Use pywikibot to add population data to Wikidata

This is an example method of how to add India population data to wikidata using pywikibot's API. This is one method of doing it, and there are definitely many more methods.

import collections
import pywikibot
from pywikibot.data import sparql
from pprint import pprint

wikidata = pywikibot.Site("wikidata", "wikidata")
wikidata_repo = wikidata.data_repository()
sparql_query = pywikibot.data.sparql.SparqlQuery()
# Read and parse the lines from the CSV file

lines = open('census.csv', 'r').readlines()

# Remove header and save data we need in a dictionary
rows = []
for line in lines:
    row = line.split(',')
    row_data = {"state": row[6], "district": row[7], "level": row[8], "name": row[9], "_type": row[10], "population": row[12]}
    rows.append(row_data)
header = rows[0]
rows = rows[1:]
# Create "data" which hold an item for each value we want to add to Wikidata

data = []

def is_same_place(val1, val2):
    # Checks if the place has the same state, district, and name
    return (val1['state'] == val2['state'] and val1['district'] == val2['district'] and
                val1["name"] == val2['name'] and val1['level'] == val2['level'])

# Prefer "Total" if it's found. Else, use "Rural" or "Urban"
# Assumptions: We assume that there is no entry with a "Rural" AND "Urban" but no "Total".
for row in rows:
    if row['_type'] == "Total":        
        # Remove all previously added rows, as "Total" is better.
        remove_items = []
        for data_item in data:
            if is_same_place(data_item, row):
                remove_items.append(data_item)
        for remove_item in remove_items:
            data.remove(remove_item)
        
        # Now add the "Total" row back
        data.append(row)
    else:
        append_it = True
        for data_item in data:
            if data_item['_type'] == "Total" and is_same_place(data_item, row):
                # A matching place was found which is higher than this value
                append_it = False
        if append_it:
            data.append(row)
print("data has", len(data), "items, out of the", len(rows), "rows.", len(rows) - len(data), "items removed.")
data has 131 items, out of the 137 rows. 6 items removed.
def search_items(site, item_title):
    """
    Search the website using the search API to fetch Items which match the given title
    
    :param site:       The pywikibot.Site to search in.
    :param item_title: The title to search for
    """
    request = pywikibot.data.api.Request(
        site=site, parameters={"action": "wbsearchentities",
                               "format": "json",
                               "type": "item",
                               "language": "en",
                               "search": item_title})
    return request.submit()
q_state = 'Q22424'  # Punjab
q_country = 'Q668'  # India

matches = []
country_matches = []
state_matches = []
for item in data:
    # Strip the unnecessary brackets in the name
    found_bracket = item['name'].find('(')
    if found_bracket != -1:
        sanitized_name = item['name'][:found_bracket].strip()
    else:
        sanitized_name = item['name'].strip()
    
    # Search for it and match whether there is a property with the state in it.
    search = search_items(wikidata, sanitized_name)
    
    country_search = []
    state_search = []
    for search_item in search['search']:
        search_itempage = pywikibot.page.ItemPage(wikidata, search_item['id'])
        search_itempage.get()
        has_country = False
        if 'P17' not in search_itempage.claims:
            # Skip because "country" property is not defined.
            continue
        for icountry in search_itempage.claims['P17']:
            if icountry.getTarget().id == q_country:
                has_country = True
        if not has_country:
            # Skip because the country value is not correct.
            continue
        
        country_search.append(search_item)
        
        query = 'SELECT ?prop WHERE { wd:%s ?prop wd:%s . }' % (search_item['id'], q_state)
        if len(sparql_query.select(query)) != 0:
            state_search.append(search_item)
    
    if len(country_search) > 1 and len(state_search) > 0:
        # If too many country search, try state search.
        filtered_search = state_search
    else:
        filtered_search = country_search
    print(sanitized_name, len(filtered_search), len(country_search), len(state_search))
    
    country_matches.append(country_search)
    state_matches.append(state_search)
    matches.append(filtered_search)
Barnala 1 4 1
Kot Duna 0 0 0
Pandher 1 2 1
Rajia 1 3 1
Aspal kalan 0 0 0
Aspal Khurd 0 0 0
Kaleke 0 0 0
Dhanaula 1 1 0
Kuba 0 0 0
Attargarh 0 0 0
Bhura 2 2 0
Harigarh 0 0 0
Badbar 0 0 0
Bhaini Mehraj 0 0 0
Bhathlan 0 0 0
Kattu 3 3 0
Dangarh 0 0 0
Uppali 0 0 0
Rajgarh 5 5 0
Pharwahi 1 1 1
Rasulpur 5 5 0
Dhanaula Khurd 0 0 0
Bhaini Jassa 0 0 0
Fatehgarh Chhanna 0 0 0
Khudi Kalan 0 0 0
Patti Sohlan 0 0 0
Patti Sekhwan 0 0 0
Barnala 1 4 1
Sanghera 0 0 0
Harbanspura 0 0 0
Shekha 1 1 0
Jhaloor 0 0 0
Thulewal 0 0 0
Nangal 4 4 0
Karamgarh 0 0 0
Thikriwala 1 3 1
Naiwala 0 0 0
Hamidi 0 0 0
Gurm 0 0 0
Gumti 1 3 1
Thuliwal 0 0 0
Manal 1 1 0
Mangewal 0 0 0
Rura Khurd 0 0 0
Pakho 1 1 1
Rura Kalan 0 0 0
Dhurkot 1 1 0
Badra 1 1 0
Bhaini Fatta 0 0 0
Kahneke 0 0 0
Dhaula 2 2 0
Khudi Khurd 0 0 0
Sehna 1 1 0
Jodhpur 3 3 0
Kaire 0 0 0
Cheema 4 4 0
Pakhoke 0 0 0
Malian 0 0 0
Bakhatgarh 0 0 0
Tajoke 0 0 0
Mehta 0 0 0
Ghunas 0 0 0
Dhilwan 1 2 1
Dhilwan 1 2 1
Patti Khatar 0 0 0
Tappa 1 1 0
Draka 0 0 0
Daraj 0 0 0
Jaimal Singhwala 0 0 0
Mauran 1 1 0
Baloke 0 0 0
Sandhu Kalan 0 0 0
Nainewala 0 0 0
Chhana Gulabsingh 0 0 0
Jangiana 0 0 0
Alkran 0 0 0
Kharak Singhwala 0 0 0
Talwandi 1 6 1
Majuke 0 0 0
Deepgarh 0 0 0
Badhata 0 0 0
Sehna 1 1 0
Burj Fatehgarh 0 0 0
Patti Draka 0 0 0
Mauran 1 1 0
Ugoke 0 0 0
Bhotna 0 0 0
Chung 0 0 0
Talewal 0 0 0
Ramgarh 4 4 0
Patti Bir Singh 0 0 0
Patti Deep Singh 0 0 0
Patti Mohar Singh 0 0 0
Mehal Kalan 1 1 1
Amla Singhwala 0 0 0
Bhadalwad 0 0 0
Moom 0 0 0
Chiniwal kalan 0 0 0
Chakrohi 0 0 0
Chananwal 1 1 0
Raisar 0 0 0
Raisar 0 0 0
Kalala 0 0 0
Chuhanke Khurd 0 0 0
Sehjra 0 0 0
Chuhanke Kalan 0 0 0
Bazidke Kalan 0 0 0
Bazidke Khurd 0 0 0
Kurar 1 1 0
Khiali 1 2 1
Sahore 0 0 0
Mahal kalan 0 0 0
Dhaner 2 2 0
Kalal Majra 0 0 0
Kirpal Singhwala 0 0 0
Gangohar 0 0 0
Mahal Khurd 0 0 0
Pandori 3 3 0
Nihaluwala 0 0 0
Bahmania 1 1 0
Kutba 0 0 0
Hardaspura 0 0 0
Chhapa 3 3 0
Lohgarh 1 4 1
Bhila 5 5 0
Gehal 0 0 0
Naraingarh Sohian 0 0 0
Diwana 0 0 0
Chhiniwal Khurd 0 0 0
Sadowal 0 0 0
Gagewal 0 0 0
print("Matches", sum(map(lambda x: len(x) > 0, matches)))
print("Average matches", sum(map(len, matches)) * 1.0 / len(matches))
print('Average country matches', sum(map(len, country_matches)) * 1.0 / len(country_matches))
print('Average state matches', sum(map(len, state_matches)) * 1.0 / len(state_matches))
print('Ideal Average', sum(map(lambda x: len(x) > 0, matches)) * 1.0 / len(matches))
Matches 40
Average matches 0.549618320610687
Average country matches 0.732824427480916
Average state matches 0.10687022900763359
ideal Average 0.3053435114503817
pprint(state_matches)
[[{'concepturi': 'http://www.wikidata.org/entity/Q25991089',
   'id': 'Q25991089',
   'label': 'Barnala Assembly Constituency',
   'match': {'language': 'en',
             'text': 'Barnala Assembly Constituency',
             'type': 'label'},
   'pageid': 27943823,
   'title': 'Q25991089',
   'url': '//www.wikidata.org/wiki/Q25991089'}],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q7130621',
   'description': 'village in India',
   'id': 'Q7130621',
   'label': 'Pandher Kheri',
   'match': {'language': 'en', 'text': 'Pandher Kheri', 'type': 'label'},
   'pageid': 7027093,
   'title': 'Q7130621',
   'url': '//www.wikidata.org/wiki/Q7130621'}],
 [{'concepturi': 'http://www.wikidata.org/entity/Q7286142',
   'description': 'village in India',
   'id': 'Q7286142',
   'label': 'Rajiana',
   'match': {'language': 'en', 'text': 'Rajiana', 'type': 'label'},
   'pageid': 7190704,
   'title': 'Q7286142',
   'url': '//www.wikidata.org/wiki/Q7286142'}],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q17082861',
   'description': 'village in India',
   'id': 'Q17082861',
   'label': 'Pharwahi',
   'match': {'language': 'en', 'text': 'Pharwahi', 'type': 'label'},
   'pageid': 18682614,
   'title': 'Q17082861',
   'url': '//www.wikidata.org/wiki/Q17082861'}],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q25991089',
   'id': 'Q25991089',
   'label': 'Barnala Assembly Constituency',
   'match': {'language': 'en',
             'text': 'Barnala Assembly Constituency',
             'type': 'label'},
   'pageid': 27943823,
   'title': 'Q25991089',
   'url': '//www.wikidata.org/wiki/Q25991089'}],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q16901591',
   'description': 'village in India',
   'id': 'Q16901591',
   'label': 'Thikriwala (Barnala)',
   'match': {'language': 'en',
             'text': 'Thikriwala (Barnala)',
             'type': 'label'},
   'pageid': 18501664,
   'title': 'Q16901591',
   'url': '//www.wikidata.org/wiki/Q16901591'}],
 [],
 [],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q5618253',
   'description': 'village in India',
   'id': 'Q5618253',
   'label': 'Gumti Kalan',
   'match': {'language': 'en', 'text': 'Gumti Kalan', 'type': 'label'},
   'pageid': 5379208,
   'title': 'Q5618253',
   'url': '//www.wikidata.org/wiki/Q5618253'}],
 [],
 [],
 [],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q2890335',
   'id': 'Q2890335',
   'label': 'Pakho kalan',
   'match': {'language': 'en', 'text': 'Pakho kalan', 'type': 'label'},
   'pageid': 2766876,
   'title': 'Q2890335',
   'url': '//www.wikidata.org/wiki/Q2890335'}],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q5269555',
   'description': 'village in India',
   'id': 'Q5269555',
   'label': 'Dhilwan Kalan',
   'match': {'language': 'en', 'text': 'Dhilwan Kalan', 'type': 'label'},
   'pageid': 5039040,
   'title': 'Q5269555',
   'url': '//www.wikidata.org/wiki/Q5269555'}],
 [{'concepturi': 'http://www.wikidata.org/entity/Q5269555',
   'description': 'village in India',
   'id': 'Q5269555',
   'label': 'Dhilwan Kalan',
   'match': {'language': 'en', 'text': 'Dhilwan Kalan', 'type': 'label'},
   'pageid': 5039040,
   'title': 'Q5269555',
   'url': '//www.wikidata.org/wiki/Q5269555'}],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q17144103',
   'description': 'village in India',
   'id': 'Q17144103',
   'label': 'Talwandi Aklia',
   'match': {'language': 'en', 'text': 'Talwandi Aklia', 'type': 'label'},
   'pageid': 18737704,
   'title': 'Q17144103',
   'url': '//www.wikidata.org/wiki/Q17144103'}],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q25991090',
   'id': 'Q25991090',
   'label': 'Mehal Kalan Assembly Constituency',
   'match': {'language': 'en',
             'text': 'Mehal Kalan Assembly Constituency',
             'type': 'label'},
   'pageid': 27943824,
   'title': 'Q25991090',
   'url': '//www.wikidata.org/wiki/Q25991090'}],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q6401543',
   'description': 'village in India',
   'id': 'Q6401543',
   'label': 'Khiali Chehlan Wali',
   'match': {'language': 'en',
             'text': 'Khiali Chehlan Wali',
             'type': 'label'},
   'pageid': 6215483,
   'title': 'Q6401543',
   'url': '//www.wikidata.org/wiki/Q6401543'}],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [],
 [{'concepturi': 'http://www.wikidata.org/entity/Q3258089',
   'description': 'village in India',
   'id': 'Q3258089',
   'label': 'Lohgarh',
   'match': {'language': 'en', 'text': 'Lohgarh', 'type': 'label'},
   'pageid': 3109145,
   'title': 'Q3258089',
   'url': '//www.wikidata.org/wiki/Q3258089'}],
 [],
 [],
 [],
 [],
 [],
 [],
 []]
p_population = 'P1082'
p_point_in_time = 'P585'
q_census_of_india = 'Q16056280'

for irow, row_state_matches in enumerate(state_matches[2:15]):
    if len(row_state_matches) != 1:
        continue
    for match in row_state_matches:
        itempage = pywikibot.page.ItemPage(wikidata, match['id'])
        itempage.get()
        print("English label:", itempage.labels['en'])
        
        # Check if the population property already exists
        population_exists = p_population in itempage.claims
        population_census = pywikibot.WbQuantity(amount=rows[irow]['population'])
        
        if population_exists:
            populations = []
            has_census_population = False
            for claim in itempage.claims[p_population]:
                if p_point_in_time in claim.qualifiers and claim.qualifiers[p_point_in_time].getTarget().year == 2011:
                    for source_prop, source_val in claim.getSources():
                        check_census = lambda s: (s.getTarget() == 'Census of India' or
                                                  getattr(s.getTarget(), 'id', None) == q_census_of_india)
                        if isinstance(source_val, collections.Iterable) and len(filter(check_census, source_val)):
                            has_census_population = True
                        elif check_census(source_val):
                            has_census_population = True
                
                if has_census_population == 2:
                    has_census_population = True
                    break
            
            if has_census_population is True:
                
        
English label: Pandher Kheri
English label: Rajiana
itempage = pywikibot.page.ItemPage(wikidata, 'Q668')
itempage.get()
print("English label:", itempage.labels['en'])
# Check if the population property already exists
population_exists = p_population in itempage.claims
if population_exists:
    populations = []
    for claim in itempage.claims[p_population]:
        populations.append({"population": claim.getTarget(), "sources": claim.getSources()})
        
English label: India
{
    "amount": "+1263200000",
    "lowerBound": "+1263200000",
    "unit": "1",
    "upperBound": "+1263200000"
}
#source = itempage.claims[p_population][0].getSources()[0]
#print(source)
#source_key, source_value = list(source.items())[0]
#source_value[0].getTarget()
qual = itempage.claims[p_population][0].qualifiers
qual_key, qual_val = list(qual.items())[0]
qual_val[0].getTarget().year
2014
getattr(qual_val[0].getTarget(), 'yedar', None)