How to create a bot

Use pywikibot

pywikibot is a python library based on the Mediawiki API. In this notebook we will see how to use the API using Python with pywikibot and lay the groundwork to later develop a bot or tool for Wikidata.

Use pywikibot for Wikidata:

If you want to setup pywikibot on your computer, check this tutorial: https://www.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial/Setting_up_Shop

Quick steps:

Create a new directory for your project Clone pywikibot in this directory: git clone --recursive https://gerrit.wikimedia.org/r/pywikibot/core.git pywikibot Run python generate_user_files.py to create user-config.py Run python pwb.py login to login with your account

# install the dependencies
!pip install requests
import pywikibot
import requests
import csv
import time
# site = pywikibot.Site("test", "wikidata")
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
zurich = pywikibot.ItemPage(repo, 'Q72').get()
zurich

Let's create a bot to keep the population counts up to date

The data is available from Open Data Zurich, in the dataset Bevölkerung nach Stadtquartier, seit 1970.

result = requests.get(
    'https://data.stadt-zuerich.ch/api/3/action/package_show?id=bev_bestand_jahr_quartier_seit1970_od3240'
)
dataset = result.json()['result']
population_url = dataset['resources'][0]['url']
data = requests.get(population_url)
decoded_data = data.content
# un-comment the following line for Python 3
# decoded_data = data.content.decode('utf-8')
cr = csv.reader(decoded_data.splitlines(), delimiter=',')
rows = list(cr)
for row in rows:
    print(row)
# get rid of header
rows.pop(0)
# use the sandbox items on wikidata.org
zurich_quarters = {
    '123': "Q13406268",
    '122': "Q15397819",
}
def load_item_from_repo(repo, item_id):
    item = pywikibot.ItemPage(repo, item_id)
    item.get()
    return item
def existing_claim_from_year(item, year):
    try:
        claims = item.claims['P1082']
        time_str = pywikibot.WbTime(year=year).toTimestr()
        for claim in claims:
            for qualifier_value in claim.qualifiers['P585']:
                if (qualifier_value.getTarget().toTimestr() == time_str):
                    return claim
    except KeyError:
        pass
    return None
def create_popultation_claim(site, repo, value, year, url):
    population_prop_id = 'P1082'
    time_prop_id = 'P585'
    url_prop_id = 'P854'
    
    print("Add new value: %s (%s)" % (value, year))
    print()
    
    # population claim
    population_claim = pywikibot.Claim(repo, population_prop_id)
    population_claim.setTarget(
        pywikibot.WbQuantity(amount=int(value), site=site)
    )
    item.addClaim(population_claim)

    # time qualifier
    qualifier = pywikibot.Claim(repo, time_prop_id)
    yearObj = pywikibot.WbTime(year=year)
    qualifier.setTarget(yearObj)
    population_claim.addQualifier(qualifier)

    # source
    source = pywikibot.Claim(repo, url_prop_id)
    source.setTarget(url)
    population_claim.addSource(source)
    
    print ("Added population claim to %s for year %d") % (item_id, year)
# Loop over CSV file
for row in rows:
    year = int(row[0])
    qnr = row[1]
    quarter = row[2]
    population_value = row[3]
    
    # load item
    try:
        item_id = zurich_quarters[qnr]
    except KeyError:
        print("No mapping for qnr %s found." % qnr)
        continue

    # check if we already have an existing claim
    try:
        item = load_item_from_repo(repo, item_id)
        population_claim = existing_claim_from_year(item, year)
        if (population_claim is None):
            # add a new statement
            create_popultation_claim(site, repo, population_value, year, population_url)
        else:
            print("Population claim already exists on %s for year %d, skipping") % (item_id, year)
    except pywikibot.data.api.APIError as e:
        print("API Error: %s" % (e))
        break