Wikidata API Example

This notebook demonstrates the usage of the Mediawiki API to get and save information on Wikidata.

# install the dependencies
!pip install requests python-dotenv
Requirement already satisfied: requests in /srv/paws/lib/python3.6/site-packages
Requirement already satisfied: python-dotenv in /srv/paws/lib/python3.6/site-packages
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /srv/paws/lib/python3.6/site-packages (from requests)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /srv/paws/lib/python3.6/site-packages (from requests)
Requirement already satisfied: idna<2.9,>=2.5 in /srv/paws/lib/python3.6/site-packages (from requests)
Requirement already satisfied: certifi>=2017.4.17 in /srv/paws/lib/python3.6/site-packages (from requests)
import requests
import pandas as pd
from dotenv import load_dotenv
import os
import datetime

# load environment variables from .env files (to store sensitive information like passwords)
load_dotenv(dotenv_path='env.txt')

session = requests.Session()

Request an item from the API

WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php'

# sandbox item
item = 'Q15397819'
res = session.get(WIKIDATA_API_URL, params={
    'action': 'wbgetentities',
    'ids': item,
    'format': 'json',
})
result = res.json()
result
{'entities': {'Q15397819': {'pageid': 17254039,
   'ns': 0,
   'title': 'Q15397819',
   'lastrevid': 1043569022,
   'modified': '2019-11-02T20:13:14Z',
   'type': 'item',
   'id': 'Q15397819',
   'labels': {'ko': {'language': 'ko', 'value': '세 번째 연습장'},
    'en': {'language': 'en', 'value': 'Wikidata Sandbox 3'},
    'en-ca': {'language': 'en-ca', 'value': 'Third sandbox'},
    'el': {'language': 'el', 'value': 'Τρίτο Πρόχειρο'},
    'ru': {'language': 'ru', 'value': 'Песочница: Эпизод III'},
    'zh-hans': {'language': 'zh-hans', 'value': '维基数据沙盒3'},
    'mk': {'language': 'mk', 'value': 'Трет песочник'},
    'eo': {'language': 'eo', 'value': 'tria provejo'},
    'nl': {'language': 'nl', 'value': 'derde zandbak'},
    'cs': {'language': 'cs', 'value': 'třetí pískoviště'},
    'it': {'language': 'it', 'value': 'terza pagina delle prove'},
    'sv': {'language': 'sv', 'value': 'Tredje sandlådan'},
    'ja': {'language': 'ja', 'value': 'ウィキデータ・サンドボックス3'},
    'fr': {'language': 'fr', 'value': 'troisième bac à sable de Wikidata'},
    'nan': {'language': 'nan', 'value': 'Test2'},
    'da': {'language': 'da', 'value': 'Wikidatas tredje sandkasse'},
    'zh': {'language': 'zh', 'value': '维基数据沙盒3'},
    'tr': {'language': 'tr', 'value': 'Kullanıcı mesaj:HakanIST/luatest'},
    'fi': {'language': 'fi', 'value': 'Third sandbox'},
    'es': {'language': 'es', 'value': 'Plantilla:Ficha de faro/test1'},
    'de': {'language': 'de', 'value': 'Sandkasten 3'},
    'gl': {'language': 'gl', 'value': 'Modelo:Faro/páxina de probas'},
    'ar': {'language': 'ar', 'value': 'ملعب ويكي بيانات 3'}},
   'descriptions': {'gl': {'language': 'gl',
     'value': 'Páxina de probas de Wikipedia'},
    'fr': {'language': 'fr',
     'value': 'bac à sable pour amuser les contributeurs'},
    'en': {'language': 'en',
     'value': 'My super wikidata bot, current time: 2019-11-02T19:53:40.301974'}},
   'aliases': {'nan': [{'language': 'nan', 'value': 'Test'}],
    'fr': [{'language': 'fr', 'value': 'bac à sable 3'}]},
   'claims': {'P31': [{'mainsnak': {'snaktype': 'value',
       'property': 'P31',
       'hash': 'e5ef3cd1a671227be40e6978474d86d17df8cbb4',
       'datavalue': {'value': {'entity-type': 'item',
         'numeric-id': 14204246,
         'id': 'Q14204246'},
        'type': 'wikibase-entityid'},
       'datatype': 'wikibase-item'},
      'type': 'statement',
      'id': 'Q15397819$db64e926-46bb-5001-a673-23dfbb1a0132',
      'rank': 'normal'},
     {'mainsnak': {'snaktype': 'value',
       'property': 'P31',
       'hash': 'ad7d38a03cdd40cdc373de0dc4e7b7fcbccb31d9',
       'datavalue': {'value': {'entity-type': 'item',
         'numeric-id': 5,
         'id': 'Q5'},
        'type': 'wikibase-entityid'},
       'datatype': 'wikibase-item'},
      'type': 'statement',
      'id': 'Q15397819$A79B0AF3-5ED6-41D2-ADBF-0C2239146B7F',
      'rank': 'normal'}]},
   'sitelinks': {'eswiki': {'site': 'eswiki',
     'title': 'Plantilla:Ficha de faro/zona de pruebas',
     'badges': []},
    'frwiki': {'site': 'frwiki',
     'title': 'Modèle:Infobox Phare/Test',
     'badges': []},
    'trwiki': {'site': 'trwiki',
     'title': 'Kullanıcı mesaj:HakanIST/luatest',
     'badges': []},
    'wikidatawiki': {'site': 'wikidatawiki',
     'title': 'Wikidata:Sandbox/3',
     'badges': []}}}},
 'success': 1}

Login to Wikidata

# login to wikidata
# Note: the login is saved in the requests session (i.e. in the cookies)
#       so make sure to use the same session for all subsequent calls
res = session.get(WIKIDATA_API_URL, params={
    'action': 'query',
    'meta': 'tokens',
    'type': 'login',
    'format': 'json',
})
tokens = res.json()['query']['tokens']

res = session.post(WIKIDATA_API_URL, data={
    'action': 'login',
    'lgname': os.getenv('BOT_NAME'),
    'lgpassword': os.getenv('BOT_PASSWORD'),
    'lgtoken': tokens['logintoken'],
    'format': 'json'
})
login = res.json()
login
{'login': {'result': 'Success', 'lguserid': 733408, 'lgusername': 'Metaodi'}}

Update the description of an item (+CSRF)

# generate csrf token
res = session.get(WIKIDATA_API_URL, params={
    'action': 'query',
    'meta': 'tokens',
    'type': 'csrf',
    'format': 'json',
})
csrf = res.json()['query']['tokens']['csrftoken']
csrf

now = datetime.datetime.now()
# update description of sandbox item
res = session.post(WIKIDATA_API_URL, data={
    'action': 'wbsetdescription',
    'id': item,
    'token': csrf,
    'language': 'en',
    'value': 'My super wikidata bot, current time: %s' % now.isoformat(),
    'format': 'json',
})
res.json()
{'entity': {'descriptions': {'en': {'language': 'en',
    'value': 'My super wikidata bot, current time: 2019-11-02T20:14:42.285335'}},
  'id': 'Q15397819',
  'type': 'item',
  'lastrevid': 1043569760},
 'success': 1}
# request the item again
res = session.get(WIKIDATA_API_URL, params={
    'action': 'wbgetentities',
    'ids': item,
    'format': 'json',
})
result = res.json()
result['entities']['Q15397819']['descriptions']['en']
{'language': 'en',
 'value': 'My super wikidata bot, current time: 2019-11-02T20:14:42.285335'}

Let's add a triple

To add a triple, we use the wbcreateclaim action. Let's see what parameters are needed.

# generate csrf token
res = session.get(WIKIDATA_API_URL, params={
    'action': 'query',
    'meta': 'tokens',
    'type': 'csrf',
    'format': 'json',
})
csrf = res.json()['query']['tokens']['csrftoken']
csrf

# add triple to item
res = session.post(WIKIDATA_API_URL, data={
    'action': 'wbcreateclaim',
    'entity': item,
    'token': csrf,
    'snaktype': 'value',
    'property': 'P106',
    'value': {'entity-type': 'item', 'numeric-id': 212238},
    'format': 'json',
})
res.json()
{'error': {'code': 'invalid-snak',
  'info': 'Invalid snak data.',
  'messages': [{'name': 'wikibase-api-invalid-snak',
    'parameters': [],
    'html': {'*': 'Invalid snak data.'}}],
  '*': 'See https://www.wikidata.org/w/api.php for API usage. Subscribe to the mediawiki-api-announce mailing list at &lt;https://lists.wikimedia.org/mailman/listinfo/mediawiki-api-announce&gt; for notice of API deprecations and breaking changes.'},
 'servedby': 'mw1315'}