Sample code for adding Structured Data on Commons via API

Structured Data on Commons supports some basic properties – captions and depicts (P180) – but you can add any Wikibase statement via API, P180 or otherwise. This sample code based on Maarten Dammers Github repo.

https://github.com/multichill/toollabs/blob/master/bot/commons/digital_representation_paintings.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Bot code to add "digital representation of" (P6243) to a Wikimedia Commons image.
This is just a proof of concept to show that this is possible.
"""

import pywikibot
import re
import pywikibot.data.sparql
import datetime
from pywikibot.comms import http
import json
from pywikibot import pagegenerators

class DigitalRepresentationBot:
    """
    Bot to add digital representation of statements on Commons
    """
    def __init__(self):
        """
        Grab generator based on SPARQL to work on.
        """
        self.site = pywikibot.Site(u'commons', u'commons')
        self.repo = self.site.data_repository()

        # Lookup an artwork via Met Object ID (P3634), returning the associated Commons image
        pywikibot.output(u'Pre SPARQL')
        query = u"""SELECT DISTINCT ?item ?image WHERE {
  ?item wdt:P3634 '16908' .
  ?item wdt:P18 ?image .
} LIMIT 1"""
        pywikibot.output(query)

        self.generator = pagegenerators.PreloadingEntityGenerator(pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo))

    def run(self):
        """
        Run on the items
        """
        for item in self.generator:
            self.handlePaintingItem(item)

    def handlePaintingItem(self, item):
        """
        :param item:
        :return:
        """
        data = item.get()
        claims = data.get('claims')

        if u'P18' not in claims:
            pywikibot.output(u'handlePaintingItem: no P18, returning')
            return

        filepage = claims.get('P18')[0].getTarget()

        if not filepage.exists():
            pywikibot.output(u'handlePaintingItem: no filepage, returning')
            return

        mediaid = u'M%s' % (filepage.pageid,)
        if self.mediaInfoExists(mediaid):
            pywikibot.output(u'handlePaintingItem: mediainfoexists already, returning')
            return

        # Add P180 statement
        qid = item.title()
        summary = u'this file depicts [[:d:%s]] (based on image usage)' % (qid,)
        self.addClaim(mediaid, u'P180', qid, summary)

        # Add P6243 statement
        summary = u'this file is a digital representation of [[:d:%s]] (based on image usage)' % (qid,)
        self.addClaim(mediaid, u'P6243', qid, summary)
        
        # After adding these statements you can search Commons using:
        # "haswbstatement:P180=Q1752990"
        # "haswbstatement:P6243=Q1752990"
        # Ex: https://commons.wikimedia.org/w/index.php?search=haswbstatement%3AP6243%3DQ1752990&title=Special%3ASearch&go=Go&ns0=1&ns6=1&ns12=1&ns14=1&ns100=1&ns106=1

    def addClaim(self, mediaid, pid, qid, summary=''):
        """
        :param mediaid:
        :param pid:
        :param qid:
        :param summary:
        :return:
        """
        pywikibot.output(u'Adding %s->%s to %s. %s' % (pid, qid, mediaid, summary))

        tokenrequest = http.fetch(u'https://commons.wikimedia.org/w/api.php?action=query&meta=tokens&type=csrf&format=json')

        tokendata = json.loads(tokenrequest.text)
        token = tokendata.get(u'query').get(u'tokens').get(u'csrftoken')

        postvalue = {"entity-type":"item","numeric-id": qid.replace(u'Q', u'')}

        postdata = {u'action' : u'wbcreateclaim',
                    u'format' : u'json',
                    u'entity' : mediaid,
                    u'property' : pid,
                    u'snaktype' : u'value',
                    u'value' : json.dumps(postvalue),
                    u'token' : token,
                    u'summary' : summary
                    }
        apipage = http.fetch(u'https://commons.wikimedia.org/w/api.php', method='POST', data=postdata)


    def mediaInfoExists(self, mediaid):
        """
        Check if the media info exists or not
        :param mediaid: The entity ID (like M1234, pageid prefixed with M)
        :return: True if it exists, otherwise False
        """
        # https://commons.wikimedia.org/w/api.php?action=wbgetentities&format=json&ids=M52611909
        # https://commons.wikimedia.org/w/api.php?action=wbgetentities&format=json&ids=M10038
        request = self.site._simple_request(action='wbgetentities',ids=mediaid)
        data = request.submit()
        if data.get(u'entities').get(mediaid).get(u'pageid'):
            return True
        return False

def main():
    digitalRepresentationBot = DigitalRepresentationBot()
    digitalRepresentationBot.run()

if __name__ == "__main__":
    main()
Pre SPARQL
SELECT DISTINCT ?item ?image WHERE {
  ?item wdt:P3634 '16908' .
  ?item wdt:P18 ?image .
} LIMIT 1
Adding P180->Q20178219 to M58742409. this file depicts [[:d:Q20178219]] (based on image usage)
Adding P6243->Q20178219 to M58742409. this file is a digital representation of [[:d:Q20178219]] (based on image usage)

After adding these statements you can search Commons using strings such as:

  • "haswbstatement:P180=Q1752990"
  • "haswbstatement:P6243=Q1752990"

Example: https://commons.wikimedia.org/w/index.php?search=haswbstatement%3AP6243%3DQ1752990&title=Special%3ASearch&go=Go&ns0=1&ns6=1&ns12=1&ns14=1&ns100=1&ns106=1