pywikibot for Wikidata tutorial 5

Correcting items that use fruits as colours

Also trying to fix the tutorial itself because the tutorial is based on old software specs that don't work anymore

In [8]:
import pywikibot
from pywikibot import pagegenerators as pg

site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
property = "P462"
error_dict = {"Q13191": "Q39338",    #orange - "fruit": "color"
              "Q897": "Q208045",     #gold - "element": "color"
               "Q753": "Q2722041",   #copper - "element": "color"
              "Q25381": "Q679355",   #amber - "material": "color"
              "Q134862": "Q5069879", #champagne - "drink": "color"
              "Q1090": "Q317802",    #silver - "element": "color"
              "Q1173": "Q797446",    #burgundy - "region": "color
              "Q13411121": "Q5148721", #peach - "fruit": "color"
              }

#first, check that the queries can be generated correctly

with open('colours.rq', 'r') as query_file:
    QUERY = query_file.read()

queries_dict = {}
for key in error_dict:
    queries_dict[key] = QUERY.replace('Q13191', key)
    print(queries_dict[key])
SELECT ?item WHERE {
  ?item wdt:P462 wd:Q13191.
}
SELECT ?item WHERE {
  ?item wdt:P462 wd:Q897.
}
SELECT ?item WHERE {
  ?item wdt:P462 wd:Q753.
}
SELECT ?item WHERE {
  ?item wdt:P462 wd:Q25381.
}
SELECT ?item WHERE {
  ?item wdt:P462 wd:Q134862.
}
SELECT ?item WHERE {
  ?item wdt:P462 wd:Q1090.
}
SELECT ?item WHERE {
  ?item wdt:P462 wd:Q1173.
}
SELECT ?item WHERE {
  ?item wdt:P462 wd:Q13411121.
}
In [11]:
#then get the generators

generator_dict = {}

for key in queries_dict:
    generator_dict[key] = pg.WikidataSPARQLPageGenerator(QUERY, site=wikidata_site)
    generator_dict[key] = site.preloadpages(generator_dict[key], pageprops=True)

print(generator_dict)
{'Q13191': <generator object APISite.preloadpages at 0x7fdaadee4fc0>, 'Q897': <generator object APISite.preloadpages at 0x7fdaada131a8>, 'Q753': <generator object APISite.preloadpages at 0x7fdaada13468>, 'Q25381': <generator object APISite.preloadpages at 0x7fdaada13888>, 'Q134862': <generator object APISite.preloadpages at 0x7fdaada135c8>, 'Q1090': <generator object APISite.preloadpages at 0x7fdaada130a0>, 'Q1173': <generator object APISite.preloadpages at 0x7fdaada13a40>, 'Q13411121': <generator object APISite.preloadpages at 0x7fdaada13200>}
In [18]:
for key in error_dict:
    colour_item = pywikibot.ItemPage(repo, key)
    colour_dict = colour_item.get()
    colour_label = colour_dict['labels']['en']
    colour_desc = colour_dict['descriptions']['en']
    print('Items incorrectly coloured:', colour_label, ':', colour_desc)
    
    for item in generator_dict[key]:
        item_dict = item.get()
        item_label = item_dict['labels']['en']
        print(item, ' ', item_label)
Items incorrectly coloured: orange : fruit (of Citrus × sinensis: Q3355098)‎
Items incorrectly coloured: gold : chemical element with the atomic number of 79
Items incorrectly coloured: copper : chemical element with the atomic number of 29
Items incorrectly coloured: amber : fossilized tree resin
Items incorrectly coloured: Champagne : sparkling wine from Champagne, France
Items incorrectly coloured: silver : chemical element with atomic number 47
Items incorrectly coloured: Burgundy : former administrative region of France
Items incorrectly coloured: peach : fruit, use Q13189 for the species
In [19]:
dir(item)
Out[19]:
['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__unicode__',
 '__weakref__',
 '_cache_attrs',
 '_cmpkey',
 '_content',
 '_contentmodel',
 '_cosmetic_changes_hook',
 '_defined_by',
 '_diff_to',
 '_getInternals',
 '_get_parsed_page',
 '_isredir',
 '_latest_cached_revision',
 '_link',
 '_namespace',
 '_namespace_obj',
 '_normalizeData',
 '_normalizeLanguages',
 '_pageid',
 '_pageprops',
 '_revid',
 '_revisions',
 '_save',
 '_timestamp',
 'addClaim',
 'aliases',
 'applicable_protections',
 'aslink',
 'autoFormat',
 'backlinks',
 'botMayEdit',
 'canBeEdited',
 'categories',
 'change_category',
 'claims',
 'clear_cache',
 'concept_uri',
 'concept_url',
 'content_model',
 'contributingUsers',
 'contributors',
 'coordinates',
 'data_item',
 'data_repository',
 'defaultsort',
 'delete',
 'depth',
 'descriptions',
 'editAliases',
 'editDescriptions',
 'editEntity',
 'editLabels',
 'editTime',
 'embeddedin',
 'encoding',
 'entity_type',
 'exists',
 'expand_text',
 'extlinks',
 'fromPage',
 'from_entity_uri',
 'fullVersionHistory',
 'full_url',
 'get',
 'getCategoryRedirectTarget',
 'getCreator',
 'getDeletedRevision',
 'getID',
 'getLatestEditors',
 'getMovedTarget',
 'getOldVersion',
 'getRedirectTarget',
 'getReferences',
 'getRestrictions',
 'getSitelink',
 'getTemplates',
 'getVersionHistory',
 'getVersionHistoryTable',
 'getdbName',
 'id',
 'image_repository',
 'imagelinks',
 'interwiki',
 'isAutoTitle',
 'isCategory',
 'isCategoryRedirect',
 'isDisambig',
 'isEmpty',
 'isFlowPage',
 'isImage',
 'isIpEdit',
 'isRedirectPage',
 'isStaticRedirect',
 'isTalkPage',
 'is_categorypage',
 'is_filepage',
 'is_flow_page',
 'is_valid_id',
 'iterlanglinks',
 'iterlinks',
 'itertemplates',
 'labels',
 'langlinks',
 'lastNonBotUser',
 'latestRevision',
 'latest_revision',
 'latest_revision_id',
 'linkedPages',
 'loadDeletedRevisions',
 'markDeletedRevision',
 'mergeInto',
 'merge_history',
 'move',
 'moved_target',
 'namespace',
 'oldest_revision',
 'pageAPInfo',
 'page_image',
 'pageid',
 'permalink',
 'preloadText',
 'previousRevision',
 'previous_revision_id',
 'properties',
 'protect',
 'protection',
 'purge',
 'put',
 'put_async',
 'removeClaims',
 'removeImage',
 'removeSitelink',
 'removeSitelinks',
 'replaceImage',
 'repo',
 'revision_count',
 'revisions',
 'save',
 'section',
 'sectionFreeTitle',
 'setSitelink',
 'setSitelinks',
 'set_redirect_target',
 'site',
 'sitelinks',
 'templates',
 'text',
 'title',
 'titleForFilename',
 'titleWithoutNamespace',
 'title_pattern',
 'toJSON',
 'toggleTalkPage',
 'touch',
 'undelete',
 'urlname',
 'userName',
 'version',
 'watch']