In [4]:
import os
import pywikibot
import requests
import sys
import time
from pywikibot import pagegenerators
from pywikibot.data.api import APIError

os.environ['TZ'] = 'Europe/Berlin'
time.tzset()

wikidata_site = pywikibot.Site('wikidata', 'wikidata')
repo = wikidata_site.data_repository()

editSummary = 'Update {} language(s) with labels "{}"'
langcodes = [ 'af', 'an', 'ast', 'bar', 'bm', 'br', 'ca', 'co', 'cs', 'cy', 'da', 'de', 'de-at', 'de-ch', 'en', 'en-ca', 'en-gb', 'eo', 'es', 'et', 'eu', 'fi', 'fr', 'frc', 'frp', 'fur', 'ga', 'gd', 'gl', 'gsw', 'hr', 'hu', 'ia', 'id', 'ie', 'io', 'it', 'jam', 'kab', 'kg', 'lb', 'li', 'lij', 'lt', 'mg', 'mi', 'nap', 'nb', 'nds', 'nds-nl', 'nl', 'nn', 'nrm', 'min', 'ms', 'oc', 'pap', 'pcd', 'pl', 'pms', 'prg', 'pt', 'pt-br', 'rgn', 'rm', 'ro', 'sc', 'scn', 'sco', 'sk', 'sl', 'sq', 'sr-el', 'sv', 'sw', 'tr', 'vec', 'vi', 'vls', 'vmf', 'vo', 'wa', 'wo', 'zu' ]

dataset_query2 = """SELECT ?item (COUNT(*) AS ?cnt) WITH {
  SELECT DISTINCT ?item WHERE {
    ?item p:P106/ps:P106/wdt:P279* wd:Q26270618 .
  }
} AS %s1 WHERE {
  INCLUDE %s1 .
  VALUES ?country { wd:Q30 wd:Q29 wd:Q183 wd:Q664 wd:Q16 wd:Q145 wd:Q39 wd:Q258 wd:Q20 wd:Q38 wd:Q29999 wd:Q408 wd:Q142 wd:Q35 wd:Q36 wd:Q174193 wd:Q218 wd:Q954 wd:Q33 wd:Q27 wd:Q155 wd:Q215 wd:Q213 wd:Q40 wd:Q28 wd:Q45 wd:Q34 wd:Q33946 wd:Q224 wd:Q37 wd:Q414 wd:Q8646 wd:Q191 wd:Q16957 wd:Q241 wd:Q43 wd:Q265 wd:Q298 wd:Q4628 wd:Q96 wd:Q713750 wd:Q77 wd:Q419 wd:Q25 wd:Q214 wd:Q227 wd:Q217 wd:Q252 wd:Q334 wd:Q733 wd:Q172579 wd:Q114 wd:Q235 wd:Q43287 wd:Q717 wd:Q734 wd:Q686 wd:Q736 wd:Q774 wd:Q22 wd:Q833 wd:Q1009 wd:Q928 wd:Q792 wd:Q916 wd:Q945 wd:Q1032 wd:Q739 wd:Q783 wd:Q754 wd:Q778 wd:Q1033 wd:Q23635 wd:Q117 wd:Q39193 }
  ?item p:P27/ps:P27 ?country .
  OPTIONAL {
    ?item rdfs:label ?label .
    FILTER(LANG(?label) = 'en') .
  }
  FILTER(BOUND(?label)) .
  OPTIONAL {
    ?item rdfs:label ?label2 .
    FILTER(LANG(?label2) = 'de') .
  }
  FILTER(!BOUND(?label2)) .
} GROUP BY ?item HAVING(?cnt=1)"""

dataset_query = """SELECT ?item (COUNT(*) AS ?cnt) WITH {
  SELECT ?item (COUNT(*) AS ?cnt) WHERE {
    ?item p:P106/ps:P106/wdt:P279* wd:Q26270618 .
    VALUES ?country { wd:Q30 wd:Q29 wd:Q183 wd:Q664 wd:Q16 wd:Q145 wd:Q39 wd:Q258 wd:Q20 wd:Q38 wd:Q29999 wd:Q408 wd:Q142 wd:Q35 wd:Q36 wd:Q174193 wd:Q218 wd:Q954 wd:Q33 wd:Q27 wd:Q155 wd:Q215 wd:Q213 wd:Q40 wd:Q28 wd:Q45 wd:Q34 wd:Q33946 wd:Q224 wd:Q37 wd:Q414 wd:Q8646 wd:Q191 wd:Q16957 wd:Q241 wd:Q43 wd:Q265 wd:Q298 wd:Q4628 wd:Q96 wd:Q713750 wd:Q77 wd:Q419 wd:Q25 wd:Q214 wd:Q227 wd:Q217 wd:Q252 wd:Q334 wd:Q733 wd:Q172579 wd:Q114 wd:Q235 wd:Q43287 wd:Q717 wd:Q734 wd:Q686 wd:Q736 wd:Q774 wd:Q22 wd:Q833 wd:Q1009 wd:Q928 wd:Q792 wd:Q916 wd:Q945 wd:Q1032 wd:Q739 wd:Q783 wd:Q754 wd:Q778 wd:Q1033 wd:Q23635 wd:Q117 wd:Q39193 }
    ?item p:P27/ps:P27 ?country .
    ?item rdfs:label ?label .
    FILTER(LANG(?label) = 'en') .
  } GROUP BY ?item HAVING(?cnt=1)
} AS %s WHERE {
  INCLUDE %s .
  ?item rdfs:label ?label2 .
} GROUP BY ?item HAVING(?cnt < 20)
"""

for Qitem in pagegenerators.WikidataSPARQLPageGenerator(dataset_query, site=wikidata_site):
    Qitem.get()
    
    if not Qitem.labels:
        print('Item {} does not have any labels'.format(Qitem.title()))
        continue
    
    if 'en' not in Qitem.labels:
        print('Item {} does not have an English label'.format(Qitem.title()))
        continue

    if Qitem.labels['en'] == 'unknown cox':
        continue
        
    addLabels = {}
    cntLabels = 0
    for langcode in langcodes:
        if langcode not in Qitem.labels:
            addLabels.update({langcode:Qitem.labels['en']})
            cntLabels += 1
    
    if cntLabels > 0:
        try:
            Qitem.editLabels(addLabels, summary=editSummary.format(cntLabels, Qitem.labels['en']))
            print('{}\t{} labels\t"{}"'.format(Qitem.title(), cntLabels, Qitem.labels['en']))
        except APIError as e:
            print('{}\tcannot save label {} due to APIerror: {}'.format(Qitem.title(), Qitem.labels['en'], e))
    else:
        print('{}\tnothing to update'.format(Qitem.title()))
        time.sleep(1)
        
print('All done, job finished')
All done, job finished
In [ ]: