import json
import pywikibot
from pywikibot import pagegenerators
from pywikibot.data.api import APIError
import re
import requests

wikidata_site = pywikibot.Site('wikidata', 'wikidata')
repo = wikidata_site.data_repository()

gndPattern = re.compile('^1[01]?\d{7}[0-9X]|[47]\d{6}-\d|[1-9]\d{0,7}-[0-9X]|3\d{7}[0-9X]$')
viafUrlPattern = 'https://viaf.org/viaf/{}/justlinks.json'
viafHeaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0' }

items = [  ]
for item in items: ## attention: does not check GND's retrieved from VIAF for their type; there may be Tn's in VIAF
    Qitem = pywikibot.ItemPage(repo, item)

#for Qitem in pagegenerators.WikidataSPARQLPageGenerator(dataset_query, site=wikidata_site):
    Qitem.get()
    
    if not Qitem.claims or 'P214' not in Qitem.claims:
        print('{}: no claims, or no P214 claim'.format(Qitem.title()))
        continue

    allExistingGnds = []
    if 'P227' in Qitem.claims:
        for claim2 in Qitem.claims['P227']:
            allExistingGnds.append(claim2.getTarget().strip())
        
    for claim in Qitem.claims['P214']:
        viaf = claim.getTarget().strip()
        r = requests.get(viafUrlPattern.format(viaf), headers=viafHeaders)
        if r.status_code != 200:
            print('{}: could not retrieve VIAF justlinks.json, status {} (VIAF {})'.format(Qitem.title(), r.status_code, viaf))
            continue

        viafLinks = json.loads(r.text)
        if 'DNB' not in viafLinks:
            print('{}: no GND link found in VIAF justlinks.json (VIAF {})'.format(Qitem.title(), viaf))
            continue
        
        for newGndUrl in viafLinks['DNB']:
            newGnd = newGndUrl[21:]
            matchNew = gndPattern.findall(newGnd)
            if matchNew == None or len(matchNew) == 0:
                print('{}: new GND identifier also invalid (VIAF {}; newGND {})'.format(Qitem.title(), viaf, newGnd))
                continue
            
            print('{}: new GND found: {} (VIAF {})'.format(Qitem.title(), newGnd, viaf))
            
            if newGnd not in allExistingGnds:
                try:
                    newGndClaim = pywikibot.Claim(repo, 'P227')
                    newGndClaim.setTarget(value=newGnd)
                    Qitem.addClaim(newGndClaim, summary='add GND identifier GND:{} based on VIAF:{}'.format(newGnd, viaf))
                    print('* Add new claim, as the old one was already removed in a previous iteration')
                except APIError as e:
                    print('* Cannot update oldGND to newGND due to APIerror: {}'.format(Qitem.title(), e))        
    
print('Job done, all finished')
Q3083041: new GND found: 142958077 (VIAF 84062164)
* Add new claim, as the old one was already removed in a previous iteration
Q3167089: no GND link found in VIAF justlinks.json (VIAF 44429297)
Q3570590: no GND link found in VIAF justlinks.json (VIAF 237642056)
Q3731205: new GND found: 10000153X (VIAF 27404343)
* Add new claim, as the old one was already removed in a previous iteration
Q4102404: new GND found: 15623565X (VIAF 43217058)
* Add new claim, as the old one was already removed in a previous iteration
Q3659368: no GND link found in VIAF justlinks.json (VIAF 304920435)
Q4060313: no GND link found in VIAF justlinks.json (VIAF 249559411)
Q4754971: no GND link found in VIAF justlinks.json (VIAF 51931478)
Q5536027: no GND link found in VIAF justlinks.json (VIAF 97603135)
Q5553266: no GND link found in VIAF justlinks.json (VIAF 239055067)
Q5518269: new GND found: 1089533934 (VIAF 268635532)
* Add new claim, as the old one was already removed in a previous iteration
Q5221079: no GND link found in VIAF justlinks.json (VIAF 77923524)
Q4793379: no GND link found in VIAF justlinks.json (VIAF 94088397)
Q4895511: no GND link found in VIAF justlinks.json (VIAF 48230659)
Q4794802: no GND link found in VIAF justlinks.json (VIAF 105765998)
Q5971957: no GND link found in VIAF justlinks.json (VIAF 108918026)
Q4567235: no GND link found in VIAF justlinks.json (VIAF 2746095)
Q4773389: new GND found: 100553753 (VIAF 44650396)
* Add new claim, as the old one was already removed in a previous iteration
Q5448513: new GND found: 142436127 (VIAF 170333900)
* Add new claim, as the old one was already removed in a previous iteration
Q5371321: no claims, or no P214 claim
Q5401313: no GND link found in VIAF justlinks.json (VIAF 39479795)
Q4419971: new GND found: 103680020 (VIAF 37333946)
* Add new claim, as the old one was already removed in a previous iteration
Q6089489: no claims, or no P214 claim
Q4765627: no GND link found in VIAF justlinks.json (VIAF 3857769)
Q5340101: no GND link found in VIAF justlinks.json (VIAF 25939377)
Q5299001: new GND found: 18337049X (VIAF 200575685)
* Add new claim, as the old one was already removed in a previous iteration
Q5395639: no GND link found in VIAF justlinks.json (VIAF 93973138)
Q6015248: new GND found: 184993539 (VIAF 12371716)
* Add new claim, as the old one was already removed in a previous iteration
Q6072463: no claims, or no P214 claim
Q6780511: no claims, or no P214 claim
Q6759293: new GND found: 1089230435 (VIAF 58445)
* Add new claim, as the old one was already removed in a previous iteration
Q5565417: new GND found: 188492399 (VIAF 200750440)
* Add new claim, as the old one was already removed in a previous iteration
Q5650578: no GND link found in VIAF justlinks.json (VIAF 15437531)
Q5950301: new GND found: 1024248127 (VIAF 71349345)
* Add new claim, as the old one was already removed in a previous iteration
Q6015119: new GND found: 103644326 (VIAF 163874)
* Add new claim, as the old one was already removed in a previous iteration
Q6171522: no GND link found in VIAF justlinks.json (VIAF 75906602)
Q5203576: no claims, or no P214 claim
Q5236148: new GND found: 103214321 (VIAF 56703668)
* Add new claim, as the old one was already removed in a previous iteration
Q5107520: new GND found: 140384561 (VIAF 167932763)
* Add new claim, as the old one was already removed in a previous iteration
Q5017960: no GND link found in VIAF justlinks.json (VIAF 41868528)
Q6209837: no GND link found in VIAF justlinks.json (VIAF 19664841)
Q5984542: new GND found: 100239587 (VIAF 29653449)
* Add new claim, as the old one was already removed in a previous iteration
Q11267859: no claims, or no P214 claim
Q11305732: new GND found: 188496912 (VIAF 46944631)
* Add new claim, as the old one was already removed in a previous iteration
Q21341145: new GND found: 158098501 (VIAF 163089131)
* Add new claim, as the old one was already removed in a previous iteration
Q6176407: new GND found: 10208789X (VIAF 89462021)
* Add new claim, as the old one was already removed in a previous iteration
Q5681931: no GND link found in VIAF justlinks.json (VIAF 27310288)
Q7945894: no claims, or no P214 claim
Q6305481: no GND link found in VIAF justlinks.json (VIAF 21421203)
Q7279446: no GND link found in VIAF justlinks.json (VIAF 5104208)
Q7067009: new GND found: 103646973 (VIAF 12417642)
* Add new claim, as the old one was already removed in a previous iteration
Q6721195: new GND found: 114135274 (VIAF 5018075)
* Add new claim, as the old one was already removed in a previous iteration
Q6448432: no claims, or no P214 claim
Q6536369: new GND found: 101883315 (VIAF 20056132)
* Add new claim, as the old one was already removed in a previous iteration
Q5881287: no GND link found in VIAF justlinks.json (VIAF 77860480)
Q7940581: no GND link found in VIAF justlinks.json (VIAF 58641471)
Q8051262: new GND found: 103667687 (VIAF 7804275)
* Add new claim, as the old one was already removed in a previous iteration
Q6761419: no GND link found in VIAF justlinks.json (VIAF 44595933)
Q11530974: no GND link found in VIAF justlinks.json (VIAF 120599679)
Q7927151: no GND link found in VIAF justlinks.json (VIAF 77707074)
Q7372349: no GND link found in VIAF justlinks.json (VIAF 51784897)
Q7372334: no GND link found in VIAF justlinks.json (VIAF 11312024)
Q12048488: new GND found: 103701796 (VIAF 262328062)
* Add new claim, as the old one was already removed in a previous iteration
Q8563904: no GND link found in VIAF justlinks.json (VIAF 77781642)
Q13560361: no GND link found in VIAF justlinks.json (VIAF 28744015)
Q13562572: no GND link found in VIAF justlinks.json (VIAF 23456693)
Q12418526: no claims, or no P214 claim
Q12372324: no GND link found in VIAF justlinks.json (VIAF 63000770)
Q12382774: no GND link found in VIAF justlinks.json (VIAF 55496998)
Q12402788: new GND found: 185229131 (VIAF 311582385)
* Add new claim, as the old one was already removed in a previous iteration
Q12390809: new GND found: 189626216 (VIAF 188255608)
* Add new claim, as the old one was already removed in a previous iteration
Q9093471: new GND found: 1087780705 (VIAF 1642145857002722920884)
* Add new claim, as the old one was already removed in a previous iteration
Q12167724: new GND found: 15599560X (VIAF 12148120798494791872)
* Add new claim, as the old one was already removed in a previous iteration
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-3-ba5f986055d7> in <module>()
     37 
     38         viafLinks = json.loads(r.text)
---> 39         if 'DNB' not in viafLinks:
     40             print('{}: no GND link found in VIAF justlinks.json (VIAF {})'.format(Qitem.title(), viaf))
     41             continue

TypeError: argument of type 'int' is not iterable