import os
import pywikibot
import requests
import rdflib
import sys
import time
from pywikibot import pagegenerators
from rdflib.plugins.sparql import prepareQuery
from rdflib.namespace import Namespace, NamespaceManager

os.environ['TZ'] = 'Europe/Berlin'
time.tzset()

wikidata_site = pywikibot.Site('wikidata', 'wikidata')
repo = wikidata_site.data_repository()

gndP = 'P227'
urlFormatter = 'https://d-nb.info/gnd/{}/about/lds'
GNDO = Namespace('http://d-nb.info/standards/elementset/gnd#')
OWL = Namespace('http://www.w3.org/2002/07/owl#')

summaryStringRemoveTn = 'remove undifferentiated GND "{}" (type Tn)'
summaryStringRemoveRedirectAlreadySet = 'remove redirecting GND "{}"; redirect target GND "{}" is already set'
summaryStringUpdateRedirect = 'update redirecting GND "{}" to redirect target "{}"'
summaryStringAddNamedAsQualifier = 'add [[Property:P1810]] qualifier to GND "{}" with value "{}"'

headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0' }

undiff = '<http://d-nb.info/gnd/{}> a gndo:UndifferentiatedPerson'
redir  = 'owl:sameAs <http://d-nb.info/gnd/{}>'

blacklist = [  ]

dataset_query = """SELECT DISTINCT ?item WITH {
  SELECT ?item WHERE {
    ?item p:P227 ?s; wdt:P31 wd:Q5 .
    MINUS { ?s wikibase:rank wikibase:DeprecatedRank }
  } GROUP BY ?item HAVING(COUNT(*) > 1)
} AS %s WHERE {
  INCLUDE %s .
  ?item p:P227 ?s .
  MINUS { ?s wikibase:rank wikibase:DeprecatedRank }
  ?s ps:P227 ?gnd .
  OPTIONAL {
    ?s pq:P1810 ?namedAs .
  }
  FILTER(!BOUND(?namedAs)) .
}"""

## option 1
#for Qitem in pagegenerators.WikidataSPARQLPageGenerator(dataset_query, site=wikidata_site):

## option 2    
#touch_categories = [ 'Kategorie:Wikipedia:GND in Wikipedia weicht von GND in Wikidata ab' ]
#wikipedia_site = pywikibot.Site('de', 'wikipedia')
#items = []
#for touch_category in touch_categories:
#    for page in pagegenerators.CategorizedPageGenerator(pywikibot.Category(wikipedia_site, touch_category)):
#        items.append(page.data_item())
#for Qitem in items:    

## option 3
items = [ 'Q672957', 'Q672957', 'Q2471942', 'Q1683841', 'Q2075443', 'Q1518672', 'Q156019', 'Q156019', 'Q174769', 'Q2324941', 'Q19820049', 'Q95145' ]
for i, item in enumerate(items):
    Qitem = pywikibot.ItemPage(repo, item)
    
## end options
    Qitem.get()
    if Qitem.title() in blacklist:
        continue
    
    time.sleep(5)
    print()
    print('{}/{}'.format(i+1, len(items)))
    
    if not Qitem.claims:
        print('Item {} does not have any claims'.format(Qitem.title()))
        continue
    
    if gndP not in Qitem.claims:
        print('Item {} does not have a claim {}'.format(Qitem.title(), gndP))
        continue
   
    allCurrentGnds = []
    setNamedAsQualifiers = {}
    for claim in Qitem.claims[gndP]:
        allCurrentGnds.append(claim.getTarget())

    for claim in Qitem.claims[gndP]:
        if len(allCurrentGnds) == 1:
            pass #break
            
        if claim.getRank() == 'deprecated':
            allCurrentGnds.remove(claim.getTarget())
            continue
            
        continueToggle = True
        
        gndIdentifier = claim.getTarget()
        url = urlFormatter.format(gndIdentifier)
        
        gndRequest = requests.get(url, headers=headers)
        #print('requested {} with status {}'.format(url, request.status_code))
        
        if gndRequest.status_code not in [ 200, 301 ]:
            print('{}: {} with status code {}'.format(Qitem.title(), gndIdentifier, gndRequest.status_code))
            continue
            
        graph = rdflib.Graph()
        graph.parse(data=gndRequest.text, format='turtle')
        
        typeQuery = prepareQuery(
            """SELECT ?gnd ?type WHERE {
                ?gnd rdf:type ?type .
            }"""
        )
        for row in graph.query(typeQuery, initBindings={ 'gnd' : rdflib.term.URIRef('http://d-nb.info/gnd/{}'.format(gndIdentifier)) } ):
            gnd = row[0][21:]
            gndType = str(row[1])
            print('{}: {}={} has type {}'.format(Qitem.title(), gndIdentifier, gnd, gndType))
            if str(row[1]) == 'http://d-nb.info/standards/elementset/gnd#UndifferentiatedPerson':
                print('{}: {} to be removed (Tn)'.format(Qitem.title(), gndIdentifier))
                Qitem.removeClaims(claim, summary=summaryStringRemoveTn.format(gnd))
                continueToggle = False
                allCurrentGnds.remove(gndIdentifier)
        
        if continueToggle == False:
            continue
        
        redirQuery = prepareQuery(
            """SELECT ?targetGnd ?redirGnd WHERE {
                ?targetGnd owl:sameAs ?redirGnd .
            }""",
            initNs = { 'owl': OWL }
        )
        for row in graph.query(redirQuery, initBindings={ 'redirGnd' : rdflib.term.URIRef('http://d-nb.info/gnd/{}'.format(gndIdentifier)) } ):
            print('{}: {}={} redirects to {}'.format(Qitem.title(), gndIdentifier, row[1], row[0]))
            redirGnd = row[1][21:]
            targetGnd = row[0][21:]
            if redirGnd==gndIdentifier:
                if targetGnd in allCurrentGnds:
                    print('{}: remove redirect {} as target {} is already set'.format(Qitem.title(), redirGnd, targetGnd))
                    Qitem.removeClaims(claim, summary=summaryStringRemoveRedirectAlreadySet.format(redirGnd, targetGnd))
                    continueToggle = False
                    allCurrentGnds.remove(gndIdentifier)
                else:
                    print('{}: remove redirect {} and add target {}'.format(Qitem.title(), redirGnd, targetGnd))
                    claim.changeTarget(value=targetGnd, summary=summaryStringUpdateRedirect.format(redirGnd, targetGnd))
                    continueToggle = False
                    allCurrentGnds.remove(gndIdentifier)
                    allCurrentGnds.append(targetGnd)
                
        if continueToggle == False:
            continue        
        
        nameQuery = prepareQuery(
            """SELECT ?gnd ?name WHERE {
                ?gnd gndo:preferredNameForThePerson ?name .
            }""",
            initNs = { 'gndo': GNDO }
        )
        for row in graph.query(nameQuery, initBindings={ 'gnd' : rdflib.term.URIRef('http://d-nb.info/gnd/{}'.format(gndIdentifier)) } ):
            print('{}: {} with name "{}"'.format(Qitem.title(), row[0][21:], row[1].value))
            if gndIdentifier not in setNamedAsQualifiers:
                setNamedAsQualifiers[gndIdentifier] = []
            setNamedAsQualifiers[gndIdentifier].append(row[1].value)
        
    if False and len(setNamedAsQualifiers)>1:
        print(setNamedAsQualifiers)
        for claim in Qitem.claims[gndP]:
            for gndIdentifier in setNamedAsQualifiers:
                if claim.getTarget() != gndIdentifier:
                    continue
                if claim.has_qualifier('P1810', setNamedAsQualifiers[gndIdentifier]) == True:
                    print('{}: claim {} has already P1810 qualifier'.format(Qitem.title(), gndIdentifier))
                    continue
                if len(setNamedAsQualifiers[gndIdentifier]) == 1:
                    print('{}: {} add P1810 qualifier with value "{}"'.format(Qitem.title(), gndIdentifier, setNamedAsQualifiers[gndIdentifier][0]))
                    newQualifier = pywikibot.Claim(repo, 'P1810')
                    newQualifier.setTarget(value=setNamedAsQualifiers[gndIdentifier][0])
                    #claim.addQualifier(newQualifier, summary=summaryStringAddNamedAsQualifier.format(gndIdentifier, setNamedAsQualifiers[gndIdentifier][0]))
1/12
Q672957: 1086298004=1086298004 has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q672957: 1086184866=http://d-nb.info/gnd/1086184866 redirects to http://d-nb.info/gnd/5531832-0
Q672957: remove redirect 1086184866 as target 5531832-0 is already set
Q672957: 1086071344=http://d-nb.info/gnd/1086071344 redirects to http://d-nb.info/gnd/5531832-0
Q672957: remove redirect 1086071344 as target 5531832-0 is already set
Q672957: 5531832-0=5531832-0 has type http://d-nb.info/standards/elementset/gnd#CorporateBody

2/12
Q672957: 1086298004=1086298004 has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q672957: 5531832-0=5531832-0 has type http://d-nb.info/standards/elementset/gnd#CorporateBody

3/12
Q2471942: 106537268X=106537268X has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q2471942: 1088740480=http://d-nb.info/gnd/1088740480 redirects to http://d-nb.info/gnd/515685-3
Q2471942: remove redirect 1088740480 and add target 515685-3

4/12
Q1683841: 1065858396=1065858396 has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q1683841: 1035166534=1035166534 has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q1683841: 1092120181=http://d-nb.info/gnd/1092120181 redirects to http://d-nb.info/gnd/1035166534
Q1683841: remove redirect 1092120181 as target 1035166534 is already set

5/12
Q2075443: 2017795-1=2017795-1 has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q2075443: 1086722256=http://d-nb.info/gnd/1086722256 redirects to http://d-nb.info/gnd/2017795-1
Q2075443: remove redirect 1086722256 as target 2017795-1 is already set

6/12
Q1518672: 5010007-5=5010007-5 has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q1518672: 108821987X=http://d-nb.info/gnd/108821987X redirects to http://d-nb.info/gnd/5010007-5
Q1518672: remove redirect 108821987X as target 5010007-5 is already set
Q1518672: 1088898971=1088898971 has type http://d-nb.info/standards/elementset/gnd#CorporateBody

7/12
Q156019: 1088260055=http://d-nb.info/gnd/1088260055 redirects to http://d-nb.info/gnd/1127242172
Q156019: remove redirect 1088260055 and add target 1127242172
Q156019: 2123842-X=2123842-X has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q156019: 2019974-0=2019974-0 has type http://d-nb.info/standards/elementset/gnd#CorporateBody

8/12
Q156019: 1127242172=1127242172 has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q156019: 2123842-X=2123842-X has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q156019: 2019974-0=2019974-0 has type http://d-nb.info/standards/elementset/gnd#CorporateBody

9/12
Q174769: 5321482-1=5321482-1 has type http://d-nb.info/standards/elementset/gnd#CorporateBody
Q174769: 1085287521=http://d-nb.info/gnd/1085287521 redirects to http://d-nb.info/gnd/5321482-1
Q174769: remove redirect 1085287521 as target 5321482-1 is already set
Q174769: 1086367294=1086367294 has type http://d-nb.info/standards/elementset/gnd#CorporateBody

10/12
Q2324941: 10103286-9=10103286-9 has type http://d-nb.info/standards/elementset/gnd#CorporateBody

11/12
Q19820049: 1024221342=1024221342 has type http://d-nb.info/standards/elementset/gnd#DifferentiatedPerson
Q19820049: 1024221342 with name "Pons, Bartholomeus"

12/12
Q95145: 7701946-5=7701946-5 has type http://d-nb.info/standards/elementset/gnd#CorporateBody