import json
import pywikibot
import re
import requests
import time
from pywikibot import pagegenerators

taskHashtag = '#msynbotTask2'

wikidata_site = pywikibot.Site('wikidata', 'wikidata')
repo = wikidata_site.data_repository()
timestampformat='%Y-%m-%d %H:%M:%S'

claimProperties = [ 'P269', 'P268', 'P214', 'P213', 'P244', 'P349', 'P227', 'P1006', 'P691', 'P1015', 'P396', 'P409', 'P950', 'P1017', 'P1005', 'P3280', 'P245', 'P906', 'P1670', 'P1207', 'P1273', 'P949', 'P1309', 'P1695', 'P2163', 'P271', 'P951', 'P1375', 'P1368', 'P496', 'P1946', 'P1890', 'P1025', 'P1315', 'P1255', 'P345', 'P535', 'P646', 'P723', 'P947', 'P2409', 'P3065' ] #'*' ] # references in these claims are looked at; '*' for all properties, or a specified list
fromRefProperty = 'P143'
requireFromRefValue = [ 'Q54919' ] # None (just moves everything) or list of Q-ID (recommended)
toRefProperty = 'P248'
setToRefValue = None # None or Q-ID; this replaces the target value of the moved reference qualifier

editSummary = 'update reference: move qualifier from [[Property:{}]] to [[Property:{}]] and add [[Property:{}]] {}'

dataset_query = """SELECT DISTINCT ?item WITH {
  SELECT ?ref WHERE { ?ref pr:P143 wd:Q54919; pr:P813 [] }
} AS %s WHERE {
  INCLUDE %s .
  ?item ?any [ prov:wasDerivedFrom ?ref ] .
} LIMIT 100"""
inputdata = [ 'Q65204' ]
#for itemkey in pagegenerators.WikidataSPARQLPageGenerator(dataset_query, site=wikidata_site):
    #inputdata.append(itemkey.title())

print('Found {} items to process'.format(len(inputdata)))

viafLinksUrl = 'https://viaf.org/viaf/{}/justlinks.json'
headers = { 'User-Agent': 'python3 (Wikidata bot by User:MisterSynergy; mailto:mister.synergy@yahoo.com)' }
importTarget = 'P214'
claimPropertyMap = {
    'P269' : { 'key' : 'SUDOC', 'urlFormat' : '{}' },
    'P268' : { 'key' : 'BNF', 'urlFormat' : 'http://catalogue.bnf.fr/ark:/12148/cb{}' },
    'P214' : { 'key' : 'viafID', 'urlFormat' : '{}' },
    'P213' : { 'key' : 'ISNI', 'urlFormat' : '{}' },
    'P244' : { 'key' : 'LC', 'urlFormat' : '{}' },
    'P349' : { 'key' : 'NDL', 'urlFormat' : '{}' },
    'P227' : { 'key' : 'DNB', 'urlFormat' : 'http://d-nb.info/gnd/{}' },
    'P1006' : { 'key' : 'NTA', 'urlFormat' : '{}' },
    'P691' : { 'key' : 'NKC', 'urlFormat' : '{}' },
    'P1015' : { 'key' : 'BIBSYS', 'urlFormat' : '{}' },
    'P409' : { 'key' : 'NLA', 'urlFormat' : '{}' },
    'P950' : { 'key' : 'BNE', 'urlFormat' : '{}' },
    'P1207' : { 'key' : 'NUKAT', 'urlFormat' : '{}' },
    'P1695' : { 'key' : 'NLP', 'urlFormat' : '{}' },
    'P2163' : { 'key' : 'FAST', 'urlFormat' : '{}' },
    'P949' : { 'key' : 'NLI', 'urlFormat' : '{}' },
    'P1017' : { 'key' : 'BAV', 'urlFormat' : '{}' },
    'P906' : { 'key' : 'SELIBR', 'urlFormat' : '{}' },
    'P1670' : { 'key' : 'LAC', 'urlFormat' : '{}' },
}

def action_moveP(Qitem, claimProps): # adapted from https://github.com/Pascalco/DeltaBot/blob/master/fixClaims/fixClaims.py#L223 on 2017-03-06
    commands = {} # define a command structure to be filled; this is the "data" parameter of the API at https://www.mediawiki.org/wiki/Wikibase/API#wbeditentity
    commands['claims'] = [] # we only work on claims in this script; this list takes all claim commands

    viafIds = []
    if 'P214' in Qitem.claims:
        for viafClaim in Qitem.claims['P214']:
            viafIds.append({ 'viaf':viafClaim.getTarget() , 'links':{} })
    for k, viafId in enumerate(viafIds):
        request = requests.get(viafLinksUrl.format(viafIds[k]['viaf']), headers=headers)
        if request.status_code == 200:
            viafIds[k]['links'] = json.loads(request.text)
        time.sleep(3)
    #print(viafIds)
    
    if claimProps==[ '*' ]:
        claimProps.pop(claimProps.index('*'))
        for key in Qitem.claims.keys():
            claimProps.append(key)
            
    for claimProperty in claimProps:
        if claimProperty not in Qitem.claims:
            continue
        for claim in Qitem.claims[claimProperty]:
            claimValue = claim.getTarget()
            claimHasChanged = False
            fromClaimJSON = claim.toJSON()
            if 'references' not in fromClaimJSON: # no references found
                continue 
                
            for i, reference in enumerate(fromClaimJSON['references']):
                if fromClaimJSON['references'][i]['snaks-order'] != [ 'P143', 'P813' ] and fromClaimJSON['references'][i]['snaks-order'] != [ 'P813', 'P143' ] and fromClaimJSON['references'][i]['snaks-order'] != [ 'P143' ]:
                    continue
                if fromRefProperty not in fromClaimJSON['references'][i]['snaks']: # fromRefProperty not found
                    continue
                if toRefProperty in fromClaimJSON['references'][i]['snaks']: # toRefProperty with any value already present in this reference
                    continue
                if len(fromClaimJSON['references'][i]['snaks'][fromRefProperty])!=1: # more than one reference qualifier for fromRefProperty in this reference
                    continue
                if requireFromRefValue!=None and 'Q' + str(fromClaimJSON['references'][i]['snaks'][fromRefProperty][0]['datavalue']['value']['numeric-id']) not in requireFromRefValue: # target value of the reference qualifier to move is wrong
                    continue
                if 'P214' in fromClaimJSON['references'][i]['snaks']:
                    continue
                    
                importId = None
                for k, viafId in enumerate(viafIds):
                    if claimProperty in claimPropertyMap and claimPropertyMap[claimProperty]['key'] in viafIds[k]['links'] and claimPropertyMap[claimProperty]['urlFormat'].format(claimValue) in viafIds[k]['links'][claimPropertyMap[claimProperty]['key']]:
                        importId = viafIds[k]['viaf']
                        break
                
                if importId == None:
                    #print('Not found: {} --> {} --> {}'.format(Qitem.title(), claimProperty, claimValue))
                    continue
                
                #print('{} --> {} --> {}; found in {}'.format(Qitem.title(), claimProperty, claimValue, importId))
                
                claimHasChanged = True
                fromClaimJSON['references'][i]['snaks'][toRefProperty] = fromClaimJSON['references'][i]['snaks'].pop(fromRefProperty)
                for j, elem in enumerate(fromClaimJSON['references'][i]['snaks'][toRefProperty]):
                    fromClaimJSON['references'][i]['snaks'][toRefProperty][j]['property'] = toRefProperty
                    if setToRefValue!=None: # change target Q-ID
                        fromClaimJSON['references'][i]['snaks'][toRefProperty][j]['datavalue']['value']['numeric-id'] = int(setToRefValue[1:])
                fromClaimJSON['references'][i]['snaks-order'][fromClaimJSON['references'][i]['snaks-order'].index(fromRefProperty)] = toRefProperty

                fromClaimJSON['references'][i]['snaks'][importTarget] = [ { 'snaktype': 'value', 'property': importTarget, 'datatype': 'external-id', 'datavalue': { 'value': importId, 'type': 'string' } } ]
                fromClaimJSON['references'][i]['snaks-order'].append(importTarget)
                
                if 'P813' in fromClaimJSON['references'][i]['snaks']:
                    for l, elem in enumerate(fromClaimJSON['references'][i]['snaks']['P813']):
                        fromClaimJSON['references'][i]['snaks']['P813'][l]['datavalue']['value']['time'] = time.strftime('+0000000%Y-%m-%dT00:00:00Z')
                        fromClaimJSON['references'][i]['snaks']['P813'][l]['datavalue']['value']['precision'] = 11
                        fromClaimJSON['references'][i]['snaks']['P813'][l]['datavalue']['value']['calendarmodel'] = 'http://www.wikidata.org/entity/Q1985727'
                else:
                    fromClaimJSON['references'][i]['snaks']['P813'] = [ { 'snaktype': 'value', 'property': 'P813', 'datatype': 'time', 'datavalue': { 'value' : { 'time' : time.strftime('+0000000%Y-%m-%dT00:00:00Z'), 'precision': 11, 'after': 0, 'before': 0, 'timezone': 0, 'calendarmodel': 'http://www.wikidata.org/entity/Q1985727' }, 'type': 'time' } } ]
                    fromClaimJSON['references'][i]['snaks-order'].append('P813')    
                #print(fromClaimJSON['references'][i]['snaks']['P813'])

                if fromClaimJSON['references'][i]['snaks-order'] == [ 'P248', 'P813', importTarget ] or fromClaimJSON['references'][i]['snaks-order'] == [ 'P813', 'P248', importTarget ]:
                    fromClaimJSON['references'][i]['snaks-order'] = [ 'P248', importTarget, 'P813' ]
                
            if claimHasChanged==True:
                commands['claims'].append({'id': fromClaimJSON['id'], 'remove':''}) # add first command: removal of the old claim, identified by statement ID
                fromClaimJSON.pop('id', None) # drop statement ID from the old claim, since a new one needs to be generated for the new claim
                commands['claims'].append(fromClaimJSON) # add second command: addition of the new (modified) claim, with data from the old claim (old ID was removed, new property identifier set)
            
    if len(commands['claims']) > 0: # all commands are executed here
        try:
            Qitem.editEntity(commands, summary=editSummary.format(fromRefProperty, toRefProperty, importTarget, taskHashtag))
            pass
        except Exception as e:
            print(e)
            time.sleep(60) # take a 1 minute break, in order to drastically reduce load in case something goes wrong
    return int(0.5 * len(commands['claims'])) # return number of moved claims, inferred from command structure size

for i, itemkey in enumerate(inputdata):
    Qitem = pywikibot.ItemPage(repo, itemkey)
    Qitem.get()
    if not Qitem.claims: # item does not have any claims
        continue
    
    moved_claims = action_moveP(Qitem, claimProperties)
    if True or (len(inputdata)-i-1)%100==0:
        print('{} (UTC), {}: mv {:d} pr:{} -> pr:{}; {:d}/{:d} item(s) to go'.format(time.strftime(timestampformat), Qitem.title(), moved_claims, fromRefProperty, toRefProperty, len(inputdata)-i-1, len(inputdata)))
        
print('all done, job finished')
Found 1 items to process
[{'snaktype': 'value', 'property': 'P813', 'datatype': 'time', 'datavalue': {'value': {'time': '+00000002018-08-09T00:00:00Z', 'precision': 11, 'after': 0, 'before': 0, 'timezone': 0, 'calendarmodel': 'http://www.wikidata.org/entity/Q1985727'}, 'type': 'time'}}]
[{'snaktype': 'value', 'property': 'P813', 'datatype': 'time', 'datavalue': {'value': {'time': '+00000002018-08-09T00:00:00Z', 'precision': 11, 'after': 0, 'before': 0, 'timezone': 0, 'calendarmodel': 'http://www.wikidata.org/entity/Q1985727'}, 'type': 'time'}}]
2018-08-09 23:32:25 (UTC), Q65204: mv 2 pr:P143 -> pr:P248; 0/1 item(s) to go
all done, job finished