import json
import sys
import urllib.request, urllib.parse
import datetime
from time import strftime

import pywikibot
#from pywikibot import pagegenerators
#from pywikibot import pagegenerators as pg, textlib, WikidataBot
from pywikibot import textlib

maanden=['nl','januari','februari','maart','april','mei','juni','juli','augustus','september','oktober','november','december']

debugmodus = True
debugmodus = False

wikistr={'taxon':'','person':'','dp':'','list':'','misc':''}
wikiurl={'nl':u'Wikipedia:Wikidata/Ongekoppelde pagina\'s'}

skip_templates = ['','infobox/breedte','!!','0','afbeelding gewenst','infobox generiek','nl','nl-vlag','be','be-vlag','afbeeldingbreedte']
person_templates = ['acteur','artiest','artiest klassieke muziek','atleet','atletiek','auteur','filmregisseur','hoogleraar','medicus','journalist','kunstenaar','persoon','politicus nederland','presentator','sporter','voetballer','wielrenner']
disamb_templates = ['dpintro','dp']
nomination_templates = ['auteur','ne','nuweg','reclame','wb','wiu','samenvoegen naar','weg','samenvoegen']
mustbe='Navigatie infoboxen personen'

isdisamb='Q4167410'

def is_person_template(checktemplate):
  site=pywikibot.Site()
  page=pywikibot.Page(site,checktemplate,10)
  for templ in page.templates():
    if templ.title() in ['Sjabloon:Navigatie infoboxen personen','Sjabloon:Afbeelding gewenst persoon']: 
      return True
  return False

def try2find_taxon(repo,searchstr):
  """
    read Taxon template, find value on worms, match with P850
  """
  site=pywikibot.Site()
  page=pywikibot.Page(site,searchstr)
  pagetext=page.get()
  templates = textlib.extract_templates_and_params(pagetext)
  i = 0
  worms2find=''
  
  for (templ, fielddict) in templates:
    if templ=='Taxobox':
      #taxobox = templ
      for field, value in fielddict.items():
              i+=1
              #print("%d: [%s]: [%s]" % (i,field, value))
              if field=='worms': worms2find=value
  
  
  #print("worms2find: [%s]" % worms2find)
  max=99
  c=0
  searchres = repo.search(searchstr,[0])
  for onetaxon in searchres:
    c+=1
    if c>max: return c,None
    wditem = pywikibot.ItemPage(repo,onetaxon.title())
    wditem.get(get_redirect=True)
    if 'P850' in wditem.claims:
      thisworms = wditem.claims.get('P850')[0].getTarget()
      if thisworms==worms2find:
        return c,wditem.title()
  
  
  return c,None
  
def try2find_person(repo,searchstr):
  
  savegebdate = saveimdb = None
  
  site=pywikibot.Site()
  page=pywikibot.Page(site,searchstr)
  pagetext=page.get()
  templates = textlib.extract_templates_and_params(pagetext)
  for (templ,fielddict) in templates:
    for field,value in fielddict.items():
      if field in ['geboortedatum']:
        savegebdate = value
      if field in ['imdb']:
        saveimdb = value
        
  
  
  searchres = repo.search(searchstr,[0])
  c=0
  for oneperson in searchres:
    c += 1
    wditem = pywikibot.ItemPage(repo,oneperson.title())
    wditem.get(get_redirect=True)
    if (saveimdb != None) and ('P345' in wditem.claims): #imdb   
      claims = wditem.claims.get('P345')
      for thisclaim in claims:
        thisimdb = thisclaim.getTarget().title()
        if thisimdb.rfind(saveimdb):
          return c,wditem.title()        
    if (savegebdate != None) and ('P569' in wditem.claims):  #check if same birth date (full date)
       try:
         thisdate = wditem.claims.get('P569')[0].getTarget()   #date of found wikidata-item
         xstr = str(savegebdate).replace('[','').replace(']','')  #date without [[]] linking brackets
         ystr = '%d %s %d' % (thisdate.day,maanden[thisdate.month],thisdate.year)   #str-date with Dutch named months
         if xstr==ystr:
           return c,wditem.title()
       except:
         pass
  return c,None

def try2find_dp(repo,searchstr):
    max=99
    c=0
    searchres = repo.search(searchstr,[0])
    for oneitem in searchres:
     c+=1
     if (c>max): return c,None 
     wdpage=pywikibot.ItemPage(repo,oneitem.title())
     wdpage.get()
     if 'P31' in wdpage.claims:
       claim31=wdpage.claims.get('P31')[0].getTarget().title()
       if claim31==isdisamb:
         if (wdpage.sitelinks):
           return c,wdpage.title()
    return 0,None
       
def action(pagename):

  isPerson=False
  isRedirect=False
  isList=False
  isDisambigue=False
  isNominated=False
  hasInfobox=None
  gotInfobox=False
  hasCategory=None
  isTaxon=False
  global wikistr 
  suggest_wd=None
  level=0
  
  site = pywikibot.Site('nl')
  repo = site.data_repository()
  page = pywikibot.Page(site,pagename)
  for ptemplate in page.templates():
    thistemplate = ptemplate.title()[9:]
    if thistemplate.lower() in nomination_templates:
      return
    if (hasInfobox==None):
      if (thistemplate=='Taxobox'):
        hasInfobox = 'Taxobox'      
        isTaxon=True
      if ((thistemplate[0:7]=='Infobox') and not(thistemplate.lower() in skip_templates)):
        gotInfobox = True #found one, save name
        hasInfobox = ptemplate.title()[17:]
    
    isPerson = isPerson or is_person_template(thistemplate)
    #isPerson = thistemplate[8:].lower() in person_templates
    #if not(hasInfobox==None):
    #  isPerson = hasInfobox.lower() in person_templates
    
    isDisambigue = thistemplate.lower() in disamb_templates
    #isList = thistemplate.lower[0:7] in ['lijsten']
    
  for pcategory in page.categories():
    thiscat = pcategory.title()[10:]
    if hasCategory==None:
      if thiscat[0:9] != 'Wikipedia':
        hasCategory = thiscat
  
  if isTaxon:
    level,suggest_wd = try2find_taxon(repo,pagename)
  if isDisambigue:
    level,suggest_wd = try2find_dp(repo,pagename)
  if isPerson:
    level,suggest_wd = try2find_person(repo,pagename)
  
  if (suggest_wd != None):
    suggest_wd = "[[:d:%s]]" % suggest_wd
  
  if hasCategory==None: hasCategory='None'
  if hasInfobox==None: hasInfobox='None'  
  onestr = "\n|-\n|[[%s]]\n|%s\n|%s\n|%s\n|%s\n|%s\n|%i\n" % (pagename.replace('_',' '),isDisambigue,isPerson,hasInfobox.replace('_',' '),hasCategory.replace('_',' '), suggest_wd, level )
  if   isTaxon:      wikistr['taxon'] += onestr
  elif isPerson:     wikistr['person'] += onestr
  elif isDisambigue: wikistr['dp'] += onestr
  elif isList:       wikistr['list'] += onestr
  else:
    if (gotInfobox):
      #print("hasInfobox: %s" % hasInfobox)
      if (hasInfobox in wikistr.keys()): 
        wikistr[hasInfobox] += onestr
      else:
        wikistr[hasInfobox] = onestr
    else:
      wikistr['misc'] += onestr
    
    
def main():    
 #exit()  #avoid scheduled runs from crontab 
 dparray = []
 global wikistr
 
 mylanguage=u'nl'
 myproject=u'wikipedia'
 mydepth=u'12'
 mycategory=u'Wikipedia:Doorverwijspagina'
 mycategory=u'Nederlands persoon'
 mycategory=u'Alles'

 query = 'https://petscan.wmflabs.org/?'\
        'language='+mylanguage+\
        '&project='+myproject+\
        '&depth='+mydepth+\
        '&categories='+urllib.parse.quote_plus(mycategory)+\
        '&combination=subset'\
        '&negcats='\
        '&ns%5B0%5D=1'\
        '&larger='\
        '&smaller='\
        '&minlinks='\
        '&maxlinks='\
        '&before='\
        '&after='\
        '&max_age='\
        '&show_redirects=no'\
        '&edits%5Bbots%5D=both'\
        '&edits%5Banons%5D=both'\
        '&edits%5Bflagged%5D=both'\
        '&templates_yes='\
        '&templates_any='\
        '&templates_no='\
        '&outlinks_yes='\
        '&outlinks_any='\
        '&outlinks_no='\
        '&sparql='\
        '&manual_list='\
        '&manual_list_wiki='\
        '&pagepile='\
        '&common_wiki=cats'\
        '&format=json'\
        '&output_compatability=catscan'\
        '&sortby=none'\
        '&sortorder=ascending'\
        '&wikidata_item=without'\
        '&wikidata_label_language='\
        '&regexp_filter='\
        '&doit='\
        '&interface_language=en'\
        '&active_tab=tab_output'\
        '&format=json'

 #if (debugmodus):
 # import pdb

 if False: #or os.isatty(sys.stdin.fileno()):  #detect run from cron or from console
    action('Fred McLeod')
    #action('Joo Kang-eun')
    #action('Lijst van personages uit Smeris')
    #action('Alias Jimmy Valentine')
    #action('Achillas')
 else:        
    print("get query")
    response = urllib.request.urlopen(query)  
    print("process pages")
    rawdate = response.read()
    decoded=rawdate.decode('utf8')
    dps=json.loads(decoded)

    for dp in dps['*'][0]['a']['*']:
     dparray.append(dp['title'])
     #print(dp)

    
    print("-------")  
    for i in range(0,len(dparray)):
     print("%i - %s" % (i,dparray[i]))
     action(dparray[i])

 wikiString = (u'Dit zijn [[Speciaal:OngekoppeldePaginas|pagina\'s die niet gekoppeld zijn aan items]] minus de pagina\'s die genomineerd zijn voor verwijdering.\n\n'+\
              u'Deze pagina wordt automatisch gegenereerd, handmatige updates hebben dus geen zin!\n\n'+\
              u'aangemaakt op %s\n\n' +\
              '{| class=\"wikitable sortable\"\n|-\n!Pagina || Dp || Persoon || Infobox || Categorie || Suggestie \n')  % '{:%d-%m-%Y %H:%m}'.format(datetime.date.today())

 print("=======") 
 for k in wikistr.keys():
   print(wikistr[k])
   wikiString+=wikistr[k]
 wikiString += "\n|}\n"

 if not debugmodus: 
    pass#pywikibot.Page(pywikibot.getSite('nl'), wikiurl['nl']).put(wikiString, comment='Update') #Save page   
 else:
    print("<!----!>")
    print(wikiString)
 
main()
get query
process pages
-------
0 - Ziektebeeld
WARNING: API error mwoauth-invalid-authorization-invalid-user: The authorization headers in your request are for a user that does not exist here
---------------------------------------------------------------------------
NoUsername                                Traceback (most recent call last)
<ipython-input-1-27f47cbfe463> in <module>()
    254     print(wikiString)
    255 
--> 256 main()

<ipython-input-1-27f47cbfe463> in main()
    238     for i in range(0,len(dparray)):
    239      print("%i - %s" % (i,dparray[i]))
--> 240      action(dparray[i])
    241 
    242  wikiString = (u'Dit zijn [[Speciaal:OngekoppeldePaginas|pagina\'s die niet gekoppeld zijn aan items]] minus de pagina\'s die genomineerd zijn voor verwijdering.\n\n'+              u'Deze pagina wordt automatisch gegenereerd, handmatige updates hebben dus geen zin!\n\n'+              u'aangemaakt op %s\n\n' +              '{| class=\"wikitable sortable\"\n|-\n!Pagina || Dp || Persoon || Infobox || Categorie || Suggestie \n')  % '{:%d-%m-%Y %H:%m}'.format(datetime.date.today())

<ipython-input-1-27f47cbfe463> in action(pagename)
    157         hasInfobox = ptemplate.title()[17:]
    158 
--> 159     isPerson = isPerson or is_person_template(thistemplate)
    160     #isPerson = thistemplate[8:].lower() in person_templates
    161     #if not(hasInfobox==None):

<ipython-input-1-27f47cbfe463> in is_person_template(checktemplate)
     28 def is_person_template(checktemplate):
     29   site=pywikibot.Site()
---> 30   page=pywikibot.Page(site,checktemplate,10)
     31   for templ in page.templates():
     32     if templ.title() in ['Sjabloon:Navigatie infoboxen personen','Sjabloon:Afbeelding gewenst persoon']:

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1738                              cls, depth)
   1739                     del __kw[old_arg]
-> 1740             return obj(*__args, **__kw)
   1741 
   1742         if not __debug__:

/srv/paws/pwb/pywikibot/page.py in __init__(self, source, title, ns)
   2270                 raise ValueError('Title must be specified and not empty '
   2271                                  'if source is a Site.')
-> 2272         super(Page, self).__init__(source, title, ns)
   2273 
   2274     @property

/srv/paws/pwb/pywikibot/page.py in __init__(self, source, title, ns)
    194 
    195         if isinstance(source, pywikibot.site.BaseSite):
--> 196             self._link = Link(title, source=source, default_namespace=ns)
    197             self._revisions = {}
    198         elif isinstance(source, Page):

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1738                              cls, depth)
   1739                     del __kw[old_arg]
-> 1740             return obj(*__args, **__kw)
   1741 
   1742         if not __debug__:

/srv/paws/pwb/pywikibot/page.py in __init__(self, text, source, default_namespace)
   5502         # See bug T104864, default_namespace might have been deleted.
   5503         try:
-> 5504             self._defaultns = self._source.namespaces[default_namespace]
   5505         except KeyError:
   5506             self._defaultns = default_namespace

/srv/paws/pwb/pywikibot/site.py in namespaces(self)
   1021         """Return dict of valid namespaces on this wiki."""
   1022         if not hasattr(self, '_namespaces'):
-> 1023             self._namespaces = NamespacesDict(self._build_namespaces())
   1024         return self._namespaces
   1025 

/srv/paws/pwb/pywikibot/site.py in _build_namespaces(self)
   2643         # For versions lower than 1.14, APISite needs to override
   2644         # the defaults defined in Namespace.
-> 2645         is_mw114 = self.mw_version >= '1.14'
   2646 
   2647         for nsdata in self.siteinfo.get('namespaces', cache=False).values():

/srv/paws/pwb/pywikibot/site.py in mw_version(self)
   2777         mw_ver, cache_time = getattr(self, '_mw_version_time', (None, None))
   2778         if mw_ver is None or time.time() - cache_time > 60 * 60 * 24:
-> 2779             mw_ver = MediaWikiVersion(self.version())
   2780             setattr(self, '_mw_version_time', (mw_ver, time.time()))
   2781         return mw_ver

/srv/paws/pwb/pywikibot/site.py in version(self)
   2760             try:
   2761                 version = self.siteinfo.get('generator',
-> 2762                                             expiry=1).split(' ')[1]
   2763             except pywikibot.data.api.APIError:
   2764                 # May occur if you are not logged in (no API read permissions).

/srv/paws/pwb/pywikibot/site.py in get(self, key, get_default, cache, expiry)
   1670                 elif not Siteinfo._is_expired(cached[1], expiry):
   1671                     return copy.deepcopy(cached[0])
-> 1672         preloaded = self._get_general(key, expiry)
   1673         if not preloaded:
   1674             preloaded = self._get_siteinfo(key, expiry)[key]

/srv/paws/pwb/pywikibot/site.py in _get_general(self, key, expiry)
   1616                     .format("', '".join(props)), _logger)
   1617             props += ['general']
-> 1618             default_info = self._get_siteinfo(props, expiry)
   1619             for prop in props:
   1620                 self._cache[prop] = default_info[prop]

/srv/paws/pwb/pywikibot/site.py in _get_siteinfo(self, prop, expiry)
   1539         request._warning_handler = warn_handler
   1540         try:
-> 1541             data = request.submit()
   1542         except api.APIError as e:
   1543             if e.code == 'siunknown_siprop':

/srv/paws/pwb/pywikibot/data/api.py in submit(self)
   2426         cached_available = self._load_cache()
   2427         if not cached_available:
-> 2428             self._data = super(CachedRequest, self).submit()
   2429             self._write_cache(self._data)
   2430         else:

/srv/paws/pwb/pywikibot/data/api.py in submit(self)
   2253                     continue
   2254                 raise NoUsername('Failed OAuth authentication for %s: %s'
-> 2255                                  % (self.site, info))
   2256             if code == 'cirrussearch-too-busy-error':  # T170647
   2257                 self.wait()

NoUsername: Failed OAuth authentication for wikipedia:test: The authorization headers in your request are for a user that does not exist here