import pywikibot
import urllib
import requests
import os
import time 
from time import time
import datetime
import random
from pywikibot import pagegenerators as pg

""" query all players with no dollars yet 
SELECT ?item ?wtaID ?dob WHERE {
  ?item wdt:P31 wd:Q5;
    wdt:P106 wd:Q10833314;
    wdt:P597 ?wtaID .
  optional {?item wdt:P2121 ?dollars} .
  optional {?item wdt:P569 ?dob}
  filter (!bound(?dollars))
  filter (?dob>"2002-01-01T00:00:00Z"^^xsd:dateTime)      
}
"""
wta_html_srch_dollar='<div class="player-header-stats__section">'
wta_html_srch_balans_single='<div class="player-header-stats__value">'
wta_html_srch_career_high='Career High</div>'
oneweek=604800 #so many seconds it is
site=pywikibot.Site('wikidata','wikidata')
repo=site.data_repository()
Psingle='P564'
Pdouble='P555'
Pwtaid='P597'
Prank='P1352'
Pprizemoney='P2121'
Qdollar='Q4917' 

def wd_sparql_query(spq):
   generator=pg.WikidataSPARQLPageGenerator(spq,site=pywikibot.Site('wikidata','wikidata'))
   for wd in generator:
     try:
       wd.get(get_redirect=True)
       yield wd
     except:
       pass
        
def all_from_cat(thissite,thiscat):
 site=pywikibot.Site(thissite)
 cat = pywikibot.Category(site,thiscat)
 gen = pg.CategorizedPageGenerator(cat,12)
 for player in gen:
    wd=player.data_item()
    wd.get(get_redirect=True)
    yield(wd.title())


class WTA:
  def __init__(self):  
    self.wtatxt=''
    self.dollar=0
    self.career_high_single_value=0
    self.career_high_single_date=None
    self.career_high_double_value=0
    self.career_high_double_date=None
    self.balans_single_win=0
    self.balans_single_loose=0
    self.balans_double_win=0
    self.balans_double_loose=0
    self.insta=''
    self.wdID=None #like Q239758 -> Arantxa Rus on wikidata
    self.wditem=None

  def getWTAdata(self):
    try:
      ptxt=self.wtatxt[len(wta_html_srch_dollar)+self.wtatxt.find(wta_html_srch_dollar):]
      ptxt=self.wtatxt
    except:
      ptxt=''
    try:
      start=ptxt.find('<div class="player-header-stats__section">')+43
      xstr=ptxt[start:start+60]
      print(f'1Start={start}, str=[{xstr}]')
      starts=start+ptxt[start :].find('data-single=')+13
      print(f'2starts={starts}, str=[{ptxt[starts:starts+60]}]')
      starts+=     ptxt[starts:].find('data-single=')+13
      print(f'3starts={starts}, str=[{ptxt[starts:starts+60]}]')
      end=ptxt[starts:].find('"')
      print(f'end={end}, str={ptxt[starts:starts+end]}')
      sdate=  ptxt[starts:starts+end]
      
      startd=start+ptxt[start :].find('data-double=')+13
      startd+=     ptxt[startd:].find('data-double=')+13
      end=ptxt[startd:].find('"')
      ddate=ptxt[startd:startd+end]
    except:    
      self.career_high_single_date=''
      self.career_high_double_date=''
      print('error getting singles data date')  
    try:
      self.career_high_single_date=datetime.datetime.strptime(sdate, '%b %d %Y')
      self.career_high_double_date=datetime.datetime.strptime(ddate, '%b %d %Y')
    except:
      print(f'Date format issue\nsingle: [{sdate}]\ndouble: [{ddate}]\n')  
    try:  
      dtxt=ptxt[ptxt.find(wta_html_srch_dollar):]
      start=dtxt.find('Prize Money</div>')
      dollartxt=dtxt[start:]
      start=dollartxt.find('<div data-single=\"')
      end=dollartxt[18+start:].find('\"')  
      self.dollar=int(dollartxt[18+start:18+start+end])
    except:
      self.dollar=0
    try:
      stxt=ptxt[len(wta_html_srch_balans_single)+ptxt.find(wta_html_srch_balans_single):] #first YTD-value
      stxt=stxt[stxt.find(wta_html_srch_balans_single):] #later career-value
      start=stxt.find('data-single=\"')
      end=stxt[start+13:].find('\"')
      balanstxt=stxt[13+start:13+start+end]
      self.balans_single_win=int(balanstxt)
      start=stxt.find('data-double=\"')
      end=stxt[start+13:].find('\"')
      balanstxt=stxt[13+start:13+start+end]
      self.balans_double_win=int(balanstxt)
      stxt=stxt[start+1:]  #move to next section 
      start=stxt.find('data-single=\"')
      end=stxt[start+13:].find('\"')
      balanstxt=stxt[13+start:13+start+end]
      self.balans_single_loose=int(balanstxt)
      start=stxt.find('data-double=\"')
      end=stxt[start+13:].find('\"')
      balanstxt=stxt[13+start:13+start+end]
      self.balans_double_loose=int(balanstxt)
    except:
      self.balans_single_win=0
      self.balans_single_loose=0
      self.balans_double_win=0
      self.balans_double_loose=0
      print('No balans')
    try:
      stxt=ptxt[ptxt.find(wta_html_srch_career_high):]
      start=stxt.find('data-single="')
      end=stxt[start+13:].find('\"')
      careerhightxt=stxt[13+start:13+start+end]
      self.career_high_single_value=int(careerhightxt)
      start=stxt.find('data-double="')
      end=stxt[start+13:].find('\"')
      careerhightxt=stxt[13+start:13+start+end]
      self.career_high_double_value=int(careerhightxt)
    except:
      self.career_high_single_value=0
      self.career_high_single_date=''
      self.career_high_double_value=0
      self.career_high_double_date=''

  def url2filename(self,url):
    return('WTA/'+url.replace('/','^'))

  def filename2url(self,filename):
    return(filename[4:].replace('^','/'))       
  
  def URLfromid(self,id):  
    return f'https://www.wtatennis.com/players/{id}'

  def readWTAprofileFromWeb(self,id):
    url=self.URLfromid(id)
    print(f'Update from WTA-website: {id}')
    f=requests.get(url)  
    self.wtatxt=f.text
    fname=self.url2filename(id)
    print(fname)
    self.writeWTAfileToCache(fname)
    time.sleep(60+int(60 * random.random()))  # wait between 1 and 2 minutes to not overload WTA-website
    
  def writeWTAfileToCache(self,filename):
   outfile=open(filename,'a')
   outfile.write(str(self.wtatxt))
   outfile.close()

  def readWTAfileFromCache(self,id):
    filename=self.url2filename(id)    
    try:
      if (os.path.exists(filename)):
       if ((time()-os.path.getctime(filename))>oneweek ):  #after one week, refresh from cache
        print('read from web')
        self.readWTAprofileFromWeb(self.filename2url(filename))  #more then 7 days old, read from web and update
        self.writeWTAfileToCache(filename)
       else:
        print('read from cache')
        rfile=open(filename)
        self.wtatxt=rfile.read()
    except Exception as errcode:
      print('File error... reason: %s' % str(errcode))  
      stop=102/0
    
  def readbyWTAid(self,id,update=True):
    self.readWTAfileFromCache(id)
    if (update):
      self.getWTAdata()  
  
  def read_wd(self,usewdID=None):
    if usewdID==None:
      usewdID=self.wdID 
    else:
      self.wdID=usewdID  
    self.wditem=pywikibot.ItemPage(repo,usewdID)    
    self.wditem.get(get_redirect=True)
    if Pwtaid in self.wditem.claims:
       wtaID=self.wditem.claims[Pwtaid][0].getTarget()
       print(f'found id {wtaID}')
       self.readbyWTAid(wtaID)


  def print(self):
    print(f'Single: {self.balans_single_win}{self.balans_single_loose}')
    print(f'Double: {self.balans_double_win}{self.balans_double_loose}')  
    print(f'${self.dollar}')
    print(f'Hoogste positie: enkel: {self.career_high_single_value} on {self.career_high_single_date}')
    print(f'                dubbel: {self.career_high_double_value} on {self.career_high_double_date}')
    
  def wdUpdate(self):
    self.read_wd()
    try:
      if self.wditem.claims['P21'][0].getTarget().title()!='Q6581072': #if not a women, don't update
        print('Not a women, but: %s'% self.wditem.claims['P21'][0].getTarget().title())
        return(0)
    except:
      print('Oioioi: exception! for %s' % self.wditem.title())  
      return(0)  #catch all errors, on error, assume it's not a women
    fsingle='%s%s'%(self.balans_single_win,self.balans_single_loose)
    fdouble='%s%s'%(self.balans_double_win,self.balans_double_loose)
    
    if (fsingle!='0–0'):    
     if (Psingle in self.wditem.claims):
      wd_balans_single=self.wditem.claims[Psingle][0].getTarget()
      if (wd_balans_single!=fsingle):
        claim=self.wditem.claims[Psingle][0]
        claim.changeTarget(fsingle,summary='balance single games from WTA-website')
     else:
      claim=pywikibot.Claim(repo,Psingle)  
      claim.setTarget(fsingle)
      self.wditem.addClaim(claim,summary='new single balance from WTA-website')

    if (fdouble!='0–0'):    
     if (Pdouble in self.wditem.claims):
      wd_balans_double=self.wditem.claims[Pdouble][0].getTarget()
      if (wd_balans_double!=fdouble):
        claim=self.wditem.claims[Pdouble][0]
        claim.changeTarget(fdouble,summary='balance double games from WTA-website')
     else:
      claim=pywikibot.Claim(repo,Pdouble)  
      claim.setTarget(fdouble)
      self.wditem.addClaim(claim,summary='new double balance from WTA-website')
            
    if (Pprizemoney in self.wditem.claims):
      dt=self.wditem.claims[Pprizemoney][0].getTarget()
      if (dt!=None):
        wd_dollar=dt.amount
        if (self.dollar>wd_dollar):  
          claimDollar=self.wditem.claims[Pprizemoney][0]
          target=pywikibot.WbQuantity(self.dollar,'http://www.wikidata.org/entity/Q4917',site=site) 
          claimDollar.changeTarget(target,summary='update new $$$-value from WTA-website')
    else:
       claimDollar=pywikibot.Claim(repo,Pprizemoney)
       target=pywikibot.WbQuantity(self.dollar,'http://www.wikidata.org/entity/Q4917',site=site) 
       claimDollar.setTarget(target)
       self.wditem.addClaim(claimDollar,summary='add earned $$$ from WTA website')

    if (Prank in self.wditem.claims):
     for onerank in self.wditem.claims[Prank]:
      if ('P641' in onerank.qualifiers):
        sport=onerank.qualifiers['P641'][0].getTarget()
        if sport!= None: sport=sport.title()
        rank=onerank.getTarget().amount
        if (sport=='Q18123880'): #tennis single
          if ((onerank.getTarget().amount > self.career_high_single_value) and (self.career_high_single_value>0)):  
            target=pywikibot.WbQuantity(self.career_high_single_value,site=site)
            onerank.changeTarget(target,summary='new highest single position from WTA-website')
            print(f'-----------------Single: {onerank.getTarget().amount}')
            x=11/0 #check result - add date
        else: 
          if (sport=='Q18123885'): # tennis double
            if ((onerank.getTarget().amount > self.career_high_double_value) and (self.career_high_double_value>0)):
              #target=pywikibot.WbQuantity(site,30)
              #onerank.setTarget(self.career_high_double_value)
              target=pywikibot.WbQuantity(self.career_high_double_value,site=site)  
              onerank.changeTarget(target,summary='new highest double position from WTA-website')
              updatePointInTime(onerank,self.career_high_double_date)
              x=12/0 #check result - add date
    else:
      if (self.career_high_single_value>0):  
        claim=pywikibot.Claim(Prank)  
        target=pywikibot.WbQuantity(self.career_high_single_value)
        claim.setTarget(target)
        self.wditem.addClaim(claim,summary='add new single highest position from WTA-website')
        x=123/0 #check result
      if (self.career_high_double_value>0):          
        claim=pywikibot.Claim(Prank)  
        target=pywikibot.WbQuantity(self.career_high_double_value)
        claim.setTarget(target)
        self.wditem.addClaim(claim,summary='add new double highest position from WTA-website')
        x=323/0 #check result
        
def updatePointInTime(rankingclaim,datestr):
  if datestr=='': return;
    
    
"""
main
"""    
    
print('Start...')
id='Q20968624'
#"""
#for id in all_from_cat('nl','Categorie:Tennisser naar nationaliteit'):
for id in ['Q30812']:
#"""
#for wd in wd_sparql_query('SELECT ?item ?wtaID ?dob WHERE {  ?item wdt:P31 wd:Q5;    wdt:P106 wd:Q10833314;    wdt:P597 ?wtaID .  optional {?item wdt:P2121 ?dollars} .  optional {?item wdt:P569 ?dob}  filter (!bound(?dollars))  filter (?dob>"2001-01-01T00:00:00Z"^^xsd:dateTime)      }'):
#if (True):
  print('-----------------------------------+')
  player=WTA()
  #player.read_wd(wd.title())
  player.read_wd(id)
  print('-----------------------------------|')
  player.print()
  print('-----------------------------------|')
  player.wdUpdate()
  print('-----------------------------------+')
print('Klaar...')
Start...
-----------------------------------+
Sleeping for 5.0 seconds, 2020-01-22 19:40:59
Sleeping for 5.0 seconds, 2020-01-22 19:41:04
Sleeping for 5.0 seconds, 2020-01-22 19:41:10
Sleeping for 5.0 seconds, 2020-01-22 19:41:15
Sleeping for 5.0 seconds, 2020-01-22 19:41:20
Sleeping for 5.0 seconds, 2020-01-22 19:41:25
Sleeping for 5.0 seconds, 2020-01-22 19:41:31
Sleeping for 5.0 seconds, 2020-01-22 19:41:36
Sleeping for 5.0 seconds, 2020-01-22 19:41:41
Sleeping for 5.0 seconds, 2020-01-22 19:41:46
Sleeping for 5.0 seconds, 2020-01-22 19:41:51
Sleeping for 5.0 seconds, 2020-01-22 19:41:57
Sleeping for 5.0 seconds, 2020-01-22 19:42:02
Sleeping for 5.0 seconds, 2020-01-22 19:42:07
Sleeping for 5.0 seconds, 2020-01-22 19:42:12
Sleeping for 5.0 seconds, 2020-01-22 19:42:17
Sleeping for 5.0 seconds, 2020-01-22 19:42:23
Sleeping for 5.0 seconds, 2020-01-22 19:42:28
Sleeping for 5.0 seconds, 2020-01-22 19:42:33
Sleeping for 5.0 seconds, 2020-01-22 19:42:38
Sleeping for 5.0 seconds, 2020-01-22 19:42:43
Sleeping for 5.0 seconds, 2020-01-22 19:42:48
Sleeping for 5.0 seconds, 2020-01-22 19:42:54
Sleeping for 5.0 seconds, 2020-01-22 19:42:59
Sleeping for 5.0 seconds, 2020-01-22 19:43:04
Sleeping for 5.0 seconds, 2020-01-22 19:43:09
Sleeping for 5.0 seconds, 2020-01-22 19:43:15
Sleeping for 5.0 seconds, 2020-01-22 19:43:20
Sleeping for 5.0 seconds, 2020-01-22 19:43:25
Sleeping for 5.0 seconds, 2020-01-22 19:43:30
Sleeping for 5.0 seconds, 2020-01-22 19:43:35
Sleeping for 5.0 seconds, 2020-01-22 19:43:41
Sleeping for 5.0 seconds, 2020-01-22 19:43:46
Sleeping for 5.0 seconds, 2020-01-22 19:43:51
Sleeping for 5.0 seconds, 2020-01-22 19:43:56
Sleeping for 5.0 seconds, 2020-01-22 19:44:01
Sleeping for 5.0 seconds, 2020-01-22 19:44:07
Sleeping for 5.0 seconds, 2020-01-22 19:44:12
Sleeping for 5.0 seconds, 2020-01-22 19:44:17
Sleeping for 5.0 seconds, 2020-01-22 19:44:22
Sleeping for 5.0 seconds, 2020-01-22 19:44:27
Sleeping for 5.0 seconds, 2020-01-22 19:44:32
Sleeping for 5.0 seconds, 2020-01-22 19:44:37
Sleeping for 5.0 seconds, 2020-01-22 19:44:43
Sleeping for 5.0 seconds, 2020-01-22 19:44:48
Sleeping for 5.0 seconds, 2020-01-22 19:44:53
Sleeping for 5.0 seconds, 2020-01-22 19:44:58
Sleeping for 5.0 seconds, 2020-01-22 19:45:03
Sleeping for 5.0 seconds, 2020-01-22 19:45:08
Sleeping for 5.0 seconds, 2020-01-22 19:45:14
Sleeping for 5.0 seconds, 2020-01-22 19:45:19
Sleeping for 5.0 seconds, 2020-01-22 19:45:24
Sleeping for 5.0 seconds, 2020-01-22 19:45:29
Sleeping for 5.0 seconds, 2020-01-22 19:45:34
Sleeping for 5.0 seconds, 2020-01-22 19:45:40
Sleeping for 5.0 seconds, 2020-01-22 19:45:45
Sleeping for 5.0 seconds, 2020-01-22 19:45:50
Sleeping for 5.0 seconds, 2020-01-22 19:45:55
Sleeping for 5.0 seconds, 2020-01-22 19:46:00
Sleeping for 5.0 seconds, 2020-01-22 19:46:06
Sleeping for 5.0 seconds, 2020-01-22 19:46:12
Sleeping for 5.0 seconds, 2020-01-22 19:46:17
Sleeping for 5.0 seconds, 2020-01-22 19:46:23
Sleeping for 5.0 seconds, 2020-01-22 19:46:28
Sleeping for 5.0 seconds, 2020-01-22 19:46:33
Sleeping for 5.0 seconds, 2020-01-22 19:46:38
Sleeping for 5.0 seconds, 2020-01-22 19:46:43
Sleeping for 5.0 seconds, 2020-01-22 19:46:48
Sleeping for 5.0 seconds, 2020-01-22 19:46:53
Sleeping for 5.0 seconds, 2020-01-22 19:46:59
Sleeping for 5.0 seconds, 2020-01-22 19:47:04
Sleeping for 5.0 seconds, 2020-01-22 19:47:09
Sleeping for 5.0 seconds, 2020-01-22 19:47:14
Sleeping for 5.0 seconds, 2020-01-22 19:47:19
Sleeping for 5.0 seconds, 2020-01-22 19:47:24
Sleeping for 5.0 seconds, 2020-01-22 19:47:30
Sleeping for 5.0 seconds, 2020-01-22 19:47:35
Sleeping for 5.0 seconds, 2020-01-22 19:47:40
Sleeping for 5.0 seconds, 2020-01-22 19:47:45
Sleeping for 5.0 seconds, 2020-01-22 19:47:50
Sleeping for 5.0 seconds, 2020-01-22 19:47:56
Sleeping for 5.0 seconds, 2020-01-22 19:48:01
Sleeping for 5.0 seconds, 2020-01-22 19:48:06
Sleeping for 5.0 seconds, 2020-01-22 19:48:11
Sleeping for 5.0 seconds, 2020-01-22 19:48:16
Sleeping for 5.0 seconds, 2020-01-22 19:48:21
Sleeping for 5.0 seconds, 2020-01-22 19:48:27
Sleeping for 5.0 seconds, 2020-01-22 19:48:32
Sleeping for 5.0 seconds, 2020-01-22 19:48:37
Sleeping for 5.0 seconds, 2020-01-22 19:48:42
Sleeping for 5.0 seconds, 2020-01-22 19:48:47
Sleeping for 5.0 seconds, 2020-01-22 19:48:53
Sleeping for 5.0 seconds, 2020-01-22 19:48:58
Sleeping for 5.0 seconds, 2020-01-22 19:49:03
Sleeping for 5.0 seconds, 2020-01-22 19:49:08
Sleeping for 5.0 seconds, 2020-01-22 19:49:13
Sleeping for 5.0 seconds, 2020-01-22 19:49:19
Sleeping for 5.0 seconds, 2020-01-22 19:49:24
Sleeping for 5.0 seconds, 2020-01-22 19:49:30
Sleeping for 5.0 seconds, 2020-01-22 19:49:35
Sleeping for 5.0 seconds, 2020-01-22 19:49:40
Sleeping for 5.0 seconds, 2020-01-22 19:49:45
Sleeping for 5.0 seconds, 2020-01-22 19:49:50
Sleeping for 5.0 seconds, 2020-01-22 19:49:56
Sleeping for 5.0 seconds, 2020-01-22 19:50:01
Sleeping for 5.0 seconds, 2020-01-22 19:50:06
Sleeping for 5.0 seconds, 2020-01-22 19:50:11
Sleeping for 5.0 seconds, 2020-01-22 19:50:16
Sleeping for 5.0 seconds, 2020-01-22 19:50:21
Sleeping for 5.0 seconds, 2020-01-22 19:50:27
Sleeping for 5.0 seconds, 2020-01-22 19:50:32
Sleeping for 5.0 seconds, 2020-01-22 19:50:37
Sleeping for 5.0 seconds, 2020-01-22 19:50:42
Sleeping for 5.0 seconds, 2020-01-22 19:50:47
Sleeping for 5.0 seconds, 2020-01-22 19:50:52
Sleeping for 5.0 seconds, 2020-01-22 19:50:58
found id 314206/petra-kvitova
1Start=42, str=[]
2starts=54, str=[]
3starts=66, str=[]
end=-1, str=
Date format issue
single: []
double: []

No balans
-----------------------------------|
Single: 0–0
Double: 0–0
$0
Hoogste positie: enkel: 0 on 
                dubbel: 0 on 
-----------------------------------|
found id 314206/petra-kvitova
1Start=42, str=[]
2starts=54, str=[]
3starts=66, str=[]
end=-1, str=
Date format issue
single: []
double: []

No balans
-----------------------------------+
Klaar...
'Q105868',
'Q10996110',
'Q11052500',
'Q11459',
'Q115079',
'Q11578',
'Q11583',
'Q11657',
'Q11659',
'Q11662',
'Q11666',
'Q122632',
'Q1293027',
'Q132147',
'Q13426427',
'Q13512756',
'Q13636960',
'Q13637616',
'Q13646075',
'Q13850958',
'Q14286',
'Q144343',
'Q14616346',
'Q14618664',
'Q14626025',
'Q14751169',
'Q14751256',
'Q14863648',
'Q15269',
'Q15469035',
'Q15478936',
'Q15634509',
'Q15642606',