import pywikibot 

site = pywikibot.Site('en','wiktionary')
cat = pywikibot.Category(site,'Category:Navajo roots')
from pywikibot import pagegenerators
gen = pagegenerators.CategorizedPageGenerator(cat)

import re, datetime
print(datetime.datetime.now().strftime("%X")) 
m=re.compile( "nv-stem-set-row *\|([^\}]+)\}\}|[^#]# *([^#\n]+)")

#############################

import re, collections

class Node:
  def  __init__(self, beg, end) :
     self.beg=beg
     self.end=end
     self.nodes=collections.OrderedDict()

def addSection(text, title, newsection) :
# language, section name hard-coded to Navajo / STEM

 m=re.compile( "(=*) *([^=\n]*) *(=*)")
    
 language="" 
 section=""
 lastsection=None
 lastlanguage=None
 tree=collections.OrderedDict()

 for p in re.finditer(m, text):
   balise=p.group(1)
   if balise == "==" :

     if lastsection:
        lastsection.end=p.span()[0]-1
     if lastlanguage :
        lastlanguage.end=p.span()[0]-1
     
     language=p.group(2).strip()
     lastlanguage=tree[language] =Node(p.span()[0], len(text))
     lastsection =None
    
   elif balise=="===":
     if lastsection:
       lastsection.end = p.span()[0]-1
    
     section=p.group(2).strip()
     lastsection=tree[language].nodes[section] =Node(p.span()[0], len(text ))
    
 beg=0
 end=0
 needtitle=False
 if "Navajo" in tree:
  lang=tree["Navajo"]
  if "Stem" in lang.nodes:
    beg=lang.nodes["Stem"].beg
    end=lang.nodes["Stem"].end
    print("found Navajo stems ") 
  else:
    for sect in lang.nodes:
      if sect < "Stem" :
        beg=end=lang.nodes[sect].end
        print("found Navajo", sect)
 else:
  needtitle=True
  for lang in tree:
     if lang < "Navajo" :
        beg=end=tree[lang].end
        
 if text[beg:end].strip()==newsection.strip():
   print ("Section HASN'T CHANGED")
   return ""

 if needtitle:
   newsection=title+newsection
 new =text[0:beg].strip()+"\n"+ newsection.strip()+"\n"+text[end:] 

 return new
 

############################

class Stem:
  def __init__(self, stem):
    self.stem = stem
    self.rootsByAspect = {} 
count=0

stemlist={} 
for page in gen:
  count+=1
  #if count >2:
  # break
  #if count < 100:
  #  continue 
  text = page.text
  root = page.title()
  gloss =""
  prevgloss=""
  #print(root) 
  print(datetime.datetime.now().strftime("%X"),count, page.title()  )   
  for p in re.finditer(m, text):
    if p.group(2):
        if gloss!="": 
           gloss+=","
        gloss+=p.group(2).replace("'''","").split('(')[0].strip()
        
    if p.group(1):
      if gloss!="":
         prevgloss=gloss
      gloss=""
    
      g=p.group(1) 
      s=g.split('|')
      #print(s) 
      aspect = s[0] 
      for i in range(1,6): 
  
        ss=s[i].strip().replace("(","").replace(")","")
        if ss =="" or ss=="-":
          continue
        
        if ss in stemlist:
          stem = stemlist[ss] 
        else:

          stem = Stem(ss) 
          stemlist[ss] =stem
    

        if aspect in stem.rootsByAspect:
          stem.rootsByAspect[aspect][i-1].append((root, prevgloss ))
        else:
          stem.rootsByAspect[aspect]=[[] , [] ,[] , [] , [ ]] 
          stem.rootsByAspect[aspect][i-1].append((root, prevgloss )) 

        

links=""
for ss in sorted (stemlist) :  
   #if ss!="tsoh":
   # continue
   title="==Navajo==\n"

   body="===Stem===\n"
   body+="{{head|nv|stem}} \n\n"

 
   body+="<!-- WARNING: Do not manually update this page! It is automatically generated based on the root pages -->\n"                             
   body+="# ''Verbal stem occurring in the following root, aspect, and mode combinations'':\n"
   #table="::{|style=\"margin-bottom:1em; margin-right:1em;border:1px solid #AAAAAA; border-collapse:collapse;\" cellpadding=\"4\" rules=\"all\"\n"
   #table+="!bgcolor=#EFEAAA align=left   WIDTH=\"80\" style=\"font-size:90%;\"| Aspect\n"
   #table+="!bgcolor=#EFEFFF align=center WIDTH=\"80\" style=\"font-size:90%;\"| IMP\n"
   #table+="!bgcolor=#EFEFFF align=center WIDTH=\"80\" style=\"font-size:90%;\"| PERF \n"
   #table+="!bgcolor=#EFEFFF align=center WIDTH=\"80\" style=\"font-size:90%;\"| FUT \n"
   #table+="!bgcolor=#EFEFFF align=center WIDTH=\"80\" style=\"font-size:90%;\"|ITER \n"
   #table+="!bgcolor=#EFEFFF align=center WIDTH=\"80\" style=\"font-size:90%;\"|OPT\n" table="::{{nv-stem-set-header}}\n"
   table="::{{nv-stem-set-header|indent}}\n"

   stem=stemlist[ss]
   for asp in stem.rootsByAspect :
     #table+="|-\n"
     #table+="|bgcolor=#EFEFFF |<b>{{nowrap|"+asp+"}} "
     table+="{{nv-stem-set-row2|" +asp
     for roots in stem.rootsByAspect [asp] :
          #table+="|| "
          table+="|" 
          for root in roots:
            table+= "{{l|nv|" +root[0] +"}} ("+root[1]+")<br />"
     table+="}}\n"
   table+="|}" 
   table=table.strip() 
   
   #page=pywikibot.Page(site,"User:Julien_Daux/-"+stem.stem) 
   page=pywikibot.Page(site,"-"+stem.stem) 
   print(stem.stem,page.exists(), len(page.text), len(title +body +table)) 
   #links+="\n[[User:Julien_Daux/-"+stem.stem+"]] \n\n"
   links+="\n[[-"+stem.stem+"]] \n\n" 
   
   if page.text==title+body+table:
     print (stem.stem + " : page already up-to-date. ") 
     links +=":page already up-to-date.\n"
   elif page.text=="":
     print(stem.stem + " : page doesn't exist yet, will create.") 

     links+=":page doesn't exist yet, will create.\n\n"

     page.text=title+body+table
     page.save()
   else:
     ## replace Navajo section only
     newText=addSection (page.text,title,body+table) 
     if newText != "":
       links+=":Navajo section has changed, will update.\n\n"
       page.text=newText

       page.save()
     else:
       links+=":Navajo section is the same. "
    
page=pywikibot.Page(site,"User:Julien_Daux/Navajo_stems_reversed") 
page.text=links
page.save()
16:11:38
16:11:38 1 -CHĄĄʼ
16:11:38 2 -CHĄ́Ą́ʼ
16:11:38 3 -CHĮ́
16:11:38 4 -CHʼIIZH
16:11:39 5 -CHʼIL
16:11:39 6 -DĮĮD
16:11:39 7 -DÍÍN
16:11:39 8 -DIZ
16:11:39 9 -DLĄ́Ą́ʼ
16:11:39 10 -DLOʼ
16:11:39 11 -DZÍÍʼ
16:11:39 12 -GHĄ́Ą́ʼ
16:11:39 13 -GHAAZH
16:11:39 14 -GHÁÁZH
16:11:39 15 -GHAZH
16:11:39 16 -ʼĮĮD
16:11:39 17 -JOOL
16:11:39 18 -LAA
16:11:40 19 -LĮ́Į́ʼ
16:11:40 20 -LIZH
16:11:40 21 -NIIʼ
16:11:40 22 -TʼEʼ
16:11:40 23 -TŁʼǪ́
16:11:40 24 -TSʼÍÍSÍ
16:11:40 25 -TSOH
16:11:40 26 -TSʼÓÓZÍ
16:11:40 27 -YAA
16:11:40 28 -YĄ́Ą́ʼ
16:11:40 29 -ZHIIZH
chin True 398 400
found Navajo stems 
Page [[wiktionary:en:-chin]] saved
chon True 332 332
chon : page already up-to-date. 
chxon True 332 332
chxon : page already up-to-date. 
chí True 338 338
chí : page already up-to-date. 
chííh True 462 462
chííh : page already up-to-date. 
chííł True 505 505
chííł : page already up-to-date. 
chąąʼ True 389 389
chąąʼ : page already up-to-date. 
chą́ą́ʼ True 469 469
chą́ą́ʼ : page already up-to-date. 
chįįh True 561 561
chįįh : page already up-to-date. 
chįįł True 425 425
chįįł : page already up-to-date. 
chįʼ True 359 359
chįʼ : page already up-to-date. 
chį́ True 419 419
chį́ : page already up-to-date. 
chį́į́ł True 331 331
chį́į́ł : page already up-to-date. 
chʼiish True 357 357
chʼiish : page already up-to-date. 
chʼiizh True 325 325
chʼiizh : page already up-to-date. 
chʼil True 381 381
chʼil : page already up-to-date. 
chʼish True 547 547
chʼish : page already up-to-date. 
chʼił True 381 381
chʼił : page already up-to-date. 
chʼíízh True 329 329
chʼíízh : page already up-to-date. 
chʼííł True 381 381
chʼííł : page already up-to-date. 
din True 573 336
found Navajo stems 
Section HASN'T CHANGED
dis True 347 347
dis : page already up-to-date. 
diz True 380 380
diz : page already up-to-date. 
dlish True 327 327
dlish : page already up-to-date. 
dloh True 349 349
dloh : page already up-to-date. 
dloʼ True 321 321
dloʼ : page already up-to-date. 
dlóóh True 349 349
dlóóh : page already up-to-date. 
dlą́ True 324 324
dlą́ : page already up-to-date. 
dlą́ą́ True 385 385
dlą́ą́ : page already up-to-date. 
dlį́į́h True 478 478
dlį́į́h : page already up-to-date. 
dlį́į́ł True 385 385
dlį́į́ł : page already up-to-date. 
dzih True 379 379
dzih : page already up-to-date. 
dziih True 379 379
dziih : page already up-to-date. 
dzííʼ True 336 336
dzííʼ : page already up-to-date. 
díís True 347 347
díís : page already up-to-date. 
dįįd True 333 333
dįįd : page already up-to-date. 
dįįh True 413 413
dįįh : page already up-to-date. 
dįįł True 333 333
dįįł : page already up-to-date. 
ghaazh True 409 409
ghaazh : page already up-to-date. 
ghash True 835 835
ghash : page already up-to-date. 
ghazh True 379 379
ghazh : page already up-to-date. 
gháásh True 493 493
gháásh : page already up-to-date. 
gháázh True 424 424
gháázh : page already up-to-date. 
ghą́ True 342 325
found Navajo stems 
Section HASN'T CHANGED
ghą́ą́h True 510 510
ghą́ą́h : page already up-to-date. 
ghą́ą́ł True 449 449
ghą́ą́ł : page already up-to-date. 
ghą́ą́ʼ True 542 542
ghą́ą́ʼ : page already up-to-date. 
jool True 470 434
found Navajo stems 
Page [[wiktionary:en:-jool]] saved
jooł True 329 365
found Navajo stems 
Sleeping for 9.5 seconds, 2016-10-22 16:11:51
Page [[wiktionary:en:-jooł]] saved
joł True 365 365
joł : page already up-to-date. 
laa True 319 319
laa : page already up-to-date. 
leeh True 347 347
leeh : page already up-to-date. 
leeł True 320 320
leeł : page already up-to-date. 
leʼ True 346 346
leʼ : page already up-to-date. 
lish True 417 417
lish : page already up-to-date. 
lizh True 447 447
lizh : page already up-to-date. 
lyaa True 328 328
lyaa : page already up-to-date. 
lééh True 319 319
lééh : page already up-to-date. 
líísh True 353 353
líísh : page already up-to-date. 
lííł True 319 319
lííł : page already up-to-date. 
lį́ True 323 323
lį́ : page already up-to-date. 
lį́į́ʼ True 320 320
lį́į́ʼ : page already up-to-date. 
neʼ True 472 472
neʼ : page already up-to-date. 
nih True 431 431
nih : page already up-to-date. 
niih True 347 347
niih : page already up-to-date. 
niiʼ True 377 377
niiʼ : page already up-to-date. 
nééh True 328 328
nééh : page already up-to-date. 
nííh True 347 347
nííh : page already up-to-date. 
nííł True 472 472
nííł : page already up-to-date. 
tso True 326 326
tso : page already up-to-date. 
tsoh True 1521 326
found Navajo stems 
Section HASN'T CHANGED
tsʼíísí True 338 338
tsʼíísí : page already up-to-date. 
tłʼó True 324 324
tłʼó : page already up-to-date. 
tłʼóóh True 412 412
tłʼóóh : page already up-to-date. 
tłʼóół True 441 441
tłʼóół : page already up-to-date. 
tłʼǫ́ True 383 383
tłʼǫ́ : page already up-to-date. 
tʼeʼ True 321 321
tʼeʼ : page already up-to-date. 
tʼé True 321 321
tʼé : page already up-to-date. 
wosh True 538 538
wosh : page already up-to-date. 
yą́ True 321 321
yą́ : page already up-to-date. 
yą́ą́ʼ True 435 435
yą́ą́ʼ : page already up-to-date. 
yį́į́h True 407 407
yį́į́h : page already up-to-date. 
yį́į́ł True 379 379
yį́į́ł : page already up-to-date. 
zhiish True 345 345
zhiish : page already up-to-date. 
zhiizh True 427 427
zhiizh : page already up-to-date. 
zhish True 583 583
zhish : page already up-to-date. 
zhíísh True 397 397
zhíísh : page already up-to-date. 
ʼįįd True 407 407
ʼįįd : page already up-to-date. 
ʼįįh True 540 540
ʼįįh : page already up-to-date. 
ʼį́ True 335 335
ʼį́ : page already up-to-date. 
Sleeping for 3.5 seconds, 2016-10-22 16:12:07
Page [[wiktionary:en:User:Julien Daux/Navajo stems reversed]] saved