import pywikibot 
 
site = pywikibot.Site('en','wiktionary')
cat = pywikibot.Category(site,'Category:Navajo verbs')
from pywikibot import pagegenerators
gen = pagegenerators.CategorizedPageGenerator(cat)

import re, datetime
print(datetime.datetime.now().strftime("%X")) 
m=re.compile( "nv-prefixes *\|([^\}]+)\}\}")
m2=re.compile("nv-paradigm *\|([^\}]+)\}\}")
m3=re.compile("nv-link-to-stem *\|([^\}]+)\}\}")
m4=re.compile("\* \{\{l\|nv\|([^\}]+)\}\}")
m5=re.compile("\* \[\[([^\]]+)\]\]")
m6=re.compile("nv-verbtable(-ext)* *\|(FUT[^\|]+)\|([^\}]+)\}\}")
m7=re.compile("\n#(.*)")


    
class Verb:
  def __init__(self, verb):
    self.verb = verb
    self.prefixes = []
    self.slots    = [[],[],[],[],[],[],[],[],[],[],[]]
    self.paradigm = []
    self.count =0
    self.stem = ""
    self.conj = "" 
    self.disj = "" 
    self.obj  = "" 
    self.cl   = ""
    self.conjugation =""
    self.dashift=""
    
verbList=[] 
prefixes=set() 
count=0

for page in gen:
  count+=1
  if count >750:
    break
  if count <451:
    continue 
  text = page.text
  verb = page.title()
  v=Verb(verb)
    
  p=m.search(text)
  if p :
    print(datetime.datetime.now().strftime("%X"),count, page.title())
    g=p.group(1) 
    s=g.split('|')

    verbList.append(v)
    v.count=count
    v.prefixes=s
    
    prefixes|=set(s)


  p2=m2.search(text)
  if p2 :
    g=p2.group(1)
    s=g.split('|')
    
    v.paradigm=s
    #if len(v. paradigm)>1 :
    #  if "da-shift" in text:
     #     v.paradigm[1]+="*"
    if "|da}}" in text:
        v.dashift="1"
    elif "|da-partial}}" in text:
        v.dashift="3"
     #     v.paradigm[1] +="~" 
     # 
    #  if ("sii" in text and "soo" in text or "shii" in text and "shoo" in text) :
      #    v.paradigm[1]+="°"
        
  p3=m3.search(text)
  if p3 :
    g=p3.group(1)
    s=g.split('|')
    v.stem=s[0].strip().replace("-","")
    
  p6=m6.search(text[200:])
  if p6 : 
      mode=p6.group(2).strip()
      if "FUT" not in mode:
         print( p6.group(2))
      else:
         print( p6.group(2))
         g=p6.group(3).replace("\n"," ")       
         v.conjugation= g.replace("|", " " )
      




### prefixes classification
prefixslot={} 
slotprefixes={} 
for i in range(0,10):
    slotprefixes[i]=[]
for p in prefixes:
  pp = p.split('-')
  p1=p2=p12=" "
  if len(pp) >= 1:
        p1=pp[0]
  if len(pp) >= 2:
        p12 = pp[0]+"-"+pp[1]
  if p == "-" or p1 in ('Ø', 'l', 'ł', 'd', 'L'):
    slot = 9 
  elif p in ('a-away', 'da-death', 'ni-disj', 'di-disj') :
    slot = 1
  elif p in  ('ni', 'si', 'yi-semel', 'yi-trans','yi-prog', 'yi') :
    slot = 7
  elif p12 in ('ni-3s','si-3s','yi-3s','yi-prog'):
    slot = 7
  elif p1 in ('di', 'hi', 'hí', 'ni', 'ní', 'yi', 'yíní', 'dini','yini', 'sh', 'dzi'  ) :
    slot = 6
  elif p in ('a', 'ho', '3o', 'ah', '4i') :
    slot = 4
  elif p in ('3s','y-peg') :
    slot = 8
  elif p in ('b', 'ał', 'ah-disj') :
    slot = 0
  elif len(p) >=2 and p[0]=="-" :
    slot = 0

  else:
    slot = 1
    
  prefixslot[p] =slot
  if slot in slotprefixes:
    slotprefixes[slot].append(p)
  else:
    slotprefixes[slot] = [] 


for verb in verbList:    
    for j in range(len(verb.prefixes)):
       vp = verb.prefixes[j]
       slot = prefixslot[vp]
        
       if slot == 4 and j==0 and verb.prefixes[1] in slotprefixes[0]:
          slot=0
            
       #formatting

       vpp=vp.split('-')[0]   
       if vp in('-') :
         vpp="Ø"
       elif vp[0]=="-": #postpositions 
          vpp=vp[1:] 
       elif vp in ('3s'):
          vpp='-' 
       elif vp in ('3o'):
          vpp='y' 
       elif vp in ('4i'):
          vpp='ʼa'
       elif vp =='L' :
          vpp='ł'

       #print(vp, vpp, slot) 

       if slot == 0 or slot ==1:
         verb.disj += vpp +":"
       elif slot == 4:
         verb.obj = vpp
       elif slot == 6:
         verb.conj += vpp +":"
       elif slot == 9:
         verb.cl = vpp



       
for verb in verbList:
    if (len(verb.paradigm)==0):
        continue
    para=verb.paradigm[0]
    p=para.lower().strip()
    if p == "momentaneous":
      para="MOM"
    elif p == "neuter imperfective":
      para="N-IMP"
    elif p == "neuter perfective":
      para="N-PERF"


    elif p == "neuter repetitive":
      para="N-REP"
    elif p == "neuter absolute":
      para="N-ABS"
    elif p == "neuter comparative":
      para="N-COMP"
    elif p == "repetitive":
      para="REP"
    elif p == "conclusive":
      para="CONCL"
    elif p == "continuative":
      para="CONT" 
    elif p == "conative":
      para="CONAT"
    elif p == "semelfactive":
      para="SEM"
    elif p == "transitional":
      para="TRANS"
    elif p == "durative":
      para="DUR"
    elif p == "progressive":
      para="PROG"
    elif p == "reversative":
      para="REV"
    verb.paradigm[0]=para
    
    
###### print table
mc=re.compile( "([^aąáą́eęéę́iįíį́oǫóǫ́]+)([aąáą́eęéę́iįíį́oǫóǫ́]+[^aąáą́eęéę́iįíį́oǫóǫ́]*)$")
  
s=""
for verb in verbList:
    
  if verb.conjugation =="":
     continue 
        
  disj=verb.disj.strip(":")
  conj=verb.conj.strip(":")
  obj=verb.obj
  mode="fut"
  
  cl=verb.cl

  asp="Ø"
  #if len(verb.paradigm)>1:
   #  aspp=verb.paradigm[1].split("/")
   #  if len(aspp) >1:
     #  asp = aspp[1]
   #  else:
      #  asp=aspp[0] 
  
  #build perfective stem
  form_1sg=verb.conjugation.split(" ") [0]
  pc=mc.search(form_1sg) 
  pd=mc.search(verb.stem)
  if pc and pd:
    verb.stem= pd.group(1)+ pc.group(2)
  
    #s=g.split('|') 
    
  s+= "[\""+mode
  if disj !=  "" :
         s+= "|disj=" +disj
  if obj !=  "" :
         s+= "|obj=" +obj
  if conj !=  "" :
         s+= "|conj=" +conj 

  s+="|"+asp+"|"+cl+"|"+verb.stem
  # if verb.dashift!="":
  #   s+="|dashift="+verb.dashift

  s+="\"] = \""
  s+=verb.conjugation
  s+="\","

  s+="\n\n"





page=pywikibot.Page(site,"User:Julien_Daux/Testcases") 
page.text=s
page.save()
19:23:58
19:23:58 451 yibadooghááh
19:23:58 452 yibah
19:23:59 453 yibéézh
19:23:59 454 yibizh
FUTURE

19:23:59 455 yicha
FUTURE

19:23:59 456 yichʼid
FUTURE

19:23:59 457 yichííh
FUTURE

19:23:59 458 yíchííł
19:23:59 459 yichʼiish
FUTURE

19:23:59 460 yichʼish
FUTURE

19:23:59 461 yidą́
FUTURE

19:23:59 462 yidąąh
19:23:59 463 yídéeltǫʼ
19:23:59 464 yideeł
FUTURE

19:24:00 466 yidiiłtłʼíísh
19:24:00 467 yidiitsʼį́į́h
FUTURE

19:24:00 468 yidiłhį́į́h
19:24:00 469 yidiłid
19:24:00 470 yidiséí
FUTURE

19:24:00 471 yidiz
19:24:00 472 yidlą́
FUTURE

19:24:00 473 yidleeʼ
19:24:00 474 yidleesh
FUTURE

19:24:00 475 yidloh
19:24:01 476 yidlóóh
19:24:01 477 yidzííd
FUTURE

19:24:01 478 yidziih
FUTURE

19:24:01 479 yidzį́į́s
19:24:01 480 yiʼeeł
19:24:01 481 yiʼéés
19:24:01 482 yiʼeesh
FUTURE

19:24:01 483 yiʼéésh
19:24:01 484 yigááł
19:24:01 485 yigan
19:24:02 486 yigeeh
FUTURE

19:24:02 487 yighą́
19:24:02 488 yigháád
19:24:02 489 yighááh
19:24:02 490 yíghááh
19:24:02 491 yighaał
19:24:02 492 yighaas
19:24:03 493 yigháázh
19:24:03 494 yigháʼdíłdlaad
19:24:03 495 yigoh
19:24:03 496 yihah
19:24:03 497 yihę́ę́s
19:24:04 498 yíhoołʼaah
FUTURE

19:24:04 499 yiʼį́
19:24:04 500 yiibááh
19:24:04 501 yiichííh
19:24:04 502 yiigááh
19:24:04 503 yiijį́į́h
19:24:04 504 yiiltééh
19:24:04 505 yiitsóóh
19:24:04 506 yiʼiz
19:24:05 507 yiizįįh
19:24:05 508 yijááh
19:24:05 509 yíjááh
FUTURE

19:24:05 510 yijah
19:24:05 511 yijeeh
FUTURE

19:24:05 512 yijį́į́ł
19:24:05 513 yikʼá
FUTURE

19:24:05 514 yikaah
FUTURE

19:24:05 515 yikááh
19:24:05 516 yikʼąąs
19:24:05 517 yikʼaash
19:24:05 518 yikad
FUTURE

19:24:06 519 yikah
19:24:06 520 yikʼaih
19:24:06 521 yikʼeed
FUTURE

19:24:06 522 yikʼęęh
19:24:06 523 yikʼíididis
FUTURE

19:24:06 525 yilʼá
19:24:06 526 yilʼaah
FUTURE

19:24:06 527 yilchʼąął
19:24:06 528 yilchʼął
19:24:06 529 yildééh
FUTURE

19:24:06 530 yildeeł
19:24:06 531 yildin
19:24:07 532 yildlóósh
FUTURE

19:24:07 533 yíldlóósh
FUTURE

19:24:07 534 yildlosh
19:24:07 535 yildził
19:24:07 536 yilé
19:24:07 537 yileeh
FUTURE

19:24:07 538 yilééh
FUTURE

19:24:07 539 yileeł
19:24:07 540 yilghał
19:24:07 541 yilʼis
19:24:07 542 yiljoł
19:24:07 543 yilkeed
19:24:07 544 yilnood
FUTURE

19:24:08 545 yilóós
19:24:08 546 yiltał
19:24:08 547 yiltłʼééh
FUTURE

19:24:08 548 yiltłʼééł
19:24:08 549 yiltʼood
FUTURE

19:24:08 550 yiltsą́
19:24:08 551 yilwoł
19:24:08 552 yilyé
FUTURE

19:24:08 553 yilyeed
FUTURE

19:24:08 554 yilzhah
19:24:08 555 yilzhóóh
FUTURE

19:24:08 556 yiłʼá
FUTURE

19:24:08 557 yiłąąh
19:24:08 558 yiłbąąs
19:24:09 559 yiłbal
19:24:09 560 yiłbéézh
FUTURE

19:24:09 561 yiłchʼal
FUTURE

19:24:09 562 yiłchí
FUTURE

19:24:09 563 yiłchʼil
FUTURE

19:24:09 564 yiłchin
FUTURE

19:24:09 565 yiłchǫǫh
FUTURE

19:24:09 566 yiłchozh
FUTURE

19:24:09 567 yiłchxǫǫh
FUTURE

19:24:09 568 yiłdééh
FUTURE

19:24:09 569 yiłdeeł
FUTURE

19:24:09 570 yiłdin
19:24:10 571 yiłdlaad
19:24:10 572 yiłdǫʼ
19:24:10 573 yiłdon
19:24:10 574 yíłdóóh
FUTURE

19:24:10 575 yiłdzééh
19:24:10 576 yiłdzid
19:24:10 577 yiłʼeeł
19:24:10 578 yiłgan
19:24:10 579 yiłhaał
19:24:10 580 yiłhį́į́h
19:24:10 581 yiłhiz
19:24:10 582 yiłhod
19:24:11 583 yiłhozh
19:24:11 584 yiłjiid
19:24:11 585 yiłjįzh
FUTURE

19:24:11 586 yiłjooł
19:24:11 587 yiłkaad
19:24:11 588 yiłkááh
19:24:11 589 yiłkąąh
19:24:11 590 yiłkeed
19:24:11 591 yiłmáás
19:24:11 592 yíłmáás
FUTURE

19:24:11 593 yiłmaz
19:24:12 594 yiłnaad
FUTURE

19:24:12 595 yiłnááh
FUTURE

19:24:12 596 yiłneʼ
19:24:12 597 yiłní
19:24:12 598 yiłtʼá
19:24:12 599 yiłtʼááh
19:24:12 600 yiłtązh
19:24:12 601 yiłtʼeʼ
19:24:12 602 yiłteeh
19:24:12 603 yiłtʼééh
19:24:12 604 yiłtʼees
FUTURE

19:24:13 605 yiłtin
19:24:13 606 yiłtłah
19:24:13 607 yiłtłʼis
19:24:13 608 yiłtʼoʼ
19:24:13 609 yiłtʼood
19:24:13 610 yiłtsʼééh
19:24:13 611 yíłtseii
19:24:13 612 yiłtsih
FUTURE

19:24:13 613 yiłtsʼil
19:24:13 614 yiłtsoʼ
19:24:13 615 yiłtsóós
19:24:13 616 yimáás
19:24:13 617 yimal
19:24:14 618 yimas
19:24:14 619 yinaalnish
19:24:14 620 yiʼnah
19:24:14 621 yináldzid
19:24:14 622 yinééh
FUTURE

19:24:14 623 yiʼnééh
FUTURE

19:24:14 624 yinééł
19:24:14 625 yiniih
19:24:14 626 yiniʼįįh
19:24:14 627 yinííł
19:24:14 628 yiniisííł
19:24:15 629 yiníłʼį́
FUTURE

19:24:15 630 yiniłtséés
FUTURE

19:24:15 631 yinizh
19:24:15 632 yinízin
19:24:15 633 yinóʼááh
19:24:15 634 yiʼoł
19:24:15 635 yisał
19:24:15 636 yisdéíłʼeeł
FUTURE

19:24:15 637 yisééh
19:24:15 638 yishééh
FUTURE

19:24:15 639 yishíísh
19:24:16 640 yishood
19:24:16 641 yishóóh
FUTURE

19:24:16 642 yishǫǫh
19:24:16 643 yishóósh
19:24:16 644 yishtłizh
19:24:16 645 yisį́į́h
19:24:16 646 yitaał
19:24:16 647 yitʼeesh
19:24:16 648 yitiʼ
19:24:16 649 yitʼį́
19:24:16 650 yitiih
19:24:17 651 yitįįh
FUTURE

19:24:17 652 yitʼįįh
FUTURE

19:24:17 653 yitił
19:24:17 654 yitin
19:24:17 655 yitłeeh
19:24:17 656 yitłíísh
19:24:17 657 yitłʼił
19:24:17 658 yitłʼin
FUTURE

19:24:17 659 yitłish
19:24:17 660 yitłʼó
19:24:17 661 yitʼood
FUTURE

19:24:17 662 yitseeł
19:24:17 663 yitsʼiʼ
19:24:17 664 yitsid
19:24:18 665 yitsʼííd
19:24:18 666 yitsʼǫǫs
19:24:18 667 yiwod
19:24:18 668 yiwozh
19:24:18 669 yiyą́
FUTURE

19:24:18 670 yiyeeh
19:24:18 671 yíyeeh
FUTURE

19:24:18 672 yiyiighas
19:24:18 673 yiyiigis
19:24:18 674 yiyiigish
FUTURE

19:24:18 675 yiyiilįh
FUTURE

19:24:18 676 yiyiiłbááh
19:24:19 677 yiyiiłchííh
19:24:19 678 yiyiiłchxosh
19:24:19 679 yiyiiłgááh
19:24:19 680 yiyiiłgąsh
FUTURE

19:24:19 682 yiyiiłhash
FUTURE

19:24:19 683 yiyiiłhé
FUTURE

19:24:19 684 yiyiiłoh
19:24:19 685 yiyiiłtʼoh
19:24:19 686 yiyiiłtsééh
FUTURE

19:24:19 687 yiyiiłtsóód
19:24:19 688 yiyiiłtsóóh
19:24:20 689 yiyiinih
19:24:20 690 yiyiishį́į́h
19:24:20 691 yiyiitał
19:24:20 692 yiyiitsʼǫs
FUTURE

19:24:20 693 yiyiizoh
19:24:20 694 yiyizh
19:24:20 695 yizéés
19:24:20 696 yizhizh
19:24:20 697 yizííd
19:24:20 698 yízííd
19:24:20 699 yizǫ́ǫ́s
19:24:20 700 yóbé
19:24:20 701 yólgę́ę́zh
19:24:20 702 yółtaʼ
FUTURE

19:24:21 703 yooʼááł
19:24:21 704 yoodlą́
19:24:21 705 yoodzį́į́s
19:24:21 706 yooʼį́
19:24:21 707 yooʼish
19:24:21 708 yoojih
19:24:21 709 yookááł
19:24:21 710 yoolééł
19:24:21 711 yooloh
19:24:21 712 yoolóós
19:24:21 713 yoołʼah
19:24:21 714 yoołbąs
19:24:21 715 yoołbįįh
19:24:22 716 yoołchʼął
19:24:22 717 yoołdoh
19:24:22 718 yoołjił
19:24:22 719 yoołjoł
19:24:22 720 yoołkááł
19:24:22 721 yoołkił
19:24:22 722 yoołʼoł
19:24:22 723 yoołtʼááł
19:24:22 724 yoołtʼah
19:24:22 725 yoołtééł
19:24:22 726 yoołtʼih
19:24:22 727 yoołtʼoł
19:24:22 728 yoołtsʼǫł
19:24:22 729 yoołtsos
19:24:23 730 yoosas
19:24:23 731 yooshoł
19:24:23 732 yootʼááł
19:24:23 733 yootį́į́ł
19:24:23 734 yootłʼį́į́ł
19:24:23 735 yootłoh
19:24:23 736 yootłʼóół
19:24:23 737 yooyééł
19:24:23 738 yooznah
FUTURE

19:24:23 739 yoozoh
19:24:23 740 yoozǫ́ǫ́s
19:24:23 741 yózhí
19:24:23 742 yózhíh
WARNING: API error badtoken: Invalid CSRF token.
Sleeping for 9.9 seconds, 2017-01-25 19:24:24
Page [[wiktionary:en:User:Julien Daux/Testcases]] saved