import pywikibot
import re
from pywikibot import pagegenerators
site = pywikibot.Site('en', 'wikipedia')
resultpage = pywikibot.Page(site, u"User:Trialpears/Automatic biography short descriptions")
def extractfirst(text):
    i=0
    result=text
    result=re.sub("\[\[[Cc]ategory:[^\]]*]]","",result)
    result=re.sub("\[\[[Ff]ile:[^\]]*]]","",result)
    result=re.sub("\[\[[Ii]mage:[^\]]*]]","",result)
    result=re.sub("\n"," ",result)
    result=re.sub("==.*","",result)
    result=re.sub("''+","",result)
    result=re.sub("\[\[([^\|\]\[]*\|)?([^\|\]\[]*)]]",r"\2",result)
    while i < 5:
        result=re.sub("{{[^{}]*}}","",result)
        result=re.sub("\([^\(\)]*\)","",result)
        result=re.sub("<ref[^<>]*>[^<>]*<\/ref>","",result)
        result=re.sub("<ref[^<>]*\/>","",result)
        result=re.sub("<!--[^<>]*-->","",result)
        i+=1
    result=re.sub("\n","",result)
    result=re.sub("  *"," ",result)
    result=re.sub("(^.*?[.!?](?=\s[A-Z]|$)).*",r"\1",result)
    result=re.sub("^\s*","",result)
    result=re.sub("\s*(?=,|\.)","",result)
    result=re.sub("\s*$","",result)
    if re.search("\.[^\s]\.",result,re.IGNORECASE) or re.search("(br|chan|chapln|dr|fr|gov|miss|mr|mrs|ms|mme|m|msgr|pres|prof|rep|rev|revs|sen|sr|sra|srta|hon|esq|jr|ret|lt|col|sgt|gen|cpl|capt|bg|adm|cwo|ens|maj|msgt|st)\.",result,re.IGNORECASE):
        return ""
    else:
        return result
def extractdescription(text):
    result=text
    if re.match('^(("?[A-Z][a-z]*"?|(Jr\.)),? )*?(is|was) (a|an) ',result):
        result=re.sub('^(("?[A-Z][a-z]*"?|(Jr\.)),? )*?(is|was) (a|an) (.*)',r"\6",result)
        result=re.sub(',? who.*',"",result)
        result=re.sub(',? currently.*',"",result)
        result=re.sub(',? as well.*',"",result)
        result=re.sub(',? better known.*',"",result)
        result=re.sub(',? best known.*',"",result)
        result=re.sub(',? also known.*',"",result)
        result=re.sub(',? most known.*',"",result)
        result=re.sub(',? mostly known.*',"",result)
        result=re.sub(',? generally known.*',"",result)
        result=re.sub(',? especially known.*',"",result)
        result=re.sub(',? well known.*',"",result)
        result=re.sub(',? particularly known.*',"",result)
        result=re.sub(',? primarily known.*',"",result)
        result=re.sub(',? also known.*',"",result)
        result=re.sub(',? known for.*',"",result)
        result=re.sub("\.$","",result)
        result=re.sub('[,;]? (he|she|they) (is|are|were|was).*',"",result)
        result=re.sub('([a-zA-Z])', lambda x: x.groups()[0].upper(), result, 1)
        result=re.sub("\.$","",result)
        if len(result) <= 40:
            if re.match("(Afghan|Albanian|Algerian|Andorran|Angolan|and|Barbuda|Antiguan|Barbudan|Argentine|Armenian|Australian|Austrian|Azerbaijani|Azeri|Bahamas|Bahamian|Bahraini|Bengali|Barbadian|Belarusian|Belgian|Belizean|Beninese|Beninois|Bhutanese|Bolivian|Bosnian|Herzegovinian|Motswana|Botswanan|Brazilian|Bruneian|Bulgarian|Faso|Burkinabé|Burmese|Burundian|Verde|Cabo|Verdean|Cambodian|Cameroonian|Canadian|African|Chadian|Chilean|Chinese|Colombian|Comoran|Comorian|Congolese|Rican|Ivorian|Croatian|Cuban|Cypriot|Republic|Czech|Danish|Djiboutian|Dominican|Republic|Dominican|Timor|Timorese|Ecuadorian|Egyptian|Salvador|Salvadoran|Guinea|Equatorial|Guinean|Equatoguinean|Eritrean|Estonian|Ethiopian|Fijian|Finnish|French|Gabonese|The|Gambian|Georgian|German|Ghanaian|Gibraltar|Greek|Hellenic|Grenadian|Guatemalan|Guinean|Bissau|Guinean|Guyanese|Haitian|Honduran|Hungarian|Magyar|Icelandic|Indian|Indonesian|Iranian|Persian|Iraqi|Irish|Israeli|Italian|Coast|Ivorian|Jamaican|Japanese|Jordanian|Kazakhstani|Kazakh|Kenyan|Kiribati|Korea|North|Korean|Korea|South|Korean|Kuwaiti|Kyrgyzstani|Kyrgyz|Kirgiz|Kirghiz|Lao|Laotian|Latvian|Lettish|Lebanese|Basotho|Liberian|Libyan|Liechtensteiner|Lithuanian|Luxembourg|Luxembourgish|Macedonian|Malagasy|Malawian|Malaysian|Maldivian|Malian|Malinese|Maltese|Islands|Marshallese|Martiniquais|Martinican|Mauritanian|Mauritian|Mexican|Micronesian|Moldovan|Monégasque|Monacan|Mongolian|Montenegrin|Moroccan|Mozambican|Namibian|Nauruan|Nepali|Nepalese|Dutch|Netherlandic|Zealand|Zealand|Zelanian|Nicaraguan|Nigerien|Nigerian|Marianan|Norwegian|Omani|Pakistani|Palauan|Palestinian|Panamanian|Guinea|Papua|Guinean|Papuan|Paraguayan|Peruvian|Filipino|Philippine|Polish|Portuguese|Rico|Puerto|Rican|Qatari|Romanian|Russian|Rwandan|Kitts|and|Nevis|Kittitian|Nevisian|Saint|Lucian|Saint|Vincentian|Vincentian|Samoan|Marino|Sammarinese|Tomé|Príncipe|São|Toméan|Arabia|Saudi|Arabian|Senegalese|Serbian|Seychellois|Leone|Sierra|Leonean|Singapore|Singaporean|Slovak|Slovenian|Slovene|Islands|Solomon|Island|Somali|African?|South|African|Sudan|South|Sudanese|Spanish|Lanka|Sri|Lankan|Sudanese|Surinamese|Swazi|Swedish|Swiss|Syrian|Tajikistani|Tanzanian|Thai|Leste|Timorese|Togolese|Tokelauan|Tongan|Tobago|Trinidadian|Tobagonian|Tunisian|Turkish|Turkmen|Tuvaluan|Ugandan|Ukrainian|Arab|Emirates|Emirati|Emirian|Emiri|Kingdom|Great|Britain|Northern|Ireland|UK|British|States|of|America|United|States|U.S.|American|Uruguayan|Uzbekistani|Uzbek|Vanuatu|Vanuatuan|Vatican|Venezuelan|Vietnamese|Yemeni|Zambian|Zimbabwean)",result):
                return result
    return False
print("1:")
print(extractfirst("St."))
print(extractfirst("Potatoes. St."))
print("2:")
print(extractfirst("Amelia Curran is a Canadian singer-songwriter from St. John's, Newfoundland and Labrador. The National Post describes her music as a bit like Leonard Cohen being channeled in a dusty saloon by Patsy Cline.[1] "))
1:

Potatoes.
2: