#
# (C) Toto Azéro, 2012-2013
#
# Distribué sous licence GNU GPLv3
# Distributed under the terms of the GNU GPLv3 license
# http://www.gnu.org/licenses/gpl.html
#

import re
import pywikibot
import time

def extract_titles(text, beginning, match_title):
    """
    Extracts all the titles of a text, starting at 'beginning'
    Setting beginning to '' or None will start at the beginning of the text
    [‹!› Not working] Setting beginning to anything else (but only unicode) will start ignore all the titles
    before the first occurrence of the phrase given.
    
    match_title should be a regular expression (use re.compile).
    
    Returns a list of unicode strings.
    """
    titles = {
    -1: beginning
    }
    i = 0
    #print text
    if not text:
        return None
    while re.search(match_title, text):
        titles[i] = re.search(match_title, text).group(0)
        
        if titles[i][0] == '\n':
            titles[i] = titles[i][1:]
        
        #print titles[i]
        text = text[text.index(titles[i]) + len(titles[i]):]
        i += 1
    
    del titles[-1]
    
    return titles

def extract_sections(text, titles):
    """
    Extracts all the sections of a text, based on a list of titles.
    You can use extract_titles() to be given this list.
    
    Returns a dictionnary as following :
        section_number (int): section_value (unicode)    
    NB: section_value includes the section's title.
    """
    if not titles:
        return None
        
    sections = {}
    
    for setion_number in titles:
        # Si jamais le titre est celui de la dernière section, on procède
        # sans rechercher le titre de la section suivante, puisqu'elle
        # n'existe pas.
        if (setion_number + 1) != len(titles):
            sections[setion_number] = text[text.index(titles[setion_number]):text.index(titles[setion_number + 1])]
            text = text[text.index(sections[setion_number]) + len(sections[setion_number]):]
        else:
            sections[setion_number] = text[text.index(titles[setion_number]):]
    
    return sections
    

def extract_sections_with_titles(text, beginning, match_title):
    """
    Extracts all the titles and sections of a text, starting at 'beginning'.
    
    match_title should be a regular expression (use re.compile).
    
    Returns a dictionnary as following :
        section_title (unicode): section_value (unicode)    
    NB: section_value includes the section's title.
    """
    
    titles = extract_titles(text, beginning, match_title)
    if not titles:
        return None
    
    sections = {}
    
    for setion_number in titles:
        # Si jamais le titre est celui de la dernière section, on procède
        # sans rechercher le titre de la section suivante, puisqu'elle
        # n'existe pas.
        current_title = titles[setion_number]
        if (setion_number + 1) != len(titles):
            sections[current_title] = text[text.index(titles[setion_number]):text.index(titles[setion_number + 1])]
            text = text[text.index(sections[current_title]) + len(sections[current_title]):]
        else:
            sections[current_title] = text[text.index(titles[setion_number]):]
        
    
    return sections


class WarnBot:
    def __init__(self):
        self.site = pywikibot.Site('fr', 'wikipedia')
        self.main_page = pywikibot.Page(self.site, u"Wikipédia:Respect de l'obligation de transparence à vérifier/En cours")
        self.match_titre_requete = re.compile(u"=== *([^=].*?) *===")
        
        
    def analyse_une_section(self, page, match_debut):
        # TODO : - gérer ou du moins éviter les problèmes en cas de doublons de titres. 
        
        text = page.get()

        # Permet de ne garder que le texte contenant les requêtes à étudier,
        # car plusieurs sections se trouvent sur la même page.
        if match_debut == u'Comptes bloqués en attente':
            text = text[0:text.index(u"= Comptes bloqués en attente =")]
        elif match_debut == u'En cours':
            text = text[text.index(u"= En cours ="):]
        
        titres = extract_titles(text, beginning = None, match_title = self.match_titre_requete)
        sections = extract_sections(text, titres)
        
        return {
        'titres': titres,
        'sections': sections
        }
test = WarnBot()
liste = test.analyse_une_section(page=test.main_page, match_debut="En cours")
i = 0
listee=[]
regexp = re.compile(r'(Statut : {{Terminé}})')
site = pywikibot.Site('fr', 'wikipedia')
page = pywikibot.Page(site, u"Wikipédia:Respect de l'obligation de transparence à vérifier/En cours")
textee = page.get()

for field,value in liste.items():
    if field == 'sections':
        for bb,val in value.items():
            if regexp.search(val):
                i= i+1
                listee.append(val)
                textee = textee.replace(val, '')
        print(i)
    
page.text = textee
page.save(summary="[[wp:bot|robot]] : archivage de " + str(i) + "sections")

time.sleep(120)

site = pywikibot.Site('fr', 'wikipedia')
page = pywikibot.Page(site, u"Wikipédia:Respect de l'obligation de transparence à vérifier/Archives")
texteee = page.get()

for theentry in listee:
    texteee = texteee + theentry
page.text = texteee
page.save(summary="[[wp:bot|robot]] : archivage de " + str(i) + " sections")
print(texteee)
4
Sleeping for 5.9 seconds, 2018-10-25 21:40:33
Page [[fr:Wikipédia:Respect de l'obligation de transparence à vérifier/En cours]] saved
---------------------------------------------------------------------------
NoPage                                    Traceback (most recent call last)
<ipython-input-1-8944c393eaef> in <module>()
    153 site = pywikibot.Site('fr', 'wikipedia')
    154 page = pywikibot.Page(site, u"Wikipédia:Respect de l'obligation de transparence à vérifier/Archives")
--> 155 texteee = page.get()
    156 
    157 for theentry in listee:

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1738                              cls, depth)
   1739                     del __kw[old_arg]
-> 1740             return obj(*__args, **__kw)
   1741 
   1742         if not __debug__:

/srv/paws/pwb/pywikibot/page.py in get(self, force, get_redirect, sysop)
    475             del self.latest_revision_id
    476         try:
--> 477             self._getInternals(sysop)
    478         except pywikibot.IsRedirectPage:
    479             if not get_redirect:

/srv/paws/pwb/pywikibot/page.py in _getInternals(self, sysop)
    505         if self._latest_cached_revision() is None:
    506             try:
--> 507                 self.site.loadrevisions(self, content=True, sysop=sysop)
    508             except (pywikibot.NoPage, pywikibot.SectionError) as e:
    509                 self._getexception = e

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1738                              cls, depth)
   1739                     del __kw[old_arg]
-> 1740             return obj(*__args, **__kw)
   1741 
   1742         if not __debug__:

/srv/paws/pwb/pywikibot/site.py in loadrevisions(self, page, content, revids, startid, endid, starttime, endtime, rvdir, user, excludeuser, section, sysop, step, total, rollback)
   4142                 raise InconsistentTitleReceived(page, pagedata['title'])
   4143             if 'missing' in pagedata:
-> 4144                 raise NoPage(page)
   4145             api.update_page(page, pagedata, rvgen.props)
   4146 

NoPage: Page [[fr:Wikipédia:Respect de l'obligation de transparence à vérifier/Archives]] doesn't exist.