import pywikibot
import re
import os
import pymysql
site = pywikibot.Site('zh', 'wikipedia')
site.login()
host = os.environ['MYSQL_HOST']
user = os.environ['MYSQL_USERNAME']
password = os.environ['MYSQL_PASSWORD']
conn = pymysql.connect(
    host=host,
    user=user,
    password=password,
    charset="utf8"
)
with conn.cursor() as cur:
    cur.execute('use zhwiki_p')
    cur.execute("""
        SELECT ct_rev_id, actor_name, comment_text, page_namespace, page_title
        FROM (
          SELECT *
          FROM change_tag
          WHERE ct_tag_id = 1
          ORDER BY ct_id DESC
          LIMIT 5000
        ) change_tag
        LEFT JOIN revision
        ON ct_rev_id = rev_id
        LEFT JOIN actor
        ON rev_actor = actor_id
        LEFT JOIN comment
        ON rev_comment_id = comment_id
        LEFT JOIN page
        ON rev_page = page_id
    """)
    res = cur.fetchall()
def parse_type(summary, ns, title):
    # arv
    if ns == 4 and title == '元維基用戶查核請求' and re.search(r'^(報告|报告)', summary):
        return 'arv', 'reportrfcu'
    if ns == 3 and re.search(r'通知用戶查核請求|通知用户查核请求', summary):
        return 'arv', 'noticerfcu'
    if ns == 4 and title == '当前的破坏' and re.search(r'^(報告|报告)', summary):
        return 'arv', 'reportaiv'
    if ns == 4 and title == '需要管理員注意的用戶名' and re.search(r'^(报告|新提報|新提报)', summary):
        return 'arv', 'reportuaa'
        
    # block
    if ns == 3 and re.search(r'{{uw-(ublock|block1|block2|block3|vblock|dblock)', summary):
        return 'block', 'notice'
    if ns == 2 and re.search(r'(標記被永久封禁的用戶頁|标记被永久封禁的用户页)', summary):
        return 'block', 'taguser'
    
    # close
    CLOSEREASON = (r'(快速保留|請求理由消失|请求理由消失|重定向|请求无效|請求無效|保留|删除|刪除|提删者未取得提删资格'
        +'|提刪者未取得提刪資格|转移至维基共享资源|移动|并入|併入|轉交侵權|转交侵权|无共识|重複提出|移動|無共識|重复提出|转移至)')
    if (ns == 4 and re.search(r'^(頁面存廢討論|檔案存廢討論)/記錄/', title)
            and re.search(CLOSEREASON, summary)):
        return 'close', 'close'
    if re.search(r'Wikipedia:(頁面存廢討論|檔案存廢討論)/記錄/\d+/\d+/\d+\]\]:.*'+CLOSEREASON, summary):
        return 'close', 'tagtalk'
    if re.search(r'(存廢討論關閉|存废讨论关闭):', summary):
        return 'close', 'rmtem'
    
    # config
    if ns == 2 and re.search(r'(保存Twinkle参数设置|儲存Twinkle偏好設定)', summary):
        return 'config', 'config'
    
    # copyvio
    if ns == 4 and title == '頁面存廢討論/疑似侵權' and re.search(r'(添加|加入)\[\[', summary):
        return 'copyvio', 'report'
    if ns == 3 and re.search(r'通知:页面.+疑似侵犯版权', summary):
        return 'copyvio', 'notice'
    if re.search(r'(本頁面疑似侵犯版權|本页面疑似侵犯版权)', summary):
        return 'copyvio', 'tag'
    
    # fluff
    if re.search(r'^(回退|撤销|还原)', summary):
#         if '出於[[WP:AGF|善意]]的編輯' in summary:
        return 'fluff', 'fluff'
#         if '回退到由' in summary:
#             return 'fluff', 'torev'
#         if '做出的最后一个修订版本' in summary:
#             return 'fluff', 'norm'
    
    # image
    if ns == 4 and title == '檔案存廢討論/無版權訊息或檔案來源' and re.search(r'^(添加|加入)', summary):
        return 'image', 'report'
        
    # protect
    if ns == 4 and title == '请求保护页面' and re.search(r'请求对.+保护', summary):
        return 'protect', 'request'
    if re.search(r'移除保護模板|(加入|添加){{pp-', summary):
        return 'protect', 'tag'
    if re.search(r'已保护|移除保护|保护等级', summary):
        return 'protect', 'protect'
    
    # speedy
    if ns == 2 and re.search(r'(记录对|記錄對).+(的快速删除提名|的快速刪除提名)', summary):
        return 'speedy', 'record'
    if ns == 3 and re.search(r'通知:.+(快速删除提名|快速刪除提名)', summary):
        return 'speedy', 'notice'
    if re.search('(请求|請求).*(快速删除|快速刪除)', summary):
        return 'speedy', 'tag'

    # tag
    if ns in [0, 4, 10, 118] and re.search(r'(添加|加入).+(标记|標記)', summary):
        return 'tag', 'tag'
    if ns == 6 and re.search(r'^添加{{', summary):
        return 'tag', 'tag'
    if ns == 4 and title == '关注度/提报' and re.search(r'(添加|加入)\[\[', summary):
        return 'tag', 'reportnp'
    if ns == 1 and re.search(r'(请求|請求).+(合并|合併)', summary):
        return 'tag', 'mergetagtalk'
    if ns == 1 and re.search(r'(请求移动至|請求移動至)', summary):
        return 'tag', 'movetagtalk'
    
    # talkback
    if ns == 3 and re.search(r'(回复通告|回覆通告|通知:有新郵件)', summary):
        return 'talkback', 'talkback'
    
    # unlink
    if re.search(r'(注释出文件使用|取消链接到)', summary):
        return 'unlink', 'unlink'
    
    # warn
    if ns == 3 and re.search(r'((層級|层级)(1|2|3|4|4im)|单层级通知|單層級通知|单层级警告|單層級警告|提示):|您翻譯的質量有待改善', summary):
        return 'warn', 'warn'
    
    # xfd
    if ns == 4 and re.search(r'^(頁面存廢討論|檔案存廢討論)/記錄/', title) and re.search(r'(添加|加入)\[\[', summary):
        return 'xfd', 'report'
    if ns == 3 and re.search(r'通知:(页面|文件).+存废讨论提名', summary):
        return 'xfd', 'notice'
    if re.search(r'(页面存废讨论|頁面存廢討論|檔案存廢討論|文件存废讨论):', summary):
        return 'xfd', 'tag'
    
    return 'unknown', 'unknown'
BRAMCHS = {
    '94rain': ['94rain'],
    'Vanished user 1929210': ['S099001', 'R96340'],
    'Kuon.Haku': ['Kuon.Haku', 'Fran1001hk'],
    'gadget': ['Emojiwiki', 'ZEdzEd3168', 'Dannylee0417', 'Clafiok', 'Poem', 'Bxxiaolin', 'Jyxyl9', 'Yenhochia',
              'Techyan', 'N0099', '佛壁灯', 'Yenhochia', 'Silvermetals', 'Silvermetals', 'Lanwi1', 'JuneAugust',
              'No1lovesu', 'Njzjz', 'Hyijun', '宝安区浩哥', 'Richardcai', 'Peter Yeung', 'BrianYWH', 'Kriz Ju',
              'Ngguls', '泡泡小号028', '杨高南路', 'Yangfl', 'Cwek', 'Richard923888', '420peace', 
               # not confirm
               'Zuohaocheng', '淺藍雪', 'Angrydog001', 'Marcushsu', 'Jpcomic', 'CHNAQW', 'Shizhao', '啦啦啦巴拿马',
               'Leiem', 'DAN DAN 66', 'Tombus20032000', '葉又嘉', 'FRDian', 'South Africa No.1', 'Matthew hk',
               'Pbdragonwang', 'C933103', 'Solelyeagle', 'Wi24rd', 'Iokseng', 'Des Cascade', 'Johnson.Xia', 'GnolizX',
               'Kencola', 'ChenKB91', 'Hkjacksonhk', 'Vakrieger', 'Bagakuco', '燃玉', 'Shenchen13', 'Xl7010',
               'BakaChinatsu', 'Amazingloong', 'GZWDer', 'Aa1388zz', 'BoyuZhang1998', 'Sunyiming steven',
               'Fire-and-Ice', 'Kanashimi', 'Mys 721tx', 'Formuse', 'KKD123', 'Nightwayyyyy', 'Ma3r', 'Assanges',
               'Supaplex', 'Softyu', 'Moonian', 'Viztor', '風鳴', 'Hosiet', 'Nissangeniss', 'O-ring', 'Oligophasia',
               '百战天虫', 'Vozhuo', 'YFdyh000', 'Ch.Andrew', 'Timmyboger', 'Outlookxp', 'Bowleerin', 'Terry850324',
               '林天蓬', '痛心疾首', 'Z7504', 'PlyrStar93', 'Billytanghh', 'Pokman817', '舞月書生', 'SickManWP',
               '笔尖留痕', 'Dirrival', 'An Macanese', 'Jared-li', 'Billy382', 'Naturenice', 'S20131417', 'Uranus1781',
               'MStargazer', 'Arnie97', 'Sdf', 'Koala0090', 'TrutH SuiTeR', 'Morrigan', 'Editbility', 'Suezcuts', 'Meodel',
               'Weirichmond', 'Clear Sky C', '北極企鵝觀賞團', 'Unravel17', 'Pazakui', 'Edison2017', 'Gslin', '台北人',
               'Siyuwj', 'Thyj', 'Shamrockwikiedit', 'Hkzense', 'Chenyijia001', 'Yexiaoxing', 'M940504', 'AngeCI',
               'Antigng', 'Legolas1024', 'ZySun', 'CopperSulfate', 'DreamLiner', 'Liu116', 'Cdip150',
               '大诺史', 'PECCO', 'Cbliu', 'Bluedeck', 'Rowe Wilson Frederisk Holme', '照大江', 'Hikaruangeel', 'Mimeyuen1219',
               'Kegns', 'Kiki59123', 'Sunbread', 'Weesteed', 'Yxh1433', 'K.Y.K.Z.K.', 'Alvinz', 'Xiaoxiaoluopsu', 'Liaon98',
               'E8xE8', 'Kiraclyne', '微月吨日亿万千'
              ],
    'xiplus': ['Xiplus', 'MCC214', 'Sanmosa', '蟲蟲飛', 'Sunny00217', 'A1Cafel', 'Willy1018', 'Ohtashinichiro',
                'Hamish', 'Joshua Zhan', '-Zest', '社会我佩奇', 'SCP-2000', 'BenedictusFX', 'Wcam', '人人生來平等',
              'Wolfch', 'Tigerzeng', 'SmallTim',
              'TimWu007', 'DW YoungDLS', 'KirkLU', 'だ*ぜ', 'A84396572', 'StevenK234', 'Cohaf', 'SSYoung', 'Aotfs2013',
               'Alneth', 'Renamed user EPgiXc0zK4m0gk8j', 'AINH', 'SH6188', 'Michael Chan', 'Jimmyshjj', 'LimSoo-jung', 'Dabao qian', 'WQL',
               'A2569875', 'Stang', 'AlexLeeCN', 'Vikarna', 'Super Wang', 'Ericliu1912', 'Taiwania Justo', '無聊龍',
               'TimChen', 'Masdggg', '和平至上', 'Catherine Laurence', '游魂', 'Suaveness', 'Rowingbohe', 'Soaring swallow',
               'WhitePhosphorus', 'Manchiu', 'Brror', 'AT', 'Kalicine730', '夢蝶葬花',
               '春卷柯南', '靖天子', 'Subscriptshoe9', 'EveryDayMood', 'ToothpasteLake',
               'Kuailong', '高文海', 'Dqwyy', 'EtaoinWu', '1233',
              ],
}
def get_branch(user):
    for branch in BRAMCHS:
        if user in BRAMCHS[branch]:
            return branch
    return 'unknown'
ADMINS = ['AT', 'Acepatrick', 'AddisWang', 'Alberth2', 'Alexander Misel', 'Alexsh', 'Alltonight', 'Antigng', 'Aoke1989', 'Aotfs2013', 'Bencmq', 'Bluedeck', 'BrockF5', 'Cdip150', 'Ch.Andrew', 'Chiefwei', 'Cloudcolors', 'Cp111', 'DreamLiner', 'Father vice', 'Ffaarr', 'Gakmo', 'Gzdavidwong', 'Hat600', 'Htchien', 'Iokseng', 'Jasonzhuocn', 'Jimmy Xu', 'Jimmy-abot', 'Jusjih', 'KOKUYO', 'Kalicine730', 'Kallgan', 'Kegns', 'Kevinhksouth', 'Koika', 'Kolyma', 'Kuailong', 'Kuon.Haku', 'Lakokat', 'Lanwi1', 'Liangent', 'Liangent-adminbot', 'Manchiu', 'Minghong', 'Mongol', 'Munford', 'Mys 721tx', 'Nbfreeh', 'Nlu', 'Outlookxp', 'Pedist', 'SElephant', 'Seasurfer', 'Shinjiman', 'Shizhao', 'Stang', 'Stewart', 'Subscriptshoe9', 'T.A Shirakawa', 'Techyan', 'Tigerzeng', 'Wcam', 'WhitePhosphorus', 'Wing', 'Wong128hk', 'Wright.one', 'Ws227', 'Xiplus', 'Xiplus-abot', 'Yhz1221', 'Zy26', '乌拉跨氪', '唐戈', '妙詩人', '春卷柯南', '武藏', '淺藍雪', '滥用过滤器', '燃玉', '霧島聖']
PATROLLERS = ['*angys*', '-Zest', '1233', '2thuriel', '94rain', 'A1Cafel', 'A22234798', 'A2569875', 'A635683851', 'A84396572', 'AINH', 'Aa1388zz', 'Alvin Lee', 'Alvinz', 'Amazingloong', 'An Macanese', 'AndyAndyAndyAlbert', 'Anthony Fok', 'ArthurLau1997', 'Assanges', 'BakaChinatsu', 'Billytanghh', 'Bowleerin', 'BoyuZhang1998', 'Brror', 'Bxxiaolin', 'CBNWGBB', 'Catherine Laurence', 'Cbliu', 'Changnick', 'Chenyijia001', 'Choihei', 'Clear Sky C', 'Coekon', 'Cohaf', 'CopperSulfate', 'Cwek', 'DAN DAN 66', 'DW YoungDLS', 'Datou 1996', 'Dingar', 'Dirrival', 'DukeAnt', 'E8xE8', 'Eartheater', 'Ehime', 'EricDiao', 'Ericliu1912', 'Esiymbro', 'EveryDayMood', 'FRDian', 'Ffffnm', 'Formuse', 'Fu ht', 'GC MathTeacher', 'Ginson Lim', 'Gslin', 'Hamish', 'Happy60907', 'HellWK', 'Hijk910', 'Irycatewi', 'Jacklamf1d14', 'Jane9306', 'Jimmyshjj', 'Johnpoint', 'Joshua Zhan', 'Jpcomic', 'JuneAugust', 'Jyxyl9', 'KEPWAK111', 'KKD123', 'Kenwong', 'Kinghongkong', 'Kiraclyne', 'KirkLU', 'Koala0090', 'Kou Dou', 'Lam3011', 'Leiem', 'Lily135', 'Lokionly', 'Lt2818', 'Ltdccba', 'Meidosensei', 'NHC', 'Nightwayyyyy', 'Oymai', 'Pbdragonwang', 'Peter Yeung', 'Pokman817', 'R96340', 'Richard923888', 'Rowingbohe', 'Sanmosa', 'Shenchen13', 'Smallpei', 'Snowkylin', 'Soaring swallow', 'South Africa No.1', 'StevenK234', 'Suaveness', 'SyaNHs', 'Symplectopedia', 'Taiwania Justo', 'Tazkeung', 'Temp3600', 'Tenbeens', 'Theodore Xu', 'Tombus20032000', 'Universehk', 'Unseaingly', 'Vikarna', 'Waihorace', 'Wasami007', 'Wetrace', 'Whisper of the heart', 'Wi24rd', 'Will629', 'WilliamSkyWalk', 'Willy1018', 'Wolfch', 'Xiaoxiaoluopsu', 'Yangfl', 'Yenhochia', 'Yexiaoxing', 'Ypzrgy1225', 'Yuriy kosygin', 'Yvtou', 'ZEdzEd3168', 'Zetifree', 'ZhengZhou', 'だ*ぜ', '卡達', '吉太小唯', '和平至上', '夢蝶葬花', '大虾番', '太假真人', '小躍', '慕尼黑啤酒', '星巴克女王', '林天蓬', '無聊龍', '百战天虫', '胡葡萄', '脳内補完', '葉又嘉', '虚空幻翼', '蟲蟲飛', '金善賢', '靖天子', '魯班']
record = {}
alltypes = set()
ignore = 0
printunknownbranch = []
for row in res:
    revid = row[0]
    if row[1] is None:
#         print('ignore', row)
        ignore += 1
        continue
    user = row[1].decode()
    if row[2] is None:
#         print('ignore', row)
        ignore += 1
        continue
    summary = row[2].decode()
    ns = row[3]
    title = row[4].decode()
    
    atype = parse_type(summary, ns, title)
    if atype == ('unknown', 'unknown'):
        print(revid, atype, summary, ns, title, user)
    if get_branch(user) == 'unknown':
        if user not in printunknownbranch:
            print('unknown branch', user)
            printunknownbranch.append(user)
    
    if user not in record:
        record[user] = {}
        record[user]['total'] = 0
    if atype not in record[user]:
        record[user][atype] = 0
    record[user][atype] += 1
    record[user]['total'] += 1
    alltypes.add(atype)
print('ignore', ignore)
# print(record)
unknown branch 瑞丽江的河水
unknown branch Cz4523
56023414 ('unknown', 'unknown') 层级2:编辑测试,于[[:Supipara - Alice the magical conductor.]] 2 SmileYuki Bagakuco
unknown branch Larryasou
unknown branch Green Starbucks
unknown branch UsFTBu3TR
unknown branch 白布飘扬
unknown branch Lakokat
unknown branch Pseudo-Class
55970562 ('unknown', 'unknown') {{uw-3block}}: 違反[[WP:3RR|回退不過三原則]] 3 AnonymousPasser Manchiu
unknown branch Amigarse
unknown branch Temp3600
unknown branch 2679D
unknown branch 1j1z2
unknown branch Jane9306
unknown branch SyaNHs
unknown branch Johnpoint
55933909 ('unknown', 'unknown') 通知:有新邮件 3 Red16 WhitePhosphorus
unknown branch 解放的高加索
55924171 ('unknown', 'unknown') {{uw-3block}}: 违反[[WP:3RR|回退不过三原则]] 3 2001:B011:380E:2691:D42A:B794:A839:5BC0 Tigerzeng
55924165 ('unknown', 'unknown') {{uw-3block}}: 违反[[WP:3RR|回退不过三原则]] 3 Itw Tigerzeng
unknown branch *angys*
unknown branch 如沐西风
unknown branch Weather Synchronize
unknown branch YouTable
55907159 ('unknown', 'unknown') 通知:有新邮件 3 Stang Camouflaged Mirage
unknown branch Camouflaged Mirage
unknown branch TSN789
unknown branch ErwinTATP
unknown branch Jonashtand
unknown branch A2093064-test
ignore 834
alltypes = sorted(alltypes)
record = sorted(record.items(), key=lambda v:v[1]['total'], reverse=True)
# print(record)
text = """* [https://paws-public.wmflabs.org/paws-public/User:Xiplus/Twinkle%20usage.ipynb 來源]
* 本表分析了過去5000筆帶有[{} Twinkle標籤]的操作,根據摘要分析該操作屬於哪個功能
* 由於無法檢查已刪編輯,所以speedy-tag及相同性質的數據極度不正確
* 使用版本是檢查common.js而產生,不一定正確
""".format(
'https://zh.wikipedia.org/w/index.php?hidebots=1&hideWikibase=1&tagfilter=Twinkle&limit=500&days=30&title=Special:%E6%9C%80%E8%BF%91%E6%9B%B4%E6%94%B9&urlversion=2')

text += """{| class="wikitable sortable"
!用戶
!身分
!使用版本
!總和"""
for atype in alltypes:
    text += '\n! {}<br>{}'.format(atype[0], atype[1])
for user, userrecord in record:
    text += '\n|-'
    text += '\n'
    text += '| {} |'.format(user)
    if user in ADMINS:
        text += '| sysop |'
    elif user in PATROLLERS:
        text += '| patroller |'
    else:
        text += '| |'
    text += '| {} |'.format(get_branch(user))
    text += '| {} |'.format(userrecord['total'])
    for atype in alltypes:
        if atype in userrecord:
            text += '| {} |'.format(userrecord[atype])
        else:
            text += '| |'
text += "\n|}"
# print(text)
page = pywikibot.Page(site, "User:Xiplus/Twinkle使用統計")
page.text = text
page.save(summary="更新", minor=False)
Sleeping for 35.6 seconds, 2019-09-10 02:48:12
Page [[zh:User:Xiplus/Twinkle使用統計]] saved