import pywikibot
import re
import os
import pymysql
site = pywikibot.Site('zh', 'wikipedia')
site.login()
host = os.environ['MYSQL_HOST']
user = os.environ['MYSQL_USERNAME']
password = os.environ['MYSQL_PASSWORD']
conn = pymysql.connect(
    host=host,
    user=user,
    password=password,
    charset="utf8"
)
with conn.cursor() as cur:
    cur.execute('use zhwiki_p')
    cur.execute("""
        SELECT ct_rev_id, actor_name, comment_text, page_namespace, page_title
        FROM (
          SELECT *
          FROM change_tag
          WHERE ct_tag_id = 1
          ORDER BY ct_id DESC
          LIMIT 5000
        ) change_tag
        LEFT JOIN revision
        ON ct_rev_id = rev_id
        LEFT JOIN actor
        ON rev_actor = actor_id
        LEFT JOIN comment
        ON rev_comment_id = comment_id
        LEFT JOIN page
        ON rev_page = page_id
    """)
    res = cur.fetchall()
def parse_type(summary, ns, title):
    # arv
    if ns == 4 and title == '元維基用戶查核請求' and re.search(r'^(報告|报告)', summary):
        return 'arv', 'reportrfcu'
    if ns == 3 and re.search(r'通知用戶查核請求|通知用户查核请求', summary):
        return 'arv', 'noticerfcu'
    if ns == 4 and title == '当前的破坏' and re.search(r'^(報告|报告)', summary):
        return 'arv', 'reportaiv'
    if ns == 4 and title == '需要管理員注意的用戶名' and re.search(r'^(报告|新提報|新提报)', summary):
        return 'arv', 'reportuaa'

    # batch
    if re.search(r'移除已被刪除檔案', summary):
        return 'batch', 'delete'

    # block
    if ns == 3 and re.search(r'{{uw-(ublock|block1|block2|block3|vblock|dblock|3block)|{{blocked proxy}}', summary):
        return 'block', 'notice'
    if ns == 2 and re.search(r'(標記被永久封[禁鎖]的用戶頁|标记被永久封禁的用户页)', summary):
        return 'block', 'taguser'
    
    # close
    CLOSEREASON = (r'(轉移至維基導遊|快速保留|請求理由消失|请求理由消失|重定向|请求无效|請求無效|保留|删除|刪除|提删者未取得提删资格'
        +'|提刪者未取得提刪資格|转移至维基共享资源|移动|并入|併入|轉交侵權|转交侵权|无共识|重複提出|移動|無共識|重复提出|转移至|目标页面或档案不存在,无效)')
    if (ns == 4 and re.search(r'^(頁面存廢討論|檔案存廢討論)/記錄/', title)
            and re.search(CLOSEREASON, summary)):
        return 'close', 'close'
    if re.search(r'Wikipedia:(頁面存廢討論|檔案存廢討論)/記錄/\d+/\d+/\d+\]\]:.*'+CLOSEREASON, summary):
        return 'close', 'tagtalk'
    if re.search(r'(存廢討論關閉|存废讨论关闭):', summary):
        return 'close', 'rmtem'
    
    # config
    if ns == 2 and re.search(r'(保存Twinkle参数设置|儲存Twinkle偏好設定)', summary):
        return 'config', 'config'
    
    # copyvio
    if ns == 4 and title == '頁面存廢討論/疑似侵權' and re.search(r'(添加|加入)\[\[', summary):
        return 'copyvio', 'report'
    if ns == 3 and re.search(r'通知:[页頁]面.+(疑似侵犯版权|疑似侵犯版權)', summary):
        return 'copyvio', 'notice'
    if re.search(r'(本頁面疑似侵犯版權|本页面疑似侵犯版权)', summary):
        return 'copyvio', 'tag'
    
    # fluff
    if re.search(r'^(回退|撤销|还原)', summary):
#         if '出於[[WP:AGF|善意]]的編輯' in summary:
        return 'fluff', 'fluff'
#         if '回退到由' in summary:
#             return 'fluff', 'torev'
#         if '做出的最后一个修订版本' in summary:
#             return 'fluff', 'norm'
    
    # image
    if ns == 4 and title == '檔案存廢討論/無版權訊息或檔案來源' and re.search(r'^(添加|加入)', summary):
        return 'image', 'report'
        
    # protect
    if ns == 4 and title == '请求保护页面' and re.search(r'请求对.+保护', summary):
        return 'protect', 'request'
    if re.search(r'移除保護模板|(加入|添加){{pp-', summary):
        return 'protect', 'tag'
    if re.search(r'已保护|移除保护|保护等级', summary):
        return 'protect', 'protect'
    
    # speedy
    if ns == 2 and re.search(r'(记录对|記錄對).+(的快速删除提名|的快速刪除提名)', summary):
        return 'speedy', 'record'
    if ns == 3 and re.search(r'通知:.+(快速删除提名|快速刪除提名)', summary):
        return 'speedy', 'notice'
    if re.search('(请求|請求).*(快速删除|快速刪除)', summary):
        return 'speedy', 'tag'

    # tag
    if ns in [0, 4, 10, 118] and re.search(r'(添加|加入|移除).+(标记|標記)', summary):
        return 'tag', 'tag'
    if (ns == 6 or (ns == 4 and title == '沙盒')) and re.search(r'^添加{{', summary):
        return 'tag', 'tag'
    if ns == 4 and title == '关注度/提报' and re.search(r'(添加|加入)\[\[', summary):
        return 'tag', 'reportnp'
    if ns == 1 and re.search(r'(请求|請求).+(合并|合併)', summary):
        return 'tag', 'mergetagtalk'
    if ns == 1 and re.search(r'(请求移动至|請求移動至)', summary):
        return 'tag', 'movetagtalk'
    
    # talkback
    if ns == 3 and re.search(r'(回复通告|回覆通告|通知:有新郵件)', summary):
        return 'talkback', 'talkback'
    
    # unlink
    if re.search(r'(注释出文件使用|取消链接到)', summary):
        return 'unlink', 'unlink'
    
    # warn
    if ns == 3 and re.search(r'((層級|层级)(1|2|3|4|4im)|单层级通知|單層級通知|单层级警告|單層級警告|提示):|您翻譯的質量有待改善', summary):
        return 'warn', 'warn'
    
    # xfd
    if ns == 4 and re.search(r'^(頁面存廢討論|檔案存廢討論)/記錄/', title) and re.search(r'(添加|加入)\[\[', summary):
        return 'xfd', 'report'
    if ns == 3 and re.search(r'通知:(页面|文件).+存废讨论提名', summary):
        return 'xfd', 'notice'
    if re.search(r'(页面存废讨论|頁面存廢討論|檔案存廢討論|文件存废讨论):', summary):
        return 'xfd', 'tag'
    
    return 'unknown', 'unknown'
BRAMCHS = {
    'xiplus': [
        '-Zest',
        '1233',
        '500000000006城',
        '5LZ',
        '94rain',
        'A1Cafel',
        'A2093064-test',
        'A2569875',
        'A84396572',
        'AAparajitaa',
        'AINH',
        'Air7538',
        'AlexLeeCN',
        'AlexLeePublic',
        'Alneth',
        'Amigarse',
        'Amijide',
        'AndyAndyAndyAlbert',
        'Aotfs2013',
        'ArthurLau1997',
        'AT',
        'Bagakuco',
        'BenedictusFX',
        'Bnb674',
        'Brror',
        'Camouflaged Mirage',
        'Catherine Laurence',
        'Classy Melissa',
        'Dabao qian',
        'Dqwyy',
        'Dukawana',
        'Ericliu1912',
        'EtaoinWu',
        'EveryDayMood',
        'Exert yourself',
        'FireFeather',
        'Formosa loves river',
        'Fran1001hk',
        'Hal 0005',
        'Hamish',
        'Hjh474',
        'Hktyphoon',
        'Howew',
        "It's gonna be awesome",
        'Jimmyshjj',
        'Johnpoint',
        'Joshua Zhan',
        'Kalicine730',
        'Kenpoon',
        'KirkLU',
        'Kuailong',
        'Kuon.Haku',
        'Lakokat',
        'Leolokey',
        'LiangZhiPeng',
        'LimSoo-jung',
        'Lnnocentius',
        'Mageharu',
        'Manchiu',
        'Masdggg',
        'MCC214',
        'MeritTim',
        'Michael Chan',
        'NanoKid',
        'NHC',
        'Njzjz',
        'Ohtashinichiro',
        'OoXoxoXoO',
        'Pseudo Classes',
        'Rowingbohe',
        'Sanmosa',
        'SCP-2000',
        'Shangkuanlc',
        'SmallTim',
        'Soaring swallow',
        'South Africa No.1',
        'Spaghet-Ti',
        'SSYoung',
        'Stang',
        'SteepPeak',
        'StevenK234',
        'Streetdeck',
        'Suaveness',
        'Subscriptshoe9',
        'Sunny00217',
        'Super Wang',
        'Taiwania Justo',
        'Temp3600',
        'Tenbeens',
        'Tigerzeng',
        'TimChen',
        'TimWu007',
        'Tommyang',
        'TongcyDai',
        'ToothpasteLake',
        'Toxic-Carrotkit',
        'UsFTBu3TR',
        'Vikarna',
        'Waikijacky',
        'Walter Grassroot',
        'Wcam',
        'Weesteed',
        'WhitePhosphorus',
        'WilliamSkyWalk',
        'Willy1018',
        'Wolfch',
        'Wong128hk',
        'Xiplus',
        'YouTable',
        'え06',
        'だ*ぜ',
        '人人生來平等',
        '吉太小唯',
        '夢蝶葬花',
        '小老虎3018',
        '春卷柯南',
        '李炯霄',
        '無聊龍',
        '燃灯',
        '社会我佩奇',
        '維基小精靈',
        '舞月書生',
        '蟲蟲飛',
        '靖天子',
        '高文海',
    ],
    '94rain': [],
    'Vanished user 1929210': [
        '333-blue',
        'BrandNew Jim Zhang',
        'Jackchows',
        'Jesus estw',
        'QBear',
        'S099001',
        'WAN233',
        'Wanchens',
        'Wcam',
        '哈哈常笑',
        '夢蝶葬花',
        '星巴克女王',
    ],
    'Kuon.Haku': [
        'Fran1001hk'
    ],
}


def get_branch(user):
    for branch in BRAMCHS:
        if user in BRAMCHS[branch]:
            return branch
    return 'gadget'
ADMINS = ['AT', 'Acepatrick', 'AddisWang', 'Alberth2', 'Alexander Misel', 'Alexsh', 'Alltonight', 'Antigng', 'Aoke1989', 'Aotfs2013', 'Bencmq', 'Bluedeck', 'BrockF5', 'Cdip150', 'Ch.Andrew', 'Chiefwei', 'Cloudcolors', 'Cp111', 'DreamLiner', 'Father vice', 'Ffaarr', 'Gakmo', 'Gzdavidwong', 'Hat600', 'Htchien', 'Iokseng', 'Jasonzhuocn', 'Jimmy Xu', 'Jimmy-abot', 'Jusjih', 'KOKUYO', 'Kalicine730', 'Kallgan', 'Kegns', 'Kevinhksouth', 'Koika', 'Kolyma', 'Kuailong', 'Kuon.Haku', 'Lakokat', 'Lanwi1', 'Liangent', 'Liangent-adminbot', 'Manchiu', 'Minghong', 'Mongol', 'Munford', 'Mys 721tx', 'Nbfreeh', 'Nlu', 'Outlookxp', 'Pedist', 'SElephant', 'Seasurfer', 'Shinjiman', 'Shizhao', 'Stang', 'Stewart', 'Subscriptshoe9', 'T.A Shirakawa', 'Techyan', 'Tigerzeng', 'Wcam', 'WhitePhosphorus', 'Wing', 'Wong128hk', 'Wright.one', 'Ws227', 'Xiplus', 'Xiplus-abot', 'Yhz1221', 'Zy26', '乌拉跨氪', '唐戈', '妙詩人', '春卷柯南', '武藏', '淺藍雪', '滥用过滤器', '燃玉', '霧島聖', '蟲蟲飛']
PATROLLERS = ['*angys*', '-Zest', '1233', '2thuriel', '94rain', 'A1Cafel', 'A22234798', 'A2569875', 'A635683851', 'A84396572', 'AINH', 'Aa1388zz', 'Alvin Lee', 'Alvinz', 'Amazingloong', 'An Macanese', 'AndyAndyAndyAlbert', 'Anthony Fok', 'ArthurLau1997', 'Assanges', 'BakaChinatsu', 'Billytanghh', 'Bowleerin', 'BoyuZhang1998', 'Brror', 'Bxxiaolin', 'CBNWGBB', 'Catherine Laurence', 'Cbliu', 'Changnick', 'Chenyijia001', 'Choihei', 'Clear Sky C', 'Coekon', 'Cohaf', 'CopperSulfate', 'Cwek', 'DAN DAN 66', 'DW YoungDLS', 'Datou 1996', 'Dingar', 'Dirrival', 'DukeAnt', 'E8xE8', 'Eartheater', 'Ehime', 'EricDiao', 'Ericliu1912', 'Esiymbro', 'EveryDayMood', 'FRDian', 'Ffffnm', 'Formuse', 'Fu ht', 'GC MathTeacher', 'Ginson Lim', 'Gslin', 'Hamish', 'Happy60907', 'HellWK', 'Hijk910', 'Irycatewi', 'Jacklamf1d14', 'Jane9306', 'Jimmyshjj', 'Johnpoint', 'Joshua Zhan', 'Jpcomic', 'JuneAugust', 'Jyxyl9', 'KEPWAK111', 'KKD123', 'Kenwong', 'Kinghongkong', 'Kiraclyne', 'KirkLU', 'Koala0090', 'Kou Dou', 'Lam3011', 'Leiem', 'Lily135', 'Lokionly', 'Lt2818', 'Ltdccba', 'Meidosensei', 'NHC', 'Nightwayyyyy', 'Oymai', 'Pbdragonwang', 'Peter Yeung', 'Pokman817', 'R96340', 'Richard923888', 'Rowingbohe', 'Sanmosa', 'Shenchen13', 'Smallpei', 'Snowkylin', 'Soaring swallow', 'South Africa No.1', 'StevenK234', 'Suaveness', 'SyaNHs', 'Symplectopedia', 'Taiwania Justo', 'Tazkeung', 'Temp3600', 'Tenbeens', 'Theodore Xu', 'Tombus20032000', 'Universehk', 'Unseaingly', 'Vikarna', 'Waihorace', 'Wasami007', 'Wetrace', 'Whisper of the heart', 'Wi24rd', 'Will629', 'WilliamSkyWalk', 'Willy1018', 'Wolfch', 'Xiaoxiaoluopsu', 'Yangfl', 'Yenhochia', 'Yexiaoxing', 'Ypzrgy1225', 'Yuriy kosygin', 'Yvtou', 'ZEdzEd3168', 'Zetifree', 'ZhengZhou', 'だ*ぜ', '卡達', '吉太小唯', '和平至上', '夢蝶葬花', '大虾番', '太假真人', '小躍', '慕尼黑啤酒', '星巴克女王', '林天蓬', '無聊龍', '百战天虫', '胡葡萄', '脳内補完', '葉又嘉', '虚空幻翼', '金善賢', '靖天子', '魯班']
record = {}
alltypes = set()
ignore = 0
printunknownbranch = []
for row in res:
    revid = row[0]
    if row[1] is None:
#         print('ignore', row)
        ignore += 1
        continue
    user = row[1].decode()
    if row[2] is None:
#         print('ignore', row)
        ignore += 1
        continue
    summary = row[2].decode()
    ns = row[3]
    title = row[4].decode()
    
    atype = parse_type(summary, ns, title)
    if atype == ('unknown', 'unknown'):
        print(revid, atype, summary, ns, title, user)
    if get_branch(user) == 'unknown':
        if user not in printunknownbranch:
            print('unknown branch', user)
            printunknownbranch.append(user)
    
    if user not in record:
        record[user] = {}
        record[user]['total'] = 0
    if atype not in record[user]:
        record[user][atype] = 0
    record[user][atype] += 1
    record[user]['total'] += 1
    alltypes.add(atype)
print('ignore', ignore)
# print(record)
ignore 493
alltypes = sorted(alltypes)
record = sorted(record.items(), key=lambda v:v[1]['total'], reverse=True)
# print(record)
text = """* [https://paws-public.wmflabs.org/paws-public/User:Xiplus/Twinkle%20usage.ipynb 來源]
* 本表分析了過去5000筆帶有[{} Twinkle標籤]的操作,根據摘要分析該操作屬於哪個功能
* 由於無法檢查已刪編輯,所以speedy-tag及相同性質的數據極度不正確
* 使用版本是檢查common.js而產生,不一定正確
""".format(
'https://zh.wikipedia.org/w/index.php?hidebots=1&hideWikibase=1&tagfilter=Twinkle&limit=500&days=30&title=Special:%E6%9C%80%E8%BF%91%E6%9B%B4%E6%94%B9&urlversion=2')

text += """{| class="wikitable sortable"
!用戶
!身分
!使用版本
!總和"""
for atype in alltypes:
    text += '\n! {}<br>{}'.format(atype[0], atype[1])
for user, userrecord in record:
    text += '\n|-'
    text += '\n'
    text += '| {} |'.format(user)
    if user in ADMINS:
        text += '| sysop |'
    elif user in PATROLLERS:
        text += '| patroller |'
    else:
        text += '| |'
    text += '| {} |'.format(get_branch(user))
    text += '| {} |'.format(userrecord['total'])
    for atype in alltypes:
        if atype in userrecord:
            text += '| {} |'.format(userrecord[atype])
        else:
            text += '| |'
text += "\n|}"
# print(text)
page = pywikibot.Page(site, "User:Xiplus/Twinkle使用統計")
page.text = text
page.save(summary="更新", minor=False)
Page [[zh:User:Xiplus/Twinkle使用統計]] saved