#setup
import pywikibot
import datetime

site = pywikibot.Site("zh-yue", "wikipedia")
league_length = 10
days_to_trace = 2 #Positive integer

py_timestamp_now = site.server_time() #Get the time now, datatype pywikibot.Timestamp
py_timestamp_oldlimit = py_timestamp_now + datetime.timedelta(-days_to_trace) #Can add datetime.timedelta to this object

league_page_title = 'Deryck Chan/League'
league_page_ns = 2 # 2=User

# hanzi checker
# https://stackoverflow.com/questions/30069846/how-to-find-out-chinese-or-japanese-character-in-a-string-in-python
# -*- coding:utf-8 -*-
ranges = [
  {"from": ord(u"\u3300"), "to": ord(u"\u33ff")},         # compatibility ideographs
  {"from": ord(u"\ufe30"), "to": ord(u"\ufe4f")},         # compatibility ideographs
  {"from": ord(u"\uf900"), "to": ord(u"\ufaff")},         # compatibility ideographs
  {"from": ord(u"\U0002F800"), "to": ord(u"\U0002fa1f")}, # compatibility ideographs
  #{'from': ord(u'\u3040'), 'to': ord(u'\u309f')},         # Japanese Hiragana
  #{"from": ord(u"\u30a0"), "to": ord(u"\u30ff")},         # Japanese Katakana
  {"from": ord(u"\u2e80"), "to": ord(u"\u2eff")},         # cjk radicals supplement
  {"from": ord(u"\u4e00"), "to": ord(u"\u9fff")},
  {"from": ord(u"\u3400"), "to": ord(u"\u4dbf")},
  {"from": ord(u"\U00020000"), "to": ord(u"\U0002a6df")},
  {"from": ord(u"\U0002a700"), "to": ord(u"\U0002b73f")},
  {"from": ord(u"\U0002b740"), "to": ord(u"\U0002b81f")},
  {"from": ord(u"\U0002b820"), "to": ord(u"\U0002ceaf")}  # included as of Unicode 8.0
]

def is_cjk(char):
  return any([range["from"] <= ord(char) <= range["to"] for range in ranges])

# This function is Deryck's new work
def cjkcount(string):
    count = 0
    for char in string:
        if is_cjk(char):
            count = count + 1
    return count

# The aim is to tag all pages with their title, creators, and size in CJK chars
# Then sort all the new pages by size
# Then get top N entries

yue_new_pages = site.newpages(start=py_timestamp_now, end=py_timestamp_oldlimit, namespaces={''})

# Count and tag
page_list = []
num_pages = 0
for page in yue_new_pages:
    # page[0] is a page object; page[4] is a string of the creator username    
    page_wikitext = page[0].get()
    page_cjkcount = cjkcount(page_wikitext)
    page_listentry = {#"wikitext": page_wikitext,
                     "cjkcount": page_cjkcount,
                     "creator": page[4],
                     "title": page[0].title()}
    page_list.append(page_listentry)
    num_pages = num_pages + 1
    

# Sort by size
def page_list_sortkey(listentry):
    return listentry["cjkcount"]

page_list.sort(key=page_list_sortkey, reverse=1)


# Now get top N entries

league_length = min(league_length, num_pages)

league_list = []
league_filled = 0
page_list_popping = page_list.copy() # Items get popped off this one as they are taken, to prevent duplicates
while league_filled < league_length:
    users_represented = set()
    for listentry in page_list_popping:
        if listentry["creator"] in users_represented:
            continue
        users_represented.add(listentry["creator"])
        league_list.append(listentry)
        page_list_popping.remove(listentry) # Because there shouldn't be any duplicates, this should be unambiguous
        league_filled = league_filled + 1
        if league_filled >= league_length:
            break

# print(league_list)

# Create Wikitext
league_wikitext = """{| class="wikitable" style="text-align: center"
|+ 最近 """ + str(days_to_trace) + """ 日新作
|-
! 文
! 作者
! 字數"""

for listentry in league_list:
    league_wikitext = league_wikitext + """
|-
| [[""" + listentry["title"] + """]]
| [[User:""" + listentry["creator"] + "|" + listentry["creator"] + """]]
| """ + str(listentry["cjkcount"])

league_wikitext = league_wikitext + """
|}"""

# print(league_wikitext)

league_page = pywikibot.Page(source=site, title=league_page_title, ns=league_page_ns)

if league_page.exists() and league_page.canBeEdited():
    league_page.put(league_wikitext, summary="更新新文章排名")
else:
    print('failed')
Page [[zh-yue:User:Deryck Chan/League]] saved