#setup
import pywikibot
import datetime

site = pywikibot.Site("zh-yue", "wikipedia")
league_length = 10
days_to_trace = 7 #Positive integer

py_timestamp_now = site.server_time() #Get the time now, datatype pywikibot.Timestamp
py_timestamp_oldlimit = py_timestamp_now + datetime.timedelta(-days_to_trace) #Can add datetime.timedelta to this object
# hanzi checker
# https://stackoverflow.com/questions/30069846/how-to-find-out-chinese-or-japanese-character-in-a-string-in-python
# -*- coding:utf-8 -*-
ranges = [
  {"from": ord(u"\u3300"), "to": ord(u"\u33ff")},         # compatibility ideographs
  {"from": ord(u"\ufe30"), "to": ord(u"\ufe4f")},         # compatibility ideographs
  {"from": ord(u"\uf900"), "to": ord(u"\ufaff")},         # compatibility ideographs
  {"from": ord(u"\U0002F800"), "to": ord(u"\U0002fa1f")}, # compatibility ideographs
  #{'from': ord(u'\u3040'), 'to': ord(u'\u309f')},         # Japanese Hiragana
  #{"from": ord(u"\u30a0"), "to": ord(u"\u30ff")},         # Japanese Katakana
  {"from": ord(u"\u2e80"), "to": ord(u"\u2eff")},         # cjk radicals supplement
  {"from": ord(u"\u4e00"), "to": ord(u"\u9fff")},
  {"from": ord(u"\u3400"), "to": ord(u"\u4dbf")},
  {"from": ord(u"\U00020000"), "to": ord(u"\U0002a6df")},
  {"from": ord(u"\U0002a700"), "to": ord(u"\U0002b73f")},
  {"from": ord(u"\U0002b740"), "to": ord(u"\U0002b81f")},
  {"from": ord(u"\U0002b820"), "to": ord(u"\U0002ceaf")}  # included as of Unicode 8.0
]

def is_cjk(char):
  return any([range["from"] <= ord(char) <= range["to"] for range in ranges])

# This function is Deryck's new work
def cjkcount(string):
    count = 0
    for char in string:
        if is_cjk(char):
            count = count + 1
    return count
# The aim is to tag all pages with their title, creators, and size in CJK chars
# Then sort all the new pages by size
# Then get top N entries

yue_new_pages = site.newpages(start=py_timestamp_now, end=py_timestamp_oldlimit, namespaces={''})

# Count and tag
page_list = []
num_pages = 0
for page in yue_new_pages:
    # page[0] is a page object; page[4] is a string of the creator username    
    page_wikitext = page[0].get()
    page_cjkcount = cjkcount(page_wikitext)
    page_listentry = {#"wikitext": page_wikitext,
                     "cjkcount": page_cjkcount,
                     "creator": page[4],
                     "title": page[0].title()}
    page_list.append(page_listentry)
    num_pages = num_pages + 1
    

# Sort by size
def page_list_sortkey(listentry):
    return listentry["cjkcount"]

page_list.sort(key=page_list_sortkey, reverse=1)
# Now get top N entries

league_length = min(league_length, num_pages)

league_list = []
league_filled = 0
page_list_popping = page_list.copy() # Items get popped off this one as they are taken, to prevent duplicates
while league_filled < league_length:
    users_represented = set()
    for listentry in page_list_popping:
        if listentry["creator"] in users_represented:
            continue
        users_represented.add(listentry["creator"])
        league_list.append(listentry)
        page_list_popping.remove(listentry) # Because there shouldn't be any duplicates, this should be unambiguous
        league_filled = league_filled + 1
        if league_filled >= league_length:
            break

print(league_list)
[{'cjkcount': 4637, 'creator': '13568zhu', 'title': '第9屆中華民國立法委員選舉'}, {'cjkcount': 1920, 'creator': 'PQ77wd', 'title': '白雲東平站'}, {'cjkcount': 901, 'creator': 'Olympianngkithim', 'title': '西印度群島聖公會'}, {'cjkcount': 795, 'creator': 'SC96', 'title': 'Bristol K型'}, {'cjkcount': 743, 'creator': 'HenryLi', 'title': '百子里公園'}, {'cjkcount': 478, 'creator': 'Milegateh', 'title': '自願醫保計劃'}, {'cjkcount': 377, 'creator': 'Webeauty', 'title': '台北當代藝術館'}, {'cjkcount': 261, 'creator': 'Triroosy', 'title': '首部劇情電影計劃'}, {'cjkcount': 226, 'creator': '202.40.137.199', 'title': '潘霜霜'}, {'cjkcount': 166, 'creator': 'Morrishong', 'title': '走佬去馬拉'}]
league_wikitext = """{| class="wikitable" style="text-align: center"
|+ 尋日新作
|-
! 文
! 作者
! 字數"""

for listentry in league_list:
    league_wikitext = league_wikitext + """
|-
| [[""" + listentry["title"] + """]]
| [[User:""" + listentry["creator"] + "|" + listentry["creator"] + """]]
| """ + str(listentry["cjkcount"])

league_wikitext = league_wikitext + """
|}"""

print(league_wikitext)
{| class="wikitable" style="text-align: center"
|+ 尋日新作
|-
! 文
! 作者
! 字數
|-
| [[第9屆中華民國立法委員選舉]]
| [[User:13568zhu|13568zhu]]
| 4637
|-
| [[白雲東平站]]
| [[User:PQ77wd|PQ77wd]]
| 1920
|-
| [[西印度群島聖公會]]
| [[User:Olympianngkithim|Olympianngkithim]]
| 901
|-
| [[Bristol K型]]
| [[User:SC96|SC96]]
| 795
|-
| [[百子里公園]]
| [[User:HenryLi|HenryLi]]
| 743
|-
| [[自願醫保計劃]]
| [[User:Milegateh|Milegateh]]
| 478
|-
| [[台北當代藝術館]]
| [[User:Webeauty|Webeauty]]
| 377
|-
| [[首部劇情電影計劃]]
| [[User:Triroosy|Triroosy]]
| 261
|-
| [[潘霜霜]]
| [[User:202.40.137.199|202.40.137.199]]
| 226
|-
| [[走佬去馬拉]]
| [[User:Morrishong|Morrishong]]
| 166
|}
league_page = pywikibot.Page(source=site, title="User:Deryck Chan/Sandbox", ns=2)

if league_page.exists() and league_page.canBeEdited():
    league_page.put(league_wikitext, summary="更新新文章排名")
else:
    print('failed')
Page [[zh-yue:User:Deryck Chan/Sandbox]] saved