from IPython import display
import pywikibot
from pywikibot import pagegenerators

enwp = pywikibot.Site('en', 'wikipedia')
commons = pywikibot.Site('commons', 'commons')
cat = pywikibot.Category(enwp, 'Category:Wikipedia files that shadow a file on Wikimedia Commons')


def commons_file_exists(cpage):
    """Check if the shadowed file still exists.
    
    The Commons file might have been deleted,
    or the enwiki file may have been moved without removing the template.
    Returns True if the file exists, False if it doesnt.
    """
    return cpage.exists()


def local_deletion_process(page):
    """Check if the enwiki file is awaiting deletion.
    
    Returns 'DR' if listed at FfD, 'DI' if a delayed speedy deletion,
    'SD' for speedy deletion, and '' for no deletion proceses.
    """
    # Enwiki adds "All ... " categories for deletable images, so we can just check for those
    SD_cats = (
        'Category:Candidates for speedy deletion',
        'Category:All Wikipedia files with the same name on Wikimedia Commons',
        'Category:All Wikipedia files with a different name on Wikimedia Commons')
        
    DI_cats = (
        'Category:All files proposed for deletion',
        'Category:All Wikipedia files with unknown source',
        'Category:All replaceable non-free use Wikipedia files',
        'Category:All orphaned non-free use Wikipedia files',
        'Category:All Wikipedia files with no non-free use rationale',
        'Category:All Wikipedia files with unknown copyright status',
        'Category:All Wikipedia files missing evidence of permission')
        
    DR_cats = ('Category:Wikipedia files for discussion')
    
    
    def check_cats(category):
        """Check if the given category is a known deletion category

        If it is, return a string.
        If it isn't, return an empty string.
        """
        title = category.title()
        if title in SD_cats:
            return 'SD'
        elif title in DR_cats:
            return 'DR'
        elif title in DI_cats:
            return 'DI'
        else:
            return ''
    
    
    for pagecat in page.categories():
        cc = check_cats(pagecat)
        if cc: return cc


def commons_deletion_process(cpage):
    """Check if the Commons file is awaiting deletion.
    
    Returns 'DR' if listed at DR, 'DI' if a delayed speedy deletion,
    'SD' for speedy deletion, and '' for no deletion proceses.
    """
    # Commons does not add "All..." categories, so we have to climb the cat tree recursively
    # Define deletion categories
    SD_cats = ('Category:Candidates for speedy deletion')
    DI_cats = (
        'Category:Media without a license',
        'Category:Media without a source',
        'Category:Media missing permission')
    DR_cats = ('Category:Deletion requests')
    
    def check_cats(category):
        """Check if the given category is a known deletion category
        
        If it is, return a string.
        If it isn't, return an empty string.
        """
        ctitle = category.title()
        if ctitle in SD_cats:
            return 'SD'
        elif ctitle in DR_cats:
            return 'DR'
        elif ctitle in DI_cats:
            return 'DI'
        else:
            return ''
        
        
    def recurse_check_cats(category, level):
        """Check if the category is a deletion category.
        
        First, check if this category is the CommonsRoot. If it is, we've reached the top
        of the category tree and this isn't a deletion category. Return an empty string
        Then, check if this category is a known deletion category.
        If it is, return a string.
        If it isn't, check the parent categories. 
        """
        ctitle = category.title()
        if (ctitle == 'Category:CommonsRoot') or level == 2:
            # Check if we're too high in the category tree
            # Deletion parents are only one or two categories above
            # the categories on the page
            return ''
        else:
            cc = check_cats(category)
            # If this is a deletion category, pass that up.
            if cc: return cc
            else:
                # If not, increment the level and check the parent categories
                for catcat in category.categories():
                    # If a parent is a deletion category, return such.
                    rcl = recurse_check_cats(catcat, level + 1)
                    if rcl: return rcl
          
    
    # Check just the page categories, as some deletions categorize in the base category
    for pagecat in cpage.categories():
        cc = check_cats(pagecat)
        if cc: return cc

    # If we haven't returned yet, we need to check the parent categories too.
    for pagecat in cpage.categories():
        rcc = recurse_check_cats(pagecat, 0)
        if rcc: return rcc
        

def gen_file_lists(ignore):
    """Run through the category and sort the files into lists. Return a tuple of lists"""
    no_commons_file = []
    enwp_deletion = []
    enwp_speedy = []
    commons_deletion = []
    commons_speedy = []
    shadowing = []
        
    for page in pagegenerators.CategorizedPageGenerator(cat):
        title = page.title()
        cpage = pywikibot.Page(commons, title)
        
        if title in ignore:
            continue
        elif commons_file_exists(cpage):
            cdp = commons_deletion_process(cpage)
            ldp = local_deletion_process(page)
                        
            if not (cdp or ldp):
                # If the file is not awaiting deletion anywhere, add to shadowing and end. (most common)
                # If cdp is empty and ldp is empty, file shadows.
                shadowing.append((page, cpage))
            else:
                if ldp == 'DR':
                    # If the file is awaiting deletion on enwp, add to list and end.
                    enwp_deletion.append((page, cpage))
                elif ldp == 'DI':
                    enwp_deletion.append((page, cpage))
                elif ldp == 'SD':
                    enwp_speedy.append((page, cpage))
                    
                if cdp == 'DR':
                    # If the file is awaiting deletion on Commons, add to list and continue
                    commons_deletion.append((page, cpage))
                elif cdp == 'DI':
                    commons_deletion.append((page, cpage))
                elif cdp == 'SD':
                    commons_speedy.append((page, cpage))
            
        else:
            # If the file is deleted on Commons, add to no_commons_file and end.
            no_commons_file.append((page, cpage))
            
    return (no_commons_file, enwp_deletion, enwp_speedy, commons_deletion, commons_speedy, shadowing)
                
                
def output_lists(lists):
    """Take the lists and render them into markdown"""
    
    output = '# {{Shadows Commons}}\n'
    
    def tomarkdown(list):
        md = ''
        for page, cpage in list:
            md += '* [{title}]({elink}) ([Commons]({clink}))\n'.format(
                title = page.title(), elink = page.full_url(),
                clink = cpage.full_url())
        return md
            
    
    no_commons_file, enwp_deletion, enwp_speedy, commons_deletion, commons_speedy, shadowing = lists
    
    output += '## Files with {{Shadows Commons}} but no Commons file\n'
    output += 'These files may have shadowed a Commons file at some point, but they don\'t  anymore.\n\n'
    output += tomarkdown(no_commons_file)
    
    output += '\n## Files that shadow Commons\n\n'
    output += tomarkdown(shadowing)
    
    output += '\n## Files awaiting deletion on the English Wikipedia\n'
    output += '### Speedy Deletion\n\n'
    output += tomarkdown(enwp_speedy)
    output += '### Delayed Deletion or FfD\n\n'
    output += tomarkdown(enwp_deletion)
    
    output += '\n## Files awaiting deletion on Commons\n'
    output += '### Speedy Deletion\n\n'
    output += tomarkdown(commons_speedy)
    output += '### Delayed Deletion or DR\n\n'
    output += tomarkdown(commons_deletion)
    return output
    
def __main__():
    ignore = []
    lists = gen_file_lists(ignore)
    md = output_lists(lists)

    display.display_markdown(md, raw=True)
__main__()

{{Shadows Commons}}

Files with {{Shadows Commons}} but no Commons file

These files may have shadowed a Commons file at some point, but they don't anymore.

Files that shadow Commons

Files awaiting deletion on the English Wikipedia

Speedy Deletion

Files awaiting deletion on Commons

Speedy Deletion

Delayed Deletion or DR

Licensing

Copyright 2019 AntiCompositeNumber

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

To fork:

  1. Open the raw page and save it to your computer
  2. Go to your PAWS control panel and sign in using OAUTH
  3. Click Upload and upload the file from step 1
  4. To run, open the notebook and click Cell > Run All