#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Bot to generate statistics for authors

<s>Stolen</s>Adapted from https://github.com/multichill/toollabs/blob/master/bot/wikidata/painting_property_statistics.py

These are published at https://www.wikidata.org/wiki/Wikidata:WikiProject_Q5/Authors_by_language

"""
import logging

import pywikibot
import pywikibot.data.sparql
import collections

class AuthorsPropertyStatistics:
    """
    Generate statitics for authors by languages in Wikidata

    """
    def __init__(self):
        """
        Set what to work on and other variables here.
        """
        site = pywikibot.Site('en', 'wikipedia')
        self.repo = site.data_repository()
        self.collection_threshold = 0
        self.property_threshold = 0
        self.targetPageTitle = u'Wikidata:WikiProject Q5/Authors by language'
        self.properties = collections.OrderedDict()
        self.properties[u'P18'] = u'[[Property:P18|image]]'
        self.properties[u'P734'] = u'[[Property:P735|given name]]'
        self.properties[u'P735'] = u'[[Property:P740|family name]]'       
        self.properties[u'P21'] = u'[[Property:P21|sex]]'
        self.properties[u'P569'] = u'[[Property:P569|dob]]'
        self.properties[u'P19'] = u'[[Property:P19|pob]]'        
        self.properties[u'P570'] = u'[[Property:P570|dod]]'
        self.properties[u'P20'] = u'[[Property:P20|pod]]'  
        self.properties[u'P27'] = u'[[Property:P27|citizenship]]' 
        self.properties[u'P103'] = u'[[Property:P103|native language]]'        
        self.propertyData = {}

    def get_collection_information(self):
        """
        Get the information for a single author by language.

        :return: Tuple of two (ordered) dictionaries: First with counts, second with country codes
        """
        query = """SELECT ?item (COUNT(?item) as ?count) WHERE {
  ?author wdt:P31 wd:Q5 .
  {?author wdt:P106 wd:Q36180.} UNION {?author wdt:P106 [wdt:P279* wd:Q36180].} UNION {?author wdt:P106 wd:Q482980.}
  ?author wdt:P6886 ?item .
} GROUP BY ?item
HAVING (?count > %s)
ORDER BY DESC(?count)
LIMIT 1000""" % (self.collection_threshold,)
        country_counts = collections.OrderedDict()

        sq = pywikibot.data.sparql.SparqlQuery()
        queryresult = sq.select(query)

        for resultitem in queryresult:
            qid = resultitem.get('item').replace(u'http://www.wikidata.org/entity/', u'')
            country_counts[qid] = int(resultitem.get('count'))
        return country_counts

    def get_property_info(self, prop):
        """
        Get the usage counts for a property for the authors by writing languages

        :param prop: Wikidata Pid of the property
        :return: (Ordered) dictionary with the counts per collection
        """
        query = """SELECT ?item (COUNT(?item) as ?count) WHERE {
  ?author wdt:P31 wd:Q5 .
  {?author wdt:P106 wd:Q36180.} UNION {?author wdt:P106 [wdt:P279* wd:Q36180].} UNION {?author wdt:P106 wd:Q482980.}
  ?author wdt:P6886 ?item .
  FILTER EXISTS { ?author p:%s [] } .
} GROUP BY ?item
HAVING (?count > %s)
ORDER BY DESC(?count) 
LIMIT 1000""" % (prop, self.property_threshold)

        result = collections.OrderedDict()
        sq = pywikibot.data.sparql.SparqlQuery()
        queryresult = sq.select(query)

        for resultitem in queryresult:
            qid = resultitem.get('item').replace(u'http://www.wikidata.org/entity/', u'')
            result[qid] = int(resultitem.get('count'))
        return result

    def get_author_totals(self, prop=None):
        """
        Get the total authors
        :param prop:  Wikidata Pid of the property. If set, just get the count of states with that property
        :return: number of authors found
        """
        if prop:
            query = """SELECT (COUNT(?item) as ?count) WHERE {
  ?item wdt:P31 wd:Q34770 .
  FILTER EXISTS { ?item p:%s [] } .
}""" % (prop,)
        else:
            query = """SELECT (COUNT(?item) as ?count) WHERE {
  ?item wdt:P31 wd:Q34770 .
  }"""
        sq = pywikibot.data.sparql.SparqlQuery()
        queryresult = sq.select(query)
        for resultitem in queryresult:
            # Just one result, return that right away
            return int(resultitem.get('count'))

    def run(self):
        """
        Starts the robot and do all the work.
        """
        logging.info("Retrieving collection information...")
        collections_counts = self.get_collection_information()
        logging.info("Collection retrieved: %s", len(collections_counts))
        for prop in self.properties:
            self.propertyData[prop] = self.get_property_info(prop)

        text = u'{{/Header}}\n{| class="wikitable sortable"\n'
        text += u'! colspan="2" |Top languages (Minimum %s authors)\n' % (self.collection_threshold, )
        text += u'! colspan="%s"|Top Properties (used at least %s times per languages)\n' % (len(self.properties), self.property_threshold, )
        text += u'|-\n'
        text += u'! Language\n'
        text += u'! Count\n'
        for prop in self.properties:
            text += u'! data-sort-type="number"|%s\n' % self.properties.get(prop)

        for collection in collections_counts:
            workcount = collections_counts.get(collection)

            text += u'|-\n'
            text += u'| {{Q|%s}}\n' % (collection,)
            text += u'| %s \n' % (workcount, )
            for prop in self.properties:
                propcount = self.propertyData.get(prop).get(collection)
                if not propcount:
                    propcount = 0
                percentage = round(1.0 * propcount / max(workcount, 1) * 100, 2)
                text += u'| {{/Cell|%s|%s}}\n' % (percentage, propcount)

        # Get the totals
        totalworks = self.get_author_totals()

        text += u'|- class="sortbottom"\n|\'\'\'Totals\'\'\' <small>(all authors)<small>:\n| %s\n' % (totalworks,)
        for prop in self.properties:
            totalprop = self.get_author_totals(prop=prop)
            try:
                percentage = round(1.0 * totalprop / totalworks * 100, 2)
            except ZeroDivisionError:
                percentage = 0
            text += u'| {{/Cell|%s|%s}}\n' % (percentage, totalprop)
        text += u'|}\n'
        text += u'[[Category:WikiProject Q5|Author statistics]]\n'

        page = pywikibot.Page(self.repo, title=self.targetPageTitle)
        summary = u'Author property usage stats'
        page.put(text, summary)


def main(*args):
    """
    Main function. Bot does all the work.
    """
    stats = AuthorsPropertyStatistics()
    stats.run()

if __name__ == "__main__":
    main()