import pymysql
import os
from pprint import pprint
import json

def get_mysql():
    host = os.environ['MYSQL_HOST']
    user = os.environ['MYSQL_USERNAME']
    password = os.environ['MYSQL_PASSWORD']
    return pymysql.connect(
        host=host,
        user=user,
        password=password,
        charset='utf8'
    )
# Find wikidata Q ids for all pages in category. If the page is in a talk namespace, we have
# to look in the related article namespace for Q ids.

def get_qid(cur, page_title):
    cur.execute("""
        select pp_value
        from page_props
        where pp_propname = 'wikibase_item'
        and pp_page = %s
    """, page_id)
    res = cur.fetchall()
    if len(res) > 0:
        return res[0][0]
    else:
        return None
    
source_pages = {}
with get_mysql().cursor() as cur:
    cur.execute("use enwiki_p")
    cur.execute("""
        select page_namespace, page_title, page_id
        from categorylinks
        join page on page_id = cl_from
        where cl_to = 'WikiProject_Wiki_Loves_Women_translation'
    """)
    for (page_namespace, page_title, page_id) in cur.fetchall():
        if page_namespace == 1:
            cur.execute("""
                select page_id
                from page
                where page_namespace = 0
                and page_title = %s
            """, page_title)
            res = cur.fetchall()
            if len(res) > 0:
                page_namespace = 0
                page_id = res[0][0]
        if page_namespace == 0:
            qid = get_qid(cur, page_id)
            if qid is not None:
                source_pages[page_id] = {
                    'page_title': page_title.decode('utf8'),
                    'page_id': page_id,
                    'qid': qid.decode('utf8'),
                }
                

pprint(source_pages)

#with open('query.json', 'r') as f:
#    content = json.loads(f.read())
#print(content[0])
{156899: {'page_id': 156899, 'page_title': 'Yaa_Asantewaa', 'qid': 'Q467803'},
 5017425: {'page_id': 5017425, 'page_title': 'Leila_Ahmed', 'qid': 'Q438645'},
 5572213: {'page_id': 5572213,
           'page_title': 'Anna_Tibaijuka',
           'qid': 'Q456003'},
 7788166: {'page_id': 7788166, 'page_title': 'Flora_Nwapa', 'qid': 'Q5460344'},
 8618046: {'page_id': 8618046,
           'page_title': 'Martha_Karua',
           'qid': 'Q3295164'},
 8935089: {'page_id': 8935089,
           'page_title': 'Funmilayo_Ransome-Kuti',
           'qid': 'Q226122'},
 12694687: {'page_id': 12694687,
            'page_title': 'Nozizwe_Madlala-Routledge',
            'qid': 'Q731819'},
 18579790: {'page_id': 18579790,
            'page_title': 'Samia_Yusuf_Omar',
            'qid': 'Q290451'},
 22880663: {'page_id': 22880663,
            'page_title': 'Fatou_Bensouda',
            'qid': 'Q508482'},
 23323043: {'page_id': 23323043,
            'page_title': 'Cri-Zelda_Brits',
            'qid': 'Q3525768'},
 30724416: {'page_id': 30724416,
            'page_title': 'Asmaa_Mahfouz',
            'qid': 'Q289706'},
 34126022: {'page_id': 34126022,
            'page_title': 'Maggie_Laubser',
            'qid': 'Q6730265'},
 34140912: {'page_id': 34140912,
            'page_title': 'Jacqueline_Moudeina',
            'qid': 'Q1677830'},
 43518832: {'page_id': 43518832,
            'page_title': 'Frances_Ames',
            'qid': 'Q17626455'},
 44258772: {'page_id': 44258772,
            'page_title': 'Benin_ivory_mask',
            'qid': 'Q18385381'},
 48046320: {'page_id': 48046320,
            'page_title': 'Chinwendu_Ihezuo',
            'qid': 'Q21621526'},
 49114443: {'page_id': 49114443, 'page_title': 'Malouma', 'qid': 'Q12244275'},
 49333348: {'page_id': 49333348,
            'page_title': 'Deolinda_Rodríguez_de_Almeida',
            'qid': 'Q22665691'},
 49362591: {'page_id': 49362591,
            'page_title': 'Fatima_Massaquoi',
            'qid': 'Q22712178'},
 51509106: {'page_id': 51509106,
            'page_title': 'Nassima_Saifi',
            'qid': 'Q26844464'}}
# Ask SPARQL for all pages in all language wikipedias
sparql_query = """
SELECT *
WHERE {
    VALUES ?item { wd:%s }
    ?link schema:about ?item .
    ?link schema:name ?name .
    ?link schema:isPartOf/wikibase:wikiGroup "wikipedia" .
}
""" % (' wd:'.join([x['qid'] for x in source_pages.values()]))
print(sparql_query)

import requests
r = requests.get('https://query.wikidata.org/sparql', params={
    'format': 'json',
    'query': sparql_query,
})
print(r.content)
content = json.loads(r.text)
pprint(content)
SELECT *
WHERE {
    VALUES ?item { wd:Q289706 wd:Q21621526 wd:Q3525768 wd:Q22665691 wd:Q5460344 wd:Q6730265 wd:Q438645 wd:Q12244275 wd:Q290451 wd:Q17626455 wd:Q226122 wd:Q26844464 wd:Q467803 wd:Q18385381 wd:Q456003 wd:Q508482 wd:Q731819 wd:Q1677830 wd:Q3295164 wd:Q22712178 }
    ?link schema:about ?item .
    ?link schema:name ?name .
    ?link schema:isPartOf/wikibase:wikiGroup "wikipedia" .
}

b'SPARQL-QUERY: queryStr=\nSELECT *\nWHERE {\n    VALUES ?item { wd:Q289706 wd:Q21621526 wd:Q3525768 wd:Q22665691 wd:Q5460344 wd:Q6730265 wd:Q438645 wd:Q12244275 wd:Q290451 wd:Q17626455 wd:Q226122 wd:Q26844464 wd:Q467803 wd:Q18385381 wd:Q456003 wd:Q508482 wd:Q731819 wd:Q1677830 wd:Q3295164 wd:Q22712178 }\n    ?link schema:about ?item .\n    ?link schema:name ?name .\n    ?link schema:isPartOf/wikibase:wikiGroup "wikipedia" .\n}\n\njava.util.concurrent.ExecutionException: java.util.concurrent.ExecutionException: org.openrdf.query.QueryInterruptedException: java.lang.RuntimeException: java.util.concurrent.ExecutionException: com.bigdata.bop.engine.QueryTimeoutException: Query deadline is expired.\n\tat java.util.concurrent.FutureTask.report(FutureTask.java:122)\n\tat java.util.concurrent.FutureTask.get(FutureTask.java:192)\n\tat com.bigdata.rdf.sail.webapp.BigdataServlet.submitApiTask(BigdataServlet.java:286)\n\tat com.bigdata.rdf.sail.webapp.QueryServlet.doSparqlQuery(QueryServlet.java:653)\n\tat com.bigdata.rdf.sail.webapp.QueryServlet.doGet(QueryServlet.java:288)\n\tat com.bigdata.rdf.sail.webapp.RESTServlet.doGet(RESTServlet.java:240)\n\tat com.bigdata.rdf.sail.webapp.MultiTenancyServlet.doGet(MultiTenancyServlet.java:271)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:687)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:790)\n\tat org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:769)\n\tat org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:585)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)\n\tat org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:577)\n\tat org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:223)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1125)\n\tat org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:515)\n\tat org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1059)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)\n\tat org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:215)\n\tat org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:110)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97)\n\tat org.eclipse.jetty.server.Server.handle(Server.java:497)\n\tat org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:311)\n\tat org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:248)\n\tat org.eclipse.jetty.io.AbstractConnection$2.run(AbstractConnection.java:540)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:610)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:539)\n\tat java.lang.Thread.run(Thread.java:745)\nCaused by: java.util.concurrent.ExecutionException: org.openrdf.query.QueryInterruptedException: java.lang.RuntimeException: java.util.concurrent.ExecutionException: com.bigdata.bop.engine.QueryTimeoutException: Query deadline is expired.\n\tat java.util.concurrent.FutureTask.report(FutureTask.java:122)\n\tat java.util.concurrent.FutureTask.get(FutureTask.java:192)\n\tat com.bigdata.rdf.sail.webapp.QueryServlet$SparqlQueryTask.call(QueryServlet.java:864)\n\tat com.bigdata.rdf.sail.webapp.QueryServlet$SparqlQueryTask.call(QueryServlet.java:670)\n\tat com.bigdata.rdf.task.ApiTaskForIndexManager.call(ApiTaskForIndexManager.java:68)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\t... 1 more\nCaused by: org.openrdf.query.QueryInterruptedException: java.lang.RuntimeException: java.util.concurrent.ExecutionException: com.bigdata.bop.engine.QueryTimeoutException: Query deadline is expired.\n\tat com.bigdata.rdf.sail.Bigdata2Sesame2BindingSetIterator.hasNext(Bigdata2Sesame2BindingSetIterator.java:184)\n\tat info.aduna.iteration.IterationWrapper.hasNext(IterationWrapper.java:68)\n\tat org.openrdf.query.QueryResults.report(QueryResults.java:155)\n\tat org.openrdf.repository.sail.SailTupleQuery.evaluate(SailTupleQuery.java:76)\n\tat com.bigdata.rdf.sail.webapp.BigdataRDFContext$TupleQueryTask.doQuery(BigdataRDFContext.java:1713)\n\tat com.bigdata.rdf.sail.webapp.BigdataRDFContext$AbstractQueryTask.innerCall(BigdataRDFContext.java:1569)\n\tat com.bigdata.rdf.sail.webapp.BigdataRDFContext$AbstractQueryTask.call(BigdataRDFContext.java:1534)\n\tat com.bigdata.rdf.sail.webapp.BigdataRDFContext$AbstractQueryTask.call(BigdataRDFContext.java:747)\n\t... 4 more\nCaused by: java.lang.RuntimeException: java.util.concurrent.ExecutionException: com.bigdata.bop.engine.QueryTimeoutException: Query deadline is expired.\n\tat com.bigdata.rdf.sail.RunningQueryCloseableIterator.checkFuture(RunningQueryCloseableIterator.java:59)\n\tat com.bigdata.rdf.sail.RunningQueryCloseableIterator._hasNext(RunningQueryCloseableIterator.java:105)\n\tat com.bigdata.rdf.sail.RunningQueryCloseableIterator.hasNext(RunningQueryCloseableIterator.java:79)\n\tat com.bigdata.striterator.ChunkedWrappedIterator.hasNext(ChunkedWrappedIterator.java:197)\n\tat com.bigdata.rdf.sail.Bigdata2Sesame2BindingSetIterator.hasNext(Bigdata2Sesame2BindingSetIterator.java:134)\n\t... 11 more\nCaused by: java.util.concurrent.ExecutionException: com.bigdata.bop.engine.QueryTimeoutException: Query deadline is expired.\n\tat com.bigdata.util.concurrent.Haltable.get(Haltable.java:273)\n\tat com.bigdata.bop.engine.AbstractRunningQuery.get(AbstractRunningQuery.java:1516)\n\tat com.bigdata.bop.engine.AbstractRunningQuery.get(AbstractRunningQuery.java:104)\n\tat com.bigdata.rdf.sail.RunningQueryCloseableIterator.checkFuture(RunningQueryCloseableIterator.java:46)\n\t... 15 more\nCaused by: com.bigdata.bop.engine.QueryTimeoutException: Query deadline is expired.\n\tat com.bigdata.bop.engine.RunState.checkDeadline(RunState.java:832)\n\tat com.bigdata.bop.engine.RunState.startOp(RunState.java:753)\n\tat com.bigdata.bop.engine.AbstractRunningQuery.startOp(AbstractRunningQuery.java:789)\n\tat com.bigdata.bop.engine.QueryEngine.startOp(QueryEngine.java:1358)\n\tat com.bigdata.bop.engine.ChunkedRunningQuery$ChunkTaskWrapper.run(ChunkedRunningQuery.java:912)\n\tat java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat com.bigdata.concurrent.FutureTaskMon.run(FutureTaskMon.java:63)\n\tat com.bigdata.bop.engine.ChunkedRunningQuery$ChunkFutureTask.run(ChunkedRunningQuery.java:821)\n\t... 3 more\n'
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-79-3f7a6a4e6e5a> in <module>()
     17 })
     18 print(r.content)
---> 19 content = json.loads(r.text)
     20 pprint(content)

/usr/lib/python3.4/json/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    316             parse_int is None and parse_float is None and
    317             parse_constant is None and object_pairs_hook is None and not kw):
--> 318         return _default_decoder.decode(s)
    319     if cls is None:
    320         cls = JSONDecoder

/usr/lib/python3.4/json/decoder.py in decode(self, s, _w)
    341 
    342         """
--> 343         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    344         end = _w(s, end).end()
    345         if end != len(s):

/usr/lib/python3.4/json/decoder.py in raw_decode(self, s, idx)
    359             obj, end = self.scan_once(s, idx)
    360         except StopIteration as err:
--> 361             raise ValueError(errmsg("Expecting value", s, err.value)) from None
    362         return obj, end

ValueError: Expecting value: line 1 column 1 (char 0)
import urllib
entry = content[0]

pages = []
for entry in content:

    pages.append({
        "qid": urllib.parse.urlparse(entry['item']).path.split('/')[-1],
        "wikiid": "%swiki" % (urllib.parse.urlparse(entry['link']).netloc.split('.')[0]),
        "title": entry['name']
    })
    
print(pages[0])
bywiki = {}
for page in pages:
    if page['wikiid'] in bywiki:
        bywiki[page['wikiid']].append(page)
    else:
        bywiki[page['wikiid']] = [page]
        
print(bywiki['nlwiki'])
[{'qid': 'Q26844464', 'wikiid': 'nlwiki', 'title': 'Nassima Saifi'}, {'qid': 'Q508482', 'wikiid': 'nlwiki', 'title': 'Fatou Bensouda'}, {'qid': 'Q289706', 'wikiid': 'nlwiki', 'title': 'Asmaa Mahfouz'}, {'qid': 'Q290451', 'wikiid': 'nlwiki', 'title': 'Samia Yusuf Omar'}, {'qid': 'Q467803', 'wikiid': 'nlwiki', 'title': 'Yaa Asantewaa'}, {'qid': 'Q731819', 'wikiid': 'nlwiki', 'title': 'Nozizwe Madlala-Routledge'}, {'qid': 'Q6730265', 'wikiid': 'nlwiki', 'title': 'Maggie Laubser'}, {'qid': 'Q5460344', 'wikiid': 'nlwiki', 'title': 'Flora Nwapa'}, {'qid': 'Q17626455', 'wikiid': 'nlwiki', 'title': 'Frances Ames'}, {'qid': 'Q12244275', 'wikiid': 'nlwiki', 'title': 'Malouma'}]
users = {}
with conn.cursor() as cur:
    for wikiid in bywiki.keys():
        cur.execute("use %s_p" % (wikiid))
        for page in bywiki[wikiid]:
            cur.execute("""
                select distinct rev_user_text 
                from revision 
                join  page on page_namespace = 0 AND page_title = %s AND rev_page = page_id
                WHERE rev_user > 0 AND rev_timestamp BETWEEN '20170220000000' AND '20170309000000'
            """, (page['title'].replace(' ', '_')))
            res = cur.fetchall()
            for row in res:
                user = row[0]
                if user in users:
                    users[user].append({
                        "name": user,
                        "title": page['title'],
                        "wikiid": wikiid
                    })
                else:
                    users[user] = [{
                        "name": user,
                        "title": page['title'],
                        "wikiid": wikiid
                    }]
import pprint
pprint.pprint(users[list(users.keys())[0]])
import csv
with open('results_yay.csv', 'w') as f:
    writer = csv.writer(f, delimiter=',', quotechar='|')
    writer.writerow(['name', 'title', 'wikiid'])
    for name in users.keys():
        for item in users[name]:
            writer.writerow([name.decode('utf8'), item['title'], item['wikiid']])