from datetime import datetime
import os
import pickle
import pymysql
import json
import pandas as pd
def connect_to_db():
    """
    Connect to the public enwiki db using quarry.wmflabs.org credentials
    """
    conn = pymysql.connect(
        host=os.environ['MYSQL_HOST'],
#         user='u2402',

        user=os.environ['MYSQL_USERNAME'],
        password=os.environ['MYSQL_PASSWORD'], #don't print me!
        
        database='enwiki_p',
        charset='utf8',
        use_unicode=True
    )
    return conn
conn = connect_to_db()
f = open('mid-level-wp.json','r')
wps = json.loads(f.read())['wikiprojects']
f.close()
i = 0
results = []
for wp in wps:
    wikiprojects = wps[wp]
    w_processed = list(map(lambda k: "'{}'".format(k[10:].replace(' ', '_')), wikiprojects))
    query = """SELECT tl_from, tl_title, tl_namespace, page.page_title
        FROM templatelinks INNER JOIN page ON templatelinks.tl_from=page.page_id
        WHERE tl_title IN ( {} )
        AND tl_namespace = 10 AND page.page_namespace = 1 AND page.page_is_redirect = 0
        ORDER BY RAND() LIMIT 2000""".format(','.join(w_processed))
    print("processing {}".format(wp))
    with conn.cursor() as cur:
        cur.execute('use enwiki_p;')
        cur.execute(query)
        result = cur.fetchall()
        df = pd.DataFrame(list(result))
        mc = [wp]*len(result)
        import pdb
        pdb.set_trace()
        for tup in result:
            results.append(tup)
df = pd.DataFrame(results)

df.to_csv('wp-pairs', sep='\t', header=['page_id', 'wikiproject', 'namespace', 'page_title'])