# cf. http://paws-public.wmflabs.org/paws-public/User:Jtmorgan/ds4ux/paws-cheatsheet.ipynb
import os 
"""
Your db login credentials are stored in os.environ. 
DO NOT print or run os.environ, or it will expose your credentials in the Notebook
"""
import pymysql
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.dates
import seaborn as sns
from collections import defaultdict
import collections
from datetime import datetime
%%time
!date

conn = pymysql.connect(
    host=os.environ['MYSQL_HOST'],
    user=os.environ['MYSQL_USERNAME'],
    password=os.environ['MYSQL_PASSWORD'],
    database='enwiki_p',
    charset='utf8'
)

with conn.cursor() as cur:
    cur.execute("""
SET @prev=0""")
    cur.execute("""
SET @editnum=0;""")
    cur.execute("""
SELECT pageid, created, enwiki_p.revision_userindex.rev_user AS creator, 
DATE_ADD(enwiki_p.user.user_registration,INTERVAL 4 DAY) AS regtsp4,
tenthedits.rev_timestamp AS tentheditts
FROM (
SELECT ptrp_created AS created, ptrp_page_id AS pageid
FROM enwiki_p.pagetriage_page, enwiki_p.pagetriage_page_tags, enwiki_p.page
WHERE enwiki_p.pagetriage_page.ptrp_page_id = enwiki_p.pagetriage_page_tags.ptrpt_page_id 
AND enwiki_p.page.page_id = enwiki_p.pagetriage_page_tags.ptrpt_page_id
AND ptrp_reviewed = 0 and page_namespace = 0 AND page_is_redirect=0 AND ptrpt_tag_id = 13) AS backlog
INNER JOIN
enwiki_p.revision_userindex
ON pageid = enwiki_p.revision_userindex.rev_page
INNER JOIN 
enwiki_p.user
ON enwiki_p.revision_userindex.rev_user = enwiki_p.user.user_id
LEFT JOIN  # keep users with <10 edits, i.e. no row in 'tenthedits'
(SELECT rev_user, editnum, rev_timestamp FROM ( # Use 'poor man's windowing' while we wait for MariaDB to be upgraded to 10.2.:
    SELECT IF(@prev <> rev_user, @editnum:=1, @editnum:= @editnum +1) AS editnum, 
    rev_timestamp, @prev:= rev_user, rev_user, rev_user_text FROM (
        SELECT rev_timestamp, rev_user, rev_user_text
        FROM enwiki_p.revision_userindex
        WHERE rev_user > 0
        ORDER BY rev_user, rev_timestamp) AS ordered_data
    ORDER BY rev_user, rev_timestamp) AS numberedlist
HAVING editnum = 10) AS tenthedits
ON enwiki_p.user.user_id = tenthedits.rev_user
WHERE revision_userindex.rev_timestamp LIKE '2017021500%' AND revision_userindex.rev_parent_id = 0;""")
    df = cur.fetchall()
Fri Jun  2 22:50:22 UTC 2017
---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
<ipython-input-37-4418978d2f08> in <module>()
----> 1 get_ipython().run_cell_magic('time', '', '!date\n\nconn = pymysql.connect(\n    host=os.environ[\'MYSQL_HOST\'],\n    user=os.environ[\'MYSQL_USERNAME\'],\n    password=os.environ[\'MYSQL_PASSWORD\'],\n    database=\'enwiki_p\',\n    charset=\'utf8\'\n)\n\nwith conn.cursor() as cur:\n    cur.execute("""\nSET @prev=0""")\n    cur.execute("""\nSET @editnum=0;""")\n    cur.execute("""\nSELECT pageid, created, enwiki_p.revision_userindex.rev_user AS creator, \nDATE_ADD(enwiki_p.user.user_registration,INTERVAL 4 DAY) AS regtsp4,\ntenthedits.rev_timestamp AS tentheditts\nFROM (\nSELECT ptrp_created AS created, ptrp_page_id AS pageid\nFROM enwiki_p.pagetriage_page, enwiki_p.pagetriage_page_tags, enwiki_p.page\nWHERE enwiki_p.pagetriage_page.ptrp_page_id = enwiki_p.pagetriage_page_tags.ptrpt_page_id \nAND enwiki_p.page.page_id = enwiki_p.pagetriage_page_tags.ptrpt_page_id\nAND ptrp_reviewed = 0 and page_namespace = 0 AND page_is_redirect=0 AND ptrpt_tag_id = 13) AS backlog\nINNER JOIN\nenwiki_p.revision_userindex\nON pageid = enwiki_p.revision_userindex.rev_page\nINNER JOIN \nenwiki_p.user\nON enwiki_p.revision_userindex.rev_user = enwiki_p.user.user_id\nLEFT JOIN  # keep users with <10 edits, i.e. no row in \'tenthedits\'\n(SELECT rev_user, editnum, rev_timestamp FROM ( # Use \'poor man\'s windowing\' while we wait for MariaDB to be upgraded to 10.2.:\n    SELECT IF(@prev <> rev_user, @editnum:=1, @editnum:= @editnum +1) AS editnum, \n    rev_timestamp, @prev:= rev_user, rev_user, rev_user_text FROM (\n        SELECT rev_timestamp, rev_user, rev_user_text\n        FROM enwiki_p.revision_userindex\n        WHERE rev_user > 0\n        ORDER BY rev_user, rev_timestamp) AS ordered_data\n    ORDER BY rev_user, rev_timestamp) AS numberedlist\nHAVING editnum = 10) AS tenthedits\nON enwiki_p.user.user_id = tenthedits.rev_user\nWHERE revision_userindex.rev_timestamp LIKE \'2017021500%\' AND revision_userindex.rev_parent_id = 0;""")\n    df = cur.fetchall()')

/srv/paws/lib/python3.4/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
   2113             magic_arg_s = self.var_expand(line, stack_depth)
   2114             with self.builtin_trap:
-> 2115                 result = fn(magic_arg_s, cell)
   2116             return result
   2117 

<decorator-gen-59> in time(self, line, cell, local_ns)

/srv/paws/lib/python3.4/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
    186     # but it's overkill for just that one bit of state.
    187     def magic_deco(arg):
--> 188         call = lambda f, *a, **k: f(*a, **k)
    189 
    190         if callable(arg):

/srv/paws/lib/python3.4/site-packages/IPython/core/magics/execution.py in time(self, line, cell, local_ns)
   1183         else:
   1184             st = clock2()
-> 1185             exec(code, glob, local_ns)
   1186             end = clock2()
   1187             out = None

<timed exec> in <module>()

/srv/paws/lib/python3.4/site-packages/pymysql/cursors.py in execute(self, query, args)
    164         query = self.mogrify(query, args)
    165 
--> 166         result = self._query(query)
    167         self._executed = query
    168         return result

/srv/paws/lib/python3.4/site-packages/pymysql/cursors.py in _query(self, q)
    320         conn = self._get_db()
    321         self._last_executed = q
--> 322         conn.query(q)
    323         self._do_get_result()
    324         return self.rowcount

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in query(self, sql, unbuffered)
    850                 sql = sql.encode(self.encoding, 'surrogateescape')
    851         self._execute_command(COMMAND.COM_QUERY, sql)
--> 852         self._affected_rows = self._read_query_result(unbuffered=unbuffered)
    853         return self._affected_rows
    854 

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered)
   1051         else:
   1052             result = MySQLResult(self)
-> 1053             result.read()
   1054         self._result = result
   1055         if result.server_status is not None:

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in read(self)
   1334     def read(self):
   1335         try:
-> 1336             first_packet = self.connection._read_packet()
   1337 
   1338             if first_packet.is_ok_packet():

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in _read_packet(self, packet_type)
    981         buff = b''
    982         while True:
--> 983             packet_header = self._read_bytes(4)
    984             if DEBUG: dump_packet(packet_header)
    985 

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in _read_bytes(self, num_bytes)
   1027             self._force_close()
   1028             raise err.OperationalError(
-> 1029                 CR.CR_SERVER_LOST, "Lost connection to MySQL server during query")
   1030         return data
   1031 

OperationalError: (2013, 'Lost connection to MySQL server during query')
# note: the above cell has now crashed four times with "OperationalError: (2013, 'Lost connection to MySQL server during query')"