Import a bunch of things.

In [73]:
%matplotlib inline

import pymysql
import os

import pandas as pd
import matplotlib.pylab as pylab
import matplotlib.pyplot as plt


pylab.rcParams['figure.figsize'] = 16, 12

Connect to MySQL

In [74]:
host = os.environ['MYSQL_HOST']
user = os.environ['MYSQL_USERNAME']
password = os.environ['MYSQL_PASSWORD']


conn = pymysql.connect(
    host=host,
    user=user,
    password=password
)
In [75]:
conn.select_db('bnwiki_p')

bn_df = pd.read_sql_query("""
    SELECT COUNT(rev_id) Bengali, DATE_FORMAT(rev_timestamp, "%Y-%m-15") month
    FROM revision
    WHERE rev_minor_edit=0
    GROUP BY DATE_FORMAT(rev_timestamp, "%Y-%m-15")
""", conn, index_col='month')
In [76]:
conn.select_db('hiwiki_p')

hi_df = pd.read_sql_query("""
    SELECT COUNT(rev_id) Hindi, DATE_FORMAT(rev_timestamp, "%Y-%m-15") month
    FROM revision
    WHERE rev_minor_edit=0
    GROUP BY DATE_FORMAT(rev_timestamp, "%Y-%m-15")
""", conn, index_col='month')
In [77]:
# bn_ts = pd.Series(bn_df['count'].values, index=bn_df['month'])
# hi_ts = pd.Series(hi_df['count'].values, index=hi_df['month'])

merged = bn_df.join(hi_df, how="outer")
merged.plot()
Out[77]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f65a2f51320>
In [78]:
hi_df.sort_values(by='Hindi', ascending=False).head(n=1)
Out[78]:
Hindi
month
2011-04-15 92956
In [88]:
conn.select_db('hiwiki_p')

df_pages = pd.read_sql_query("""
    SELECT COUNT(rev_id) count, rev_page page
    FROM revision
    WHERE DATE_FORMAT(rev_timestamp, "%Y-%m-15") = "2011-04-15"
    AND
    rev_minor_edit=0
    GROUP BY rev_page
""", conn, index_col='page')

df_users = pd.read_sql_query("""
    SELECT COUNT(rev_id) count, rev_user user
    FROM revision
    WHERE DATE_FORMAT(rev_timestamp, "%Y-%m-15") = "2011-04-15"
    AND
    rev_minor_edit=0
    GROUP BY rev_user
""", conn, index_col='user')
In [89]:
# df_pages.sort_values(by="count", ascending=False).head(n=10)
# df_users.sort_values(by="count", ascending=False).head(n=10)
Out[89]:
count
user
26677 87662
0 1089
6502 954
1634 708
19137 666
42374 382
411 320
6064 190
41510 76
31919 54