Import a bunch of things.

%matplotlib inline

import pymysql
import os

import pandas as pd
import matplotlib.pylab as pylab
import matplotlib.pyplot as plt


pylab.rcParams['figure.figsize'] = 16, 12

Connect to MySQL

host = os.environ['MYSQL_HOST']
user = os.environ['MYSQL_USERNAME']
password = os.environ['MYSQL_PASSWORD']


conn = pymysql.connect(
    host=host,
    user=user,
    password=password
)
conn.select_db('bnwiki_p')

bn_df = pd.read_sql_query("""
    SELECT COUNT(rev_id) Bengali, DATE_FORMAT(rev_timestamp, "%Y-%m-15") month
    FROM revision
    WHERE rev_minor_edit=0
    GROUP BY DATE_FORMAT(rev_timestamp, "%Y-%m-15")
""", conn, index_col='month')
conn.select_db('hiwiki_p')

hi_df = pd.read_sql_query("""
    SELECT COUNT(rev_id) Hindi, DATE_FORMAT(rev_timestamp, "%Y-%m-15") month
    FROM revision
    WHERE rev_minor_edit=0
    GROUP BY DATE_FORMAT(rev_timestamp, "%Y-%m-15")
""", conn, index_col='month')
# bn_ts = pd.Series(bn_df['count'].values, index=bn_df['month'])
# hi_ts = pd.Series(hi_df['count'].values, index=hi_df['month'])

merged = bn_df.join(hi_df, how="outer")
merged.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f65a2f51320>
hi_df.sort_values(by='Hindi', ascending=False).head(n=1)
Hindi
month
2011-04-15 92956
conn.select_db('hiwiki_p')

df_pages = pd.read_sql_query("""
    SELECT COUNT(rev_id) count, rev_page page
    FROM revision
    WHERE DATE_FORMAT(rev_timestamp, "%Y-%m-15") = "2011-04-15"
    AND
    rev_minor_edit=0
    GROUP BY rev_page
""", conn, index_col='page')

df_users = pd.read_sql_query("""
    SELECT COUNT(rev_id) count, rev_user user
    FROM revision
    WHERE DATE_FORMAT(rev_timestamp, "%Y-%m-15") = "2011-04-15"
    AND
    rev_minor_edit=0
    GROUP BY rev_user
""", conn, index_col='user')
# df_pages.sort_values(by="count", ascending=False).head(n=10)
# df_users.sort_values(by="count", ascending=False).head(n=10)
count
user
26677 87662
0 1089
6502 954
1634 708
19137 666
42374 382
411 320
6064 190
41510 76
31919 54