from sqlalchemy import create_engine
import sys, os
import pandas as pd
constr = 'mysql+pymysql://{user}:{pwd}@{host}'.format(user=os.environ['MYSQL_USERNAME'],pwd=os.environ['MYSQL_PASSWORD'],host=os.environ['MYSQL_HOST'])
con = create_engine(constr)
q_en_revisions3 = """
SELECT r.rev_page as en_page_id,
       cast(r.rev_timestamp as DATETIME) as en_first_edit
from 
    enwiki_p.revision as r 
where 
    r.rev_parent_id=0
LIMIT 15000000 OFFSET 30000000
"""
df_en_revisions3 = pd.read_sql(q_en_revisions3, con)
df_en_revisions3.to_csv("en_revisions3.csv")
df_en_revisions3.shape
(15000000, 2)
df_en_revisions3.head()
en_page_id en_first_edit
0 40064002 2013-07-25 05:32:31
1 40064003 2013-07-25 05:32:34
2 40064004 2013-07-25 05:32:36
3 40064005 2013-07-25 05:32:36
4 40064006 2013-07-25 05:32:39
q_en_revisions2 = """
SELECT r.rev_page as en_page_id,
       cast(r.rev_timestamp as DATETIME) as en_first_edit
from 
    enwiki_p.revision as r 
where 
    r.rev_parent_id=0
LIMIT 15000000 OFFSET 15000000
"""
df_en_revisions2 = pd.read_sql(q_en_revisions2, con)
df_en_revisions2.to_csv("en_revisions2.csv")
df_en_revisions2.shape
(15000000, 2)
df_en_revisions2.head()
en_page_id en_first_edit
0 21785102 2009-03-03 03:51:48
1 21785104 2009-03-03 03:52:13
2 21785106 2009-03-03 03:52:31
3 21785107 2009-03-03 03:52:40
4 21785108 2009-03-03 03:52:48