import pymysql
conn = pymysql.Connection(user='cscw', host='paws-db', password='cscw', cursorclass=pymysql.cursors.DictCursor)
with conn.cursor() as cur:
    cur.execute('use enwiki_p')
    cur.execute('select user_id  from user_groups join user on user_id = ug_user where ug_group="bot"')
    bots = [r['user_id'] for r in cur.fetchall()]
def revisions_for_user_in_time(user_id, starttime, endtime):
    with conn.cursor() as cur:
        cur.execute('use enwiki_p')
        cur.execute('select * from revision_userindex where rev_user = %s and rev_timestamp > %s and rev_timestamp < %s', (user_id, starttime, endtime))
        return cur.fetchall()
bot_revs = []
for id in bots:
    bot_revs += revisions_for_user_in_time(id, '20150101000000', '20150108000000')
len(bot_revs)
121012
import random
def sample_random_users(number, starttime, endtime):
    conn.ping(True)
    users = []
    with conn.cursor() as cur:
        cur.execute('use enwiki_p')
        cur.execute('SELECT DISTINCT rev_user FROM revision_userindex WHERE rev_timestamp > %s and rev_timestamp < %s', (starttime, endtime))
        #max_id = cur.fetchone()['id']
        #ids = [random.randint(1, max_id) for _ in range(number)]
        #cur.execute('SELECT * FROM user WHERE user_id IN (%s)' % (','.join([str(id) for id in ids]),))
        return cur.fetchall()
users = sample_random_users(1000, '20150101000000', '20150108000000')
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-115-2c3b2d0680f9> in <module>()
----> 1 users = sample_random_users(1000, '20150101000000', '20150108000000')

<ipython-input-114-214414961c08> in sample_random_users(number, starttime, endtime)
      5     with conn.cursor() as cur:
      6         cur.execute('use enwiki_p')
----> 7         cur.execute('SELECT DISTINCT rev_user FROM revision_userindex WHERE rev_timestamp > %s and rev_timestamp < %s', (starttime, endtime))
      8         #max_id = cur.fetchone()['id']
      9         #ids = [random.randint(1, max_id) for _ in range(number)]

/srv/paws/lib/python3.4/site-packages/pymysql/cursors.py in execute(self, query, args)
    144         query = self.mogrify(query, args)
    145 
--> 146         result = self._query(query)
    147         self._executed = query
    148         return result

/srv/paws/lib/python3.4/site-packages/pymysql/cursors.py in _query(self, q)
    294         conn = self._get_db()
    295         self._last_executed = q
--> 296         conn.query(q)
    297         self._do_get_result()
    298         return self.rowcount

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in query(self, sql, unbuffered)
    817                 sql = sql.encode(self.encoding, 'surrogateescape')
    818         self._execute_command(COMMAND.COM_QUERY, sql)
--> 819         self._affected_rows = self._read_query_result(unbuffered=unbuffered)
    820         return self._affected_rows
    821 

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered)
    999         else:
   1000             result = MySQLResult(self)
-> 1001             result.read()
   1002         self._result = result
   1003         if result.server_status is not None:

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in read(self)
   1283     def read(self):
   1284         try:
-> 1285             first_packet = self.connection._read_packet()
   1286 
   1287             if first_packet.is_ok_packet():

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in _read_packet(self, packet_type)
    943         buff = b''
    944         while True:
--> 945             packet_header = self._read_bytes(4)
    946             if DEBUG: dump_packet(packet_header)
    947 

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in _read_bytes(self, num_bytes)
    969         while True:
    970             try:
--> 971                 data = self._rfile.read(num_bytes)
    972                 break
    973             except (IOError, OSError) as e:

/usr/lib/python3.4/socket.py in readinto(self, b)
    369         while True:
    370             try:
--> 371                 return self._sock.recv_into(b)
    372             except timeout:
    373                 self._timeout_occurred = True

KeyboardInterrupt: 
user_revs = []
for user in users:
    user_revs += revisions_for_user_in_time(user['user_id'], '20150101000000', '20150108000000')
len(user_revs)
 
[{'rev_comment': b'death date corrected',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640660211,
  'rev_len': 6617,
  'rev_minor_edit': 0,
  'rev_page': 3273865,
  'rev_parent_id': 640655960,
  'rev_sha1': b'gfni74v441g9eit6qmtjb6qs6bqoq1v',
  'rev_text_id': 0,
  'rev_timestamp': b'20150102145516',
  'rev_user': 832814,
  'rev_user_text': b'Pfold'},
 {'rev_comment': b'death date corrected',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640660296,
  'rev_len': 6617,
  'rev_minor_edit': 0,
  'rev_page': 3273865,
  'rev_parent_id': 640660211,
  'rev_sha1': b'2obj5lj8eqjeknkvgnagzulokr3muny',
  'rev_text_id': 0,
  'rev_timestamp': b'20150102145607',
  'rev_user': 832814,
  'rev_user_text': b'Pfold'},
 {'rev_comment': b'/* References */ added Rauch',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640793051,
  'rev_len': 77197,
  'rev_minor_edit': 0,
  'rev_page': 18789453,
  'rev_parent_id': 635946920,
  'rev_sha1': b'c1jmugds4q6fzi9ceb2db4j4ce99450',
  'rev_text_id': 0,
  'rev_timestamp': b'20150103123826',
  'rev_user': 832814,
  'rev_user_text': b'Pfold'},
 {'rev_comment': b'death date confirmed; earlier than date his body was discovered',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641071350,
  'rev_len': 6564,
  'rev_minor_edit': 0,
  'rev_page': 3273865,
  'rev_parent_id': 640671934,
  'rev_sha1': b'fd2kt274o7s0q7ip1ss5l4vrcfssflo',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105105703',
  'rev_user': 832814,
  'rev_user_text': b'Pfold'},
 {'rev_comment': b'rm BLP tag',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641377554,
  'rev_len': 6535,
  'rev_minor_edit': 1,
  'rev_page': 3273865,
  'rev_parent_id': 641071350,
  'rev_sha1': b'761ab0a9vm7i34hquh9ozd27orht3p4',
  'rev_text_id': 0,
  'rev_timestamp': b'20150107084825',
  'rev_user': 832814,
  'rev_user_text': b'Pfold'},
 {'rev_comment': b'I like it!',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640556863,
  'rev_len': 2217,
  'rev_minor_edit': 0,
  'rev_page': 44903385,
  'rev_parent_id': 640551289,
  'rev_sha1': b'egnio5v6xhdnjbeelbca52gwoe23dqz',
  'rev_text_id': 0,
  'rev_timestamp': b'20150101204654',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Just a review to do?',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640559629,
  'rev_len': 4141,
  'rev_minor_edit': 0,
  'rev_page': 44885574,
  'rev_parent_id': 640545606,
  'rev_sha1': b'4cv8j1a8drexsv0z7zng42f1eprnhda',
  'rev_text_id': 0,
  'rev_timestamp': b'20150101210620',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'/* Preparing and planning */ making more encyclopaedic',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640782727,
  'rev_len': 17646,
  'rev_minor_edit': 0,
  'rev_page': 37918086,
  'rev_parent_id': 640755674,
  'rev_sha1': b'pay34c6z6jyryszlh4i7m7fccoezsbm',
  'rev_text_id': 0,
  'rev_timestamp': b'20150103105044',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Adding only referenced info (removed rest) - Editing in progress..',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640797470,
  'rev_len': 3963,
  'rev_minor_edit': 0,
  'rev_page': 13576757,
  'rev_parent_id': 639131226,
  'rev_sha1': b'9sla9x5b7hltat8sbuywr5lj2v358qc',
  'rev_text_id': 0,
  'rev_timestamp': b'20150103132350',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Adding more info',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640801174,
  'rev_len': 4524,
  'rev_minor_edit': 0,
  'rev_page': 13576757,
  'rev_parent_id': 640797470,
  'rev_sha1': b'1khihw59t31fqpeww6brg3p3u4c8bet',
  'rev_text_id': 0,
  'rev_timestamp': b'20150103140319',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Adding references',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640805556,
  'rev_len': 4268,
  'rev_minor_edit': 0,
  'rev_page': 3788577,
  'rev_parent_id': 639345135,
  'rev_sha1': b'qguoaygd3p0d8ivqqzeljqs9cge2kgb',
  'rev_text_id': 0,
  'rev_timestamp': b'20150103145128',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Adding references',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640811219,
  'rev_len': 3274,
  'rev_minor_edit': 0,
  'rev_page': 8644989,
  'rev_parent_id': 611885977,
  'rev_sha1': b'd6mxp0p8a6q6kzcjexr1lqq4v4lv8lf',
  'rev_text_id': 0,
  'rev_timestamp': b'20150103154611',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Fixing wiki link',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 640922847,
  'rev_len': 4518,
  'rev_minor_edit': 0,
  'rev_page': 13576757,
  'rev_parent_id': 640801174,
  'rev_sha1': b'4wlbldzok642f567dh8283vs2tz1ddx',
  'rev_text_id': 0,
  'rev_timestamp': b'20150104092349',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'/* Social behavior */ adding wiki link',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641132210,
  'rev_len': 25916,
  'rev_minor_edit': 0,
  'rev_page': 3286366,
  'rev_parent_id': 638350669,
  'rev_sha1': b'l613pjm4o2dpc2wmx55fmt19zm5r2nu',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105200510',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'/* Theory of mind */ Repetition',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641132709,
  'rev_len': 25898,
  'rev_minor_edit': 0,
  'rev_page': 3286366,
  'rev_parent_id': 641132210,
  'rev_sha1': b'27i89vdynimux0k1l79l7t6kv9ho2pe',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105200920',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'/* Universal Grammar */ Unfinished sentence - Unable to access source to fix it',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641133266,
  'rev_len': 25861,
  'rev_minor_edit': 0,
  'rev_page': 3286366,
  'rev_parent_id': 641132709,
  'rev_sha1': b'7cf10szbqmomczxubrqaiday5kizq8r',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105201404',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'G2G!',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641136157,
  'rev_len': 2405,
  'rev_minor_edit': 0,
  'rev_page': 44440983,
  'rev_parent_id': 640272308,
  'rev_sha1': b'6aib8o1vaseiwju1g9fa9p30j8mxq2a',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105203737',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Fixing my poor editing!',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641136361,
  'rev_len': 2395,
  'rev_minor_edit': 0,
  'rev_page': 44440983,
  'rev_parent_id': 641136157,
  'rev_sha1': b'qq0htbubrvh2h35q8zwvzah50qrnblq',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105203937',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Keep',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641141882,
  'rev_len': 3465,
  'rev_minor_edit': 0,
  'rev_page': 44940940,
  'rev_parent_id': 641066656,
  'rev_sha1': b'7t00i7r3v3dgmq4j6aslmbif3sacjvg',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105212428',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641141952,
  'rev_len': 3464,
  'rev_minor_edit': 1,
  'rev_page': 44940940,
  'rev_parent_id': 641141882,
  'rev_sha1': b'4om9dplju3t89h6aa86809ngw0adney',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105212457',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'AGF issues but otherwise fine =)',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641145678,
  'rev_len': 2490,
  'rev_minor_edit': 0,
  'rev_page': 44402761,
  'rev_parent_id': 633880313,
  'rev_sha1': b'48qz493z4zmji1d94zy5ay69ga2jyfm',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105215350',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Will come back to this tomorrow!',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641148162,
  'rev_len': 4362,
  'rev_minor_edit': 0,
  'rev_page': 44940940,
  'rev_parent_id': 641148038,
  'rev_sha1': b'k5d93cb9ynos1t3wvdrv4bfrp7erib4',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105221212',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'G2G!',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641237982,
  'rev_len': 4190,
  'rev_minor_edit': 0,
  'rev_page': 44378293,
  'rev_parent_id': 641090151,
  'rev_sha1': b'0kdbfenhecjp53rnxirc68ehzmz7yay',
  'rev_text_id': 0,
  'rev_timestamp': b'20150106131017',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Reply to EEng',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641278681,
  'rev_len': 7486,
  'rev_minor_edit': 0,
  'rev_page': 44940940,
  'rev_parent_id': 641278057,
  'rev_sha1': b'e7ogvvj97gt0ih7b4p4kxw2yu3frxnv',
  'rev_text_id': 0,
  'rev_timestamp': b'20150106184756',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'/* Views on morality */ Adding bit more info',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641289400,
  'rev_len': 18949,
  'rev_minor_edit': 0,
  'rev_page': 17202440,
  'rev_parent_id': 641263034,
  'rev_sha1': b'5knq4w5rqdgv4hcej6a6rhrdf4te3d0',
  'rev_text_id': 0,
  'rev_timestamp': b'20150106200341',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Adding reference',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641293667,
  'rev_len': 19457,
  'rev_minor_edit': 0,
  'rev_page': 17202440,
  'rev_parent_id': 641289400,
  'rev_sha1': b'lby9m1uqq60w2p8ke6xim7hljquvsxe',
  'rev_text_id': 0,
  'rev_timestamp': b'20150106202759',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Agreed.',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641294716,
  'rev_len': 8686,
  'rev_minor_edit': 0,
  'rev_page': 44940940,
  'rev_parent_id': 641292255,
  'rev_sha1': b'bq4b6gf1vdtrviagdtaa2jk593veugv',
  'rev_text_id': 0,
  'rev_timestamp': b'20150106203445',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'/* Biography */ Adding ref and removing unsourced info',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641296555,
  'rev_len': 19194,
  'rev_minor_edit': 0,
  'rev_page': 17202440,
  'rev_parent_id': 641293667,
  'rev_sha1': b'mtvbo49v6ey3tnmm8xa0ylksivskxd6',
  'rev_text_id': 0,
  'rev_timestamp': b'20150106204826',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Keep',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641298900,
  'rev_len': 3091,
  'rev_minor_edit': 0,
  'rev_page': 44955786,
  'rev_parent_id': 641239511,
  'rev_sha1': b'tfoyl9ynazat67zegj3cm7n50003fg9',
  'rev_text_id': 0,
  'rev_timestamp': b'20150106210341',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'Merging?',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641302915,
  'rev_len': 3444,
  'rev_minor_edit': 0,
  'rev_page': 44955786,
  'rev_parent_id': 641298900,
  'rev_sha1': b'q6fydrh742swuty3wt8wzwb214nwlve',
  'rev_text_id': 0,
  'rev_timestamp': b'20150106213014',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'G2G!',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641415498,
  'rev_len': 4786,
  'rev_minor_edit': 0,
  'rev_page': 44885574,
  'rev_parent_id': 641402963,
  'rev_sha1': b'rnoknabo5en47fe88bb7nnrvxw18siv',
  'rev_text_id': 0,
  'rev_timestamp': b'20150107134005',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'/* Food */ Adding wiki link',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641455117,
  'rev_len': 297318,
  'rev_minor_edit': 0,
  'rev_page': 3434750,
  'rev_parent_id': 641305136,
  'rev_sha1': b'4848ytrdxciulpaskq906ai9ojj0504',
  'rev_text_id': 0,
  'rev_timestamp': b'20150107182255',
  'rev_user': 17957094,
  'rev_user_text': b'Staceydolxx'},
 {'rev_comment': b'',
  'rev_content_format': None,
  'rev_content_model': None,
  'rev_deleted': 0,
  'rev_id': 641117929,
  'rev_len': 142622,
  'rev_minor_edit': 0,
  'rev_page': 33422,
  'rev_parent_id': 641105722,
  'rev_sha1': b'nvpnki4z9lgbsm8rlmd1k3k7euwlab6',
  'rev_text_id': 0,
  'rev_timestamp': b'20150105181103',
  'rev_user': 23692578,
  'rev_user_text': b'TheCanadianHitman'}]