import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from datetime import datetime, timedelta

%matplotlib inline
from pymysql import connect
con = connect(host='paws-db', user='cscw', password='cscw', db='enwiki_p')
cur = con.cursor()
class edit_session():
    def __init__(self, t):
        self.start = t
        self.edits = [t]
        self.end = t
        self.truncate = True
        self.time_deltas = []
        
    def add(self, t):
        self.time_deltas.append(t - self.edits[-1])
        self.edits.append(t)
        self.end = t
        
    def duration(self):
        return((self.end - self.start).seconds + 430)
    
    def num_edits(self):
        return(len(self.edits))
class user_session ():
    def __init__(self, username, monthspec=None, overflowend=False):
        con.ping(True)
        with con.cursor() as cur:
            cur.execute('use enwiki_p;')
            cur.execute('select `rev_timestamp` from revision_userindex where `rev_user_text` = %s ORDER BY `rev_timestamp`', username)
            edit_times = [x[0].decode('utf-8') for x in cur.fetchall()]

            cur.execute('select `ar_timestamp` from archive_userindex where `ar_user_text` = %s  `ORDER BY `ar_timestamp`', username)
            archive_times = [x[0].decode('utf-8') for x in cur.fetchall()]
            
        self.edit_times = edit_times
        self.edit_times.extend(archive_times)
        self.edit_times.sort()
        
        self.username = username
        
        self.get_sessions()
        
        if monthspec:
            if overflowend:
                self.sessions = [x for x in self.sessions if str(x.start).replace('-', '')[0:6] == monthspec]
            else:
                self.edit_times = [x for x in self.edit_times if x[0:6] == monthspec]
                self.get_sessions()
    
    
    def get_sessions(self):
        sessions = []
        session = None
        for t in self.edit_times:
            t = datetime.strptime(t, '%Y%m%d%H%M%S')
            if not session:
                session = edit_session(t)
            elif t - session.end < timedelta(hours=1): # the between current and previous
                session.add(t)
            else:
                sessions.append(session)
                session = edit_session(t)

        sessions.append(session)
        self.sessions = sessions
        
    def total_hours(self):
        return(sum([x.duration() for x in self.sessions]) / 3600)

    def total_sessions(self):
        return(len(self.sessions))
    
    def total_edits(self):
        return(sum([x.num_edits() for x in self.sessions]))

    def summary(self):
        return({'username' : self.username,
                'total_hours' : self.total_hours(),
                'total_sessions' : self.total_sessions(),
                'total_edits' : self.total_edits() })
#con.ping(True)
#with con.cursor() as cur:
#    cur.execute('use enwiki_p;')
#    lots_of_users = cur.execute('SELECT `rev_user_text` FROM revision_userindex LIMIT 10000')
users = ["Materialscientist",
         "Jtmorgan",
         "Kwamikagami",
         "TonyTheTiger",
         "ACP2011",
         "Pinethicket",
         "Armbrust",
         "P.T. Aufrette",
         "Koavf",
         "Derek R Bullamore",
         "MathewTownsend",
         "Crisco 1492",
         "Alarbus"
         "Rich Farmbrough",
         "Alan Liefting",
         "BD2412",
         "Sitush",
         "DBigXray",
         "Allens",
         "Cloudz679"]

for user in users:
    print(user_session(user, monthspec="201203", overflowend=True).summary())
"""
{'total_hours': 454.9038888888889, 'total_edits': 7490, 'total_sessions': 59, 'username': 'Materialscientist'}
{'total_hours': 66.94111111111111, 'total_edits': 1231, 'total_sessions': 52, 'username': 'Jtmorgan'}
{'total_hours': 356.9113888888889, 'total_edits': 9086, 'total_sessions': 80, 'username': 'Kwamikagami'}
{'total_hours': 358.51222222222225, 'total_edits': 6302, 'total_sessions': 140, 'username': 'TonyTheTiger'}
{'total_hours': 338.61027777777775, 'total_edits': 2223, 'total_sessions': 111, 'username': 'ACP2011'}
{'total_hours': 320.50055555555554, 'total_edits': 3916, 'total_sessions': 91, 'username': 'Pinethicket'}
{'total_hours': 312.4475, 'total_edits': 6304, 'total_sessions': 124, 'username': 'Armbrust'}
{'total_hours': 307.38, 'total_edits': 6260, 'total_sessions': 115, 'username': 'P.T. Aufrette'}
{'total_hours': 302.5469444444444, 'total_edits': 44004, 'total_sessions': 84, 'username': 'Koavf'}
{'total_hours': 294.19055555555553, 'total_edits': 4256, 'total_sessions': 89, 'username': 'Derek R Bullamore'}
{'total_hours': 290.1525, 'total_edits': 1864, 'total_sessions': 70, 'username': 'MathewTownsend'}
{'total_hours': 284.4244444444444, 'total_edits': 2837, 'total_sessions': 118, 'username': 'Crisco 1492'}
{'total_hours': 275.67861111111114, 'total_edits': 17177, 'total_sessions': 98, 'username': 'Rich Farmbrough'}
{'total_hours': 280.5886111111111, 'total_edits': 6131, 'total_sessions': 80, 'username': 'Alan Liefting'}
{'total_hours': 277.6847222222222, 'total_edits': 9922, 'total_sessions': 121, 'username': 'BD2412'}
{'total_hours': 270.74944444444446, 'total_edits': 4421, 'total_sessions': 96, 'username': 'Sitush'}
{'total_hours': 270.265, 'total_edits': 7405, 'total_sessions': 86, 'username': 'DBigXray'}
{'total_hours': 280.6222222222222, 'total_edits': 6984, 'total_sessions': 120, 'username': 'Allens'}
{'total_hours': 254.10083333333333, 'total_edits': 9806, 'total_sessions': 112, 'username': 'Cloudz679'}
"""
import zlib

user = user_session('Benjamin Mako Hill')
time_deltas = np.array([i.seconds for x in user.sessions for i in x.time_deltas])

f, axarr = plt.subplots(2)
axarr[0].hist(np.log1p(time_deltas))

print(len(zlib.compress(time_deltas)) / len(time_deltas))

user = user_session('Jtmorgan')
time_deltas = np.array([i.seconds for x in user.sessions for i in x.time_deltas])
axarr[1].hist(np.log1p(time_deltas))

print(len(zlib.compress(time_deltas)) / len(time_deltas))

#stats.entropy(time_deltas)
1.8036233504808767
1.9191006026889197
from ipywidgets import interact_manual
@interact_manual
def do_thing(username='', monthspec='201203'):
    print(user_session(username).summary())
    print(user_session(username, monthspec).summary())
    print(user_session(username, monthspec, overflowend=True).summary())
        
---------------------------------------------------------------------------
ProgrammingError                          Traceback (most recent call last)
<ipython-input-13-f203e079d7c9> in do_thing(username, monthspec)
      2 @interact_manual
      3 def do_thing(username='', monthspec='201203'):
----> 4     print(user_session(username).summary())
      5     print(user_session(username, monthspec).summary())
      6     print(user_session(username, monthspec, overflowend=True).summary())

<ipython-input-11-d895335f70d0> in __init__(self, username, monthspec, overflowend)
      7             edit_times = [x[0].decode('utf-8') for x in cur.fetchall()]
      8 
----> 9             cur.execute('select `ar_timestamp` from archive_userindex where `ar_user_text` = %s  `ORDER BY `ar_timestamp`', username)
     10             archive_times = [x[0].decode('utf-8') for x in cur.fetchall()]
     11 

/srv/paws/lib/python3.4/site-packages/pymysql/cursors.py in execute(self, query, args)
    144         query = self.mogrify(query, args)
    145 
--> 146         result = self._query(query)
    147         self._executed = query
    148         return result

/srv/paws/lib/python3.4/site-packages/pymysql/cursors.py in _query(self, q)
    294         conn = self._get_db()
    295         self._last_executed = q
--> 296         conn.query(q)
    297         self._do_get_result()
    298         return self.rowcount

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in query(self, sql, unbuffered)
    817                 sql = sql.encode(self.encoding, 'surrogateescape')
    818         self._execute_command(COMMAND.COM_QUERY, sql)
--> 819         self._affected_rows = self._read_query_result(unbuffered=unbuffered)
    820         return self._affected_rows
    821 

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered)
    999         else:
   1000             result = MySQLResult(self)
-> 1001             result.read()
   1002         self._result = result
   1003         if result.server_status is not None:

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in read(self)
   1283     def read(self):
   1284         try:
-> 1285             first_packet = self.connection._read_packet()
   1286 
   1287             if first_packet.is_ok_packet():

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in _read_packet(self, packet_type)
    963 
    964         packet = packet_type(buff, self.encoding)
--> 965         packet.check_error()
    966         return packet
    967 

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in check_error(self)
    392             errno = self.read_uint16()
    393             if DEBUG: print("errno =", errno)
--> 394             err.raise_mysql_exception(self._data)
    395 
    396     def dump(self):

/srv/paws/lib/python3.4/site-packages/pymysql/err.py in raise_mysql_exception(data)
    118 def raise_mysql_exception(data):
    119     errinfo = _get_error_info(data)
--> 120     _check_mysql_exception(errinfo)

/srv/paws/lib/python3.4/site-packages/pymysql/err.py in _check_mysql_exception(errinfo)
    110     errorclass = error_map.get(errno, None)
    111     if errorclass:
--> 112         raise errorclass(errno, errorvalue)
    113 
    114     # couldn't find the right error number

ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '`ORDER BY `ar_timestamp`' at line 1")