import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from datetime import datetime, timedelta

%matplotlib inline
/srv/paws/lib/python3.4/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
/srv/paws/lib/python3.4/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
from pymysql import connect
con = connect(host='paws-db', user='cscw', password='cscw', db='enwiki_p')
cur = con.cursor()
---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in connect(self, sock)
    889                             sock = socket.create_connection(
--> 890                                 (self.host, self.port), self.connect_timeout)
    891                             break

/usr/lib/python3.4/socket.py in create_connection(address, timeout, source_address)
    490     err = None
--> 491     for res in getaddrinfo(host, port, 0, SOCK_STREAM):
    492         af, socktype, proto, canonname, sa = res

/usr/lib/python3.4/socket.py in getaddrinfo(host, port, family, type, proto, flags)
    529     addrlist = []
--> 530     for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    531         af, socktype, proto, canonname, sa = res

gaierror: [Errno -2] Name or service not known

During handling of the above exception, another exception occurred:

OperationalError                          Traceback (most recent call last)
<ipython-input-2-059f25303dae> in <module>()
      1 from pymysql import connect
----> 2 con = connect(host='paws-db', user='cscw', password='cscw', db='enwiki_p')
      3 cur = con.cursor()

/srv/paws/lib/python3.4/site-packages/pymysql/__init__.py in Connect(*args, **kwargs)
     88     """
     89     from .connections import Connection
---> 90     return Connection(*args, **kwargs)
     91 
     92 from pymysql import connections as _orig_conn

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in __init__(self, host, user, password, database, port, unix_socket, charset, sql_mode, read_default_file, conv, use_unicode, client_flag, cursorclass, init_command, connect_timeout, ssl, read_default_group, compress, named_pipe, no_delay, autocommit, db, passwd, local_infile, max_allowed_packet, defer_connect, auth_plugin_map, read_timeout, write_timeout)
    686             self._sock = None
    687         else:
--> 688             self.connect()
    689 
    690     def _create_ssl_ctx(self, sslp):

/srv/paws/lib/python3.4/site-packages/pymysql/connections.py in connect(self, sock)
    935                 exc.traceback = traceback.format_exc()
    936                 if DEBUG: print(exc.traceback)
--> 937                 raise exc
    938 
    939             # If e is neither DatabaseError or IOError, It's a bug.

OperationalError: (2003, "Can't connect to MySQL server on 'paws-db' ([Errno -2] Name or service not known)")
class edit_session():
    def __init__(self, t):
        self.start = t
        self.edits = [t]
        self.end = t
        self.truncate = True
        self.time_deltas = []
        
    def add(self, t):
        self.time_deltas.append(t - self.edits[-1])
        self.edits.append(t)
        self.end = t
        
    def duration(self):
        return((self.end - self.start).seconds + 430)
    
    def num_edits(self):
        return(len(self.edits))
class user_session ():
    def __init__(self, username, monthspec=None, overflowend=False):
        con.ping(True)
        with con.cursor() as cur:
            cur.execute('use enwiki_p;')
            cur.execute('select `rev_timestamp` from revision_userindex where `rev_user_text` = %s ORDER BY `rev_timestamp`', username)
            edit_times = [x[0].decode('utf-8') for x in cur.fetchall()]

            cur.execute('select `ar_timestamp` from archive_userindex where `ar_user_text` = %s  `ORDER BY `ar_timestamp`', username)
            archive_times = [x[0].decode('utf-8') for x in cur.fetchall()]
            
        self.edit_times = edit_times
        self.edit_times.extend(archive_times)
        self.edit_times.sort()
        
        self.username = username
        
        self.get_sessions()
        
        if monthspec:
            if overflowend:
                self.sessions = [x for x in self.sessions if str(x.start).replace('-', '')[0:6] == monthspec]
            else:
                self.edit_times = [x for x in self.edit_times if x[0:6] == monthspec]
                self.get_sessions()
    
    
    def get_sessions(self):
        sessions = []
        session = None
        for t in self.edit_times:
            t = datetime.strptime(t, '%Y%m%d%H%M%S')
            if not session:
                session = edit_session(t)
            elif t - session.end < timedelta(hours=1): # the between current and previous
                session.add(t)
            else:
                sessions.append(session)
                session = edit_session(t)

        sessions.append(session)
        self.sessions = sessions
        
    def total_hours(self):
        return(sum([x.duration() for x in self.sessions]) / 3600)

    def total_sessions(self):
        return(len(self.sessions))
    
    def total_edits(self):
        return(sum([x.num_edits() for x in self.sessions]))

    def summary(self):
        return({'username' : self.username,
                'total_hours' : self.total_hours(),
                'total_sessions' : self.total_sessions(),
                'total_edits' : self.total_edits() })
#con.ping(True)
#with con.cursor() as cur:
#    cur.execute('use enwiki_p;')
#    lots_of_users = cur.execute('SELECT `rev_user_text` FROM revision_userindex LIMIT 10000')
users = ["Materialscientist",
         "Jtmorgan",
         "Kwamikagami",
         "TonyTheTiger",
         "ACP2011",
         "Pinethicket",
          "Armbrust",
         "P.T. Aufrette",
         "Koavf",
         "Derek R Bullamore",
         "MathewTownsend",
         "Crisco 1492",
         "Alarbus"
         "Rich Farmbrough",
         "Alan Liefting",
         "BD2412",
         "Sitush",
         "DBigXray",
         "Allens",
         "Cloudz679"]

for user in users:
    print(user_session(user, monthspec="201203", overflowend=True).summary())
"""
{'total_hours': 454.9038888888889, 'total_edits': 7490, 'total_sessions': 59, 'username': 'Materialscientist'}
{'total_hours': 66.94111111111111, 'total_edits': 1231, 'total_sessions': 52, 'username': 'Jtmorgan'}
{'total_hours': 356.9113888888889, 'total_edits': 9086, 'total_sessions': 80, 'username': 'Kwamikagami'}
{'total_hours': 358.51222222222225, 'total_edits': 6302, 'total_sessions': 140, 'username': 'TonyTheTiger'}
{'total_hours': 237.5222222222222, 'total_edits': 14613, 'total_sessions': 84, 'username': 'Bgwhite'}
{'total_hours': 338.61027777777775, 'total_edits': 2223, 'total_sessions': 111, 'username': 'ACP2011'}
{'total_hours': 320.50055555555554, 'total_edits': 3916, 'total_sessions': 91, 'username': 'Pinethicket'}
{'total_hours': 312.4475, 'total_edits': 6304, 'total_sessions': 124, 'username': 'Armbrust'}
{'total_hours': 307.38, 'total_edits': 6260, 'total_sessions': 115, 'username': 'P.T. Aufrette'}
{'total_hours': 302.5469444444444, 'total_edits': 44004, 'total_sessions': 84, 'username': 'Koavf'}
{'total_hours': 294.19055555555553, 'total_edits': 4256, 'total_sessions': 89, 'username': 'Derek R Bullamore'}
{'total_hours': 290.1525, 'total_edits': 1864, 'total_sessions': 70, 'username': 'MathewTownsend'}
{'total_hours': 284.4244444444444, 'total_edits': 2837, 'total_sessions': 118, 'username': 'Crisco 1492'}
{'total_hours': 275.67861111111114, 'total_edits': 17177, 'total_sessions': 98, 'username': 'Rich Farmbrough'}
{'total_hours': 280.5886111111111, 'total_edits': 6131, 'total_sessions': 80, 'username': 'Alan Liefting'}
{'total_hours': 277.6847222222222, 'total_edits': 9922, 'total_sessions': 121, 'username': 'BD2412'}
{'total_hours': 270.74944444444446, 'total_edits': 4421, 'total_sessions': 96, 'username': 'Sitush'}
{'total_hours': 270.265, 'total_edits': 7405, 'total_sessions': 86, 'username': 'DBigXray'}
{'total_hours': 280.6222222222222, 'total_edits': 6984, 'total_sessions': 120, 'username': 'Allens'}
{'total_hours': 254.10083333333333, 'total_edits': 9806, 'total_sessions': 112, 'username': 'Cloudz679'}
"""
import zlib

user = user_session('Benjamin Mako Hill')
time_deltas = np.array([i.seconds for x in user.sessions for i in x.time_deltas])

f, axarr = plt.subplots(2)
axarr[0].hist(np.log1p(time_deltas))

print(len(zlib.compress(time_deltas)) / len(time_deltas))

user = user_session('Jtmorgan')
time_deltas = np.array([i.seconds for x in user.sessions for i in x.time_deltas])
axarr[1].hist(np.log1p(time_deltas))

print(len(zlib.compress(time_deltas)) / len(time_deltas))

#stats.entropy(time_deltas)
1.8036233504808767
1.9191006026889197
from ipywidgets import interact_manual
@interact_manual
def do_thing(username='', monthspec='201203'):
    print(user_session(username).summary())
    print(user_session(username, monthspec).summary())
    print(user_session(username, monthspec, overflowend=True).summary())