"""
Author: Christian Bouwense

Program that gets the revision data for a user and measures burstiness.
"""

import time
import random
import datetime as dt
import mwapi
import operator
import numpy as np
import dateutil.parser as dup
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches
def get_user_revisions(user, uc_prop='timestamp', uc_start='today', uc_end='2000-01-01T00:00:00Z'):
    # Information specifying user we are interested in
    uc_user = user
    
    # We're always going to want these parameters to be the same
    action = 'query'
    uc_list = 'usercontribs'
    uc_limit = 'max'
    
    today = dt.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%dT%H:%M:%SZ')
    # User can just give the string "today" instead of the timestamp
    if uc_start == "today":
        uc_start = today
    else:
        uc_start = start_date
    
    # Temporary dictionary holding amount of revisions for each user
    revisions_by_user = {}
    
    # Connect to Wikipedia
    session = mwapi.Session('https://en.wikipedia.org', user_agent='cbouwense')

    # Query Wikipedia for revisions on the supplied article
    # The result is stored into the dictionary "rev_dict"
    rev_dict = session.get(action=action,
                           list=uc_list,
                           ucuser=uc_user,
                           uclimit=uc_limit,
                           ucstart=uc_start,
                           ucend=uc_end)
    
    # Go through the timestamps for each revision made.
    # If the timestamp is already a key in our dictionary, increment that key value by 1.
    # Else, create a new key for that year in our dictionary and set it to 1
    rev_timestamps = []
    for rev in rev_dict['query']['usercontribs']:
        timestamp = dup.parse(rev['timestamp'])
        rev_timestamps.append(timestamp)
        
    # Check if there is a section named "continue".
    # If there is, that means the query did not get all the data
    # because of the per-user query limits.
    print ("Retrieving data on %s from Wikipedia..." % uc_user)
    while 'continue' in rev_dict:
        continue_val = rev_dict['continue']['uccontinue']
        rev_dict = session.get(action=action,
                               list=uc_list,
                               ucuser=uc_user,
                               uclimit=uc_limit,
                               ucstart=uc_start,
                               ucend=uc_end,
                               uccontinue=continue_val)
        
        for rev in rev_dict['query']['usercontribs']:
            timestamp = dup.parse(rev['timestamp'])
            rev_timestamps.append(timestamp)
    
    # Enumerate the times between events into a list
    interevent_times = []
    for i in range(0, len(rev_timestamps)-1):
        interevent_times.append((rev_timestamps[i] - rev_timestamps[i+1]).total_seconds())
    
    # Create entry in user_data for the current user
    user_data[uc_user] = {}
    
    # Add data to global dictionaries
    user_data[uc_user]['interevent_times'] = interevent_times
    get_B(uc_user)
    get_M(uc_user)
    
    print ("Data received successfully!")
def get_B(user):
    # Calculate interevent time mean and standard deviation
    interevent_mean = (np.mean(user_data[user]['interevent_times']))
    interevent_std_dev = (np.std(user_data[user]['interevent_times']))
    B = ((interevent_std_dev - interevent_mean) / (interevent_std_dev + interevent_mean))
    user_data[user]['B'] = B
def get_M(user):
    # Store times in this variable with a much shorter name
    times = user_data[user]['interevent_times']
    mean_1 = np.mean(times[0:len(times)-1])
    mean_2 = np.mean(times[1:len(times)])
    std_dev_1 = np.std(times[0:len(times)-1])
    std_dev_2 = np.std(times[1:len(times)])
    
    summation = 0
    for i in range(0, len(times)-1):
        tau_i = times[i]
        tau_i_plus_one = times[i+1]
        summation_term = (((tau_i - mean_1) * (tau_i_plus_one - mean_2)) / (std_dev_1 * std_dev_2)) 
        summation += summation_term

    M = (1 / (len(times) - 1)) * summation
    
    user_data[user]['M'] = M
user_data = {}
users = [
'Ser Amantio di Nicalao',
'koavf',
'Rich Farmbrough',
'Waacstats',
'BD2412',
'Materialscientist',
'Bearcat',
'Hmains',
'Magioladitis',
'Rjwilmsi']
    
for i in range(0,10):
    get_user_revisions(users[i])
for i in range(0, 10):    
    print ("%s: " % users[i])
    print (user_data[users[i]]['B'])
    print (user_data[users[i]]['M'])
    print ("\n")
0.968527498535
0.0167869386751