%matplotlib inline
import csv
import json
import time
import sys
from concurrent.futures import ThreadPoolExecutor

import requests
import mwapi
import mwtypes
import pandas
import seaborn
session = mwapi.Session("https://en.wikipedia.org", user_agent="ahalfaker@wikimedia.org -- IWSC demo")
WEIGHTS = {'Stub': 1, 'Start': 2, 'C': 3, 'B': 4, 'GA': 5, 'FA': 6}
def score2sum(score_doc):
    if score_doc is None:
        return None
    weighted_sum = 0
    for cl, proba in score_doc['probability'].items():
        weighted_sum += WEIGHTS[cl] * proba
    return weighted_sum
def fetch_wp10_score(rev_id):
    response = requests.get('https://ores.wikimedia.org/v3/scores/enwiki/{0}/wp10'.format(rev_id))
    try:
        return response.json()['enwiki']['scores'][str(rev_id)]['wp10']['score']
    except:
        return None


def fetch_wp10_scores(rev_ids):
    executor = ThreadPoolExecutor(max_workers=8)
    return executor.map(fetch_wp10_score, rev_ids)

def fetch_historical_scores(page_name):
    historical_scores = []
    for response_doc in session.get(action='query', prop='revisions', titles=page_name, 
                                    rvprop=['ids', 'timestamp'], rvlimit=100, rvdir="newer", 
                                    formatversion=2, continuation=True):
        rev_docs = response_doc['query']['pages'][0]['revisions']
        rev_ids = [d['revid'] for d in rev_docs]
        for rev_doc, score_doc in zip(rev_docs, fetch_wp10_scores(rev_ids)):
            rev_id = rev_doc['revid']
            timestamp = mwtypes.Timestamp(rev_doc['timestamp'])
            weighted_sum = score2sum(score_doc)
            historical_scores.append({'rev_id': rev_id, 'timestamp': timestamp, 'weighted_sum': weighted_sum})
            sys.stderr.write(".")
            sys.stderr.flush()
        sys.stderr.write("\n")
    
    return historical_scores
historical_scores = pandas.DataFrame(fetch_historical_scores("Fernando Pessoa"))
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
......
historical_scores['time'] =pandas.to_datetime(historical_scores.timestamp, format='%Y-%m-%dT%H:%M:%SZ',errors='ignore')
historical_scores = historical_scores.set_index('time')
historical_scores['weighted_sum'].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54ccd4d668>
historical_scores2 = pandas.DataFrame(fetch_historical_scores("Tiririca"))
...................................................................
historical_scores2['time'] =pandas.to_datetime(historical_scores2.timestamp, format='%Y-%m-%dT%H:%M:%SZ',errors='ignore')
historical_scores2 = historical_scores2.set_index('time')
historical_scores2['weighted_sum'].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54cd60acf8>
historical_scores2 = pandas.DataFrame(fetch_historical_scores("Ada Lovelace"))
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
.......................
historical_scores2['time'] =pandas.to_datetime(historical_scores2.timestamp, format='%Y-%m-%dT%H:%M:%SZ',errors='ignore')
historical_scores2 = historical_scores2.set_index('time')
historical_scores2['weighted_sum'].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54ccdb9f60>
historical_scores2
rev_id timestamp weighted_sum
time
2001-05-20 14:57:05 233871 2001-05-20T14:57:05Z 1.143507
2001-08-17 00:31:59 234424 2001-08-17T00:31:59Z 1.911449
2001-11-18 04:34:16 429658362 2001-11-18T04:34:16Z 1.894739
2002-02-25 15:51:15 48310 2002-02-25T15:51:15Z 1.868932
2002-04-08 12:22:01 96589 2002-04-08T12:22:01Z 1.874799
2002-06-16 14:25:00 141223 2002-06-16T14:25:00Z 1.862681
2002-08-06 10:15:58 179835 2002-08-06T10:15:58Z 1.862681
2002-08-28 16:40:11 204583 2002-08-28T16:40:11Z 1.862681
2002-09-08 22:45:18 319842 2002-09-08T22:45:18Z 1.896889
2002-09-28 02:20:15 319863 2002-09-28T02:20:15Z 2.119083
2002-09-28 02:31:50 319891 2002-09-28T02:31:50Z 2.157524
2002-09-28 02:45:35 320130 2002-09-28T02:45:35Z 2.153529
2002-09-28 07:02:42 324586 2002-09-28T07:02:42Z 2.122810
2002-09-29 22:31:21 324588 2002-09-29T22:31:21Z 2.138541
2002-09-29 22:31:58 324595 2002-09-29T22:31:58Z 2.134280
2002-09-29 22:34:47 324607 2002-09-29T22:34:47Z 2.132878
2002-09-29 22:38:29 324615 2002-09-29T22:38:29Z 2.137369
2002-09-29 22:40:54 485111 2002-09-29T22:40:54Z 2.168178
2002-12-12 09:14:20 492115 2002-12-12T09:14:20Z 2.179820
2002-12-14 06:36:58 849220 2002-12-14T06:36:58Z 2.358184
2003-04-21 09:21:13 849223 2003-04-21T09:21:13Z 2.401895
2003-04-21 09:22:27 984271 2003-04-21T09:22:27Z 2.383956
2003-05-31 23:44:16 1026978 2003-05-31T23:44:16Z 2.401976
2003-06-12 12:12:22 1026979 2003-06-12T12:12:22Z 2.454424
2003-06-12 12:13:30 1093912 2003-06-12T12:13:30Z 2.459235
2003-06-29 20:10:08 1134633 2003-06-29T20:10:08Z 2.436375
2003-07-10 20:09:34 1467126 2003-07-10T20:09:34Z 2.460329
2003-09-22 12:56:07 1498880 2003-09-22T12:56:07Z 2.474366
2003-09-29 07:16:53 1602322 2003-09-29T07:16:53Z 2.483069
2003-10-21 17:20:49 1828901 2003-10-21T17:20:49Z 2.478755
... ... ... ...
2018-12-14 21:55:53 873756958 2018-12-14T21:55:53Z 5.288435
2018-12-14 21:58:34 873757234 2018-12-14T21:58:34Z 5.288435
2018-12-14 22:06:10 873758087 2018-12-14T22:06:10Z 5.288435
2018-12-14 22:12:36 873758878 2018-12-14T22:12:36Z 5.286761
2018-12-14 22:17:15 873759399 2018-12-14T22:17:15Z 5.286761
2018-12-14 22:20:19 873759764 2018-12-14T22:20:19Z 5.288435
2018-12-14 22:23:32 873760090 2018-12-14T22:23:32Z 5.286761
2018-12-14 22:25:33 873760266 2018-12-14T22:25:33Z 5.286761
2018-12-14 22:28:14 873760567 2018-12-14T22:28:14Z 5.288435
2018-12-14 22:31:24 873760842 2018-12-14T22:31:24Z 5.288435
2018-12-14 22:35:17 873761176 2018-12-14T22:35:17Z 5.290110
2018-12-14 22:39:32 873761589 2018-12-14T22:39:32Z 5.288449
2018-12-14 22:44:11 873762097 2018-12-14T22:44:11Z 5.288186
2018-12-14 22:49:23 873762654 2018-12-14T22:49:23Z 5.285536
2018-12-14 22:52:18 873762940 2018-12-14T22:52:18Z 5.285536
2018-12-18 16:52:00 874337630 2018-12-18T16:52:00Z 5.287613
2018-12-21 16:02:19 874797849 2018-12-21T16:02:19Z 5.287613
2019-01-07 10:59:21 877230067 2019-01-07T10:59:21Z 5.287613
2019-01-07 11:26:26 877232515 2019-01-07T11:26:26Z 5.287613
2019-02-01 15:28:15 881282107 2019-02-01T15:28:15Z 5.287613
2019-02-14 11:33:16 883275437 2019-02-14T11:33:16Z NaN
2019-02-27 11:49:34 885338580 2019-02-27T11:49:34Z 5.281752
2019-03-06 13:44:09 886469956 2019-03-06T13:44:09Z NaN
2019-03-09 19:24:42 886970797 2019-03-09T19:24:42Z NaN
2019-03-09 19:55:07 886974695 2019-03-09T19:55:07Z NaN
2019-03-11 04:39:35 887193894 2019-03-11T04:39:35Z NaN
2019-03-11 13:52:58 887251320 2019-03-11T13:52:58Z NaN
2019-03-11 13:53:02 887251328 2019-03-11T13:53:02Z NaN
2019-03-11 13:53:42 887251377 2019-03-11T13:53:42Z NaN
2019-03-11 13:53:59 887251413 2019-03-11T13:53:59Z NaN

4523 rows × 3 columns

historical_scores2 = pandas.DataFrame(fetch_historical_scores("Linus Torvalds"))
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
....................................................................................................
...............................................................................
historical_scores2['time'] =pandas.to_datetime(historical_scores2.timestamp, format='%Y-%m-%dT%H:%M:%SZ',errors='ignore')
historical_scores2 = historical_scores2.set_index('time')
historical_scores2['weighted_sum'].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f54ccd37320>