import requests
import json


# Plot inline
%matplotlib inline

# Import pandas & numpy for data manipulation & analysis
import pandas as pd
import numpy as np

import datetime as dt

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

# Use seaborn for attractive graphs
import seaborn as sns

from pandas.io.json import json_normalize
sns.set()

# Set a color palette for graphs
wikipediacolors = ["#3366cc", "#00af89", "#b32425"]
wikic = sns.color_palette(wikipediacolors)

# set default styles, including color palette
sns.set(context='talk', style='white', palette=wikic, rc={'figure.figsize':(12,4)}, color_codes=False)

# display the wikic color palette
sns.palplot(wikic);
# En Wiki LEGACY
# request from Wikimedia Rest API: monthly legacy pageviews for en.wikipedia.org
api = requests.get("https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/en.wikipedia.org/all-sites/monthly/2009010100/2019100100")

df = api.json()

df = pd.io.json.json_normalize(df, 
                          record_path='items')

df.head()
access-site count granularity project timestamp
0 all-sites 5802681551 monthly en.wikipedia 2009010100
1 all-sites 5547320860 monthly en.wikipedia 2009020100
2 all-sites 6295159057 monthly en.wikipedia 2009030100
3 all-sites 5988817321 monthly en.wikipedia 2009040100
4 all-sites 6267516733 monthly en.wikipedia 2009050100
# En Wiki LEGACY
# request from Wikimedia Rest API: monthly legacy pageviews for en.wikipedia.org
api = requests.get("https://wikimedia.org/api/rest_v1/metrics/legacy/pagecounts/aggregate/en.wikipedia.org/mobile-site/monthly/2009010100/2019100100")

df = api.json()

df = pd.io.json.json_normalize(df, 
                          record_path='items')

df.head()
access-site count granularity project timestamp
0 mobile-site 3091546685 monthly en.wikipedia 2014100100
1 mobile-site 3027489668 monthly en.wikipedia 2014110100
2 mobile-site 3278950021 monthly en.wikipedia 2014120100
3 mobile-site 3485302091 monthly en.wikipedia 2015010100
4 mobile-site 3091534479 monthly en.wikipedia 2015020100
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%Y%m%d%H')

df.head()
access-site count granularity project timestamp
0 mobile-site 3091546685 monthly en.wikipedia 2014-10-01
1 mobile-site 3027489668 monthly en.wikipedia 2014-11-01
2 mobile-site 3278950021 monthly en.wikipedia 2014-12-01
3 mobile-site 3485302091 monthly en.wikipedia 2015-01-01
4 mobile-site 3091534479 monthly en.wikipedia 2015-02-01
fig, ax = plt.subplots(figsize=(10, 5))  
sns.relplot(ax=ax, x="timestamp", y="count", kind="line", data=df)

ax.set(ylabel="Pageviews",
      xlabel="Month")

ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.1f}'.format(x/1000000000) + ' B'))

plt.close(2);