Import Libs

# Makes the plots appear within the notebook
%matplotlib inline

# Two fundamental packages for doing data manipulation
import numpy as np                   # http://www.numpy.org/
import pandas as pd                  # http://pandas.pydata.org/

# Two related packages for plotting data
import matplotlib.pyplot as plt      # http://matplotlib.org/
import seaborn as sb                 # https://stanford.edu/~mwaskom/software/seaborn/

# Package for requesting data via the web and parsing resulting JSON
import requests
import json
from bs4 import BeautifulSoup

# Two packages for accessing the MySQL server
import pymysql                       # http://pymysql.readthedocs.io/en/latest/
import os                            # https://docs.python.org/3.4/library/os.html

# Packages for analyzing complex networks
import networkx as nx                # https://networkx.github.io/
import igraph as ig                  #If required manually in terminal use pip install python-igraph

# Setup the code environment to use plots with a white background and DataFrames show more columns and rows
sb.set_style('whitegrid')
pd.options.display.max_columns = 100
pd.options.display.max_rows = 110
page_title = "2013 Egyptian coup d'état"
def link_getter(page_title):
    
    _S="https://en.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles={0}&llprop=autonym|langname&lllimit=500".format(page_title)
    
    req = requests.get(_S)

    json_string = json.loads(req.text)
    
    _pageID=list(json_string['query']['pages'].keys())[0]

    _langlink_list=json_string['query']['pages'][_pageID]['langlinks']
    
    #_langAbrev_dict=dict()

    #for t in _langlink_list:
        #_lang=t['lang']
        #_langname=t['langname']
        #_langAbrev_dict[_lang]=_langname
    
    
    _langlink_dict=dict()

    for d in _langlink_list:
        _lang=d['lang']
        _title=d['*']
        _langlink_dict[_lang]=_title
        
    _langlink_dict['en'] = page_title
    
    return _langlink_dict
def name_getter(page_title):
    
    _S="https://en.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles={0}&llprop=autonym|langname&lllimit=500".format(page_title)
    
    req = requests.get(_S)

    json_string = json.loads(req.text)
    
    _pageID=list(json_string['query']['pages'].keys())[0]

    _langlink_list=json_string['query']['pages'][_pageID]['langlinks']
    
    _langname_dict=dict()

    for t in _langlink_list:
        _lang=t['lang']
        _langname=t['langname']
        _title=t['*']
        _langname_dict[_lang]=_langname
        
    _langname_dict['en'] = _title
    
    return _langname_dict
_langAbrev_dict = {}

names_and_langs = name_getter(page_title)

for lang,langname in names_and_langs.items():
    _langAbrev_dict[lang] = langname
titles_and_lang_dict = {}

pages_and_langs = link_getter(page_title)

for lang,title in pages_and_langs.items():
    titles_and_lang_dict[lang] = title
def get_page_outlinks(page_title,lang='en',redirects=1):
    # Replace spaces with underscores
    page_title = page_title.replace(' ','_')
    
    bad_titles = ['Special:','Wikipedia:','Help:','Template:','Category:','International Standard','Portal:','s:','File:']
    
    # Get the response from the API for a query
    # After passing a page title, the API returns the HTML markup of the current article version within a JSON payload
    req = requests.get('https://{2}.wikipedia.org/w/api.php?action=parse&format=json&page={0}&redirects={1}&prop=text&disableeditsection=1&disabletoc=1'.format(page_title,redirects,lang))
    
    # Read the response into JSON to parse and extract the HTML
    json_string = json.loads(req.text)
    
    # Initialize an empty list to store the links
    outlinks_list = [] 
    
    if 'parse' in json_string.keys():
        page_html = json_string['parse']['text']['*']

        # Parse the HTML into Beautiful Soup
        soup = BeautifulSoup(page_html,'lxml')

        # Delete tags associated with templates
        for tag in soup.find_all('tr'):
            tag.replace_with('')

        # For each paragraph tag, extract the titles within the links
        for para in soup.find_all('p'):
            for link in para.find_all('a'):
                if link.has_attr('title'):
                    title = link['title']
                    # Ignore links that aren't interesting
                    if all(bad not in title for bad in bad_titles):
                        outlinks_list.append(title)

        # For each unordered list, extract the titles within the child links
        for unordered_list in soup.find_all('ul'):
            for item in unordered_list.find_all('li'):
                for link in item.find_all('a'):
                    if link.has_attr('title'):
                        title = link['title']
                        # Ignore links that aren't interesting
                        if all(bad not in title for bad in bad_titles):
                            outlinks_list.append(title)

    return outlinks_list
outlinks_per_lang = {}

language_titles = link_getter(page_title)

for lang,title in language_titles.items():
    #print("The language is",lang,"and the article title is", title)
    outlinks_per_lang[lang] = get_page_outlinks(title,lang)
def get_outlink_translations(outlinks_per_lang):
    translation_dict = dict()
    for lang,links in outlinks_per_lang.items():
        _page_titles = list(set(links))

        translation_dict[lang] = {}

        for _page_title in _page_titles:
            _S="https://{1}.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles={0}&redirects=1&lllimit=500&formatversion=2".format(_page_title,lang)
            response = requests.get(_S).json()
            if 'pages' in response['query'].keys():
                langlink_dict = response['query']['pages'][0]
                translation_dict[lang][_page_title] = {}
                if 'langlinks' in langlink_dict.keys():
                    for _ll in langlink_dict['langlinks']:
                        _ll_title = _ll['title']
                        _ll_lang = _ll['lang']
                        translation_dict[lang][_page_title][_ll_lang] = _ll_title
            else:
                translation_dict[lang][_page_title] = {}
                
    return translation_dict
translation_dict = get_outlink_translations(outlinks_per_lang)
len_transoutlink_dict_counterEn = {}

for lang,titles_dict in translation_dict.items():
    len_transoutlink_dict_counterEn[lang]=0         #starting at fa at a count of 0 
    for art_title,lang_dict in titles_dict.items(): #go into links on fa 
        if 'en' in lang_dict.keys():               #check for en version in links 
            len_transoutlink_dict_counterEn[lang] +=1   #if one is found add to counter 
            
len_transoutlink_dict_enList = {}

for lang,titles_dict in translation_dict.items():
    len_transoutlink_dict_enList[lang]=[]         #starting at fa at a count of 0 
    for art_title,lang_dict in titles_dict.items(): #go into links on fa 
        if 'en' in lang_dict.keys():  
            _title = lang_dict['en']        #check for en version in links 
            len_transoutlink_dict_enList[lang].append(_title)   
len_transoutlink_dict = {}

for langlinks_lang,titles in translation_dict.items():
    len_transoutlink_dict[langlinks_lang] = len(titles)
super_function (page_title)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/srv/paws/lib/python3.4/site-packages/pandas/core/ops.py in na_op(x, y)
    651             result = expressions.evaluate(op, str_rep, x, y,
--> 652                                           raise_on_error=True, **eval_kwargs)
    653         except TypeError:

/srv/paws/lib/python3.4/site-packages/pandas/computation/expressions.py in evaluate(op, op_str, a, b, raise_on_error, use_numexpr, **eval_kwargs)
    209         return _evaluate(op, op_str, a, b, raise_on_error=raise_on_error,
--> 210                          **eval_kwargs)
    211     return _evaluate_standard(op, op_str, a, b, raise_on_error=raise_on_error)

/srv/paws/lib/python3.4/site-packages/pandas/computation/expressions.py in _evaluate_standard(op, op_str, a, b, raise_on_error, **eval_kwargs)
     62     with np.errstate(all='ignore'):
---> 63         return op(a, b)
     64 

TypeError: unsupported operand type(s) for /: 'list' and 'int'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
/srv/paws/lib/python3.4/site-packages/pandas/core/ops.py in safe_na_op(lvalues, rvalues)
    675             with np.errstate(all='ignore'):
--> 676                 return na_op(lvalues, rvalues)
    677         except Exception:

/srv/paws/lib/python3.4/site-packages/pandas/core/ops.py in na_op(x, y)
    657                 mask = notnull(x) & notnull(y)
--> 658                 result[mask] = op(x[mask], _values_from_object(y[mask]))
    659             elif isinstance(x, np.ndarray):

TypeError: unsupported operand type(s) for /: 'list' and 'int'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-37-dccbe4adb223> in <module>()
----> 1 super_function (page_title)

<ipython-input-36-f056aebeddc8> in super_function(page_title)
     61     df_mix = df_mix[['lang', 'title', 'total links', 'english links']]
     62 
---> 63     df_mix['percent en linkbacks'] = df_mix['english links']/df_mix['total links']
     64 
     65 

/srv/paws/lib/python3.4/site-packages/pandas/core/ops.py in wrapper(left, right, name, na_op)
    713                 lvalues = lvalues.values
    714 
--> 715         result = wrap_results(safe_na_op(lvalues, rvalues))
    716         return construct_result(
    717             left,

/srv/paws/lib/python3.4/site-packages/pandas/core/ops.py in safe_na_op(lvalues, rvalues)
    684                 if is_object_dtype(lvalues):
    685                     return _algos.arrmap_object(lvalues,
--> 686                                                 lambda x: op(x, rvalues))
    687             raise
    688 

pandas/src/algos_common_helper.pxi in pandas.algos.arrmap_object (pandas/algos.c:46672)()

/srv/paws/lib/python3.4/site-packages/pandas/core/ops.py in <lambda>(x)
    684                 if is_object_dtype(lvalues):
    685                     return _algos.arrmap_object(lvalues,
--> 686                                                 lambda x: op(x, rvalues))
    687             raise
    688 

TypeError: ufunc 'true_divide' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
s_d2 = pd.Series(titles_and_lang_dict)

s_d1 = pd.Series(_langAbrev_dict)

s_d3 = pd.Series(len_transoutlink_dict)

s_d4 = pd.Series(len_transoutlink_dict_counterEn)
df_mix = pd.DataFrame({'lang' : s_d1,
                       'title' : s_d2, 
                       'total links' : s_d3,
                       'english links': s_d4})

df_mix = df_mix[['lang', 'title', 'total links', 'english links']]
df_mix['percent en linkbacks'] = df_mix['english links']/df_mix['total links']

df_mix
lang title total links english links percent en linkbacks
af Afrikaans Egiptiese staatsgreep van 2013 4 2 0.500000
ar Arabic انقلاب 2013 في مصر 110 94 0.854545
arz Egyptian Arabic خريطة المستقبل (مصر) 28 26 0.928571
az Azerbaijani Misirdə hərbi çeviriliş (2013) 0 0 NaN
bg Bulgarian Държавен преврат в Египет (2013 г.) 24 17 0.708333
ca Catalan Cop d'Estat a Egipte l'any 2013 21 21 1.000000
ckb Central Kurdish کودەتای ٢٠١٣ی میسر 11 9 0.818182
de German Militärputsch in Ägypten 2013 267 233 0.872659
el Greek Αιγυπτιακό πραξικόπημα 2013 9 6 0.666667
en 2013年埃及政变 2013 Egyptian coup d'état 262 0 0.000000
es Spanish Golpe de Estado en Egipto de 2013 69 65 0.942029
fa Persian کودتای ۲۰۱۳ مصر 32 30 0.937500
fi Finnish Egyptin vallankaappaus 2013 14 10 0.714286
fr French Coup d'État du 3 juillet 2013 en Égypte 87 74 0.850575
he Hebrew ההפיכה במצרים (2013) 48 45 0.937500
hi Hindi मिस्र में सैन्य तख्तापलट २०१३ 3 3 1.000000
id Indonesian Kudeta Mesir 2013 8 8 1.000000
it Italian Golpe egiziano del 2013 57 50 0.877193
ja Japanese 2013年エジプトクーデター 235 221 0.940426
ko Korean 2013년 이집트 쿠데타 9 9 1.000000
nl Dutch Protesten en staatsgreep in Egypte in 2013 22 22 1.000000
pl Polish Zamach stanu w Egipcie (2013) 17 17 1.000000
pt Portuguese Golpe de Estado no Egito em 2013 26 23 0.884615
ro Romanian Lovitura de stat din Egipt din 2013 29 19 0.655172
ru Russian Военный переворот в Египте (2013) 33 33 1.000000
sr Serbian Државни удар у Египту (2013) 28 24 0.857143
tg Tajik Кудатои 2013 Миср 7 4 0.571429
tr Turkish 2013 Mısır askerî darbesi 39 27 0.692308
uk Ukrainian Військовий переворот в Єгипті 2013 34 32 0.941176
ur Urdu 2013ء مصری فوجی تاخت 2 2 1.000000
vi Vietnamese Đảo chính Ai Cập 2013 45 34 0.755556
zh Chinese 2013年埃及政变 91 78 0.857143
def super_function (page_title):
    
    _langlink_dict = link_getter(page_title) #get list of titles per lang  i.e. 'en' : "2013 Egyptian coup d'état"
    
    _langname_dict = name_getter(page_title)
    
    _langAbrev_dict = {}

    names_and_langs = name_getter(page_title)

    for lang,langname in names_and_langs.items():
        _langAbrev_dict[lang] = langname
        
    
    titles_and_lang_dict = {}

    pages_and_langs = link_getter(page_title)

    for lang,title in pages_and_langs.items():
        titles_and_lang_dict[lang] = title
    
    outlinks_list = get_page_outlinks(page_title,lang='en',redirects=1)
    
    outlinks_per_lang = {}

    language_titles = link_getter(page_title)

    for lang,title in language_titles.items():
    #print("The language is",lang,"and the article title is", title)
        outlinks_per_lang[lang] = get_page_outlinks(title,lang)
    
    translation_dict = get_outlink_translations(outlinks_per_lang)
    
    len_transoutlink_dict_counterEn = {}

    for lang,titles_dict in translation_dict.items():
        len_transoutlink_dict_counterEn[lang]=0         #starting at fa at a count of 0 
        for art_title,lang_dict in titles_dict.items(): #go into links on fa 
            if 'en' in lang_dict.keys():               #check for en version in links 
                len_transoutlink_dict_counterEn[lang] +=1
    
    len_transoutlink_dict = {}

    for langlinks_lang,titles in translation_dict.items():
        len_transoutlink_dict[langlinks_lang] = len(titles)

    s_d2 = pd.Series(titles_and_lang_dict)

    s_d1 = pd.Series(_langAbrev_dict)

    s_d3 = pd.Series(len_transoutlink_dict)

    s_d4 = pd.Series(len_transoutlink_dict_counterEn)   
    
    df_mix = pd.DataFrame({'lang' : s_d1,
                       'title' : s_d2, 
                       'total links' : s_d3,
                       'english links': s_d4})

    df_mix = df_mix[['lang', 'title', 'total links', 'english links']]
    
    df_mix['percent en linkbacks'] = df_mix['english links']/df_mix['total links']

  
    return df_mix
super_function (page_title)
lang title total links english links percent en linkbacks
af Afrikaans Egiptiese staatsgreep van 2013 4 2 0.500000
ar Arabic انقلاب 2013 في مصر 110 94 0.854545
arz Egyptian Arabic خريطة المستقبل (مصر) 28 26 0.928571
az Azerbaijani Misirdə hərbi çeviriliş (2013) 0 0 NaN
bg Bulgarian Държавен преврат в Египет (2013 г.) 24 17 0.708333
ca Catalan Cop d'Estat a Egipte l'any 2013 21 21 1.000000
ckb Central Kurdish کودەتای ٢٠١٣ی میسر 11 9 0.818182
de German Militärputsch in Ägypten 2013 267 233 0.872659
el Greek Αιγυπτιακό πραξικόπημα 2013 9 6 0.666667
en 2013年埃及政变 2013 Egyptian coup d'état 262 0 0.000000
es Spanish Golpe de Estado en Egipto de 2013 69 65 0.942029
fa Persian کودتای ۲۰۱۳ مصر 32 30 0.937500
fi Finnish Egyptin vallankaappaus 2013 14 10 0.714286
fr French Coup d'État du 3 juillet 2013 en Égypte 87 74 0.850575
he Hebrew ההפיכה במצרים (2013) 48 45 0.937500
hi Hindi मिस्र में सैन्य तख्तापलट २०१३ 3 3 1.000000
id Indonesian Kudeta Mesir 2013 8 8 1.000000
it Italian Golpe egiziano del 2013 57 50 0.877193
ja Japanese 2013年エジプトクーデター 235 221 0.940426
ko Korean 2013년 이집트 쿠데타 9 9 1.000000
nl Dutch Protesten en staatsgreep in Egypte in 2013 22 22 1.000000
pl Polish Zamach stanu w Egipcie (2013) 17 17 1.000000
pt Portuguese Golpe de Estado no Egito em 2013 26 23 0.884615
ro Romanian Lovitura de stat din Egipt din 2013 29 19 0.655172
ru Russian Военный переворот в Египте (2013) 33 33 1.000000
sr Serbian Државни удар у Египту (2013) 28 24 0.857143
tg Tajik Кудатои 2013 Миср 7 4 0.571429
tr Turkish 2013 Mısır askerî darbesi 39 27 0.692308
uk Ukrainian Військовий переворот в Єгипті 2013 34 32 0.941176
ur Urdu 2013ء مصری فوجی تاخت 2 2 1.000000
vi Vietnamese Đảo chính Ai Cập 2013 45 34 0.755556
zh Chinese 2013年埃及政变 91 78 0.857143
2016 Turkish coup d'état attempt
page_title_list=["2016 Turkish coup d'état attempt", "2015 Burkinabé coup d'état"]
df_dict = {}


for page_title in page_title_list:
    
    try:
        df = super_function (page_title)

        df.to_csv(page_title + '.csv')

        df_dict[page_title] = df
    
    except KeyboardInterrupt:
        raise
        
    except:
        print("{0} had an error!".format(page_title))
        pass
    
len(df_dict)
2