Lab 2 - Hyperlink Networks

Professor Brian Keegan
Department of Information Science, CU Boulder
This notebook is copyright and made available under the Apache License v2.0 license.

This is the second of five lab notebooks that will explore how to do some introductory data extraction and analysis from Wikipedia data. This lab will extend the methods in the prior lab about analyzing a single article's revision histories and use network science methods to analyze the networks of hyperlinks around a single article. You do not need to be fluent in either to complete the lab, but there are many options for extending the analyses we do here by using more advanced queries and scripting methods.

Acknowledgements
I'd like to thank the Wikimedia Foundation for the PAWS system and related Wikitech infrastructure that this workbook runs within. Yuvi Panda, Aaron Halfaker, Jonathan Morgan, and Dario Taraborelli have all provided crucial support and feedback.

Confirm that basic Python commands work

a = 3
b = 4
a**b

6012084
6012084

Import modules and setup environment

Load up all the libraries we'll need to connect to the database, retreive information for analysis, and visualize results.

# Makes the plots appear within the notebook
%matplotlib inline

# Two fundamental packages for doing data manipulation
import numpy as np                   # http://www.numpy.org/
import pandas as pd                  # http://pandas.pydata.org/

# Two related packages for plotting data
import matplotlib.pyplot as plt      # http://matplotlib.org/
import seaborn as sb                 # https://stanford.edu/~mwaskom/software/seaborn/

# Package for requesting data via the web and parsing resulting JSON
import requests
import json
from bs4 import BeautifulSoup

# Two packages for accessing the MySQL server
import pymysql                       # http://pymysql.readthedocs.io/en/latest/
import os                            # https://docs.python.org/3.4/library/os.html

# Packages for analyzing complex networks
import networkx as nx                # https://networkx.github.io/
import igraph as ig

# Setup the code environment to use plots with a white background and DataFrames show more columns and rows
sb.set_style('whitegrid')
pd.options.display.max_columns = 100
pd.options.display.max_rows = 110

Define the name of the article you want to use for the rest of the lab.

page_title = "2013 Egyptian coup d'état"
#dictionary list of all languages and their abbreviations
_langAbrev_dict=dict()

for d in _langlink_list:
    _lang=d['lang']
    _langname=d['langname']
    _langAbrev_dict[_lang]=_langname
_langAbrev_dict
{'af': 'Afrikaans',
 'als': 'Alemannisch',
 'am': 'Amharic',
 'an': 'Aragonese',
 'ang': 'Old English',
 'ar': 'Arabic',
 'arz': 'Egyptian Arabic',
 'as': 'Assamese',
 'ast': 'Asturian',
 'ay': 'Aymara',
 'az': 'Azerbaijani',
 'azb': 'تۆرکجه',
 'ba': 'Bashkir',
 'bat-smg': 'Samogitian',
 'bcl': 'Bikol Central',
 'be': 'Belarusian',
 'be-x-old': 'беларуская (тарашкевіца)\u200e',
 'bg': 'Bulgarian',
 'bm': 'Bambara',
 'bn': 'Bangla',
 'bpy': 'Bishnupriya',
 'br': 'Breton',
 'bs': 'Bosnian',
 'bxr': 'буряад',
 'ca': 'Catalan',
 'cbk-zam': 'Chavacano de Zamboanga',
 'cdo': 'Min Dong Chinese',
 'ce': 'Chechen',
 'ceb': 'Cebuano',
 'ckb': 'Central Kurdish',
 'co': 'Corsican',
 'cs': 'Czech',
 'cv': 'Chuvash',
 'cy': 'Welsh',
 'da': 'Danish',
 'de': 'German',
 'diq': 'Zazaki',
 'el': 'Greek',
 'eo': 'Esperanto',
 'es': 'Spanish',
 'et': 'Estonian',
 'eu': 'Basque',
 'ext': 'Extremaduran',
 'fa': 'Persian',
 'fi': 'Finnish',
 'fiu-vro': 'Võro',
 'fo': 'Faroese',
 'fr': 'French',
 'frp': 'Arpitan',
 'frr': 'Northern Frisian',
 'fy': 'Western Frisian',
 'ga': 'Irish',
 'gan': 'Gan Chinese',
 'gd': 'Scottish Gaelic',
 'gl': 'Galician',
 'gn': 'Guarani',
 'gom': 'Goan Konkani',
 'gu': 'Gujarati',
 'gv': 'Manx',
 'hak': 'Hakka Chinese',
 'haw': 'Hawaiian',
 'he': 'Hebrew',
 'hi': 'Hindi',
 'hif': 'Fiji Hindi',
 'hr': 'Croatian',
 'ht': 'Haitian Creole',
 'hu': 'Hungarian',
 'hy': 'Armenian',
 'ia': 'Interlingua',
 'id': 'Indonesian',
 'ie': 'Interlingue',
 'ig': 'Igbo',
 'ilo': 'Iloko',
 'io': 'Ido',
 'is': 'Icelandic',
 'it': 'Italian',
 'ja': 'Japanese',
 'jam': 'Jamaican Creole English',
 'jbo': 'Lojban',
 'jv': 'Javanese',
 'ka': 'Georgian',
 'kaa': 'Kara-Kalpak',
 'kab': 'Kabyle',
 'kk': 'Kazakh',
 'km': 'Khmer',
 'kn': 'Kannada',
 'ko': 'Korean',
 'ksh': 'Colognian',
 'ku': 'Kurdish',
 'kw': 'Cornish',
 'ky': 'Kyrgyz',
 'la': 'Latin',
 'lad': 'Ladino',
 'lb': 'Luxembourgish',
 'lez': 'Lezghian',
 'lg': 'Ganda',
 'li': 'Limburgish',
 'lij': 'Ligurian',
 'lmo': 'Lombard',
 'lrc': 'Northern Luri',
 'lt': 'Lithuanian',
 'lv': 'Latvian',
 'mai': 'Maithili',
 'map-bms': 'Basa Banyumasan',
 'mg': 'Malagasy',
 'mk': 'Macedonian',
 'ml': 'Malayalam',
 'mn': 'Mongolian',
 'mr': 'Marathi',
 'ms': 'Malay',
 'mwl': 'Mirandese',
 'my': 'Burmese',
 'mzn': 'Mazanderani',
 'na': 'Nauru',
 'nah': 'Nāhuatl',
 'nds': 'Low German',
 'nds-nl': 'Low Saxon',
 'ne': 'Nepali',
 'new': 'Newari',
 'nl': 'Dutch',
 'nn': 'Norwegian Nynorsk',
 'no': 'Norwegian',
 'nov': 'Novial',
 'nv': 'Navajo',
 'oc': 'Occitan',
 'olo': 'Livvi-Karelian',
 'om': 'Oromo',
 'or': 'Odia',
 'os': 'Ossetic',
 'pa': 'Punjabi',
 'pam': 'Pampanga',
 'pcd': 'Picard',
 'pl': 'Polish',
 'pms': 'Piedmontese',
 'pnb': 'Western Punjabi',
 'ps': 'Pashto',
 'pt': 'Portuguese',
 'qu': 'Quechua',
 'ro': 'Romanian',
 'roa-rup': 'Aromanian',
 'ru': 'Russian',
 'rue': 'Rusyn',
 'sa': 'Sanskrit',
 'sah': 'Sakha',
 'sc': 'Sardinian',
 'scn': 'Sicilian',
 'sco': 'Scots',
 'sd': 'Sindhi',
 'se': 'Northern Sami',
 'sh': 'Serbo-Croatian',
 'si': 'Sinhala',
 'simple': 'Simple English',
 'sk': 'Slovak',
 'sl': 'Slovenian',
 'so': 'Somali',
 'sq': 'Albanian',
 'sr': 'Serbian',
 'su': 'Sundanese',
 'sv': 'Swedish',
 'sw': 'Swahili',
 'szl': 'Silesian',
 'ta': 'Tamil',
 'te': 'Telugu',
 'th': 'Thai',
 'tk': 'Turkmen',
 'tl': 'Tagalog',
 'tpi': 'Tok Pisin',
 'tr': 'Turkish',
 'tt': 'Tatar',
 'tyv': 'Tuvinian',
 'ug': 'Uyghur',
 'uk': 'Ukrainian',
 'ur': 'Urdu',
 'uz': 'Uzbek',
 'vec': 'Venetian',
 'vep': 'Veps',
 'vi': 'Vietnamese',
 'vo': 'Volapük',
 'wa': 'Walloon',
 'war': 'Waray',
 'wo': 'Wolof',
 'wuu': 'Wu Chinese',
 'xmf': 'Mingrelian',
 'yi': 'Yiddish',
 'yo': 'Yoruba',
 'za': 'Zhuang',
 'zea': 'Zeelandic',
 'zh': 'Chinese',
 'zh-min-nan': 'Chinese (Min Nan)',
 'zh-yue': 'Cantonese'}
#combine all the steps above into one function, every page in another language will be listed and written in that particular language
def link_getter(page_title):
    
    _S="https://en.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles={0}&llprop=autonym|langname&lllimit=500".format(page_title)
    
    req = requests.get(_S)

    json_string = json.loads(req.text)
    
    _pageID=list(json_string['query']['pages'].keys())[0]
#try adding lang name here 
    _langlink_list=json_string['query']['pages'][_pageID]['langlinks']
    
    _langlink_dict=dict()

    for d in _langlink_list:
        _lang=d['lang']
        _title=d['*']
        _langlink_dict[_lang]=_title
        
    _langlink_dict['en'] = page_title
    return _langlink_dict
titles_and_lang = {}

important_info = link_getter(page_title)

for lang,title in language_titles.items():

#outlinks_per_lang = {}

#language_titles = link_getter(page_title)

#for lang,title in language_titles.items():
    #print("The language is",lang,"and the article title is", title)
    #outlinks_per_lang[lang] = get_page_outlinks(title,lang)
    
_langlink_AllList_dict=dict()

for lang,title in link_getter(page_title).items():
    LangLinksAll=get_page_outlinks(page_title=title,lang=lang)
   
    _langlink_AllList_dict[lang]=LangLinksAll
    
#returns list of pages in each lang it is published in 
link_getter(page_title)
{'af': 'Egiptiese staatsgreep van 2013',
 'ar': 'انقلاب 2013 في مصر',
 'arz': 'خريطة المستقبل (مصر)',
 'az': 'Misirdə hərbi çeviriliş (2013)',
 'bg': 'Държавен преврат в Египет (2013 г.)',
 'ca': "Cop d'Estat a Egipte l'any 2013",
 'ckb': 'کودەتای ٢٠١٣ی میسر',
 'de': 'Militärputsch in Ägypten 2013',
 'el': 'Αιγυπτιακό πραξικόπημα 2013',
 'en': "2013 Egyptian coup d'état",
 'es': 'Golpe de Estado en Egipto de 2013',
 'fa': 'کودتای ۲۰۱۳ مصر',
 'fi': 'Egyptin vallankaappaus 2013',
 'fr': "Coup d'État du 3 juillet 2013 en Égypte",
 'he': 'ההפיכה במצרים (2013)',
 'hi': 'मिस्र में सैन्य तख्तापलट २०१३',
 'id': 'Kudeta Mesir 2013',
 'it': 'Golpe egiziano del 2013',
 'ja': '2013年エジプトクーデター',
 'ko': '2013년 이집트 쿠데타',
 'nl': 'Protesten en staatsgreep in Egypte in 2013',
 'pl': 'Zamach stanu w Egipcie (2013)',
 'pt': 'Golpe de Estado no Egito em 2013',
 'ro': 'Lovitura de stat din Egipt din 2013',
 'ru': 'Военный переворот в Египте (2013)',
 'sr': 'Државни удар у Египту (2013)',
 'tg': 'Кудатои 2013 Миср',
 'tr': '2013 Mısır askerî darbesi',
 'uk': 'Військовий переворот в Єгипті 2013',
 'ur': '2013ء مصری فوجی تاخت',
 'vi': 'Đảo chính Ai Cập 2013',
 'zh': '2013年埃及政变'}

Retrieve the content of the page via API

Write a function that takes an article title and returns the list of links in the body of the article. Note that the reason we don't use the "pagelinks" table in MySQL or the "links" parameter in the API is that this includes links within templates. Articles with templates link to each other forming over-dense clusters in the resulting networks. We only want the links appearing in the body of the text.

We pass a request to the API, which returns a JSON-formatted string containing the HTML of the page. We use BeautifulSoup to parse through the HTML tree and extract the non-template links and return them as a list.

def get_page_outlinks(page_title,lang='en',redirects=1):
    # Replace spaces with underscores
    page_title = page_title.replace(' ','_')
    
    bad_titles = ['Special:','Wikipedia:','Help:','Template:','Category:','International Standard','Portal:','s:','File:']
    
    # Get the response from the API for a query
    # After passing a page title, the API returns the HTML markup of the current article version within a JSON payload
    req = requests.get('https://{2}.wikipedia.org/w/api.php?action=parse&format=json&page={0}&redirects={1}&prop=text&disableeditsection=1&disabletoc=1'.format(page_title,redirects,lang))
    
    # Read the response into JSON to parse and extract the HTML
    json_string = json.loads(req.text)
    
    # Initialize an empty list to store the links
    outlinks_list = [] 
    
    if 'parse' in json_string.keys():
        page_html = json_string['parse']['text']['*']

        # Parse the HTML into Beautiful Soup
        soup = BeautifulSoup(page_html,'lxml')

        # Delete tags associated with templates
        for tag in soup.find_all('tr'):
            tag.replace_with('')

        # For each paragraph tag, extract the titles within the links
        for para in soup.find_all('p'):
            for link in para.find_all('a'):
                if link.has_attr('title'):
                    title = link['title']
                    # Ignore links that aren't interesting
                    if all(bad not in title for bad in bad_titles):
                        outlinks_list.append(title)

        # For each unordered list, extract the titles within the child links
        for unordered_list in soup.find_all('ul'):
            for item in unordered_list.find_all('li'):
                for link in item.find_all('a'):
                    if link.has_attr('title'):
                        title = link['title']
                        # Ignore links that aren't interesting
                        if all(bad not in title for bad in bad_titles):
                            outlinks_list.append(title)

    return outlinks_list
german_outlinks=get_page_outlinks('Militärputsch in Ägypten 2013',lang='de')
outlinks_per_lang = {}

language_titles = link_getter(page_title)

for lang,title in language_titles.items():
    #print("The language is",lang,"and the article title is", title)
    outlinks_per_lang[lang] = get_page_outlinks(title,lang)
outlinks_per_lang['de']
['Putsch',
 'Streitkräfte Ägyptens',
 'Oberster Rat der Streitkräfte',
 'Abd al-Fattah as-Sisi',
 'Ägypten',
 'Mohammed Mursi',
 'Ultimatum',
 'Islamismus',
 'Muslimbrüder',
 'Staatskrise in Ägypten 2013',
 'Vereinigte Staaten von Amerika',
 'Europäische Union',
 'Terrorismus',
 'Sinai-Halbinsel',
 'Gotteskrieger',
 'Menschenrechtsorganisation',
 'Husni Mubarak',
 'Putsch',
 'Koptische Kirche',
 'Patriarch',
 'Tawadros II.',
 'Imam',
 'Al-Azhar-Universität',
 'Ahmed Tayeb',
 'Tamarod',
 'Linksliberalismus',
 'Nationale Heilsfront',
 'Mohammed el-Baradei',
 'Salafisten',
 'Partei des Lichts',
 'Hasim al-Beblawi',
 'Staatskrise in Ägypten 2013',
 'Christentum',
 'Bischof',
 'Tawadros II.',
 'Sanktion',
 'Römisch-katholische Kirche',
 'Deutsche Bischofskonferenz',
 'Muslimbrüder',
 'Tiefer Staat',
 'Husni Mubarak',
 'Revolution in Ägypten 2011',
 'Restauration (Geschichte)',
 'Konterrevolution',
 'Parlament',
 'Verfassunggebende Versammlung',
 'Gremium',
 'Husni Mubarak',
 'Nachrichtendienst',
 'Streitkräfte Ägyptens',
 'Demonstration',
 'Protest',
 'Interessengruppe',
 'Unterschriftenaktion',
 'Tamarod',
 'Naguib Sawiris',
 'Infrastruktur',
 'Partei der Freien Ägypter',
 'Verfassungsgerichtsbarkeit',
 'Tahani al-Gebali',
 'Elektrizitätsversorgung',
 'Kraftstoff',
 'Erdgas',
 'Menschenrechte',
 'Folter',
 'Pressefreiheit',
 'Lebensmittelteuerung (Seite nicht vorhanden)',
 'Arbeitslosigkeit',
 'Tankstelle',
 'Stromausfall',
 'Boykott',
 'Kriminalität',
 'Nachrichtendienst',
 'Tawadros II.',
 'Ahmad Mohammad al-Tayyeb',
 'Mohammed el-Baradei',
 'Islam',
 'Al-Azhar-Universität',
 'Linksliberalismus',
 'Mohammed el-Baradei',
 'Salafismus',
 'Partei des Lichts',
 'Elite',
 'Übergangsregierung',
 'Technokratie',
 'Großunternehmen',
 'Gouvernement',
 'Gouverneur',
 'Abd al-Fattah as-Sisi',
 'Mohammed Hussein Tantawi',
 'Gremium',
 'Arbeitslosigkeit',
 'Inflation',
 'Revolution in Ägypten 2011',
 'Verfassung der Republik Ägypten',
 'Gouvernements in Ägypten',
 'Adel al-Chajat',
 'Gamaa Islamija',
 'Al-Azhar-Moschee',
 'Tamarod',
 'Mohammed Mursi',
 'Tahrir-Platz',
 'Al-Wasat-Partei',
 'Streitkräfte Ägyptens',
 'Barack Obama',
 'Mohamed Kamel Amr',
 'Middle East News Agency (Seite nicht vorhanden)',
 'Abd al-Fattah as-Sisi',
 'Hescham Kandil',
 'Abdel Meguid Mahmud',
 'Koalition (Politik)',
 'Universität Kairo',
 'Al-Dschamāʿa al-islāmiyya',
 'Assem Abdel-Maged (Seite nicht vorhanden)',
 'Anne W. Patterson',
 'Mitteleuropäische Sommerzeit',
 'Heliopolis',
 'Greenwich Mean Time',
 'Übergangsregierung',
 'Abd al-Fattah as-Sisi',
 'Vorgezogene Neuwahl',
 'Mahmoud Badr (Seite nicht vorhanden)',
 'Entführung',
 'Wiki Thawra (Seite nicht vorhanden)',
 'Washington Post',
 'Revolution',
 'Stiftung Wissenschaft und Politik',
 'Volker Perthes',
 'Die Zeit',
 'Kurier (Tageszeitung)',
 'Der Spiegel',
 'Putsch',
 'Politikwissenschaft',
 'NDR Info',
 'Putsch',
 'John Kerry',
 'Martin E. Dempsey',
 'Sedki Sobhi (Seite nicht vorhanden)',
 'Gunter Mulack',
 'Türkei',
 'Recep Tayyip Erdoğan',
 'Tunesien',
 'Ennahda',
 'The Daily Beast',
 'Philip J. Crowley',
 'Außenminister der Vereinigten Staaten',
 'Zentrum für Forschung zur Arabischen Welt (Seite nicht vorhanden)',
 'Deutsche Gesellschaft für Auswärtige Politik',
 'Europäische Union',
 'Krieg gegen den Terror',
 'Israelisch-ägyptischer Friedensvertrag',
 'Menschenrechte',
 'Dirk Emmerich (Seite nicht vorhanden)',
 'N-tv',
 'Tiefer Staat',
 'Judikative',
 'Exekutive',
 'Administrative',
 'Revolution in Ägypten 2011',
 'Wirtschaft',
 'Militär',
 'Staat im Staate',
 'Konterrevolution',
 'Elite',
 'Militärparade',
 'Akademischer Grad',
 'Militärakademie',
 'Vierte Gewalt',
 'The Guardian',
 'The Washington Post',
 'Al-Arabiya',
 'Al Jazeera',
 'Deutsche Welle',
 'BBC Arabic (Seite nicht vorhanden)',
 'Analphabetismus',
 'Kinderschänder',
 'Michael Thumann',
 'Bürgerrecht',
 'Arabic Network for Human Rights Information (Seite nicht vorhanden)',
 'Muhammad Badi’e',
 'Rābiʿa-al-ʿAdawiyya-Moschee',
 'Chairat al-Schater',
 'Saad al-Katatni',
 'Freiheits- und Gerechtigkeitspartei',
 'Rashad Bajumi (Seite nicht vorhanden)',
 'Human Rights Watch',
 'Adli Mansur',
 'Marsa Matruh',
 'Kafr asch-Schaich',
 'Alexandria',
 'Al-Minya',
 'Alexandria',
 'Luxor',
 'Damanhur',
 'Konterrevolution',
 'Flughafen al-Arisch',
 'Gouvernement as-Suwais',
 'Gouvernement Dschanub Sina',
 'Hosni Mubarak',
 'Kopten',
 'Kreuzzug',
 'Scharia',
 'Mohammed el-Baradei',
 'Partei des Lichts',
 'Ägyptische Sozialdemokratische Partei',
 'Siad Bahaa El-Din (Seite nicht vorhanden)',
 'Koptische Kirche',
 'Al-Arisch',
 'Verfassung der Republik Ägypten',
 'Voice of America',
 'Afrikanische Union',
 'Republikanische Partei',
 'John McCain',
 'Partei des Lichts',
 'Hasim al-Beblawi',
 'Weltbank',
 'Recep Tayyip Erdoğan',
 'Abdullah Gül',
 'Blutbad in Kairo und Gizeh vom 14. August 2013',
 'Cairo Institute for Human Rights Studies (Seite nicht vorhanden)',
 'Kosovo',
 'Libyen',
 'Syrien',
 'Libanon',
 'Ukraine',
 'Europäische Union',
 'Militärputsch in Ägypten 1952',
 'Gamal Abdel Nasser',
 'Tawadros II.',
 'Ahmad Mohammad al-Tayyeb',
 'Mohammed el-Baradei',
 'Verfassung der Republik Ägypten',
 'Oberstes Verfassungsgericht Ägyptens',
 'Adli Mansur',
 'Übergangsregierung',
 'Technokratie',
 'British Broadcasting Corporation',
 'Nationale Heilsfront',
 'Freiheits- und Gerechtigkeitspartei',
 'Joachim Schroedel',
 'Deutsche Bischofskonferenz',
 'The European',
 'Afrikanische Union',
 'Afrikanische Union',
 'Afrikanische Union',
 'Addis Abeba',
 'Nkosazana Dlamini-Zuma',
 'Revolution in Ägypten 2011',
 'Hosni Mubarak',
 'Mohammed Edrees (Seite nicht vorhanden)',
 'Dekolonisation Afrikas',
 'Deutschland',
 'Deutschland',
 'Guido Westerwelle',
 'Dänemark',
 'Dänemark',
 'Iran',
 'Iran',
 'Jordanien',
 'Jordanien',
 'Katar',
 'Katar',
 'Kuwait',
 'Kuwait',
 'Russland',
 'Russland',
 'Alexej Puchow (Seite nicht vorhanden)',
 'Saudi-Arabien',
 'Saudi-Arabien',
 'Abdullah ibn Abd al-Aziz',
 'Somalia',
 'Somalia',
 'Al-Shabaab (Somalia)',
 'Twitter',
 'Rosarote Brille',
 'Syrien',
 'Syrien',
 'Baschar al-Assad',
 'Türkei',
 'Türkei',
 'Ahmet Davutoğlu',
 'Recep Tayyip Erdoğan',
 'Israel',
 'Tunesien',
 'Tunesien',
 'Moncef Marzouki',
 'Kongress für die Republik (Tunesien)',
 'Vereinigte Arabische Emirate',
 'Vereinigte Arabische Emirate',
 'Chalifa bin Zayid Al Nahyan',
 'Vereinigtes Königreich',
 'Vereinigtes Königreich',
 'William Hague',
 'Vereinigte Staaten',
 'Vereinigte Staaten',
 'Vereinigte Staaten',
 'Tansania',
 'Barack Obama',
 'Tunesien',
 'Algerien',
 'L’Orient-Le Jour',
 'Libanon',
 'The Daily Star (Libanon)',
 'Baschar al-Assad',
 'Syrien',
 'Iran',
 'Bahrain',
 'Gulf News',
 'Vereinigte Arabische Emirate',
 'Israel',
 'Israel HaYom',
 'Jedi’ot Acharonot',
 'Haaretz',
 'Frankreich',
 'Paris',
 'Le Figaro',
 'Ouest-France',
 'The New York Times',
 'Handelsblatt',
 'Die Welt',
 'Süddeutsche Zeitung',
 'Frankfurter Allgemeine Zeitung',
 'Jen Psaki',
 'George Orwell',
 'Neusprech',
 'Federal Reserve Bank of New York',
 'Rüstungsindustrie',
 'General Dynamics F-16',
 'Mehrzweckkampfflugzeug',
 'Hughes AH-64',
 'Kampfhubschrauber',
 'M1 Abrams',
 'Kampfpanzer',
 'Fregatte',
 'Krieg in Afghanistan',
 'Krieg gegen den Terror',
 'Naher Osten',
 'Ostafrika',
 'Freiheiten der Luft',
 'Luftraum',
 'Sueskanal',
 'Ölvorkommen',
 'Naher Osten',
 'Brookings Institution',
 'Demokratiemessung',
 'Demokratiemessung',
 'Husni Mubarak',
 'Anwar as-Sadat',
 'Blutbad in Kairo und Gizeh 2013',
 'Staatskrise in Ägypten 2013/2014 (Kabinett Beblawi)',
 'Chile',
 'Argentinien',
 'Algerien',
 'Martin Gehlen',
 'James Franklin Jeffrey',
 'American Council on Germany',
 'Council on Foreign Relations',
 'George W. Bush']
def get_outlink_translations(outlinks_per_lang):
    translation_dict = dict()
    for lang,links in outlinks_per_lang.items():
        _page_titles = list(set(links))

        translation_dict[lang] = {}

        for _page_title in _page_titles:
            _S="https://{1}.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles={0}&redirects=1&lllimit=500&formatversion=2".format(_page_title,lang)
            response = requests.get(_S).json()
            if 'pages' in response['query'].keys():
                langlink_dict = response['query']['pages'][0]
                translation_dict[lang][_page_title] = {}
                if 'langlinks' in langlink_dict.keys():
                    for _ll in langlink_dict['langlinks']:
                        _ll_title = _ll['title']
                        _ll_lang = _ll['lang']
                        translation_dict[lang][_page_title][_ll_lang] = _ll_title
            else:
                translation_dict[lang][_page_title] = {}
                
    return translation_dict
translation_dict = get_outlink_translations(outlinks_per_lang)
len([title for title,lang_dict in translation_dict['zh'].items() if 'en' in lang_dict.keys()])
78
len(translation_dict['zh'])
91
df1 = pd.DataFrame(_langlink_list)
df1
* autonym lang langname
0 Albert Einstein Afrikaans af Afrikaans
1 Albert Einstein Alemannisch als Alemannisch
2 አልበርት አይንስታይን አማርኛ am Amharic
3 Albert Einstein aragonés an Aragonese
4 Albert Einstein Ænglisc ang Old English
5 ألبرت أينشتاين العربية ar Arabic
6 البرت اينشتاين مصرى arz Egyptian Arabic
7 এলবাৰ্ট আইনষ্টাইন অসমীয়া as Assamese
8 Albert Einstein asturianu ast Asturian
9 Albert Einstein Aymar aru ay Aymara
10 Albert Eynşteyn azərbaycanca az Azerbaijani
11 آلبرت آینشتاین تۆرکجه azb تۆرکجه
12 Альберт Эйнштейн башҡортса ba Bashkir
13 Alberts Einšteins žemaitėška bat-smg Samogitian
14 Albert Einstein Bikol Central bcl Bikol Central
15 Альберт Эйнштэйн беларуская be Belarusian
16 Альбэрт Айнштайн беларуская (тарашкевіца)‎ be-x-old беларуская (тарашкевіца)‎
17 Алберт Айнщайн български bg Bulgarian
18 Albert Einstein bamanankan bm Bambara
19 আলবার্ট আইনস্টাইন বাংলা bn Bangla
20 আলবার্ট আইনস্টাইন বিষ্ণুপ্রিয়া মণিপুরী bpy Bishnupriya
21 Albert Einstein brezhoneg br Breton
22 Albert Einstein bosanski bs Bosnian
23 Альберт Айнштайн буряад bxr буряад
24 Albert Einstein català ca Catalan
25 Albert Einstein Chavacano de Zamboanga cbk-zam Chavacano de Zamboanga
26 Albert Einstein Mìng-dĕ̤ng-ngṳ̄ cdo Min Dong Chinese
27 Эйнштейн, Альберт нохчийн ce Chechen
28 Albert Einstein Cebuano ceb Cebuano
29 ئەلبەرت ئاینشتاین کوردیی ناوەندی ckb Central Kurdish
30 Albert Einstein corsu co Corsican
31 Albert Einstein čeština cs Czech
32 Эйнштейн Альберт Чӑвашла cv Chuvash
33 Albert Einstein Cymraeg cy Welsh
34 Albert Einstein dansk da Danish
35 Albert Einstein Deutsch de German
36 Albert Einstein Zazaki diq Zazaki
37 Άλμπερτ Αϊνστάιν Ελληνικά el Greek
38 Albert Einstein Esperanto eo Esperanto
39 Albert Einstein español es Spanish
40 Albert Einstein eesti et Estonian
41 Albert Einstein euskara eu Basque
42 Albert Einstein estremeñu ext Extremaduran
43 آلبرت اینشتین فارسی fa Persian
44 Albert Einstein suomi fi Finnish
45 Einsteini Albert Võro fiu-vro Võro
46 Albert Einstein føroyskt fo Faroese
47 Albert Einstein français fr French
48 Albert Einstein arpetan frp Arpitan
49 Albert Einstein Nordfriisk frr Northern Frisian
50 Albert Einstein Frysk fy Western Frisian
51 Albert Einstein Gaeilge ga Irish
52 愛因斯坦 贛語 gan Gan Chinese
53 Albert Einstein Gàidhlig gd Scottish Gaelic
54 Albert Einstein galego gl Galician
... ... ... ... ...
135 البرټ آينسټاين پښتو ps Pashto
136 Albert Einstein português pt Portuguese
137 Albert Einstein Runa Simi qu Quechua
138 Albert Einstein română ro Romanian
139 Albert Einstein armãneashti roa-rup Aromanian
140 Эйнштейн, Альберт русский ru Russian
141 Алберт Айнштайн русиньскый rue Rusyn
142 अल्बर्ट् ऐन्स्टैन् संस्कृतम् sa Sanskrit
143 Альберт Эйнштейн саха тыла sah Sakha
144 Albert Einstein sardu sc Sardinian
145 Albert Einstein sicilianu scn Sicilian
146 Albert Einstein Scots sco Scots
147 البرٽ آئنسٽائن سنڌي sd Sindhi
148 Albert Einstein sámegiella se Northern Sami
149 Albert Einstein srpskohrvatski / српскохрватски sh Serbo-Croatian
150 ඇල්බට්‍ අයින්ස්ටයින් සිංහල si Sinhala
151 Albert Einstein Simple English simple Simple English
152 Albert Einstein slovenčina sk Slovak
153 Albert Einstein slovenščina sl Slovenian
154 Albert Einstein Soomaaliga so Somali
155 Albert Einstein shqip sq Albanian
156 Алберт Ајнштајн српски / srpski sr Serbian
157 Albert Einstein Basa Sunda su Sundanese
158 Albert Einstein svenska sv Swedish
159 Albert Einstein Kiswahili sw Swahili
160 Albert Einstein ślůnski szl Silesian
161 ஆல்பர்ட் ஐன்ஸ்டைன் தமிழ் ta Tamil
162 ఆల్బర్ట్ ఐన్‌స్టీన్ తెలుగు te Telugu
163 อัลเบิร์ต ไอน์สไตน์ ไทย th Thai
164 Albert Eýnşteýn Türkmençe tk Turkmen
165 Albert Einstein Tagalog tl Tagalog
166 Albert Einstein Tok Pisin tpi Tok Pisin
167 Albert Einstein Türkçe tr Turkish
168 Альберт Эйнштейн татарча/tatarça tt Tatar
169 Эйнштейн, Альберт тыва дыл tyv Tuvinian
170 ئالبېرىت ئېينىشتېين ئۇيغۇرچە / Uyghurche ug Uyghur
171 Альберт Ейнштейн українська uk Ukrainian
172 البرٹ آئنسٹائن اردو ur Urdu
173 Albert Einstein oʻzbekcha/ўзбекча uz Uzbek
174 Albert Einstein vèneto vec Venetian
175 Einštein Al'bert vepsän kel’ vep Veps
176 Albert Einstein Tiếng Việt vi Vietnamese
177 Albert Einstein Volapük vo Volapük
178 Albert Einstein walon wa Walloon
179 Albert Einstein Winaray war Waray
180 Albert Einstein Wolof wo Wolof
181 阿爾伯特·愛因斯坦 吴语 wuu Wu Chinese
182 ალბერტ აინშტაინი მარგალური xmf Mingrelian
183 אלבערט איינשטיין ייִדיש yi Yiddish
184 Albert Einstein Yorùbá yo Yoruba
185 Albert Einstein Vahcuengh za Zhuang
186 Albert Einstein Zeêuws zea Zeelandic
187 阿尔伯特·爱因斯坦 中文 zh Chinese
188 Albert Einstein Bân-lâm-gú zh-min-nan Chinese (Min Nan)
189 愛因斯坦 粵語 zh-yue Cantonese

190 rows × 4 columns

df = pd.DataFrame.from_dict(_langlink_dict, orient='index') 
df
0
hak Albert Einstein
km អាល់បឺត អែងស្តែង
wa Albert Einstein
diq Albert Einstein
cbk-zam Albert Einstein
yo Albert Einstein
ne अल्वर्ट आइन्स्टाइन
ps البرټ آينسټاين
bn আলবার্ট আইনস্টাইন
be Альберт Эйнштэйн
my အဲလ်ဘတ် အိုင်းစတိုင်း
he אלברט איינשטיין
nv Hastiin Albert Einstein
ilo Albert Einstein
io Albert Einstein
eo Albert Einstein
lrc آلورت انیشتین
hu Albert Einstein
scn Albert Einstein
th อัลเบิร์ต ไอน์สไตน์
cv Эйнштейн Альберт
war Albert Einstein
nl Albert Einstein
ml ആൽബർട്ട് ഐൻസ്റ്റൈൻ
ug ئالبېرىت ئېينىشتېين
li Albert Einstein
zh 阿尔伯特·爱因斯坦
am አልበርት አይንስታይን
gan 愛因斯坦
kw Albert Einstein
bpy আলবার্ট আইনস্টাইন
si ඇල්බට්‍ අයින්ස්ටයින්
ca Albert Einstein
gn Albert Einstein
jbo albert. ainctain
mwl Albert Einstein
ur البرٹ آئنسٹائن
ast Albert Einstein
wuu 阿爾伯特·愛因斯坦
gv Albert Einstein
fiu-vro Einsteini Albert
za Albert Einstein
om Alberti Anistaayin
an Albert Einstein
ia Albert Einstein
frp Albert Einstein
be-x-old Альбэрт Айнштайн
so Albert Einstein
fi Albert Einstein
yi אלבערט איינשטיין
it Albert Einstein
lad Albert Einstein
sw Albert Einstein
arz البرت اينشتاين
vo Albert Einstein
... ...
mn Альберт Эйнштейн
ay Albert Einstein
roa-rup Albert Einstein
lt Albert Einstein
pl Albert Einstein
ku Albert Einstein
mzn آلبرت اینشتین
ckb ئەلبەرت ئاینشتاین
hr Albert Einstein
lg Alubbaati Ansitayini
fr Albert Einstein
bm Albert Einstein
fa آلبرت اینشتین
szl Albert Einstein
tt Альберт Эйнштейн
ht Albert Einstein
mr अॅल्बर्ट आइन्स्टाइन
simple Albert Einstein
na Albert Einstein
ba Альберт Эйнштейн
sh Albert Einstein
ar ألبرت أينشتاين
zea Albert Einstein
gl Albert Einstein
pms Albert Einstein
gu આલ્બર્ટ આઇન્સ્ટાઇન
os Эйнштейн, Альберт
tl Albert Einstein
sl Albert Einstein
nds-nl Albert Einstein
ky Эйнштейн, Алберт
mai अल्बर्ट आइनस्टाइन
hy Ալբերտ Այնշտայն
or ଆଲବର୍ଟ ଆଇନଷ୍ଟାଇନ
sc Albert Einstein
zh-min-nan Albert Einstein
mg Albert Einstein
ga Albert Einstein
nds Albert Einstein
nah Albert Einstein
haw Albert Einstein
als Albert Einstein
fy Albert Einstein
de Albert Einstein
es Albert Einstein
tyv Эйнштейн, Альберт
lez Альберт Эйнштейн
ms Albert Einstein
tk Albert Eýnşteýn
cs Albert Einstein
ie Albert Einstein
sk Albert Einstein
id Albert Einstein
as এলবাৰ্ট আইনষ্টাইন
gd Albert Einstein

190 rows × 1 columns

{article:ll_dict['en'] for article,ll_dict in translation_dict.items() if 'en' in ll_dict.keys()}
{}
german_outlinks
['Putsch',
 'Streitkräfte Ägyptens',
 'Oberster Rat der Streitkräfte',
 'Abd al-Fattah as-Sisi',
 'Ägypten',
 'Mohammed Mursi',
 'Ultimatum',
 'Islamismus',
 'Muslimbrüder',
 'Staatskrise in Ägypten 2013',
 'Vereinigte Staaten von Amerika',
 'Europäische Union',
 'Terrorismus',
 'Sinai-Halbinsel',
 'Gotteskrieger',
 'Menschenrechtsorganisation',
 'Husni Mubarak',
 'Putsch',
 'Koptische Kirche',
 'Patriarch',
 'Tawadros II.',
 'Imam',
 'Al-Azhar-Universität',
 'Ahmed Tayeb',
 'Tamarod',
 'Linksliberalismus',
 'Nationale Heilsfront',
 'Mohammed el-Baradei',
 'Salafisten',
 'Partei des Lichts',
 'Hasim al-Beblawi',
 'Staatskrise in Ägypten 2013',
 'Christentum',
 'Bischof',
 'Tawadros II.',
 'Sanktion',
 'Römisch-katholische Kirche',
 'Deutsche Bischofskonferenz',
 'Muslimbrüder',
 'Tiefer Staat',
 'Husni Mubarak',
 'Revolution in Ägypten 2011',
 'Restauration (Geschichte)',
 'Konterrevolution',
 'Parlament',
 'Verfassunggebende Versammlung',
 'Gremium',
 'Husni Mubarak',
 'Nachrichtendienst',
 'Streitkräfte Ägyptens',
 'Demonstration',
 'Protest',
 'Interessengruppe',
 'Unterschriftenaktion',
 'Tamarod',
 'Naguib Sawiris',
 'Infrastruktur',
 'Partei der Freien Ägypter',
 'Verfassungsgerichtsbarkeit',
 'Tahani al-Gebali',
 'Elektrizitätsversorgung',
 'Kraftstoff',
 'Erdgas',
 'Menschenrechte',
 'Folter',
 'Pressefreiheit',
 'Lebensmittelteuerung (Seite nicht vorhanden)',
 'Arbeitslosigkeit',
 'Tankstelle',
 'Stromausfall',
 'Boykott',
 'Kriminalität',
 'Nachrichtendienst',
 'Tawadros II.',
 'Ahmad Mohammad al-Tayyeb',
 'Mohammed el-Baradei',
 'Islam',
 'Al-Azhar-Universität',
 'Linksliberalismus',
 'Mohammed el-Baradei',
 'Salafismus',
 'Partei des Lichts',
 'Elite',
 'Übergangsregierung',
 'Technokratie',
 'Großunternehmen',
 'Gouvernement',
 'Gouverneur',
 'Abd al-Fattah as-Sisi',
 'Mohammed Hussein Tantawi',
 'Gremium',
 'Arbeitslosigkeit',
 'Inflation',
 'Revolution in Ägypten 2011',
 'Verfassung der Republik Ägypten',
 'Gouvernements in Ägypten',
 'Adel al-Chajat',
 'Gamaa Islamija',
 'Al-Azhar-Moschee',
 'Tamarod',
 'Mohammed Mursi',
 'Tahrir-Platz',
 'Al-Wasat-Partei',
 'Streitkräfte Ägyptens',
 'Barack Obama',
 'Mohamed Kamel Amr',
 'Middle East News Agency (Seite nicht vorhanden)',
 'Abd al-Fattah as-Sisi',
 'Hescham Kandil',
 'Abdel Meguid Mahmud',
 'Koalition (Politik)',
 'Universität Kairo',
 'Al-Dschamāʿa al-islāmiyya',
 'Assem Abdel-Maged (Seite nicht vorhanden)',
 'Anne W. Patterson',
 'Mitteleuropäische Sommerzeit',
 'Heliopolis',
 'Greenwich Mean Time',
 'Übergangsregierung',
 'Abd al-Fattah as-Sisi',
 'Vorgezogene Neuwahl',
 'Mahmoud Badr (Seite nicht vorhanden)',
 'Entführung',
 'Wiki Thawra (Seite nicht vorhanden)',
 'Washington Post',
 'Revolution',
 'Stiftung Wissenschaft und Politik',
 'Volker Perthes',
 'Die Zeit',
 'Kurier (Tageszeitung)',
 'Der Spiegel',
 'Putsch',
 'Politikwissenschaft',
 'NDR Info',
 'Putsch',
 'John Kerry',
 'Martin E. Dempsey',
 'Sedki Sobhi (Seite nicht vorhanden)',
 'Gunter Mulack',
 'Türkei',
 'Recep Tayyip Erdoğan',
 'Tunesien',
 'Ennahda',
 'The Daily Beast',
 'Philip J. Crowley',
 'Außenminister der Vereinigten Staaten',
 'Zentrum für Forschung zur Arabischen Welt (Seite nicht vorhanden)',
 'Deutsche Gesellschaft für Auswärtige Politik',
 'Europäische Union',
 'Krieg gegen den Terror',
 'Israelisch-ägyptischer Friedensvertrag',
 'Menschenrechte',
 'Dirk Emmerich (Seite nicht vorhanden)',
 'N-tv',
 'Tiefer Staat',
 'Judikative',
 'Exekutive',
 'Administrative',
 'Revolution in Ägypten 2011',
 'Wirtschaft',
 'Militär',
 'Staat im Staate',
 'Konterrevolution',
 'Elite',
 'Militärparade',
 'Akademischer Grad',
 'Militärakademie',
 'Vierte Gewalt',
 'The Guardian',
 'The Washington Post',
 'Al-Arabiya',
 'Al Jazeera',
 'Deutsche Welle',
 'BBC Arabic (Seite nicht vorhanden)',
 'Analphabetismus',
 'Kinderschänder',
 'Michael Thumann',
 'Bürgerrecht',
 'Arabic Network for Human Rights Information (Seite nicht vorhanden)',
 'Muhammad Badi’e',
 'Rābiʿa-al-ʿAdawiyya-Moschee',
 'Chairat al-Schater',
 'Saad al-Katatni',
 'Freiheits- und Gerechtigkeitspartei',
 'Rashad Bajumi (Seite nicht vorhanden)',
 'Human Rights Watch',
 'Adli Mansur',
 'Marsa Matruh',
 'Kafr asch-Schaich',
 'Alexandria',
 'Al-Minya',
 'Alexandria',
 'Luxor',
 'Damanhur',
 'Konterrevolution',
 'Flughafen al-Arisch',
 'Gouvernement as-Suwais',
 'Gouvernement Dschanub Sina',
 'Hosni Mubarak',
 'Kopten',
 'Kreuzzug',
 'Scharia',
 'Mohammed el-Baradei',
 'Partei des Lichts',
 'Ägyptische Sozialdemokratische Partei',
 'Siad Bahaa El-Din (Seite nicht vorhanden)',
 'Koptische Kirche',
 'Al-Arisch',
 'Verfassung der Republik Ägypten',
 'Voice of America',
 'Afrikanische Union',
 'Republikanische Partei',
 'John McCain',
 'Partei des Lichts',
 'Hasim al-Beblawi',
 'Weltbank',
 'Recep Tayyip Erdoğan',
 'Abdullah Gül',
 'Blutbad in Kairo und Gizeh vom 14. August 2013',
 'Cairo Institute for Human Rights Studies (Seite nicht vorhanden)',
 'Kosovo',
 'Libyen',
 'Syrien',
 'Libanon',
 'Ukraine',
 'Europäische Union',
 'Militärputsch in Ägypten 1952',
 'Gamal Abdel Nasser',
 'Tawadros II.',
 'Ahmad Mohammad al-Tayyeb',
 'Mohammed el-Baradei',
 'Verfassung der Republik Ägypten',
 'Oberstes Verfassungsgericht Ägyptens',
 'Adli Mansur',
 'Übergangsregierung',
 'Technokratie',
 'British Broadcasting Corporation',
 'Nationale Heilsfront',
 'Freiheits- und Gerechtigkeitspartei',
 'Joachim Schroedel',
 'Deutsche Bischofskonferenz',
 'The European',
 'Afrikanische Union',
 'Afrikanische Union',
 'Afrikanische Union',
 'Addis Abeba',
 'Nkosazana Dlamini-Zuma',
 'Revolution in Ägypten 2011',
 'Hosni Mubarak',
 'Mohammed Edrees (Seite nicht vorhanden)',
 'Dekolonisation Afrikas',
 'Deutschland',
 'Deutschland',
 'Guido Westerwelle',
 'Dänemark',
 'Dänemark',
 'Iran',
 'Iran',
 'Jordanien',
 'Jordanien',
 'Katar',
 'Katar',
 'Kuwait',
 'Kuwait',
 'Russland',
 'Russland',
 'Alexej Puchow (Seite nicht vorhanden)',
 'Saudi-Arabien',
 'Saudi-Arabien',
 'Abdullah ibn Abd al-Aziz',
 'Somalia',
 'Somalia',
 'Al-Shabaab (Somalia)',
 'Twitter',
 'Rosarote Brille',
 'Syrien',
 'Syrien',
 'Baschar al-Assad',
 'Türkei',
 'Türkei',
 'Ahmet Davutoğlu',
 'Recep Tayyip Erdoğan',
 'Israel',
 'Tunesien',
 'Tunesien',
 'Moncef Marzouki',
 'Kongress für die Republik (Tunesien)',
 'Vereinigte Arabische Emirate',
 'Vereinigte Arabische Emirate',
 'Chalifa bin Zayid Al Nahyan',
 'Vereinigtes Königreich',
 'Vereinigtes Königreich',
 'William Hague',
 'Vereinigte Staaten',
 'Vereinigte Staaten',
 'Vereinigte Staaten',
 'Tansania',
 'Barack Obama',
 'Tunesien',
 'Algerien',
 'L’Orient-Le Jour',
 'Libanon',
 'The Daily Star (Libanon)',
 'Baschar al-Assad',
 'Syrien',
 'Iran',
 'Bahrain',
 'Gulf News',
 'Vereinigte Arabische Emirate',
 'Israel',
 'Israel HaYom',
 'Jedi’ot Acharonot',
 'Haaretz',
 'Frankreich',
 'Paris',
 'Le Figaro',
 'Ouest-France',
 'The New York Times',
 'Handelsblatt',
 'Die Welt',
 'Süddeutsche Zeitung',
 'Frankfurter Allgemeine Zeitung',
 'Jen Psaki',
 'George Orwell',
 'Neusprech',
 'Federal Reserve Bank of New York',
 'Rüstungsindustrie',
 'General Dynamics F-16',
 'Mehrzweckkampfflugzeug',
 'Hughes AH-64',
 'Kampfhubschrauber',
 'M1 Abrams',
 'Kampfpanzer',
 'Fregatte',
 'Krieg in Afghanistan',
 'Krieg gegen den Terror',
 'Naher Osten',
 'Ostafrika',
 'Freiheiten der Luft',
 'Luftraum',
 'Sueskanal',
 'Ölvorkommen',
 'Naher Osten',
 'Brookings Institution',
 'Demokratiemessung',
 'Demokratiemessung',
 'Husni Mubarak',
 'Anwar as-Sadat',
 'Blutbad in Kairo und Gizeh 2013',
 'Staatskrise in Ägypten 2013/2014 (Kabinett Beblawi)',
 'Chile',
 'Argentinien',
 'Algerien',
 'Martin Gehlen',
 'James Franklin Jeffrey',
 'American Council on Germany',
 'Council on Foreign Relations',
 'George W. Bush']

Run an example article, shows the first all outlinks for all articles in each language .

#pull all out links for each specific language page 
_langlink_AllList_dict=dict()

for lang,title in link_getter(page_title).items():
    LangLinksAll=get_page_outlinks(page_title=title,lang=lang)
   
    _langlink_AllList_dict[lang]=LangLinksAll
    
    
_langlink_AllList_dict
{'af': ['Egipte',
  'Arabiese Lente',
  'Abdul Fatah al-Sisi (bladsy bestaan nie)',
  'Hamdin Sabahi (bladsy bestaan nie)'],
 'ar': ['3 يوليو',
  '2013',
  'الجيش المصري',
  'عبد الفتاح السيسي',
  'الانتخابات الرئاسية المصرية 2012',
  'محمد مرسي',
  'دستور مصر 2012',
  'المحكمة الدستورية العليا المصرية',
  'عدلي منصور',
  'محمد مرسي',
  'الإخوان المسلمون في مصر',
  'مظاهرات 30 يونيو 2013 في مصر',
  'محمد مرسي',
  'المجلس الأعلى للقوات المسلحة',
  'محمد حسني مبارك',
  'ثورة 25 يناير',
  'حركة تمرد',
  'محمد البرادعي',
  'جبهة الإنقاذ الوطني (مصر)',
  'أحمد الطيب',
  'تواضروس الثاني',
  'محمد مرسي',
  'مدينة نصر',
  'القاهرة',
  '1 يوليو',
  'الإخوان المسلمين',
  'المقطم (حي)',
  '1 يوليو',
  'القوات المسلحة المصرية',
  'عبد الفتاح السيسي',
  'حزب النور',
  'سامي عنان',
  'وكالة أنباء الشرق الأوسط',
  'محمد كامل عمرو',
  'وزارة الداخلية المصرية',
  '2 يوليو',
  'طلعت عبد الله',
  'محمد مرسي',
  'عبد المجيد محمود',
  'كفر الشيخ',
  '2 يوليو',
  'الإثنين',
  'يوتيوب',
  'عبد الفتاح السيسي',
  'المحكمة الدستورية العليا',
  'أحمد الطيب',
  'تواضروس الثاني',
  'محمد البرادعي',
  'قناة الناس',
  'قناة الرحمة الفضائية',
  'قناة الحافظ',
  'مصر 25',
  'محمد جمال هلال (الصفحة غير موجودة)',
  'الجزيرة مباشر مصر',
  'قناة الجزيرة',
  'الجزيرة الإنجليزية',
  'الجزيرة مباشر مصر',
  'قناة الجزيرة',
  'القاهرة',
  'الجزيرة مباشر مصر',
  'ميدان التحرير',
  'قناة الجزيرة',
  'الجزيرة مباشر مصر',
  'نايل سات',
  'الإخوان المسلمين',
  'محمد مرسي',
  'سعد الكتاتني',
  'خيرت الشاطر',
  'رشاد البيومي',
  'مهدي عاكف',
  'حلمي الجزار',
  'حازم صلاح أبو إسماعيل',
  'محمد بديع',
  'مصر',
  'محافظة الشرقية',
  'محمد مرسي',
  'الخميس',
  '4 يوليو',
  'شبه جزيرة سيناء',
  'معبر رفح',
  'قطاع غزة',
  'قوات الأمن المركزي',
  'مطار العريش (الصفحة غير موجودة)',
  '4 يوليو',
  'عدلي منصور',
  'المحكمة الدستورية العليا',
  'علي عوض محمد صالح (الصفحة غير موجودة)',
  'مصطفى حجازي',
  'عبد المجيد محمود',
  'النيابة العامة (مصر)',
  'طلعت عبد الله',
  'مجلس القضاء الأعلى',
  '8 يوليو',
  'محمد مرسي',
  'الجيش المصري',
  'صلاة الفجر',
  'انقلاب 3 يوليو 2013 في مصر',
  'مظاهرات 30 يونيو 2013 في مصر',
  'ما بعد انقلاب 2013 في مصر',
  'أحداث دار الحرس الجمهوري 2013',
  'محمد مرسي',
  'فيسبوك',
  'وزارة الداخلية المصرية',
  'حزب الحرية والعدالة',
  'حزب الوسط المصري',
  'التحالف الوطني لدعم الشرعية',
  'اعتصام رابعة العدوية',
  'شارة رابعة',
  'الأمم المتحدة',
  'بان كي مون',
  'الاتحاد الأوروبي',
  'كاترين أشتون',
  'الاتحاد الأفريقي',
  'الولايات المتحدة',
  'باراك أوباما',
  'المملكة المتحدة',
  'ويليام هيغ',
  'ألمانيا',
  'الصين',
  'فرنسا',
  'تركيا',
  'أحمد داود أوغلو',
  'السعودية',
  'عبد الله بن عبد العزيز آل سعود',
  'الإمارات العربية المتحدة',
  'السودان',
  'سوريا',
  'بشار الأسد',
  'تونس',
  'المنصف المرزوقي',
  'الأردن',
  'اليمن',
  'عبد ربه منصور هادي',
  'بوابة:مصر',
  'بوابة:مصر',
  'بوابة:ثورات الربيع العربي',
  'بوابة:ثورات الربيع العربي',
  'بوابة:القوات المسلحة المصرية',
  'بوابة:القوات المسلحة المصرية',
  'بوابة:الحرب',
  'بوابة:الحرب',
  'بوابة:السياسة',
  'بوابة:السياسة',
  'بوابة:القانون',
  'بوابة:القانون',
  'بوابة:عقد 2010',
  'بوابة:عقد 2010'],
 'arz': ['الجيش المصرى',
  'محمد مرسى',
  'محمد البرادعى',
  'الإمام الأكبر',
  'احمد الطيب',
  'تواضروس التانى بابا اسكندريه',
  'حركة تمرد',
  'حزب النور',
  'مدينة نصر',
  'محمد حسنى مبارك',
  'دستور مصر 2012',
  'وول ستريت جورنال (الصفحه مالهاش وجود)',
  'محمد البرادعى',
  'عمرو موسى',
  'حمدين صباحى',
  'محمد حسين طنطاوى',
  'دستور مصر',
  'المعادى',
  'اسكندريه',
  'محافظة الدقهليه',
  'محافظة الغربيه',
  'أسوان',
  'حركة تمرد',
  'عبد الفتاح السيسى',
  'عدلى منصور',
  '3 يوليه',
  '2013',
  'الجيش المصرى',
  'محمد مرسى',
  'الاتحاد الافريقى',
  'انقلاب',
  'الإخوان المسلمين (الصفحه مالهاش وجود)'],
 'az': [],
 'bg': ['3 юли',
  '2013',
  'Абдел Фатах ел-Сиси (страницата не съществува)',
  'Президент на Египет',
  'Мохамед Морси',
  'Власт',
  'Египетска конституция (страницата не съществува)',
  'Ултиматум',
  'Протест',
  'Мюсюлманско братство',
  'Адли Мансур (страницата не съществува)',
  'Демонстрация',
  'Велик шейх на ал-Ажар (страницата не съществува)',
  'Ахмед ел-Тайеб (страницата не съществува)',
  'Копти',
  'Мохамед ЕлБарадей (страницата не съществува)',
  'Араби',
  'Катар',
  'Тунис',
  'САЩ',
  'Африкански съюз',
  'Регулация',
  'Запад',
  '14 август',
  '2013'],
 'ca': ['Mohamed Mursi',
  'Germans Musulmans',
  'Egipte',
  'Mariscal de camp',
  'Govern',
  'Abdelfatah Al-Sisi',
  'Germans Musulmans',
  'Mohamed al-Baradei',
  "Teodor II d'Alexandria",
  'Xeic',
  'Mesquita',
  'Exèrcit',
  'Plaça Tahrir',
  'El Caire',
  'Mohamed Mursi',
  'Constitució',
  'Adly Mansur',
  'Egipte',
  'Primer ministre',
  'Ultimàtum',
  "Cop d'Estat",
  'Exèrcit',
  'Exèrcit',
  'Policia',
  'Germans Musulmans',
  'Drets humans',
  'Autocràcia'],
 'ckb': ['ئیخوان موسلمین',
  'محەممەد مورسی',
  'عەدلی مەنسوور',
  'عەبدولفەتاح سیسی',
  'محەممەد مورسی',
  'ژن',
  'دەستدرێژیی سێکسی',
  'گۆڕەپانی تەحریر',
  'ئاڵمانیا',
  'ئیماڕات (پەڕە بوونی نییە)',
  'سوریا',
  'سعودیا (پەڕە بوونی نییە)'],
 'de': ['Putsch',
  'Streitkräfte Ägyptens',
  'Oberster Rat der Streitkräfte',
  'Abd al-Fattah as-Sisi',
  'Ägypten',
  'Mohammed Mursi',
  'Ultimatum',
  'Islamismus',
  'Muslimbrüder',
  'Staatskrise in Ägypten 2013',
  'Vereinigte Staaten von Amerika',
  'Europäische Union',
  'Terrorismus',
  'Sinai-Halbinsel',
  'Gotteskrieger',
  'Menschenrechtsorganisation',
  'Husni Mubarak',
  'Putsch',
  'Koptische Kirche',
  'Patriarch',
  'Tawadros II.',
  'Imam',
  'Al-Azhar-Universität',
  'Ahmed Tayeb',
  'Tamarod',
  'Linksliberalismus',
  'Nationale Heilsfront',
  'Mohammed el-Baradei',
  'Salafisten',
  'Partei des Lichts',
  'Hasim al-Beblawi',
  'Staatskrise in Ägypten 2013',
  'Christentum',
  'Bischof',
  'Tawadros II.',
  'Sanktion',
  'Römisch-katholische Kirche',
  'Deutsche Bischofskonferenz',
  'Muslimbrüder',
  'Tiefer Staat',
  'Husni Mubarak',
  'Revolution in Ägypten 2011',
  'Restauration (Geschichte)',
  'Konterrevolution',
  'Parlament',
  'Verfassunggebende Versammlung',
  'Gremium',
  'Husni Mubarak',
  'Nachrichtendienst',
  'Streitkräfte Ägyptens',
  'Demonstration',
  'Protest',
  'Interessengruppe',
  'Unterschriftenaktion',
  'Tamarod',
  'Naguib Sawiris',
  'Infrastruktur',
  'Partei der Freien Ägypter',
  'Verfassungsgerichtsbarkeit',
  'Tahani al-Gebali',
  'Elektrizitätsversorgung',
  'Kraftstoff',
  'Erdgas',
  'Menschenrechte',
  'Folter',
  'Pressefreiheit',
  'Lebensmittelteuerung (Seite nicht vorhanden)',
  'Arbeitslosigkeit',
  'Tankstelle',
  'Stromausfall',
  'Boykott',
  'Kriminalität',
  'Nachrichtendienst',
  'Tawadros II.',
  'Ahmad Mohammad al-Tayyeb',
  'Mohammed el-Baradei',
  'Islam',
  'Al-Azhar-Universität',
  'Linksliberalismus',
  'Mohammed el-Baradei',
  'Salafismus',
  'Partei des Lichts',
  'Elite',
  'Übergangsregierung',
  'Technokratie',
  'Großunternehmen',
  'Gouvernement',
  'Gouverneur',
  'Abd al-Fattah as-Sisi',
  'Mohammed Hussein Tantawi',
  'Gremium',
  'Arbeitslosigkeit',
  'Inflation',
  'Revolution in Ägypten 2011',
  'Verfassung der Republik Ägypten',
  'Gouvernements in Ägypten',
  'Adel al-Chajat',
  'Gamaa Islamija',
  'Al-Azhar-Moschee',
  'Tamarod',
  'Mohammed Mursi',
  'Tahrir-Platz',
  'Al-Wasat-Partei',
  'Streitkräfte Ägyptens',
  'Barack Obama',
  'Mohamed Kamel Amr',
  'Middle East News Agency (Seite nicht vorhanden)',
  'Abd al-Fattah as-Sisi',
  'Hescham Kandil',
  'Abdel Meguid Mahmud',
  'Koalition (Politik)',
  'Universität Kairo',
  'Al-Dschamāʿa al-islāmiyya',
  'Assem Abdel-Maged (Seite nicht vorhanden)',
  'Anne W. Patterson',
  'Mitteleuropäische Sommerzeit',
  'Heliopolis',
  'Greenwich Mean Time',
  'Übergangsregierung',
  'Abd al-Fattah as-Sisi',
  'Vorgezogene Neuwahl',
  'Mahmoud Badr (Seite nicht vorhanden)',
  'Entführung',
  'Wiki Thawra (Seite nicht vorhanden)',
  'Washington Post',
  'Revolution',
  'Stiftung Wissenschaft und Politik',
  'Volker Perthes',
  'Die Zeit',
  'Kurier (Tageszeitung)',
  'Der Spiegel',
  'Putsch',
  'Politikwissenschaft',
  'NDR Info',
  'Putsch',
  'John Kerry',
  'Martin E. Dempsey',
  'Sedki Sobhi (Seite nicht vorhanden)',
  'Gunter Mulack',
  'Türkei',
  'Recep Tayyip Erdoğan',
  'Tunesien',
  'Ennahda',
  'The Daily Beast',
  'Philip J. Crowley',
  'Außenminister der Vereinigten Staaten',
  'Zentrum für Forschung zur Arabischen Welt (Seite nicht vorhanden)',
  'Deutsche Gesellschaft für Auswärtige Politik',
  'Europäische Union',
  'Krieg gegen den Terror',
  'Israelisch-ägyptischer Friedensvertrag',
  'Menschenrechte',
  'Dirk Emmerich (Seite nicht vorhanden)',
  'N-tv',
  'Tiefer Staat',
  'Judikative',
  'Exekutive',
  'Administrative',
  'Revolution in Ägypten 2011',
  'Wirtschaft',
  'Militär',
  'Staat im Staate',
  'Konterrevolution',
  'Elite',
  'Militärparade',
  'Akademischer Grad',
  'Militärakademie',
  'Vierte Gewalt',
  'The Guardian',
  'The Washington Post',
  'Al-Arabiya',
  'Al Jazeera',
  'Deutsche Welle',
  'BBC Arabic (Seite nicht vorhanden)',
  'Analphabetismus',
  'Kinderschänder',
  'Michael Thumann',
  'Bürgerrecht',
  'Arabic Network for Human Rights Information (Seite nicht vorhanden)',
  'Muhammad Badi’e',
  'Rābiʿa-al-ʿAdawiyya-Moschee',
  'Chairat al-Schater',
  'Saad al-Katatni',
  'Freiheits- und Gerechtigkeitspartei',
  'Rashad Bajumi (Seite nicht vorhanden)',
  'Human Rights Watch',
  'Adli Mansur',
  'Marsa Matruh',
  'Kafr asch-Schaich',
  'Alexandria',
  'Al-Minya',
  'Alexandria',
  'Luxor',
  'Damanhur',
  'Konterrevolution',
  'Flughafen al-Arisch',
  'Gouvernement as-Suwais',
  'Gouvernement Dschanub Sina',
  'Hosni Mubarak',
  'Kopten',
  'Kreuzzug',
  'Scharia',
  'Mohammed el-Baradei',
  'Partei des Lichts',
  'Ägyptische Sozialdemokratische Partei',
  'Siad Bahaa El-Din (Seite nicht vorhanden)',
  'Koptische Kirche',
  'Al-Arisch',
  'Verfassung der Republik Ägypten',
  'Voice of America',
  'Afrikanische Union',
  'Republikanische Partei',
  'John McCain',
  'Partei des Lichts',
  'Hasim al-Beblawi',
  'Weltbank',
  'Recep Tayyip Erdoğan',
  'Abdullah Gül',
  'Blutbad in Kairo und Gizeh vom 14. August 2013',
  'Cairo Institute for Human Rights Studies (Seite nicht vorhanden)',
  'Kosovo',
  'Libyen',
  'Syrien',
  'Libanon',
  'Ukraine',
  'Europäische Union',
  'Militärputsch in Ägypten 1952',
  'Gamal Abdel Nasser',
  'Tawadros II.',
  'Ahmad Mohammad al-Tayyeb',
  'Mohammed el-Baradei',
  'Verfassung der Republik Ägypten',
  'Oberstes Verfassungsgericht Ägyptens',
  'Adli Mansur',
  'Übergangsregierung',
  'Technokratie',
  'British Broadcasting Corporation',
  'Nationale Heilsfront',
  'Freiheits- und Gerechtigkeitspartei',
  'Joachim Schroedel',
  'Deutsche Bischofskonferenz',
  'The European',
  'Afrikanische Union',
  'Afrikanische Union',
  'Afrikanische Union',
  'Addis Abeba',
  'Nkosazana Dlamini-Zuma',
  'Revolution in Ägypten 2011',
  'Hosni Mubarak',
  'Mohammed Edrees (Seite nicht vorhanden)',
  'Dekolonisation Afrikas',
  'Deutschland',
  'Deutschland',
  'Guido Westerwelle',
  'Dänemark',
  'Dänemark',
  'Iran',
  'Iran',
  'Jordanien',
  'Jordanien',
  'Katar',
  'Katar',
  'Kuwait',
  'Kuwait',
  'Russland',
  'Russland',
  'Alexej Puchow (Seite nicht vorhanden)',
  'Saudi-Arabien',
  'Saudi-Arabien',
  'Abdullah ibn Abd al-Aziz',
  'Somalia',
  'Somalia',
  'Al-Shabaab (Somalia)',
  'Twitter',
  'Rosarote Brille',
  'Syrien',
  'Syrien',
  'Baschar al-Assad',
  'Türkei',
  'Türkei',
  'Ahmet Davutoğlu',
  'Recep Tayyip Erdoğan',
  'Israel',
  'Tunesien',
  'Tunesien',
  'Moncef Marzouki',
  'Kongress für die Republik (Tunesien)',
  'Vereinigte Arabische Emirate',
  'Vereinigte Arabische Emirate',
  'Chalifa bin Zayid Al Nahyan',
  'Vereinigtes Königreich',
  'Vereinigtes Königreich',
  'William Hague',
  'Vereinigte Staaten',
  'Vereinigte Staaten',
  'Vereinigte Staaten',
  'Tansania',
  'Barack Obama',
  'Tunesien',
  'Algerien',
  'L’Orient-Le Jour',
  'Libanon',
  'The Daily Star (Libanon)',
  'Baschar al-Assad',
  'Syrien',
  'Iran',
  'Bahrain',
  'Gulf News',
  'Vereinigte Arabische Emirate',
  'Israel',
  'Israel HaYom',
  'Jedi’ot Acharonot',
  'Haaretz',
  'Frankreich',
  'Paris',
  'Le Figaro',
  'Ouest-France',
  'The New York Times',
  'Handelsblatt',
  'Die Welt',
  'Süddeutsche Zeitung',
  'Frankfurter Allgemeine Zeitung',
  'Jen Psaki',
  'George Orwell',
  'Neusprech',
  'Federal Reserve Bank of New York',
  'Rüstungsindustrie',
  'General Dynamics F-16',
  'Mehrzweckkampfflugzeug',
  'Hughes AH-64',
  'Kampfhubschrauber',
  'M1 Abrams',
  'Kampfpanzer',
  'Fregatte',
  'Krieg in Afghanistan',
  'Krieg gegen den Terror',
  'Naher Osten',
  'Ostafrika',
  'Freiheiten der Luft',
  'Luftraum',
  'Sueskanal',
  'Ölvorkommen',
  'Naher Osten',
  'Brookings Institution',
  'Demokratiemessung',
  'Demokratiemessung',
  'Husni Mubarak',
  'Anwar as-Sadat',
  'Blutbad in Kairo und Gizeh 2013',
  'Staatskrise in Ägypten 2013/2014 (Kabinett Beblawi)',
  'Chile',
  'Argentinien',
  'Algerien',
  'Martin Gehlen',
  'James Franklin Jeffrey',
  'American Council on Germany',
  'Council on Foreign Relations',
  'George W. Bush'],
 'el': ['Αμπντούλ Φατάχ αλ Σίσι',
  'Μοχάμεντ Μόρσι',
  'Τελεσίγραφο',
  'Μοχάμεντ Μόρσι',
  'Ρεπουμπλικανική Φρουρά (Αίγυπτος) (δεν έχει γραφτεί ακόμα)',
  'Πρόεδρος της Αιγύπτου',
  'Ανώτατο Συμβούλιο Ενόπλων Δυνάμεων (δεν έχει γραφτεί ακόμα)',
  'Συνταγματικό Δικαστήριο της Αιγύπτου (δεν έχει γραφτεί ακόμα)',
  'Αντλί Μανσούρ',
  'Αφρικανική Ένωση'],
 'en': ['Abdel Fattah el-Sisi',
  'Mohamed Morsi',
  'Egyptian Constitution of 2012',
  'June 2013 Egyptian protests',
  'Muslim Brotherhood',
  'Supreme Constitutional Court of Egypt',
  'Adly Mansour',
  'Grand Imam of al-Azhar',
  'Ahmed el-Tayeb',
  'Pope of the Coptic Orthodox Church of Alexandria',
  'Pope Tawadros II of Alexandria',
  'Mohamed ElBaradei',
  'Tunisia',
  'African Union',
  'Revolution',
  'August 2013 Rabaa massacre',
  'Post-coup unrest in Egypt (2013–14)',
  'Hosni Mubarak',
  'Egyptian Revolution of 2011',
  'History of Egypt under Hosni Mubarak',
  'Egyptian presidential election, 2012',
  'Muslim Brotherhood in post-Mubarak electoral politics of Egypt',
  'Mohamed ElBaradei',
  'Amr Moussa',
  'Hamdeen Sabahi',
  'The Wall Street Journal',
  'Tamarod',
  'National Salvation Front (Egypt)',
  'April 6 Youth Movement',
  'Strong Egypt Party',
  'The Gallup Organization',
  'Foreign involvement in the Syrian civil war',
  'International Crisis Group',
  'Egyptian constitution',
  'Anti-Coup Alliance',
  'El-Hossari Mosque (page does not exist)',
  'El-Nahda Square (page does not exist)',
  'Cairo University',
  'Ain Shams',
  "Coup d'état",
  'Tamarod',
  'Politics of the United Arab Emirates',
  'Cairo',
  'Alexandria',
  'Dakahlia Governorate',
  'Gharbiya',
  'Aswan',
  'Rabia Al-Adawiya Mosque',
  'Egyptian Presidential Palace',
  'El-Quba Palace (page does not exist)',
  'Damietta',
  'Tahrir Square',
  'Heliopolis Palace',
  'Port Said',
  'Suez',
  'Mokatam (page does not exist)',
  'Egyptian Armed Forces',
  'Ministry of Tourism (Egypt)',
  'Hisham Zazou',
  "Al-Gama'a al-Islamiyya",
  'Luxor massacre',
  'Luxor',
  'Ministry of Communications and Information Technology (Egypt)',
  'Atef Helmi',
  'Hatem Bagato (page does not exist)',
  'Khaled Abdel Aal (page does not exist)',
  'Freedom and Justice Party (Egypt)',
  'Barack Obama',
  'United States',
  'Minister of Foreign Affairs (Egypt)',
  'Mohamed Kamel Amr',
  'Egyptian Army',
  'List of Ministers of Defence of Egypt',
  'Abdel Fattah el-Sisi',
  'Court of Cassation (Egypt) (page does not exist)',
  'Abdel Meguid Mahmoud',
  'Talaat Abdallah (page does not exist)',
  'Al-Ahram',
  'Constitution of Egypt',
  'Sami Hafez Anan',
  'Egyptian Armed Forces',
  'Egyptian Armed Forces',
  'Mohamed El-Baradei',
  'National Salvation Front (Egypt)',
  'Abdel Fattah el-Sisi',
  'Waleed al-Haddad (page does not exist)',
  'Mohammed Zaki (page does not exist)',
  'Yahya Hamed (page does not exist)',
  "Talk:2013 Egyptian coup d'état",
  'Abdel Fattah el-Sisi',
  'Adli Mansour',
  'Technocracy',
  'Republican Guard (Egypt)',
  'Adli Mansour',
  'Shura Council',
  'Allahu akbar',
  'Pope of the Coptic Orthodox Church of Alexandria',
  'Tawadros II',
  'Grand Imam of al-Azhar',
  'Ahmed el-Tayeb',
  'Mohamed ElBaradei',
  'Tamarod',
  'Mahmoud Badr',
  'Al-Nour party',
  'Galal Murra (page does not exist)',
  'National Salvation Front (Egypt)',
  'Egyptian Armed Forces',
  'Republican Guard (Egypt)',
  'Egyptian Armed Forces',
  'Colonel',
  'Ahmed Mohammed Ali',
  'Egyptian Armed Forces',
  'Catherine Ashton',
  'European Union',
  'African Union',
  'Freedom and Justice Party (Egypt)',
  'Saad El-Katatni',
  'Rashad al-Bayoumi',
  'Muslim Brotherhood',
  'Al-Ahram',
  'Mohammed Badie',
  'Khairat El-Shater',
  'Mahdi Akef',
  'Mohamed Beltagy',
  'Safwat Hegazi',
  'Al-Wasat Party',
  'Abou Elela Mady',
  'Essam Sultan (page does not exist)',
  'Al Jazeera English',
  'Misr 25',
  'Al Hafez (page does not exist)',
  'Al Nas (page does not exist)',
  'Al Jazeera',
  'Mubasher Misr (page does not exist)',
  'Associated Press Television News',
  'Cairo News Company (page does not exist)',
  'Committee to Protect Journalists',
  'BBC News',
  'Jeremy Bowen',
  'Al-Ahram',
  'Friday prayers',
  '2013 Republican Guard headquarters clashes',
  'BBC News',
  'Jeremy Bowen',
  'Qena',
  '6th October Bridge',
  'Gaza Strip',
  'Rafah border crossing',
  'Prime Minister of the Gaza Strip',
  'Ismail Haniyeh',
  '2013 Republican Guard headquarters clashes',
  'Mohamed Beltagy',
  'Al-Dustour (Egypt)',
  'Foreign rebel fighters in the Syrian civil war',
  'University of California at Berkeley',
  'Qalyoub (page does not exist)',
  'Rabaa al-Adawiya mosque',
  "Talk:2013 Egyptian coup d'état",
  'Coptic Christian',
  'Christians Against the Coup',
  'Anti-Coup Alliance',
  'Al-Arish',
  'Treason',
  "2005 Mauritanian coup d'état",
  "2012 Malian coup d'état",
  '2009 Malagasy political crisis',
  "1999 Pakistani coup d'état",
  'Egyptian American',
  'Michigan',
  'Amnesty International',
  'Muslim Brotherhood',
  'Freedom and Justice Party (Egypt)',
  'Egyptian Army',
  'United Arab Emirates',
  'Tamarod',
  'African Union',
  'Nkosazana Dlamini-Zuma',
  'European Union',
  'High Representative of the Union for Foreign Affairs and Security Policy',
  'Catherine Ashton',
  'United Nations',
  'Ban Ki-moon',
  'Nabil Fahmy',
  'Navi Pillay',
  'Argentina',
  'Australia',
  'Kevin Rudd',
  'Bahrain',
  'Hamad bin Isa Al-Khalifa',
  'Canada',
  'John Baird (Canadian politician)',
  'China',
  'Colombia',
  'France',
  'Francois Hollande',
  'Tunisian revolution',
  'Aftermath of the Libyan civil war',
  'Syrian civil war',
  'Laurent Fabius',
  'Germany',
  'Guido Westerwelle',
  'Iran',
  'Ali Akbar Salehi',
  'Iraq',
  'Nouri al-Maliki',
  'Israel',
  'Benjamin Netanyahu',
  'Haaretz',
  'Yisrael Katz (politician born 1955)',
  'Israeli Army Radio',
  'Eli Shaked (page does not exist)',
  'Eli Shaked (page does not exist)',
  'Jordan',
  'Kuwait',
  'Kuwait News Agency',
  'Sabah Al-Ahmad Al-Jaber Al-Sabah',
  'Lebanon',
  'Tammam Salam',
  'Libya',
  'Rome',
  'Ali Zidan',
  'Malaysia',
  'Najib Razak',
  'Ministry of Youth and Sports (Malaysia)',
  'Khairy Jamaluddin',
  'Pan-Malaysian Islamic Party',
  'Nik Abdul Aziz Nik Mat',
  'Anwar Ibrahim',
  'Pan-Malaysian Islamic Party',
  'Nik Abdul Aziz Nik Mat',
  'Anwar Ibrahim',
  'Norway',
  'Espen Barth Eide',
  'Netherlands',
  'Pakistan',
  'Nawaz Sharif',
  'State of Palestine',
  'President of the State of Palestine',
  'Mahmoud Abbas',
  'Hanan Ashrawi',
  'Gaza Strip',
  'Hamas',
  'Governance of the Gaza Strip',
  'Yahia Moussa (page does not exist)',
  'Hamas',
  'Ahmad Yousef (page does not exist)',
  'Sic',
  'Gaza Strip',
  'Hamas',
  'Governance of the Gaza Strip',
  'Yahia Moussa (page does not exist)',
  'Hamas',
  'Ahmad Yousef (page does not exist)',
  'Sic',
  'Eli Shaked (page does not exist)',
  'Pan-Malaysian Islamic Party',
  'Nik Abdul Aziz Nik Mat',
  'Anwar Ibrahim',
  'Gaza Strip',
  'Hamas',
  'Governance of the Gaza Strip',
  'Yahia Moussa (page does not exist)',
  'Hamas',
  'Ahmad Yousef (page does not exist)',
  'Sic',
  'Philippines',
  'Benigno Aquino III',
  'Edwin Lacierda',
  'Department of Foreign Affairs (Philippines)',
  'Poland',
  'Qatar',
  'Al Jazeera',
  'Tamim bin Hamad Al Thani',
  'Khaled al-Attiya (page does not exist)',
  'Russia',
  'Saudi Arabia',
  'Abdullah of Saudi Arabia',
  'Somalia',
  'Al-Shabaab (militant group)',
  'Twitter',
  'Al-Shabaab (militant group)',
  'Twitter',
  'Sudan',
  'Ali Karti (page does not exist)',
  'Mohamed Kamel Amr',
  'Egypt-Sudan relations',
  'Hassan al-Turabi',
  'Hassan al-Turabi',
  'Sweden',
  'Carl Bildt',
  'Switzerland',
  'Syria',
  'Bashar al-Assad',
  'Tunisia',
  'Arab Spring',
  'Ennahda Movement',
  'Rachid Ghannouchi',
  'Turkey',
  'Recep Tayyip Erdogan',
  'Ahmet Davutoglu',
  'Hüseyin Çelik',
  'Justice and Development Party (Turkey)',
  'Cabinet Erdoğan II',
  "Republican People's Party (Turkey)",
  'Kemal Kılıçdaroğlu',
  "Republican People's Party (Turkey)",
  'Kemal Kılıçdaroğlu',
  'United Arab Emirates',
  'Abdullah bin Zayed Al Nahyan',
  'United Kingdom',
  'William Hague',
  'United States',
  'William Joseph Burns',
  'John McCain',
  'Senate Foreign Relations Committee',
  'Ed Royce',
  'House Foreign Affairs Committee',
  'Eliot Engel',
  'Dan Shapiro',
  'Tel Aviv',
  'Frank G. Wisner',
  'United States Secretary of State',
  'John Kerry',
  'William Joseph Burns',
  'John McCain',
  'Senate Foreign Relations Committee',
  'Ed Royce',
  'House Foreign Affairs Committee',
  'Eliot Engel',
  'Dan Shapiro',
  'Tel Aviv',
  'Frank G. Wisner',
  'United States Secretary of State',
  'John Kerry',
  'Yemen',
  'Abd Rabbuh Mansur Hadi',
  'Hamid al-Ahmar',
  'Al-Islah (Yemen)',
  'Muslim Brotherhood',
  'Hamid al-Ahmar',
  'Al-Islah (Yemen)',
  'Muslim Brotherhood',
  'Al-Shabaab (militant group)',
  'Twitter',
  'Hassan al-Turabi',
  "Republican People's Party (Turkey)",
  'Kemal Kılıçdaroğlu',
  'William Joseph Burns',
  'John McCain',
  'Senate Foreign Relations Committee',
  'Ed Royce',
  'House Foreign Affairs Committee',
  'Eliot Engel',
  'Dan Shapiro',
  'Tel Aviv',
  'Frank G. Wisner',
  'United States Secretary of State',
  'John Kerry',
  'Hamid al-Ahmar',
  'Al-Islah (Yemen)',
  'Muslim Brotherhood',
  'Al-Qaeda',
  'Ayman al-Zawahiri',
  'Sharia',
  'Post-coup unrest in Egypt (2013–14)',
  'August 2013 Rabaa massacre',
  'Egyptian Revolution of 2011',
  'Egyptian Revolution of 1952',
  'Egyptian Revolution of 1919',
  'Digital object identifier',
  'Digital object identifier'],
 'es': ['3 de julio',
  '2013',
  'Consejo Supremo de las Fuerzas Armadas',
  'Abdul Fatah al-Sisi',
  'Fuerzas Armadas de Egipto',
  'Presidente de Egipto',
  'Mohamed Morsi',
  'Jefe de Estado',
  'Egipto',
  '29 de junio',
  'Plaza de la Liberación',
  'El Cairo',
  'Egipto',
  'Tamarod',
  'Referéndum revocatorio',
  'Adli Mansur',
  'Partido Libertad y Justicia',
  'Revolución egipcia de 2011',
  'Hosni Mubarak',
  'Elecciones presidenciales de Egipto de 2012',
  'Partido Libertad y Justicia',
  'Hermanos Musulmanes',
  'Islamista',
  'Ahmed Shafik',
  'Islam',
  'Protestas en Egipto en noviembre y diciembre de 2012',
  '23 de noviembre',
  '2013',
  'Tamarod',
  'Lengua árabe',
  'Recogida de firmas (aún no redactado)',
  'Kifaya',
  'Hosni Mubarak',
  'Tamarod',
  'Alejandría',
  'Puerto Saíd',
  'Suez',
  'Ciudad Nasr (aún no redactado)',
  'Hermanos Musulmanes',
  'Mokattam',
  'Fuerzas Armadas de Egipto',
  'Abdul Fatah al-Sisi',
  'Frente de Salvación Nacional (Egipto)',
  'Mohamed Kamel Amr',
  '3 de julio',
  'Hermandad musulmana',
  'Frente de Salvación Nacional (Egipto)',
  'Mohamed el-Baradei',
  'Tamarod',
  'Partido al-Nour',
  'Gran Imán de al-Azhar',
  'Ahmed el-Tayeb',
  'Iglesia copta',
  'Teodoro II (papa copto)',
  'Egipto',
  'Plaza Tahrir',
  'El Cairo',
  'Ministerio de Defensa',
  'ERTU',
  'Fuerzas Armadas de Egipto',
  'Fuerzas Armadas de Egipto',
  'Mohamed el-Baradei',
  'Adli Mansur',
  'Adli Mansur',
  'Hazem el Beblaui',
  '16 de julio',
  'Islamista',
  'Mohamed el-Baradei',
  'Mohamed Badía',
  'Mahmoud Ezzat',
  'Hamás',
  '23 de septiembre',
  'Copta',
  'Guerra civil',
  'Frente de Salvación Nacional (Egipto)',
  'Adli Mansour',
  '8 de julio',
  'Hermanos Musulmanes',
  'Intifada',
  'Partido al-Nour',
  '14 de agosto',
  'Estados Unidos',
  'Barack Obama',
  'Tanzania',
  'Rusia',
  'Rusia',
  'Vladímir Putin',
  'Egipto',
  'Guerra Civil Siria',
  'Siria',
  'Libia',
  'Partido Justicia y Construcción (aún no redactado)',
  'Trípoli',
  'Bengasi',
  'Mohamed Badía',
  'Protestas en Egipto de 2012'],
 'fa': ['محمد مرسی',
  'انقلاب ۲۰۱۱ مصر',
  'محمد البرادعی',
  'میدان تحریر',
  'ارتش مصر',
  'عبدالفتاح السیسی',
  'عدلی منصور',
  'حازم الببلاوی',
  'محمد البرادعی',
  'اخوان\u200cالمسلمین',
  'حبس ابد',
  'کلیسا',
  'سوئز',
  'رده:اعتراضات در جهان عرب (۲۰۱۰–۲۰۱۱)',
  'اخوان\u200cالمسلمین',
  'قاهره',
  'میدان التحریر',
  'زره پوش',
  'اسکندریه',
  'سوئز (شهر)',
  'عریش',
  'عبدالله بن عبدالعزیز',
  'عدلی منصور',
  'سعود الفیصل',
  'بشار اسد',
  'جهان عرب',
  'پرچم ایالات متحده آمریکا',
  'باراک اوباما',
  'جان مک\u200cکین',
  'لیندزی گراهام (صفحه وجود ندارد)',
  'مجلس سنای آمریکا',
  'محمد البرادعی',
  'حازم ببلاوی',
  'عبدالفتاح السیسی',
  'نوری مالکی',
  'عدلی منصور',
  'پرچم ایران',
  'کاترین اشتون'],
 'fi': ['Mohammed Mursi',
  'Muslimiveljeskunta',
  'Arabikevät',
  'Egyptin vallankumous 2011',
  'Hosni Mubarak',
  'Mohammed Mursi',
  'Muslimiveljeskunta',
  'Aleksandria',
  'Egyptin armeija (sivua ei ole)',
  'Arabikevät',
  'Abdel Fattah al-Sisi',
  'Adli Mansur',
  'Ansar al-Shariah (sivua ei ole)',
  'Al-Nour',
  'Mohamed ElBaradei',
  'Salafistit (sivua ei ole)',
  'Al-Nour',
  'Hazem Al-Beblawi (sivua ei ole)'],
 'fr': ['Abdelfatah Khalil al-Sisi',
  'Chuck Hagel',
  'Mohamed Morsi',
  'Consultez la documentation du modèle',
  'Forces armées égyptiennes',
  'Mohamed Morsi',
  'Élection présidentielle égyptienne de 2012',
  'Révolution égyptienne de 2011',
  'Mohamed Morsi',
  'Hosni Moubarak',
  'Révolution Égyptienne de 2011',
  'Égypte',
  'Novembre 2011',
  '2011',
  'Élections législatives égyptiennes de 2011-2012',
  '17 juin',
  'Juin 2012',
  '2012',
  'Mohamed Morsi',
  'Élection présidentielle égyptienne de 2012',
  'Ahmed Chafik',
  'Mohamed Morsi',
  'Mouvement protestataire de juin-juillet 2013 en Égypte',
  'Tamarod (page inexistante)',
  'Frères musulmans',
  'Abdelfatah Khalil al-Sisi',
  'Université du Michigan',
  'Constitution égyptienne',
  'Le Caire',
  'Alexandrie',
  'Daqahliyah (page inexistante)',
  'Gouvernorat de Gharbeya',
  'Assouan',
  'Nasr City (page inexistante)',
  'en:Nasr City',
  'Al-Masri Al-Youm',
  'Haute Cour constitutionnelle (Égypte)',
  'Maher el-Beheiry (page inexistante)',
  'Parti Nour',
  'Mouvement de la Jeunesse du 6-Avril',
  'Jihad islamique égyptien',
  'Abdel Moneim Abul Fotouh',
  'Mohamed El Baradei',
  'Parti national démocratique (Égypte)',
  'Frères musulmans',
  '30 juin',
  'Place Tahrir',
  'Caire',
  "Palais d'el-Orouba",
  'Alexandrie',
  'Port-Saïd',
  'Suez (ville)',
  '1er juillet',
  'Frères musulmans',
  'Mohamed Morsi',
  '3 juillet',
  'Adly Mansour',
  'Le Caire',
  'Abdelfatah Khalil al-Sisi',
  'Salafisme',
  'Mohamed El Baradei',
  "Théodore II d'Alexandrie (pape copte)",
  'Mohamed Morsi',
  'Haute Cour constitutionnelle (Égypte)',
  'Adli Mansour',
  'Tony Blair',
  'Palestine',
  'Mohamed Morsi',
  'Sinaï',
  'Mohamed Morsi',
  'Al-Nour',
  'Al-Nour',
  'Hazem el-Beblawi',
  'Mohammed Badie',
  'Allemagne',
  'États-Unis',
  'Bande de Gaza',
  'Blocus de la bande de Gaza',
  'Parti de la Liberté et de la Justice',
  'Mohamed Morsi',
  'Hamas',
  'Frères musulmans',
  'Amnesty International',
  'Sit-in',
  'Parti Liberté et Justice',
  'Frères musulmans',
  'Human Rights Watch',
  'Coptes',
  'Collège de France',
  'Amnesty International',
  'Human Rights Watch',
  'François Hollande',
  'Laurent Fabius',
  'Qatar',
  "L'Elysée",
  'Catherine Ashton',
  'Coptes',
  'Frères musulmans',
  'General Dynamics F-16 Fighting Falcon',
  'n:Dossier:Crise politique de juin-juillet 2013 en Égypte',
  'Événements de janvier 1992 en Algérie',
  'Alain Gresh',
  'Portail de la politique',
  'Portail:Politique',
  'Portail du droit',
  'Portail:Droit',
  'Portail de l’histoire militaire',
  'Portail:Histoire militaire',
  'Portail de l’Égypte',
  'Portail:Égypte',
  'Portail des années 2010',
  'Portail:Années 2010'],
 'he': ['הפיכה',
  'צבא מצרים',
  'נשיא מצרים',
  'מוחמד מורסי',
  'עדלי מנסור',
  'מחאה',
  'הפגנה',
  'האחים המוסלמים',
  'עבד אל-פתאח א-סיסי',
  'מצרים',
  '22 בנובמבר',
  '2012',
  '29 ביוני',
  '2013',
  'כיכר תחריר',
  'קהיר',
  '1 ביולי',
  'צבא מצרים',
  'עבד אל-פתאח א-סיסי',
  'עדלי מנסור',
  'עדלי מנסור',
  'בית המשפט החוקתי העליון של מצרים (הדף אינו קיים)',
  'האחים המוסלמים',
  '5 ביולי',
  '7 ביולי',
  '9 ביולי',
  '2013',
  'חאזם אל-בבלאווי',
  '2014',
  'חמאס',
  'ארגון טרור',
  'פלסטינים',
  'האחים המוסלמים',
  'מנהרות ההברחה של רצועת עזה',
  'חצי האי סיני',
  'טרור',
  'גרילה',
  'צבא מצרים',
  'טבח 14 באוגוסט במצרים',
  'מסגד ראבעה אל-עדוויה',
  'קהיר',
  'מוחמד אל-בראדעי',
  'מוחמד בדיע',
  'ירדן',
  'ערב הסעודית',
  'אש"ף',
  'מחמוד עבאס',
  'ראש ממשלת טורקיה',
  "רג'פ טאיפ ארדואן",
  'האומות המאוחדות',
  'עבד אל-פתאח א-סיסי',
  'הבחירות לנשיאות מצרים (2014)',
  '8 ביוני',
  '2014',
  'ההפיכה במצרים (2011)',
  'מהומות הלחם',
  'תנועת הנוער של ה-6 באפריל',
  'נשים באביב הערבי במצרים',
  'Ynet'],
 'hi': ['मुहम्मद मुर्सी', 'मुहम्मद मोरसी', 'बराक ओबामा'],
 'id': ['Mesir',
  'Mohamed Morsi',
  'Abdul Fatah al-Sisi',
  'Adly Mansour',
  'Ikhwanul Muslimin',
  'Ahmed el-Thayeb',
  'Paus Theodoros II dari Aleksandria',
  'Mohamed ElBaradei'],
 'it': ['Colpo di Stato',
  '2013',
  "Presidenti dell'Egitto",
  'Egitto',
  'Mohamed Morsi',
  'Tamarrud',
  '2013',
  'Fratelli Musulmani',
  'Forze armate egiziane',
  'Generale',
  'Abd al-Fattah al-Sisi',
  'Il Cairo',
  'Twitter',
  'Governo di coalizione',
  'Misure cautelari personali coercitive',
  'Abd al-Fattah al-Sisi',
  'Ad interim',
  'Adli Mansur',
  "Sa'd al-Katatni",
  'Rashad al-Bayumi',
  "Mohammed Badi'",
  'Adli Mansur',
  'Consiglio della Shūra',
  '2012',
  'Mohamed El Baradei',
  'Fratelli Musulmani',
  'Salafiti',
  'Partito al-Nur',
  'Oxford',
  "Ziyad Baha' al-Din",
  'Partito Socialdemocratico Egiziano',
  'Ad interim',
  'Ad interim',
  'Salafiti',
  'Liberalismo',
  'Al Jazeera',
  'Qatar',
  'Gasdotto',
  'Giordania',
  'Moschea',
  'Rābiʿa al-ʿAdawiyya',
  'Nasr City (la pagina non esiste)',
  'Fratelli Musulmani',
  'Al Jazeera',
  'Cecchini',
  "Muhammad al-Barade'i",
  'Twitter',
  'Agence France Presse',
  'Partito Libertà e Giustizia',
  'Il Cairo',
  'Primo ministro',
  'Ad interim',
  'Liberalismo',
  'Hazem al-Beblawi',
  'Ad interim',
  'Tamàrrud',
  'Twitter',
  'Penisola del Sinai',
  'Hazem al-Beblawi',
  "Mohammed Badi'",
  'Professore',
  'Medio Oriente',
  'Università di Harvard',
  'Il manifesto',
  'Huffington Post',
  'Massimo Campanini',
  'Le Figaro',
  'The Guardian',
  'Internazionale (rivista)',
  'Referendum',
  'Tamarrud',
  'Primavera araba',
  'Fratelli Musulmani',
  'Elezioni presidenziali in Egitto del 2012',
  'Mohamed Morsi',
  'Rivoluzione egiziana del 2011',
  'Collabora a Wikimedia Commons'],
 'ja': ['エジプト',
  '2013年',
  '7月3日',
  'エジプト軍',
  'ムハンマド・ムルシー',
  'クーデター',
  'エジプト革命 (2011年)',
  'ホスニー・ムバーラク',
  'ムスリム同胞団',
  'アブドルモネイム・アブールフトゥーフ',
  'ハイラト・シャーテル',
  '自由と公正党',
  '文民統制',
  'カタール',
  'レンティア国家',
  '国際通貨基金',
  'エジプト・ポンド',
  'エチオピア',
  'スーダン',
  '新自由主義',
  '労働組合',
  '国際労働機関',
  '裁判所',
  '警察国家',
  '国境なき記者団',
  '2012年エジプト大統領選挙',
  'アフマド・シャフィーク',
  'エジプト革命 (1952年) (存在しないページ)',
  'イスラエル',
  '中東戦争',
  'エジプト軍',
  '軍産複合体',
  '民主主義',
  'アブドルファッターフ・アッ=シーシー',
  'エジプト軍最高評議会',
  'タマッルド(抵抗) (存在しないページ)',
  'en:Tamarod',
  '中央治安部隊 (存在しないページ)',
  'en:Central Security Forces',
  'タハリール広場',
  'アレクサンドリア',
  'マンスーラ',
  'ミヌーフィーヤ県',
  'タンタ',
  'エル=マハッラ・エル=コブラ',
  'スエズ',
  'ポートサイド',
  'ザガジグ',
  'アブドルファッターフ・アッ=シーシー',
  'ロードマップ',
  '政党連合',
  '国民救済戦線 (存在しないページ)',
  'en:National Salvation Front (Egypt)',
  'アドリー・マンスール',
  'モハメド・エルバラダイ',
  'コプト正教会',
  '教皇',
  'スンナ派',
  'アル=アズハル大学',
  'ハーゼム・エル=ベブラーウィー',
  'ムスリム同胞団',
  '自由と公正党',
  'ヌール党',
  '外交官',
  'ナビール・ファフミー (存在しないページ)',
  'en:Nabil Fahmy',
  '世界銀行',
  'エコノミスト',
  '外貨準備',
  'エジプト革命 (2011年)',
  '潘基文',
  'ムハンマド・バディーウ',
  'ハイラト・シャーテル',
  'ワサト党',
  'イエメン',
  'ノーベル平和賞',
  'タワックル・カルマン',
  'カイロ大学',
  'ラーバア・アダウィーヤ広場 (存在しないページ)',
  'en:Rabaa Al-Adawiya Mosque',
  'モハメド・エルバラダイ',
  '民主主義',
  'エジプト革命 (2011年)',
  'アスユート',
  'シュプレヒコール',
  'カトリック教会',
  'バラク・オバマ',
  'トルコ',
  'ムスタファ・ケマル・アタテュルク',
  'トルコ軍',
  '2013年トルコ反政府運動',
  'レジェップ・タイイップ・エルドアン',
  'カタール',
  'シリア騒乱',
  'アルジャジーラ',
  'シリア騒乱',
  'アルジャジーラ',
  'アル・ハヤト',
  'CNN',
  '機関銃',
  '自動小銃',
  'カービン',
  'ナイフ',
  '火炎瓶',
  'サウジアラビア',
  'アブドゥッラー・ビン・アブドゥルアズィーズ',
  'サウード・アル=ファイサル',
  'ヨルダン',
  'アラブ首長国連邦',
  'クウェート',
  'ニューヨーク・タイムズ',
  'チャック・ヘーゲル',
  'ジョン・マケイン',
  'ファタハ',
  'モスク',
  'カイロ県',
  '紀元前14世紀',
  'アマルナ',
  'ミニヤー県',
  'マラウィ国立博物館 (エジプト)',
  '古代エジプト',
  '逮捕',
  'アラブの春',
  '2011年リビア内戦',
  'ソーシャル・ネットワーキング・サービス',
  'AK-47',
  'コプト正教会',
  'シナイ半島',
  'ジハード主義',
  'イスラエル',
  '潘基文',
  '国際連合事務総長',
  'アルカーイダ',
  'ギーザ',
  'アルカーイダ',
  'アイマン・ザワーヒリー',
  'サラフィー主義',
  'ハマース',
  'ガザ地区',
  '4月6日運動',
  'ギーザ県',
  'ムハンマド・ベルタギー',
  'ディムヤート',
  'アレクサンドリア',
  'ラファフ',
  'ベニスエフ県',
  'アリーシュ',
  'タハリール広場',
  '第4次中東戦争',
  'イスマイリア',
  'カイロ大学',
  'ポートサイド',
  'アズハル大学',
  'ザガジグ大学',
  'ナイル川デルタ',
  'マンスーラ',
  '自由公正党',
  'イサーム・エル=エリヤーン',
  'ザガジグ',
  'CAFチャンピオンズリーグ',
  'アル・アハリ',
  'FIFAクラブワールドカップ2013',
  '非常事態宣言',
  '夜間外出禁止令',
  'タンタ',
  'ヘルワーン',
  'ペルソナ・ノン・グラータ',
  '4月6日運動',
  'ダカリーヤ県',
  'マンスーラ',
  'en:Ansar Bait al-Maqdis',
  'ディムヤート',
  'イブラヒーム・メフレブ',
  '催涙弾',
  'モハメド・バディア (存在しないページ)',
  '大ムフティー',
  '6月2日',
  'ISIL',
  'ロシア',
  'イラン',
  'カナダ人',
  'ギザの大ピラミッド',
  '8月31日',
  'アブ・シンベル神殿',
  'タバ',
  'シャルム・エル・シェイク',
  'アメリカ合衆国ドル',
  '失業率',
  'クーデター',
  'ジョン・ケリー',
  '2012年エジプト大統領選挙',
  'アムル・ムーサ',
  'ジョン・マケイン',
  'イギリス',
  '日本',
  '国際連合の旗',
  '国際連合',
  '潘基文',
  'イスラム協力機構',
  'アフリカ連合',
  '日本の旗',
  '日本',
  '岸田文雄',
  'トヨタ自動車',
  'スズキ (企業)',
  '大塚製薬',
  '住友電気工業',
  'ソニー',
  '東京海上日動火災',
  '住友商事',
  '伊藤忠商事',
  '丸紅',
  '豊田通商',
  '双日',
  '三菱東京UFJ銀行',
  '三井住友銀行',
  '三井物産',
  '青年海外協力隊',
  '国際協力機構',
  '中華人民共和国の旗',
  '中華人民共和国',
  'ロシアの旗',
  'ロシア',
  'イランの旗',
  'イラン',
  'モハンマド・ジャヴァード・ザリーフ',
  'カタールの旗',
  'カタール',
  'タミーム・ビン・ハマド・アール=サーニー',
  'アラブ首長国連邦の旗',
  'アラブ首長国連邦',
  'サウジアラビアの旗',
  'サウジアラビア',
  'アブドゥッラー・ビン・アブドゥルアズィーズ',
  'シリアの旗',
  'シリア',
  'バッシャール・アル=アサド',
  'ヨルダンの旗',
  'ヨルダン',
  'イスラエルの旗',
  'イスラエル',
  'エイラート',
  'エジプト・イスラエル平和条約',
  'ベンヤミン・ネタニヤフ',
  'トルコの旗',
  'トルコ',
  'レジェップ・タイイップ・エルドアン',
  'アフメト・ダウトオール',
  'リビアの旗',
  'リビア',
  'アリー・ゼイダーン',
  'チュニジアの旗',
  'チュニジア',
  'モンセフ・マルズーキ',
  'ナフダ',
  'アメリカ合衆国の旗',
  'アメリカ合衆国',
  'バラク・オバマ',
  '10月9日',
  'イギリスの旗',
  'イギリス',
  'ウィリアム・ヘイグ',
  'スペインの旗',
  'スペイン',
  'フランスの旗',
  'フランス',
  'イタリアの旗',
  'イタリア',
  'エンマ・ボニーノ',
  'ドイツの旗',
  'ドイツ',
  'デンマークの旗',
  'デンマーク',
  'スウェーデンの旗',
  'スウェーデン',
  'カール・ビルト',
  'アルゼンチンの旗',
  'アルゼンチン'],
 'ko': ['2013년',
  '7월 3일',
  '이집트',
  '대통령',
  '무함마드 모르시',
  '헌법',
  '가택 연금',
  '무슬림 형제단',
  '6월 30일'],
 'nl': ['Moslimbroederschap',
  'Mohamed Morsi',
  'Hosni Moebarak',
  'Egyptische Revolutie (2011)',
  'Egyptische Revolutie (2011)',
  'Arabische Lente',
  'Mohamed Morsi',
  'Egypte (land)',
  'Economie (systeem)',
  'Egyptisch pond',
  'Soennisme',
  'Kopten',
  'Sjiisme',
  'Caïro (stad)',
  'Tamarod',
  'Moslimbroederschap',
  'Krijgsmacht',
  'Abdul Fatah al-Sisi',
  'Staatsgreep',
  'Adly Mansour',
  'Egyptische presidentsverkiezingen in 2014',
  'Hamdin Sabahi',
  'Sit-in',
  'Moslimbroederschap',
  'Noodtoestand',
  'Avondklok',
  'Kopten'],
 'pl': ['Zamach stanu',
  'Muhammad Mursi',
  'Kryzys polityczny w Egipcie (2012–2014)',
  'Abd al-Fattah as-Sisi',
  'Tamarrud',
  'Adli Mansur',
  'Bracia Muzułmańscy',
  'Al-Dżazira',
  'Unia Afrykańska',
  'Zamieszki na Madagaskarze (2009)',
  'Zamach stanu w Nigrze (2010)',
  'II wojna domowa na Wybrzeżu Kości Słoniowej',
  'Zamach stanu w Mali (2012)',
  'Zamach stanu w Gwinei Bissau (2012)',
  'Konflikt w Republice Środkowoafrykańskiej',
  'Kair',
  'Stan wyjątkowy'],
 'pt': ['Egito',
  'Mohamed Mursi',
  'Abdul Fatah Khalil Al-Sisi',
  '30 de junho',
  '2013',
  'Cairo',
  '1 de julho',
  'Irmandade Muçulmana',
  '3 de julho',
  'Adly Mansour',
  'Prisão domiciliar',
  'Ahmed el-Tayeb',
  'Papa da Igreja Ortodoxa Copta de Alexandria',
  'Papa Teodoro II de Alexandria',
  'Mohamed ElBaradei',
  'Qatar',
  'Tunísia',
  'Estados Unidos',
  'União Africana',
  'Protesto sentado',
  'Violência política no Egito (2013–presente)',
  'Revolução Egípcia de 2011',
  'Violência política no Egito (2013–2014)',
  'Portal do Egito',
  'Portal da história',
  'Portal da política'],
 'ro': ['Mohamed Morsi',
  'Egipt',
  'Cairo',
  'Piața Tahrir',
  'Alexandria, Egipt',
  'Port Said',
  'Suez',
  'Cairo',
  'Frăția Musulmană',
  'Cairo',
  'Limba arabă',
  'Kefaya — pagină inexistentă',
  'Hosni Mubarak',
  'Revoluția egipteană din 2011',
  'Siria',
  'Armata egipteană — pagină inexistentă',
  'Hisham Zazou — pagină inexistentă',
  'Luxor',
  'Atef Helmi — pagină inexistentă',
  'Hatem Bagato — pagină inexistentă',
  'Khaled Abdel Aal — pagină inexistentă',
  'Al-Ahram — pagină inexistentă',
  'Sami Hafez Anan — pagină inexistentă',
  'Mohamed Kamel Amr — pagină inexistentă',
  'BBC',
  'Qatar',
  'Al Jezeera — pagină inexistentă',
  'Organizația Națiunilor Unite',
  'Statele Unite',
  'Barack Obama',
  'Tanzania'],
 'ru': ['Ас-Сиси, Абдул-Фаттах Халил',
  'Мурси, Мухаммед',
  'Братья-мусульмане',
  'Коптская церковь',
  'Феодор II (коптский Патриарх Александрийский)',
  'Эль-Барадеи, Мохаммед',
  'Мансур, Адли',
  'Википедия:Ссылки на источники',
  'Братья-мусульмане',
  'Каир',
  'Александрия',
  'Асьют',
  'Братья-мусульмане',
  'Аль-Баблауи, Хазем',
  'Эль-Барадеи, Мохаммед',
  'Хамас',
  'Президентские выборы в Египте (2014)',
  'Ас-Сиси, Абдул-Фаттах Халил',
  'Каир',
  'Гиза',
  'Копты',
  '4 октября',
  'Каир',
  'Reuters',
  'Пшеница',
  'Саудовская Аравия',
  'Саудовская Аравия',
  'Абдалла ибн Абдель Азиз Аль Сауд',
  'Сирия',
  'Сирия',
  'Асад, Башар',
  'Турция',
  'Турция',
  'Эрдоган, Реджеп Тайип',
  'Европейский союз',
  'Палестинская национальная администрация',
  'Хамас',
  'Хания, Исмаил',
  'ФАТХ',
  'Африканский союз',
  'Египетская революция 2011 года',
  'R4BIA',
  'Густерин, Павел Вячеславович'],
 'sr': ['3. јул',
  '2013',
  'Мухамед Мурси',
  'Јун',
  '1. јул',
  '3. јул',
  'Хосни Мубарак',
  'Фебруар',
  '2011',
  'Април',
  'Тамарод (страница не постоји)',
  '2004',
  '28. јун',
  'Каиро',
  'Генерал',
  'Абдел Фатах ел Сиси',
  'Устав',
  'Адли Мансур',
  '4. јул',
  'Трг Тахрир',
  'Мухамед ел Барадеи (страница не постоји)',
  'Папа Теодор II од Александрије (страница не постоји)',
  'Синајско полуострво',
  '10. јул',
  'Тунис',
  'Сједињене Америчке Државе',
  'Афричка унија',
  'Протести у Египту (2011)',
  'Демонстрације у Египту (2012—2013) (страница не постоји)'],
 'tg': ['2013',
  'Муҳаммад Мурсӣ',
  'Инқилоби 2011 Миср (саҳифа вуҷуд надорад)',
  'Муҳаммад Албародай (саҳифа вуҷуд надорад)',
  'Майдони Таҳрир (саҳифа вуҷуд надорад)',
  'Қоҳира',
  'Абду Фатаҳ Халил Ал-Сийсӣ'],
 'tr': ['Abdülfettah el Sisi',
  'Mısır Silahlı Kuvvetleri',
  '2012-13 Mısır protestoları',
  'Muhammed Mursi',
  '2012 Mısır cumhurbaşkanlığı seçimleri',
  'İslamcılık',
  'Kahire',
  'Müslüman Kardeşler',
  'Sağlık ve Nüfus Bakanlığı (Mısır) (sayfa mevcut değil)',
  'Mukattam (sayfa mevcut değil)',
  'Mısır Anayasası (sayfa mevcut değil)',
  'Mısır Yüksek Anayasa Mahkemesi',
  'Adli Mansur',
  'Teknokrasi',
  'El Ezher Baş İmamı (sayfa mevcut değil)',
  'Ahmed El Tayyip (sayfa mevcut değil)',
  'II. Tavadros (sayfa mevcut değil)',
  'Muhammed El Baradey',
  'Arap Baharı',
  'Tunus',
  'Afrika Birliği',
  'Darbe',
  'Devrim',
  '2013 Mısır katliamı',
  '2011 Mısır Devrimi',
  'Hüsnü Mübarek',
  '2012 Mısır cumhurbaşkanlığı seçimleri',
  'The Wall Street Journal',
  'Amr Musa',
  'Hamdin Sabahi (sayfa mevcut değil)',
  'Temerrud (sayfa mevcut değil)',
  'Ulusal Kurtuluş Cephesi (Mısır) (sayfa mevcut değil)',
  '6 Nisan Gençlik Hareketi (sayfa mevcut değil)',
  'Güçlü Mısır Partisi (sayfa mevcut değil)',
  'Rabiatül Adeviyye Camii (sayfa mevcut değil)',
  'Mermi',
  'Barack Obama',
  'Türkiye Cumhuriyeti',
  'Abdullah Gül',
  'Birleşmiş Milletler'],
 'uk': ['3 липня',
  '2013',
  'Мухаммед Мурсі',
  'Абдул Фатах Аль-Сісі',
  'Брати-мусульмани',
  'Коптська церква',
  'Федір ІІ (Тавадрос ІІ) (ще не написана)',
  'Мухаммед аль-Барадаї',
  'Адлі Мансур',
  'Каїр',
  'Александрія',
  'Асьют',
  'Хазем Аль-Баблаї (2013) (ще не написана)',
  'Мухаммед аль-Барадаї',
  'Reuters',
  'Пшениця',
  'Ісламський світ',
  'Туреччина',
  'Катар',
  'Саудівська Аравія',
  'Кувейт',
  'ОАЕ',
  'Туреччина',
  'Реджеп Таїп Ердоган',
  'Туреччина',
  'Партія справедливості та розвитку (Туреччина)',
  'Європейський союз',
  'Ізраїль',
  'Хосні Мубарак',
  'Саудівська Аравія',
  'Саудівська Аравія',
  'Абдалла ібн Абдель Азіз Аль Сауд',
  'Сирія',
  'Сирія',
  'Башар Асад',
  'Палестинська держава',
  'Хамас',
  'Ісмаїл Ханія',
  'ФАТХ',
  'Революція в Єгипті (2011)'],
 'ur': ['محمد مرسی', 'نواز شریف'],
 'vi': ['Mohamed Morsi',
  'Mohamed Morsi',
  'Cairo',
  'Alexandria',
  'Port Said',
  'Suez',
  'Anh em Hồi giáo',
  'Hosni Mubarak',
  'Human Rights Watch',
  'Hasim al-Beblawi (trang chưa được viết)',
  'Liên Hiệp Quốc',
  'Syria',
  'Omran al-Zoubi (trang chưa được viết)',
  'Anh',
  'David Cameron',
  'Viện Thứ dân của Anh (trang chưa được viết)',
  'Hoa Kỳ',
  'Barack Obama',
  'Tanzania',
  'Tổ chức Theo dõi Nhân quyền',
  'Liên minh châu Phi',
  'Nkosazana Dlamini-Zuma (trang chưa được viết)',
  'Liên minh châu Âu',
  'Đại diện cấp cao của Liên minh về Vấn đề Ngoại giao và Chính sách An ninh (trang chưa được viết)',
  'Catherine Ashton (trang chưa được viết)',
  'Liên Hiệp Quốc',
  'Ban Ki-moon',
  'Nabil Fahmy (trang chưa được viết)',
  'Navi Pillay (trang chưa được viết)',
  'Argentina',
  'Úc',
  'Kevin Rudd',
  'Bahrain',
  'Hamad bin Isa Al-Khalifa (trang chưa được viết)',
  'Canada',
  'John Baird (chính trị gia Canada) (trang chưa được viết)',
  'Trung Quốc',
  'Đức',
  'Guido Westerwelle',
  'Tunisia',
  'Tunisia',
  'Al-Qaeda',
  'Ayman al-Zawahiri',
  'Sharia',
  'Mùa xuân Ả Rập',
  'Biểu tình Ai Cập 2012-13',
  'From Dictatorship to Democracy (sách)',
  'Tự do chính trị'],
 'zh': ['埃及',
  '阿卜杜勒-法塔赫·塞西',
  '穆罕默德·穆尔西',
  '阿德里·曼苏尔',
  '穆斯林兄弟会',
  '政变',
  '納賽爾',
  '萨达特',
  '穆巴拉克',
  '世俗主義',
  '持不同政見者',
  '穆斯林兄弟會',
  '2012年埃及总统选举',
  '世俗國家',
  '艾哈迈德·沙菲克',
  '空軍',
  '穆巴拉克',
  '埃及总理',
  '穆罕默德·侯赛因·坦塔维',
  '萨米·阿南',
  '阿卜杜勒-法塔赫·塞西',
  '西德基·苏卜希',
  '萨拉菲派(页面不存在)',
  '女性',
  '少數族群',
  '公民國家(页面不存在)',
  '世俗主義',
  '阿卜杜勒-法塔赫·塞西',
  '以色列',
  '内塔尼亚胡',
  '塔里克·拉曼丹',
  '半岛电视台',
  '软禁',
  '逮捕',
  '财产',
  '冻结(页面不存在)',
  '间谍',
  '拘留',
  '民意',
  '2013年共和國衛隊總部衝突',
  '半岛电视台',
  '路透社',
  'BBC',
  '反人类罪',
  '終身監禁',
  '亚历山大',
  '法医',
  '清场(页面不存在)',
  '恐怖组织',
  '明亚',
  '死刑',
  '終身監禁',
  '警察',
  '国际特赦组织',
  '联合国',
  '穆斯林兄弟会',
  '恐怖组织',
  '美国',
  '巴沙尔·阿萨德',
  '威廉·海格',
  '參議員',
  '格雷厄姆(页面不存在)',
  '麦凯恩',
  '加利福尼亚大学',
  '半岛电视台',
  '美国国务院',
  '民主局(页面不存在)',
  '人权与劳工局(页面不存在)',
  '中东伙伴计划(页面不存在)',
  '国际开发署(页面不存在)',
  '正义与发展党',
  '共和人民黨 (土耳其)',
  '内战',
  '潘基文',
  'Catherine Ashton',
  '非洲联盟',
  '報復',
  '电视频道',
  '言论自由',
  '于时语(页面不存在)',
  '纽约时报',
  '利益',
  '世袭',
  '土耳其',
  '巴基斯坦',
  '宪政',
  '阿拉伯世界',
  '以色列',
  '伊朗',
  '巴列维',
  '摩薩台',
  '反美',
  '极端主义',
  '何清涟',
  '联合早报',
  '社论',
  '泰国',
  '2011年埃及革命',
  '2012年埃及总统选举',
  '2013年土耳其反政府抗议运动',
  '自由与正义党',
  '2013年共和國衛隊總部衝突',
  '2013年埃及政變事後影響']}
page_outlinks = get_page_outlinks(page_title)
page_outlinks
['Abdel Fattah el-Sisi',
 'Mohamed Morsi',
 'Egyptian Constitution of 2012',
 'June 2013 Egyptian protests',
 'Muslim Brotherhood',
 'Supreme Constitutional Court of Egypt',
 'Adly Mansour',
 'Grand Imam of al-Azhar',
 'Ahmed el-Tayeb',
 'Pope of the Coptic Orthodox Church of Alexandria',
 'Pope Tawadros II of Alexandria',
 'Mohamed ElBaradei',
 'Tunisia',
 'African Union',
 'Revolution',
 'August 2013 Rabaa massacre',
 'Post-coup unrest in Egypt (2013–14)',
 'Hosni Mubarak',
 'Egyptian Revolution of 2011',
 'History of Egypt under Hosni Mubarak',
 'Egyptian presidential election, 2012',
 'Muslim Brotherhood in post-Mubarak electoral politics of Egypt',
 'Mohamed ElBaradei',
 'Amr Moussa',
 'Hamdeen Sabahi',
 'The Wall Street Journal',
 'Tamarod',
 'National Salvation Front (Egypt)',
 'April 6 Youth Movement',
 'Strong Egypt Party',
 'The Gallup Organization',
 'Foreign involvement in the Syrian civil war',
 'International Crisis Group',
 'Egyptian constitution',
 'Anti-Coup Alliance',
 'El-Hossari Mosque (page does not exist)',
 'El-Nahda Square (page does not exist)',
 'Cairo University',
 'Ain Shams',
 "Coup d'état",
 'Tamarod',
 'Politics of the United Arab Emirates',
 'Cairo',
 'Alexandria',
 'Dakahlia Governorate',
 'Gharbiya',
 'Aswan',
 'Rabia Al-Adawiya Mosque',
 'Egyptian Presidential Palace',
 'El-Quba Palace (page does not exist)',
 'Damietta',
 'Tahrir Square',
 'Heliopolis Palace',
 'Port Said',
 'Suez',
 'Mokatam (page does not exist)',
 'Egyptian Armed Forces',
 'Ministry of Tourism (Egypt)',
 'Hisham Zazou',
 "Al-Gama'a al-Islamiyya",
 'Luxor massacre',
 'Luxor',
 'Ministry of Communications and Information Technology (Egypt)',
 'Atef Helmi',
 'Hatem Bagato (page does not exist)',
 'Khaled Abdel Aal (page does not exist)',
 'Freedom and Justice Party (Egypt)',
 'Barack Obama',
 'United States',
 'Minister of Foreign Affairs (Egypt)',
 'Mohamed Kamel Amr',
 'Egyptian Army',
 'List of Ministers of Defence of Egypt',
 'Abdel Fattah el-Sisi',
 'Court of Cassation (Egypt) (page does not exist)',
 'Abdel Meguid Mahmoud',
 'Talaat Abdallah (page does not exist)',
 'Al-Ahram',
 'Constitution of Egypt',
 'Sami Hafez Anan',
 'Egyptian Armed Forces',
 'Egyptian Armed Forces',
 'Mohamed El-Baradei',
 'National Salvation Front (Egypt)',
 'Abdel Fattah el-Sisi',
 'Waleed al-Haddad (page does not exist)',
 'Mohammed Zaki (page does not exist)',
 'Yahya Hamed (page does not exist)',
 "Talk:2013 Egyptian coup d'état",
 'Abdel Fattah el-Sisi',
 'Adli Mansour',
 'Technocracy',
 'Republican Guard (Egypt)',
 'Adli Mansour',
 'Shura Council',
 'Allahu akbar',
 'Pope of the Coptic Orthodox Church of Alexandria',
 'Tawadros II',
 'Grand Imam of al-Azhar',
 'Ahmed el-Tayeb',
 'Mohamed ElBaradei',
 'Tamarod',
 'Mahmoud Badr',
 'Al-Nour party',
 'Galal Murra (page does not exist)',
 'National Salvation Front (Egypt)',
 'Egyptian Armed Forces',
 'Republican Guard (Egypt)',
 'Egyptian Armed Forces',
 'Colonel',
 'Ahmed Mohammed Ali',
 'Egyptian Armed Forces',
 'Catherine Ashton',
 'European Union',
 'African Union',
 'Freedom and Justice Party (Egypt)',
 'Saad El-Katatni',
 'Rashad al-Bayoumi',
 'Muslim Brotherhood',
 'Al-Ahram',
 'Mohammed Badie',
 'Khairat El-Shater',
 'Mahdi Akef',
 'Mohamed Beltagy',
 'Safwat Hegazi',
 'Al-Wasat Party',
 'Abou Elela Mady',
 'Essam Sultan (page does not exist)',
 'Al Jazeera English',
 'Misr 25',
 'Al Hafez (page does not exist)',
 'Al Nas (page does not exist)',
 'Al Jazeera',
 'Mubasher Misr (page does not exist)',
 'Associated Press Television News',
 'Cairo News Company (page does not exist)',
 'Committee to Protect Journalists',
 'BBC News',
 'Jeremy Bowen',
 'Al-Ahram',
 'Friday prayers',
 '2013 Republican Guard headquarters clashes',
 'BBC News',
 'Jeremy Bowen',
 'Qena',
 '6th October Bridge',
 'Gaza Strip',
 'Rafah border crossing',
 'Prime Minister of the Gaza Strip',
 'Ismail Haniyeh',
 '2013 Republican Guard headquarters clashes',
 'Mohamed Beltagy',
 'Al-Dustour (Egypt)',
 'Foreign rebel fighters in the Syrian civil war',
 'University of California at Berkeley',
 'Qalyoub (page does not exist)',
 'Rabaa al-Adawiya mosque',
 "Talk:2013 Egyptian coup d'état",
 'Coptic Christian',
 'Christians Against the Coup',
 'Anti-Coup Alliance',
 'Al-Arish',
 'Treason',
 "2005 Mauritanian coup d'état",
 "2012 Malian coup d'état",
 '2009 Malagasy political crisis',
 "1999 Pakistani coup d'état",
 'Egyptian American',
 'Michigan',
 'Amnesty International',
 'Muslim Brotherhood',
 'Freedom and Justice Party (Egypt)',
 'Egyptian Army',
 'United Arab Emirates',
 'Tamarod',
 'African Union',
 'Nkosazana Dlamini-Zuma',
 'European Union',
 'High Representative of the Union for Foreign Affairs and Security Policy',
 'Catherine Ashton',
 'United Nations',
 'Ban Ki-moon',
 'Nabil Fahmy',
 'Navi Pillay',
 'Argentina',
 'Australia',
 'Kevin Rudd',
 'Bahrain',
 'Hamad bin Isa Al-Khalifa',
 'Canada',
 'John Baird (Canadian politician)',
 'China',
 'Colombia',
 'France',
 'Francois Hollande',
 'Tunisian revolution',
 'Aftermath of the Libyan civil war',
 'Syrian civil war',
 'Laurent Fabius',
 'Germany',
 'Guido Westerwelle',
 'Iran',
 'Ali Akbar Salehi',
 'Iraq',
 'Nouri al-Maliki',
 'Israel',
 'Benjamin Netanyahu',
 'Haaretz',
 'Yisrael Katz (politician born 1955)',
 'Israeli Army Radio',
 'Eli Shaked (page does not exist)',
 'Eli Shaked (page does not exist)',
 'Jordan',
 'Kuwait',
 'Kuwait News Agency',
 'Sabah Al-Ahmad Al-Jaber Al-Sabah',
 'Lebanon',
 'Tammam Salam',
 'Libya',
 'Rome',
 'Ali Zidan',
 'Malaysia',
 'Najib Razak',
 'Ministry of Youth and Sports (Malaysia)',
 'Khairy Jamaluddin',
 'Pan-Malaysian Islamic Party',
 'Nik Abdul Aziz Nik Mat',
 'Anwar Ibrahim',
 'Pan-Malaysian Islamic Party',
 'Nik Abdul Aziz Nik Mat',
 'Anwar Ibrahim',
 'Norway',
 'Espen Barth Eide',
 'Netherlands',
 'Pakistan',
 'Nawaz Sharif',
 'State of Palestine',
 'President of the State of Palestine',
 'Mahmoud Abbas',
 'Hanan Ashrawi',
 'Gaza Strip',
 'Hamas',
 'Governance of the Gaza Strip',
 'Yahia Moussa (page does not exist)',
 'Hamas',
 'Ahmad Yousef (page does not exist)',
 'Sic',
 'Gaza Strip',
 'Hamas',
 'Governance of the Gaza Strip',
 'Yahia Moussa (page does not exist)',
 'Hamas',
 'Ahmad Yousef (page does not exist)',
 'Sic',
 'Eli Shaked (page does not exist)',
 'Pan-Malaysian Islamic Party',
 'Nik Abdul Aziz Nik Mat',
 'Anwar Ibrahim',
 'Gaza Strip',
 'Hamas',
 'Governance of the Gaza Strip',
 'Yahia Moussa (page does not exist)',
 'Hamas',
 'Ahmad Yousef (page does not exist)',
 'Sic',
 'Philippines',
 'Benigno Aquino III',
 'Edwin Lacierda',
 'Department of Foreign Affairs (Philippines)',
 'Poland',
 'Qatar',
 'Al Jazeera',
 'Tamim bin Hamad Al Thani',
 'Khaled al-Attiya (page does not exist)',
 'Russia',
 'Saudi Arabia',
 'Abdullah of Saudi Arabia',
 'Somalia',
 'Al-Shabaab (militant group)',
 'Twitter',
 'Al-Shabaab (militant group)',
 'Twitter',
 'Sudan',
 'Ali Karti (page does not exist)',
 'Mohamed Kamel Amr',
 'Egypt-Sudan relations',
 'Hassan al-Turabi',
 'Hassan al-Turabi',
 'Sweden',
 'Carl Bildt',
 'Switzerland',
 'Syria',
 'Bashar al-Assad',
 'Tunisia',
 'Arab Spring',
 'Ennahda Movement',
 'Rachid Ghannouchi',
 'Turkey',
 'Recep Tayyip Erdogan',
 'Ahmet Davutoglu',
 'Hüseyin Çelik',
 'Justice and Development Party (Turkey)',
 'Cabinet Erdoğan II',
 "Republican People's Party (Turkey)",
 'Kemal Kılıçdaroğlu',
 "Republican People's Party (Turkey)",
 'Kemal Kılıçdaroğlu',
 'United Arab Emirates',
 'Abdullah bin Zayed Al Nahyan',
 'United Kingdom',
 'William Hague',
 'United States',
 'William Joseph Burns',
 'John McCain',
 'Senate Foreign Relations Committee',
 'Ed Royce',
 'House Foreign Affairs Committee',
 'Eliot Engel',
 'Dan Shapiro',
 'Tel Aviv',
 'Frank G. Wisner',
 'United States Secretary of State',
 'John Kerry',
 'William Joseph Burns',
 'John McCain',
 'Senate Foreign Relations Committee',
 'Ed Royce',
 'House Foreign Affairs Committee',
 'Eliot Engel',
 'Dan Shapiro',
 'Tel Aviv',
 'Frank G. Wisner',
 'United States Secretary of State',
 'John Kerry',
 'Yemen',
 'Abd Rabbuh Mansur Hadi',
 'Hamid al-Ahmar',
 'Al-Islah (Yemen)',
 'Muslim Brotherhood',
 'Hamid al-Ahmar',
 'Al-Islah (Yemen)',
 'Muslim Brotherhood',
 'Al-Shabaab (militant group)',
 'Twitter',
 'Hassan al-Turabi',
 "Republican People's Party (Turkey)",
 'Kemal Kılıçdaroğlu',
 'William Joseph Burns',
 'John McCain',
 'Senate Foreign Relations Committee',
 'Ed Royce',
 'House Foreign Affairs Committee',
 'Eliot Engel',
 'Dan Shapiro',
 'Tel Aviv',
 'Frank G. Wisner',
 'United States Secretary of State',
 'John Kerry',
 'Hamid al-Ahmar',
 'Al-Islah (Yemen)',
 'Muslim Brotherhood',
 'Al-Qaeda',
 'Ayman al-Zawahiri',
 'Sharia',
 'Post-coup unrest in Egypt (2013–14)',
 'August 2013 Rabaa massacre',
 'Egyptian Revolution of 2011',
 'Egyptian Revolution of 1952',
 'Egyptian Revolution of 1919',
 'Digital object identifier',
 'Digital object identifier']
 

You could write a recursive function like recursively_get_hyperlink_network that would crawl the hyperlink network out to an arbitrary distance, but this is becomes exhorbitantly expensive at any depth greater than 1.

Here's an example function, but is not executable to prevent you from harming yourself. :)

def recursively_get_hyperlink_network(seed_page,depth): neighbors = {} if depth < 0: return neighbors neighbors[seed_page] = get_page_outlinks(seed_page) for neighbor in neighbors[seed_page]: neighbors[neighbor] = get_hyperlink_network(neighbor,depth-1) return neighbors

Instead, define a simple function to get the 1.5-step ego hyperlink network. The "ego" is the seed page you start from, the "alters" are the neighbors that the ego links out to. We also get the alters of the alters (2nd order alters), but only include these 2nd order connections if they link to 1st order alters. In other words, the 1.5-step ego hyperlink network are all the pages linked from the seed page and the connections among this set of articles.

def get_hyperlink_alters(seed_page):
    # Initialize an empty dictionary to act as an adjacency "list"
    neighbors = {}
    
    # Get all the alters for the seed page and store them in the adjacency dictionary
    neighbors[seed_page] = get_page_outlinks(seed_page,1)
    
    # For each of the alters, get their alters and store in the adjacency dictionary
    for neighbor in list(set(neighbors[seed_page])): # Don't recrawl duplicates
        neighbors[neighbor] = get_page_outlinks(neighbor,0)
    
    # Initialize an empty graph that we will add nodes and edges into
    g = nx.DiGraph()
    
    # For each entry in the adjacency dictionary, check if the alter's alters are also the seed page's alters
    # If they are and the edge is already in the graph, increment the edge weight by one
    # If they are but the edge is not already in the graph, add the edge with a weight of one
    for article,neighbor_list in neighbors.items():
        for neighbor in neighbor_list:
            if neighbor in neighbors[seed_page] + [seed_page]:
                if g.has_edge(article,neighbor):
                    g[article][neighbor]['weight'] += 1
                else:
                    g.add_edge(article,neighbor,weight=1)
    
    # Return the weighted graph
    return g

Run this on an example article and save the resulting graph object to disk.

This step could take more than a minute depending on the number of links and size of the neighboring pages.

# Create the hyperlink network
hyperlink_g = get_hyperlink_alters(page_title)

# Save the graph to disk to visualize in Gephi
nx.write_graphml(hyperlink_g,'hyperlink_{0}.graphml'.format(page_title.replace(' ','_')))
---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connection.py in _new_conn(self)
    137             conn = connection.create_connection(
--> 138                 (self.host, self.port), self.timeout, **extra_kw)
    139 

/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/util/connection.py in create_connection(address, timeout, source_address, socket_options)
     74 
---> 75     for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
     76         af, socktype, proto, canonname, sa = res

/usr/lib/python3.4/socket.py in getaddrinfo(host, port, family, type, proto, flags)
    529     addrlist = []
--> 530     for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    531         af, socktype, proto, canonname, sa = res

gaierror: [Errno -2] Name or service not known

During handling of the above exception, another exception occurred:

NewConnectionError                        Traceback (most recent call last)
/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, **response_kw)
    593                                                   body=body, headers=headers,
--> 594                                                   chunked=chunked)
    595 

/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    349         try:
--> 350             self._validate_conn(conn)
    351         except (SocketTimeout, BaseSSLError) as e:

/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py in _validate_conn(self, conn)
    834         if not getattr(conn, 'sock', None):  # AppEngine might not have  `.sock`
--> 835             conn.connect()
    836 

/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connection.py in connect(self)
    280         # Add certificate verification
--> 281         conn = self._new_conn()
    282 

/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connection.py in _new_conn(self)
    146             raise NewConnectionError(
--> 147                 self, "Failed to establish a new connection: %s" % e)
    148 

NewConnectionError: <requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x7f3a967c7c18>: Failed to establish a new connection: [Errno -2] Name or service not known

During handling of the above exception, another exception occurred:

MaxRetryError                             Traceback (most recent call last)
/srv/paws/lib/python3.4/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    422                     retries=self.max_retries,
--> 423                     timeout=timeout
    424                 )

/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, **response_kw)
    642             retries = retries.increment(method, url, error=e, _pool=self,
--> 643                                         _stacktrace=sys.exc_info()[2])
    644             retries.sleep()

/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    362         if new_retry.is_exhausted():
--> 363             raise MaxRetryError(_pool, url, error or ResponseError(cause))
    364 

MaxRetryError: HTTPSConnectionPool(host='1.wikipedia.org', port=443): Max retries exceeded with url: /w/api.php?action=parse&format=json&page=2013_Egyptian_coup_d'%C3%A9tat&redirects=1&prop=text&disableeditsection=1&disabletoc=1 (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x7f3a967c7c18>: Failed to establish a new connection: [Errno -2] Name or service not known',))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
<ipython-input-26-293154fb2210> in <module>()
      1 # Create the hyperlink network
----> 2 hyperlink_g = get_hyperlink_alters(page_title)
      3 
      4 # Save the graph to disk to visualize in Gephi
      5 nx.write_graphml(hyperlink_g,'hyperlink_{0}.graphml'.format(page_title.replace(' ','_')))

<ipython-input-25-2ce5bf0a6cd8> in get_hyperlink_alters(seed_page)
      4 
      5     # Get all the alters for the seed page and store them in the adjacency dictionary
----> 6     neighbors[seed_page] = get_page_outlinks(seed_page,1)
      7 
      8     # For each of the alters, get their alters and store in the adjacency dictionary

<ipython-input-12-26905e8b645d> in get_page_outlinks(page_title, lang, redirects)
      7     # Get the response from the API for a query
      8     # After passing a page title, the API returns the HTML markup of the current article version within a JSON payload
----> 9     req = requests.get('https://{2}.wikipedia.org/w/api.php?action=parse&format=json&page={0}&redirects={1}&prop=text&disableeditsection=1&disabletoc=1'.format(page_title,redirects,lang))
     10 
     11     # Read the response into JSON to parse and extract the HTML

/srv/paws/lib/python3.4/site-packages/requests/api.py in get(url, params, **kwargs)
     68 
     69     kwargs.setdefault('allow_redirects', True)
---> 70     return request('get', url, params=params, **kwargs)
     71 
     72 

/srv/paws/lib/python3.4/site-packages/requests/api.py in request(method, url, **kwargs)
     54     # cases, and look like a memory leak in others.
     55     with sessions.Session() as session:
---> 56         return session.request(method=method, url=url, **kwargs)
     57 
     58 

/srv/paws/lib/python3.4/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    486         }
    487         send_kwargs.update(settings)
--> 488         resp = self.send(prep, **send_kwargs)
    489 
    490         return resp

/srv/paws/lib/python3.4/site-packages/requests/sessions.py in send(self, request, **kwargs)
    607 
    608         # Send the request
--> 609         r = adapter.send(request, **kwargs)
    610 
    611         # Total elapsed time of the request (approximately)

/srv/paws/lib/python3.4/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    485                 raise ProxyError(e, request=request)
    486 
--> 487             raise ConnectionError(e, request=request)
    488 
    489         except ClosedPoolError as e:

ConnectionError: HTTPSConnectionPool(host='1.wikipedia.org', port=443): Max retries exceeded with url: /w/api.php?action=parse&format=json&page=2013_Egyptian_coup_d'%C3%A9tat&redirects=1&prop=text&disableeditsection=1&disabletoc=1 (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x7f3a967c7c18>: Failed to establish a new connection: [Errno -2] Name or service not known',))
hg_nodes = hyperlink_g.number_of_nodes()
hg_edges = hyperlink_g.number_of_edges()

print("There are {0} nodes and {1} edges in the hyperlink network.".format(hg_nodes,hg_edges))
hg_density = nx.density(hyperlink_g)
print('{0:.2%} of the possible edges actually exist.'.format(hg_density))
def reciprocity(g):
    reciprocated_edges = []
    
    for (i,j) in g.edges():
        if hyperlink_g.has_edge(j,i):
            reciprocated_edges.append((i,j))
    
    return len(reciprocated_edges)/float(g.number_of_edges())

hg_reciprocity = reciprocity(hyperlink_g)

print('{0:.2%} of the edges in the hyperlink network are reciprocated.'.format(hg_reciprocity))

Play the Wikipedia Game!

Using only the hyperlinks on the article, try to get from the first article to the second article.

page1,page2 = np.random.choice(list(hyperlink_g.nodes()),2)
print("Try to navigate from \"{0}\" to \"{1}\" using only hyperlinks.\n".format(page1,page2))
print("Start at: https://en.wikipedia.org/wiki/{0}".format(page1.replace(' ','_')))

No cheating!

After you've played the game a few times, see what an optimal shortest path is. You may get an error indicating there is no shortest path, in which case, try a new pair of nodes.

nx.shortest_path(hyperlink_g,page1,page2)

The shortest path length is the path connecting two nodes in the fewest steps. This is related to the "small world" effect where everyone in the world is just a few handshakes from each other. It's rare to find complex networks where the longest shortest path is above 5. Nodes that are this far from each other are likely about very unrelated topics.

If there are no paths greater than 5, lower the path_length_threshold from 5 to 4.

The long_path_lengths dictionary below is populated by computing all the shortest path lengths between nodes in the network and only keeping those paths that are longer than 5 steps from each other. In a directed graph like our hyperlink network, it's important to follow the direction of the arrows: if page A links to page B but page B doesn't link to page A, then we can't make a shortest path from B to A, we have to find another path.

path_length_threshold = 4
long_path_lengths = {}

for k,d in nx.all_pairs_shortest_path_length(hyperlink_g).items():
    long_paths = [v for v,l in d.items() if l > path_length_threshold]
    if len(long_paths) > 0:
        long_path_lengths[k] = long_paths
        
long_path_lengths.keys()

The shortest path between the articles can be identified using the shortest_path function and supplying the graph and the names of two nodes.

# Randomly choose two articles in the list of long shortest paths
page1,page2 = np.random.choice(list(long_path_lengths.keys()),2)
print("The two pages randomly selected are: \"{0}\" and \"{1}\"".format(page1,page2))

# Display the path between these articles
nx.shortest_path(hyperlink_g,page1,page2)

Test out different combinations of articles from the long_path_lengths to find the articles that are farthest apart by entering different article names for page1 and page2.

page1 = 'National Association for Business Economics'
page2 = 'NATO'
nx.shortest_path(hyperlink_g,page1,page2)
hg_in_degree_d = {node:int(centrality*(len(hyperlink_g) - 1)) for node,centrality in nx.in_degree_centrality(hyperlink_g).items()}
hg_out_degree_d = {node:int(centrality*(len(hyperlink_g) - 1)) for node,centrality in nx.out_degree_centrality(hyperlink_g).items()}

Look at the nodes with the highest in-degree: other pages in the network point to this page.

degree_df = pd.DataFrame({'In':hg_in_degree_d,'Out':hg_out_degree_d})
degree_df['In'].sort_values(ascending=False).head(10)

Look at the nodes with the highest-out-degree: these pages point to many other pages.

degree_df['Out'].sort_values(ascending=False).head(10)

Look at the nodes that have no links out.

degree_df.query('Out == 0')['Out']

Look at nodes that have a single link in. These are also known as (in-) pendants. If there are none, it should appear as an empty series.

degree_df.query('In == 1')['In']

Look at the nodes with a single link out. These are also known as (out-)pendants. If there are none, it should appear as an empty series.

degree_df.query('Out == 1')['Out']

Given a page, what are the neighbors that link in to it? Assign a specific article title to the page1 variable by replacing the np.random.choice(degree_df.index)

page1 = np.random.choice(degree_df.index)

in_connections = hyperlink_g.predecessors(page1)
print("The links into node \"{0}\" are:\n{1}".format(page1,in_connections))
out_connections = hyperlink_g.successors(page1)
print("The links out from node \"{0}\" are:\n{1}".format(page1,out_connections))
in_degree_dist_df = degree_df['In'].value_counts().reset_index()
out_degree_dist_df = degree_df['Out'].value_counts().reset_index()

f,ax = plt.subplots(1,1)
in_degree_dist_df.plot.scatter(x='index',y='In',ax=ax,c='blue',label='In')
out_degree_dist_df.plot.scatter(x='index',y='Out',ax=ax,c='red',label='Out')
ax.set_xscale('symlog')
ax.set_yscale('symlog')
ax.set_xlim((0,1e3))
ax.set_ylim((0,1e3))

ax.set_xlabel('Connections')
ax.set_ylabel('Count')

Calculate communities within the network

Define a function to compute node community memberships for multiple community detection algorithms within igraph. The output is a dictionary of dictionaries where the top-level key is the name of the algorithm and returns a second-level dictionary keyed by the the page name with values being the community membership value. Documentation and details about these algorithms can be found under the igraph graph-class documentation.

def comparative_community_detector(igraph):
    memberships = {}
    
    # Directed memberships
    memberships['betweenness'] = igraph.community_edge_betweenness().as_clustering().membership
    memberships['infomap'] = igraph.community_infomap().membership
    memberships['spinglass'] = igraph.community_spinglass().membership
    memberships['walktrap'] = igraph.community_walktrap().as_clustering().membership
    
    # Undirected memberships
    undirected = igraph.as_undirected()
    memberships['fastgreedy'] = undirected.community_fastgreedy().as_clustering().membership
    memberships['leading_eigenvector'] = undirected.community_leading_eigenvector().membership
    memberships['multilevel'] = undirected.community_multilevel().membership
    
    labelled_memberships = {}
    for label,membership in memberships.items():
        labelled_memberships[label] = dict(zip(igraph.vs['id'],membership))
        
    return labelled_memberships

Not included in the comparative_community_detector function are two additional community detection algorithms that are too intensive or are not working properly. They're documented below if you ever care to explore in the future.

# Uses up a ton of memory and crashes kernel immediately ig_hg_optimal_modularity = hyperlink_g.community_optimal_modularity().membership ig_hg_optimal_modularity_labels = dict(zip(ig_hg.vs['id'],ig_hg_optimal_modularity)) pd.Series(ig_hg_optimal_modularity_labels).value_counts().head(10) # Lumps everyone into a single community ig_hg_label_propagation = hyperlink_g.community_label_propagation(initial=range(ig_hg_d.vcount()),fixed=[False]*ig_hg_d.vcount()).membership ig_hg_label_propagation_labels = dict(zip(ig_hg_d.vs['id'],ig_hg_label_propagation)) pd.Series(ig_hg_label_propagation_labels).value_counts().head(10)

Here we need to shift from using the networkx library to using the igraph library. The former is built purely in Python which makes it easier-to-use but somewhat slower while the latter is a "wrapper" that lets us write in Python but does the calculations in much-faster C code behind-the-scenes.

# Load the hyperlink network data from disk into a networkx graph object
nx_hg = nx.read_graphml('hyperlink_{0}.graphml'.format(page_title.replace(' ','_')))

# Load the hyperlink network data from disk into a igraph graph object
ig_hg = ig.read('hyperlink_{0}.graphml'.format(page_title.replace(' ','_')))
ig.summary(ig_hg) # Get statistics about the 

Run the function on the igraph version of the hyperlink network.

This may take a minute or more since these are intensive calculations

# Run the community detection labelling on the igraph graph object
comparative_community_labels = comparative_community_detector(ig_hg)

# Convert the node labels into a dict-of-dicts keyed by page name and inner-dict containing community labels
comparative_community_labels_transposed = pd.DataFrame(comparative_community_labels).to_dict('index')

# Update each node in the networkx graph object to reflect the community membership labels
for _node in nx_hg.nodes():
    try:
        nx_hg.node[_node]['label'] = _node
        for (label,membership) in comparative_community_labels_transposed[_node].items():
            nx_hg.node[_node][label] = int(membership)
    except KeyError: # Concerning that some labels aren't present, but skip them for now
        print("Error in assigning \"{0}\" to a community.".format(_node))
        pass

# Write the labeled graph back to disk to visualize in Gephi
nx.write_graphml(nx_hg,'hyperlink_communities_{0}.graphml'.format(page_title.replace(' ','_')))
SCRAPS


def translation_getter(page_title, lang='de', target_lang):
    
    _T="https://en.wikipedia.org/w/api.php?action=query&format=json&prop=langlinks&titles={0}&llprop=autonym|langname&lllimit=500".format(page_title)
    
    req = requests.get(_T)

    json_string = json.loads(req.text)
    
    _pageID=list(json_string['query']['pages'].keys())[0]

    _translation_list=json_string['query']['pages'][_pageID]['langlinks']
    
    _translation_dict=dict()

    for translate in german_outlinks:
        translate_lang=translate['en']
        translate_title=translate['*']
        _translation_dict[translate_lang]=translate_title
        
    
    return _translation_dict



translation_getter(page_title)

translate_links_en_dict=dict()

for lang,title in german_outlinks(page_title,lang).items():
    translate_links_en=german_outlinks(page_title=title,lang='en')
    
    translate_links_en_dict['en']=translate_links_en
    
    
    
#check language links for translation in other pages
#def outlink_translater(page_title,lang,page_outlinks):
    
translated_links_dict = dict()
    
    for lang,title,outlinks in _langlink_AllList_dict.items():
        translatedLinks=lan