Importing of different Modules and explore their usages

import re
import json
import mwapi

Pandas is an extensively data manipulation tool, built on the Numpy package and its key data structure called DataFrame allows to store and manipulate tabular data in rows of obseravations

import pandas as pd

Seaborn and Matplotlib are imported to perform some visualizations as Analyses results can be shown well as the infographics have an impulsed and quick impact than the text paragraphs describing the results in th

import seaborn as sn
import matplotlib as mtlb
session = mwapi.Session(host='https://en.wikipedia.org',
                        user_agent='Miriiyala Pujitha Jaji')

# articles translated from English -> Spanish
parameters = {'action':'query',
              'format':'json',
              'list':'cxpublishedtranslations',
              'from':'en',
              'to':'es',
              'limit':500,
              'offset':200}
res = session.get(parameters)
res['result']['translations'][:1]
[{'translationId': '36063',
  'sourceTitle': 'Saki Nakajima (singer)',
  'targetTitle': 'Saki Nakajima (cantante)',
  'sourceLanguage': 'en',
  'sourceRevisionId': '0',
  'targetRevisionId': '0',
  'targetLanguage': 'es',
  'sourceURL': '//en.wikipedia.org/wiki/Saki Nakajima (singer)',
  'targetURL': '//es.wikipedia.org/wiki/Saki Nakajima (cantante)',
  'publishedDate': '20150417174007',
  'stats': {'any': 0.080306789984209,
   'human': 0.080306789984209,
   'mt': 0,
   'mtSectionsCount': 0}}]
Data = pd.DataFrame(res['result']['translations'])
Data.head(10)
publishedDate sourceLanguage sourceRevisionId sourceTitle sourceURL stats targetLanguage targetRevisionId targetTitle targetURL translationId
0 20150417174007 en 0 Saki Nakajima (singer) //en.wikipedia.org/wiki/Saki Nakajima (singer) {'any': 0.080306789984209, 'human': 0.08030678... es 0 Saki Nakajima (cantante) //es.wikipedia.org/wiki/Saki Nakajima (cantante) 36063
1 20150417180504 en 0 Bad Hair //en.wikipedia.org/wiki/Bad Hair {'any': 0.81668946648427, 'human': 0.604651162... es 0 Pelo Malo //es.wikipedia.org/wiki/Pelo Malo 36066
2 20150417185854 en 0 The Color of Fame //en.wikipedia.org/wiki/The Color of Fame {'any': 0.75752212389381, 'human': 0.685840707... es 0 El tinte de la fama //es.wikipedia.org/wiki/El tinte de la fama 36077
3 20150418042642 en 0 XHJMA-TV //en.wikipedia.org/wiki/XHJMA-TV {'any': 1.0625467231498, 'human': 0.8786444056... es 0 XHJMA-TV //es.wikipedia.org/wiki/XHJMA-TV 36111
4 20150418234617 en 0 Steelo Brim //en.wikipedia.org/wiki/Steelo Brim {'any': 0.98945660989457, 'human': 0.655312246... es 0 Steelo Brim //es.wikipedia.org/wiki/Steelo Brim 36138
5 20150418164417 en 0 Rómulo Pico Adobe //en.wikipedia.org/wiki/Rómulo Pico Adobe {'any': 1.0345241669168, 'human': 0.8682077454... es 0 Rómulo Pico Adobe //es.wikipedia.org/wiki/Rómulo Pico Adobe 36143
6 20150418174259 en 0 Hedwig of Kalisz //en.wikipedia.org/wiki/Hedwig of Kalisz {'any': 1.027838033261, 'human': 0.95263919016... es 0 Eduviges de Kalisz //es.wikipedia.org/wiki/Eduviges de Kalisz 36151
7 20150418193258 en 0 Citadel LLC //en.wikipedia.org/wiki/Citadel LLC {'any': 0.41940928270042, 'human': 0, 'mt': 0.... es 0 Citadel LLC //es.wikipedia.org/wiki/Citadel LLC 36161
8 20150418213840 en 0 Luis María Pérez de Onraíta //en.wikipedia.org/wiki/Luis María Pérez de On... {'any': 1.0336448598131, 'human': 0.5327102803... es 0 Luis María Pérez de Onraíta //es.wikipedia.org/wiki/Luis María Pérez de On... 36172
9 20150418215144 en 0 Stephan G. Stephansson //en.wikipedia.org/wiki/Stephan G. Stephansson {'any': 0.72252747252747, 'human': 0.714972527... es 0 Stephan G. Stephansson //es.wikipedia.org/wiki/Stephan G. Stephansson 36173
Data.shape
(500, 11)
a = Data['stats'][1]
c = str(a)
print(c)
{'any': 0.81668946648427, 'human': 0.6046511627907, 'mt': 0.21203830369357, 'mtSectionsCount': 4}
print(a)
{'any': 0.81668946648427, 'human': 0.6046511627907, 'mt': 0.21203830369357, 'mtSectionsCount': 4}
"stats is a python dictionary"
'stats is a python dictionary'
#b = json.loads(a)
Data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 11 columns):
publishedDate       500 non-null object
sourceLanguage      500 non-null object
sourceRevisionId    500 non-null object
sourceTitle         500 non-null object
sourceURL           500 non-null object
stats               500 non-null object
targetLanguage      500 non-null object
targetRevisionId    500 non-null object
targetTitle         500 non-null object
targetURL           500 non-null object
translationId       500 non-null object
dtypes: object(11)
memory usage: 43.0+ KB
Data['sourceURL'].nunique()
500
Data[Data['sourceTitle'] == 'Bad Hair']
publishedDate sourceLanguage sourceRevisionId sourceTitle sourceURL stats targetLanguage targetRevisionId targetTitle targetURL translationId
1 20150417180504 en 0 Bad Hair //en.wikipedia.org/wiki/Bad Hair {'any': 0.81668946648427, 'human': 0.604651162... es 0 Pelo Malo //es.wikipedia.org/wiki/Pelo Malo 36066