# Imports
import gzip
import json
import re

import mwapi
import pandas as pd
# Parameters for accessing the API that contains metadata about each translated article
session = mwapi.Session(host='https://en.wikipedia.org',
                        user_agent='mwapi (python) -- outreachy content translation')

# articles translated from English -> Spanish
parameters = {'action':'query',
              'format':'json',
              'list':'cxpublishedtranslations',
              'from':'en',
              'to':'ar',
              'limit':500,
              'offset':10000}
res = session.get(parameters)
res['result']['translations'][:10]
[{'translationId': '471104',
  'sourceTitle': 'Bristol Type 143',
  'targetTitle': 'بريستول طراز 143',
  'sourceLanguage': 'en',
  'sourceRevisionId': '817866765',
  'targetRevisionId': '28507961',
  'targetLanguage': 'ar',
  'sourceURL': '//en.wikipedia.org/wiki/Bristol Type 143',
  'targetURL': '//ar.wikipedia.org/wiki/بريستول طراز 143',
  'publishedDate': '20180507040239',
  'stats': {'any': 0.060072815533981,
   'human': 0.051577669902913,
   'mt': 0.008495145631068,
   'mtSectionsCount': 3}},
 {'translationId': '471106',
  'sourceTitle': 'Boeing Model 95',
  'targetTitle': 'بوينغ طراز 95',
  'sourceLanguage': 'en',
  'sourceRevisionId': '822001321',
  'targetRevisionId': '28508130',
  'targetLanguage': 'ar',
  'sourceURL': '//en.wikipedia.org/wiki/Boeing Model 95',
  'targetURL': '//ar.wikipedia.org/wiki/بوينغ طراز 95',
  'publishedDate': '20180605032507',
  'stats': {'any': 0.2223885064352,
   'human': 0.094582460341215,
   'mt': 0.12780604609398,
   'mtSectionsCount': 9}},
 {'translationId': '471110',
  'sourceTitle': 'Boeing Model 81',
  'targetTitle': 'بوينغ طراز 81',
  'sourceLanguage': 'en',
  'sourceRevisionId': '806818773',
  'targetRevisionId': '28508233',
  'targetLanguage': 'ar',
  'sourceURL': '//en.wikipedia.org/wiki/Boeing Model 81',
  'targetURL': '//ar.wikipedia.org/wiki/بوينغ طراز 81',
  'publishedDate': '20180507042651',
  'stats': {'any': 0.21227929373997,
   'human': 0.20947030497592,
   'mt': 0.0028089887640449,
   'mtSectionsCount': 1}},
 {'translationId': '471114',
  'sourceTitle': 'Boeing XP-7',
  'targetTitle': 'بوينغ أكس بي-7',
  'sourceLanguage': 'en',
  'sourceRevisionId': '782952416',
  'targetRevisionId': '28508242',
  'targetLanguage': 'ar',
  'sourceURL': '//en.wikipedia.org/wiki/Boeing XP-7',
  'targetURL': '//ar.wikipedia.org/wiki/بوينغ أكس بي-7',
  'publishedDate': '20180605032505',
  'stats': {'any': 0.19368421052632,
   'human': 0.058421052631579,
   'mt': 0.13526315789474,
   'mtSectionsCount': 6}},
 {'translationId': '471116',
  'sourceTitle': 'Boeing 929',
  'targetTitle': 'بوينغ 929',
  'sourceLanguage': 'en',
  'sourceRevisionId': '832530583',
  'targetRevisionId': '28508290',
  'targetLanguage': 'ar',
  'sourceURL': '//en.wikipedia.org/wiki/Boeing 929',
  'targetURL': '//ar.wikipedia.org/wiki/بوينغ 929',
  'publishedDate': '20180605032503',
  'stats': {'any': 0.40936421882701,
   'human': 0.39339576145885,
   'mt': 0.015968457368162,
   'mtSectionsCount': 8}},
 {'translationId': '471126',
  'sourceTitle': 'Boeing Model 203',
  'targetTitle': 'بوينغ طراز 203',
  'sourceLanguage': 'en',
  'sourceRevisionId': '806818822',
  'targetRevisionId': '28508360',
  'targetLanguage': 'ar',
  'sourceURL': '//en.wikipedia.org/wiki/Boeing Model 203',
  'targetURL': '//ar.wikipedia.org/wiki/بوينغ طراز 203',
  'publishedDate': '20180605032500',
  'stats': {'any': 0.089250297500992,
   'human': 0.061880206267354,
   'mt': 0.027370091233637,
   'mtSectionsCount': 4}},
 {'translationId': '471180',
  'sourceTitle': 'Khash County',
  'targetTitle': 'مستخدم:أبو هشام السوعان/م',
  'sourceLanguage': 'en',
  'sourceRevisionId': '778801526',
  'targetRevisionId': '28509114',
  'targetLanguage': 'ar',
  'sourceURL': '//en.wikipedia.org/wiki/Khash County',
  'targetURL': '//ar.wikipedia.org/wiki/مستخدم:أبو هشام السوعان/مسودة',
  'publishedDate': '20180507084220',
  'stats': {'any': 0.9297385620915,
   'human': 0.74509803921569,
   'mt': 0.18464052287582,
   'mtSectionsCount': 2}},
 {'translationId': '471186',
  'sourceTitle': 'Altab Ali',
  'targetTitle': 'أطلب علي',
  'sourceLanguage': 'en',
  'sourceRevisionId': '839161567',
  'targetRevisionId': '28509190',
  'targetLanguage': 'ar',
  'sourceURL': '//en.wikipedia.org/wiki/Altab Ali',
  'targetURL': '//ar.wikipedia.org/wiki/أطلب علي',
  'publishedDate': '20180507090033',
  'stats': {'any': 0.12379853902345,
   'human': 0.12290144816096,
   'mt': 0.00089709086248879,
   'mtSectionsCount': 1}},
 {'translationId': '471210',
  'sourceTitle': 'United Arab Emirates takeover of Socotra',
  'targetTitle': 'استيلاء الإمارات على سقطرى',
  'sourceLanguage': 'en',
  'sourceRevisionId': '840018869',
  'targetRevisionId': '28509443',
  'targetLanguage': 'ar',
  'sourceURL': '//en.wikipedia.org/wiki/United Arab Emirates takeover of Socotra',
  'targetURL': '//ar.wikipedia.org/wiki/استيلاء الإمارات على سقطرى',
  'publishedDate': '20180507100129',
  'stats': {'any': 0.3146913937989,
   'human': 0.3146913937989,
   'mt': 0,
   'mtSectionsCount': 0}},
 {'translationId': '471277',
  'sourceTitle': 'Macaroni',
  'targetTitle': 'ماكاروني',
  'sourceLanguage': 'en',
  'sourceRevisionId': '838716372',
  'targetRevisionId': '28510927',
  'targetLanguage': 'ar',
  'sourceURL': '//en.wikipedia.org/wiki/Macaroni',
  'targetURL': '//ar.wikipedia.org/wiki/ماكاروني',
  'publishedDate': '20180507150317',
  'stats': {'any': 0.9611886976291,
   'human': 0.94462487820721,
   'mt': 0.01656381942189,
   'mtSectionsCount': 4}}]
df = pd.DataFrame(res['result']['translations'])
df.head(10)
publishedDate sourceLanguage sourceRevisionId sourceTitle sourceURL stats targetLanguage targetRevisionId targetTitle targetURL translationId
0 20180507040239 en 817866765 Bristol Type 143 //en.wikipedia.org/wiki/Bristol Type 143 {'any': 0.060072815533981, 'human': 0.05157766... ar 28507961 بريستول طراز 143 //ar.wikipedia.org/wiki/بريستول طراز 143 471104
1 20180605032507 en 822001321 Boeing Model 95 //en.wikipedia.org/wiki/Boeing Model 95 {'any': 0.2223885064352, 'human': 0.0945824603... ar 28508130 بوينغ طراز 95 //ar.wikipedia.org/wiki/بوينغ طراز 95 471106
2 20180507042651 en 806818773 Boeing Model 81 //en.wikipedia.org/wiki/Boeing Model 81 {'any': 0.21227929373997, 'human': 0.209470304... ar 28508233 بوينغ طراز 81 //ar.wikipedia.org/wiki/بوينغ طراز 81 471110
3 20180605032505 en 782952416 Boeing XP-7 //en.wikipedia.org/wiki/Boeing XP-7 {'any': 0.19368421052632, 'human': 0.058421052... ar 28508242 بوينغ أكس بي-7 //ar.wikipedia.org/wiki/بوينغ أكس بي-7 471114
4 20180605032503 en 832530583 Boeing 929 //en.wikipedia.org/wiki/Boeing 929 {'any': 0.40936421882701, 'human': 0.393395761... ar 28508290 بوينغ 929 //ar.wikipedia.org/wiki/بوينغ 929 471116
5 20180605032500 en 806818822 Boeing Model 203 //en.wikipedia.org/wiki/Boeing Model 203 {'any': 0.089250297500992, 'human': 0.06188020... ar 28508360 بوينغ طراز 203 //ar.wikipedia.org/wiki/بوينغ طراز 203 471126
6 20180507084220 en 778801526 Khash County //en.wikipedia.org/wiki/Khash County {'any': 0.9297385620915, 'human': 0.7450980392... ar 28509114 مستخدم:أبو هشام السوعان/م //ar.wikipedia.org/wiki/مستخدم:أبو هشام السوعا... 471180
7 20180507090033 en 839161567 Altab Ali //en.wikipedia.org/wiki/Altab Ali {'any': 0.12379853902345, 'human': 0.122901448... ar 28509190 أطلب علي //ar.wikipedia.org/wiki/أطلب علي 471186
8 20180507100129 en 840018869 United Arab Emirates takeover of Socotra //en.wikipedia.org/wiki/United Arab Emirates t... {'any': 0.3146913937989, 'human': 0.3146913937... ar 28509443 استيلاء الإمارات على سقطرى //ar.wikipedia.org/wiki/استيلاء الإمارات على س... 471210
9 20180507150317 en 838716372 Macaroni //en.wikipedia.org/wiki/Macaroni {'any': 0.9611886976291, 'human': 0.9446248782... ar 28510927 ماكاروني //ar.wikipedia.org/wiki/ماكاروني 471277

From the data above I can tell some points:

  • The translation for a titles for some articles does not give the true meaning for the articles such as: . khash Country . Altab Ali . Perdita Weeks I think that is because the literal translation is not usefull here in the Arabic Translation we should translate by the meaning.

  • In all of the articles the human translations used more than the machine translation.

  • More than 6 articles have poor stats statics which till us the Arabic translations for these articles is not that much.
df[df['sourceTitle'] == 'United Arab Emirates takeover of Socotra']
publishedDate sourceLanguage sourceRevisionId sourceTitle sourceURL stats targetLanguage targetRevisionId targetTitle targetURL translationId
8 20180507100129 en 840018869 United Arab Emirates takeover of Socotra //en.wikipedia.org/wiki/United Arab Emirates t... {'any': 0.3146913937989, 'human': 0.3146913937... ar 28509443 استيلاء الإمارات على سقطرى //ar.wikipedia.org/wiki/استيلاء الإمارات على س... 471210
419 20180523175900 en 842026648 United Arab Emirates takeover of Socotra //en.wikipedia.org/wiki/United Arab Emirates t... {'any': 0.83350550964187, 'human': 0.306129476... ar 28749608 مستخدم:علاء فحصي/ملعب5 //ar.wikipedia.org/wiki/مستخدم:علاء فحصي/ملعب5 479112

Ther is two published Date here for the same article I think because of the modify of the original article and there is more human translation in the article modified.

df[df['sourceTitle'] == 'Boeing Model 203']
publishedDate sourceLanguage sourceRevisionId sourceTitle sourceURL stats targetLanguage targetRevisionId targetTitle targetURL translationId
5 20180605032500 en 806818822 Boeing Model 203 //en.wikipedia.org/wiki/Boeing Model 203 {'any': 0.089250297500992, 'human': 0.06188020... ar 28508360 بوينغ طراز 203 //ar.wikipedia.org/wiki/بوينغ طراز 203 471126

The human tranlasion for some articles are very little such as Boeing Model 203 article. The Arabic translation for this article is just for the general description.

df[df['sourceTitle'] == 'Altab Ali']
publishedDate sourceLanguage sourceRevisionId sourceTitle sourceURL stats targetLanguage targetRevisionId targetTitle targetURL translationId
7 20180507090033 en 839161567 Altab Ali //en.wikipedia.org/wiki/Altab Ali {'any': 0.12379853902345, 'human': 0.122901448... ar 28509190 أطلب علي //ar.wikipedia.org/wiki/أطلب علي 471186

Also in this article which named Altab Ali (this is not the title for this article) and the article talk about the British Bangladeshi and Altab Ali one of the subjects of this article the human tranlasion is very little. The Arabic translation for this article is just for the general description.

gboost_source_title = df.loc[7, 'sourceTitle']
gboost_target_title = df.loc[7, 'targetTitle']
gboost_tid = df.loc[7,'translationId']
gboost_source_revid = df.loc[7, 'sourceRevisionId']
print(gboost_source_title)
print(gboost_target_title)
print(gboost_tid)
print(gboost_source_revid)
Altab Ali
أطلب علي
471186
839161567
json_str = ""
with gzip.open('cx-corpora.en2ar.text.json.gz', 'rt') as fin:
    for line in fin:
        json_str += line.strip()
# remove repetitive commas
json_str = re.sub(',{2,}', ',', json_str)
parallel_corpus = json.loads(json_str)
# id: a string composed of <translationID>/<sectionID>
# <translationID> is in the data accessed from the first API
# each section in the article that was translated gets its own <sectionID>
# mt indicates whether machine translation was used in the interface
print("Descriptive statistics:")
print("{0} translated sections.".format(len(parallel_corpus)))
print("{0} translated articles.".format(len(set(sec['id'].split('/')[0] for sec in parallel_corpus))))
mt_counts = {}
for sec in parallel_corpus:
    if sec['mt']:
        service = sec['mt']['engine']
        mt_counts[service] = mt_counts.get(service, 0) + 1
    else:
        mt_counts['no-mt'] = mt_counts.get('no-mt', 0) + 1
print("Machine translation services used: {0}".format(mt_counts))
print("\nExample:\n", parallel_corpus[0])
Descriptive statistics:
135640 translated sections.
12138 translated articles.
Machine translation services used: {'no-mt': 97454, 'Yandex': 35769, 'Google': 1335, 'source-mt': 2, 'scratch': 1080}

Example:
 {'id': '48947/mwAQ', 'sourceLanguage': 'en', 'targetLanguage': 'ar', 'source': {'content': 'Picture shows that Twitter is available for free via Ncell.'}, 'mt': None, 'target': {'content': ''}}
# Translated sections for "Altab Ali" article
for translated_section in parallel_corpus:
    if translated_section['id'].split('/')[0] == gboost_tid:
        print(translated_section, '\n')
{'id': '471186/ea2b8a878841df5955c78edc6cae47', 'sourceLanguage': 'en', 'targetLanguage': 'ar', 'source': {'content': 'Background'}, 'mt': {'engine': 'Yandex', 'content': 'الخلفية'}, 'target': {'content': 'الضحية'}} 

{'id': '471186/fcd6fea72de7e6c09ca2567674befd', 'sourceLanguage': 'en', 'targetLanguage': 'ar', 'source': {'content': 'Death'}, 'mt': {'engine': 'Yandex', 'content': 'الموت'}, 'target': {'content': 'وفاته'}} 

{'id': '471186/mwBw', 'sourceLanguage': 'en', 'targetLanguage': 'ar', 'source': {'content': "Altab Ali (1953 – 4 May 1978) was a Bangladeshi textile worker who was murdered by three teenagers on 4 May 1978 in a racist attack as he walked home after work. His murder took place at St. Mary's Gardens by St Mary's Churchyard, and near the corner of Adler Street and Whitechapel Road in London. It provoked the mass mobilisation of the Bengali community locally and came to represent the self-organisation of the community. Ali became a symbol of resistance against racism and is associated with the struggle for human rights in defence of British Bangladeshis. The churchyard he was murdered in was later renamed Altab Ali Park in his memory."}, 'mt': {'engine': 'Yandex', 'content': 'Altab لعبة البوكر علي (1953 – 4 أيار / مايو 1978) كان البنغلاديشية النسيج العامل الذي اغتيل قبل ثلاثة مراهقين في 4 أيار / مايو 1978 في العنصرية الهجوم بينما كان يسير إلى المنزل بعد العمل. الجريمة وقعت في سانت ماري حدائق سانت ماري الكنيسة ، بالقرب من أدلر شارع وايت تشابل الطريق في لندن. وأثار القداس حشد من البنغالية المجتمع محليا و جاء لتمثيل الذات منظمة من المجتمع. علي أصبحت رمزا للمقاومة ضد العنصرية و يترافق مع النضال من أجل حقوق الإنسان في الدفاع البريطانية بنجلادش. الكنيسة اغتيل في وقت لاحق أعيدت تسميته altab لعبة البوكر علي بارك في ذاكرته.'}, 'target': {'content': 'أطلب علي\xa0(1953 – 4 أيار / مايو 1978) كان عامل نسيج من\xa0بنغلاديش\xa0وتم اغتياله من قبل ثلاثة مراهقين في 4 أيار / مايو 1978 خلال\xa0هجوم عنصرية بينما كان يسير إلى منزله بعد العمل. الجريمة وقعت بالقرب من شارع أدلر\xa0وايت تشابل\xa0في لندن. تمت إعادة تسمية حديقة\xa0الكنيسة التي اغتيل فيها لاحقا لإسم حديقة أطلب علي.'}} 

{'id': '471186/mwEQ', 'sourceLanguage': 'en', 'targetLanguage': 'ar', 'source': {'content': 'Ali was a 25-year-old mechanic who had recently arrived[1] from Sylhet District, Sylhet Division,[2] Bangladesh in 1969,[3] and lived in the East End of London.[4]'}, 'mt': {'engine': 'Yandex', 'content': 'علي كان يبلغ من العمر 25 عاما ميكانيكي الذين وصلوا مؤخرا[1] من سيلهيت District, سيلهيت شعبة,[2] بنغلاديش في عام 1969 ، [3] و عاش في الطرف الشرقي من لندن.[4]'}, 'target': {'content': 'كان علي يبلغ من العمر 25 عاما وكان ميكانيكيا قبل وصلوه إلى لندن[1] من\xa0سيلهيت,[2] بنغلاديش في عام 1969 ، [3] و عاش في الطرف الشرقي من لندن.[4]'}} 

{'id': '471186/mwFw', 'sourceLanguage': 'en', 'targetLanguage': 'ar', 'source': {'content': "On 4 May 1978, on local borough election night and against a background of agitation by National Front,[1] Ali was making his way to the bus stop after finishing work as a garment worker[5] at a sweatshop in Brick Lane.[1] He was chased along Brick Lane and stabbed to death near Aldgate Station[6] in a racially motivated attack[7] at St. Mary's Gardens, the site of the church of St Mary Matfelon[8] by St Mary's Churchyard, and the corner of Adler Street and Whitechapel Road.[4][9] Ali was stabbed in the neck, then staggered a few metres before collapsing.[3]"}, 'mt': {'engine': 'Yandex', 'content': 'في 4 أيار / مايو 1978 بشأن المحلية البلدة الانتخابات الليل على خلفية التحريض من قبل الجبهة الوطنية,[1] علي كان في طريقه إلى الحافلة تتوقف بعد الانتهاء من العمل مثل عمال الملابس[5] في سخرة في بريك لين.[1] كان مطاردا على طول بريك لين و طعن حتى الموت بالقرب من Aldgate محطة[6] في بدوافع عنصرية الهجوم[7] في سانت ماري حدائق موقع كنيسة سانت ماري Matfelon[8] من سانت ماري الكنيسة ، ركن من أدلر شارع وايت تشابل الطريق.[4][9] علي طعن في الرقبة ، ثم ترنحت على بعد بضعة أمتار قبل أن ينهار.[3]'}, 'target': {'content': 'في 4 أيار / مايو 1978 كان علي في طريقه إلى موقف الحافلة بعد الانتهاء من العمل مثل سائر عمال الملابس[5] في\xa0بريك لين.[1]\xa0 وكان مطاردا على طول بريك لين ثم طعن حتى الموت بالقرب من\xa0محطة\xa0ألط كات.[6]\xa0كان الهجوم بدوافع عنصرية\xa0[7]قرب حدائق كنيسة سانت ماري ، شارع وايت تشابل.[4][9] طعن\xa0علي\xa0 في الرقبة ، ثم ترنح على بعد بضعة أمتار قبل أن ينهار.[3]'}} 

{'id': '471186/mwBQ', 'sourceLanguage': 'en', 'targetLanguage': 'ar', 'source': {'content': 'Altab AliBorn 1953 Sylhet District, Sylhet Division, East Bengal (now Bangladesh)Died (1978-05-04)4 May 1978 (aged 25) Whitechapel, Tower Hamlets, London, EnglandResting place Tower Hamlets, London, EnglandNationality Bangladeshi'}, 'mt': None, 'target': {'content': 'Altab Ali معلومات شخصية تعديل\xa0'}} 

{'id': '471186/mwRA', 'sourceLanguage': 'en', 'targetLanguage': 'ar', 'source': {'content': '1 2 3 4 Riaz, Ali (2014). "Being Bengali abroad: identity politics among the Bengali community in Britain". In Chakraborty, Mridula Nath. Being Bengali: At Home and in the World. Routledge. p.\xa0178. ISBN\xa0978-0-415-62588-3. Altab Ali, a 25-year-old mechanic, who had recently arrived in the country from Bangladesh, was murdered ... on 4 May 1978. He was returning home from his job at a sweatshop in nearby Brick Lane ... took place on election night and against a background of agitation by the racist National Front.\xa0 1 2 Tatari, Eren (2015). Muslims in British Local Government: Representing Minority Interests in Hackney, Newham, and Tower Hamlets. Brill Academic Publishers. p.\xa0116. ISBN\xa0978-9004269699.\xa0 1 2 3 Nye, Catrin; Bright, Sam (4 May 2016). "Altab Ali: The racist murder that mobilised the East End". BBC News. Retrieved 1 August 2017.\xa0 1 2 3 4 5 "Indymedia: Altab Ali". Indymedia. Retrieved 19 September 2007.\xa0 ↑ Kibria, Nazli (2011). Muslims in Motion: Islam and National Identity in the Bangladeshi Diaspora. Rutgers University Press. p.\xa086. ISBN\xa0978-0-8135-5056-5. Altab Ali was a ... garment worker who ... was making his way to a bus stop after work, on a day when local borough elections were taking place.\xa0 ↑ Glinert, Ed (2006). East End Chronicles. Routledge. ISBN\xa0978-0141017181.\xa0 1 2 3 Troyna, Barry; Carrington, Bruce (1990). Education, Racism and Reform. Taylor & Francis. p.\xa030. ISBN\xa0978-0-415-03826-3.\xa0 1 2 3 Keith, Michael (2005). After the Cosmopolitan?: Multicultural Cities and the Future of Racism. Routledge. p.\xa0144. ISBN\xa0978-0-415-34169-1.\xa0 ↑ "Bangladeshi London". Exploring 20th century London. Retrieved 1 November 2015.\xa0 1 2 3 "New park life: Whitechapel\'s Altab Ali Park". London: London Evening Standard. 16 November 2011. Retrieved 1 November 2015.\xa0 ↑ Sandhu, Sukhdev (9 October 2003). "Come hungry, leave edgy". 25 (19). London: London Review of Books. pp.\xa010–13. Retrieved 19 September 2007.\xa0 ↑ Panayi, Panikos (1996). Racial violence in Britain in the nineteenth and twentieth centuries. Leicester University Press. p.\xa0200. ISBN\xa0978-0-7185-1397-9.\xa0 ↑ Leech, Kenneth (1988). Struggle in Babylon. Sheldon. p.\xa086. ISBN\xa0978-0-85969-577-0.\xa0 ↑ Bowling, Benjamin (1988). Violent Racism: Victimization, Policing, and Social Context. Clarendon Press. p.\xa048. ISBN\xa0978-0-19-826252-7.\xa0 1 2 Gilman, Sander L. (2015). Judaism, Christianity, and Islam: Collaboration and Conflict in the Age of Diaspora. Hong Kong University Press. p.\xa0171. ISBN\xa0978-9888208272.\xa0 ↑ German, Lindsey; Rees, John (1988). A People\'s History of London. Verso Books. p.\xa0248. ISBN\xa0978-1844678556.\xa0 ↑ Clarke, Hilary (23 May 1998). "Altab Ali: The racist murder that mobilised the East End". The Independent. Retrieved 1 August 2017.\xa0 1 2 Brooke, Mike (6 May 2015). "Altab Ali\'s racist murder in Whitechapel remembered 37 years on". London: East London Advertiser. Retrieved 1 November 2015.\xa0 ↑ Barnett, Adam (7 May 2014). "Anti-racists and Bangladeshis mark Altab Ali Day in Whitechapel". London: East London Advertiser. Retrieved 1 November 2015.\xa0'}, 'mt': None, 'target': {'content': '1 2 3 4 Riaz، Ali (2014). "Being Bengali abroad: identity politics among the Bengali community in Britain". Being Bengali: At Home and in the World. Routledge. صفحة\xa0178. ISBN\xa0978-0-415-62588-3. Altab Ali, a 25-year-old mechanic, who had recently arrived in the country from Bangladesh, was murdered ... on 4 May 1978. He was returning home from his job at a sweatshop in nearby Brick Lane ... took place on election night and against a background of agitation by the racist National Front.\xa0 1 2 Tatari، Eren (2015). Muslims in British Local Government: Representing Minority Interests in Hackney, Newham, and Tower Hamlets. Brill Academic Publishers. صفحة\xa0116. ISBN\xa0978-9004269699.\xa0 1 2 3 Nye، Catrin؛ Bright، Sam (4 May 2016). "Altab Ali: The racist murder that mobilised the East End". BBC News. BBC News. اطلع عليه بتاريخ 01 أغسطس 2017.\xa0 1 2 3 4 5 "Indymedia: Altab Ali". Indymedia. اطلع عليه بتاريخ 19 سبتمبر 2007.\xa0 ↑ Kibria، Nazli (2011). Muslims in Motion: Islam and National Identity in the Bangladeshi Diaspora. Rutgers University Press. صفحة\xa086. ISBN\xa0978-0-8135-5056-5. Altab Ali was a ... garment worker who ... was making his way to a bus stop after work, on a day when local borough elections were taking place.\xa0 ↑ Glinert، Ed (2006). East End Chronicles. Routledge. ISBN\xa0978-0141017181.\xa0 1 2 3 Troyna, Barry; Carrington, Bruce (1990). Education, Racism and Reform. Taylor & Francis. p.\xa030. ISBN\xa0978-0-415-03826-3.\xa0 1 2 3 Keith, Michael (2005). After the Cosmopolitan?: Multicultural Cities and the Future of Racism. Routledge. p.\xa0144. ISBN\xa0978-0-415-34169-1.\xa0 ↑ "Bangladeshi London". Exploring 20th century London. Retrieved 1 November 2015.\xa0 1 2 3 "New park life: Whitechapel\'s Altab Ali Park". London: London Evening Standard. 16 November 2011. Retrieved 1 November 2015.\xa0 ↑ Sandhu, Sukhdev (9 October 2003). "Come hungry, leave edgy". 25 (19). London: London Review of Books. pp.\xa010–13. Retrieved 19 September 2007.\xa0 ↑ Panayi, Panikos (1996). Racial violence in Britain in the nineteenth and twentieth centuries. Leicester University Press. p.\xa0200. ISBN\xa0978-0-7185-1397-9.\xa0 ↑ Leech, Kenneth (1988). Struggle in Babylon. Sheldon. p.\xa086. ISBN\xa0978-0-85969-577-0.\xa0 ↑ Bowling, Benjamin (1988). Violent Racism: Victimization, Policing, and Social Context. Clarendon Press. p.\xa048. ISBN\xa0978-0-19-826252-7.\xa0 1 2 Gilman, Sander L. (2015). Judaism, Christianity, and Islam: Collaboration and Conflict in the Age of Diaspora. Hong Kong University Press. p.\xa0171. ISBN\xa0978-9888208272.\xa0 ↑ German, Lindsey; Rees, John (1988). A People\'s History of London. Verso Books. p.\xa0248. ISBN\xa0978-1844678556.\xa0 ↑ Clarke, Hilary (23 May 1998). "Altab Ali: The racist murder that mobilised the East End". The Independent. Retrieved 1 August 2017.\xa0 1 2 Brooke, Mike (6 May 2015). "Altab Ali\'s racist murder in Whitechapel remembered 37 years on". London: East London Advertiser. Retrieved 1 November 2015.\xa0 ↑ Barnett, Adam (7 May 2014). "Anti-racists and Bangladeshis mark Altab Ali Day in Whitechapel". London: East London Advertiser. Retrieved 1 November 2015.\xa0'}} 

{'id': '471186/mwcx-source-title', 'sourceLanguage': 'en', 'targetLanguage': 'ar', 'source': {'content': 'Altab Ali'}, 'mt': None, 'target': {'content': 'أطلب علي'}} 

Arabic translation accuracy is low in some sections.

# Page revision history for Altab Ali
revision_parameters = {
    "action": "query",
    "prop": "revisions",
    "titles": gboost_source_title,
    "rvprop": "timestamp|user|comment",
    "rvlimit": 100,
    "rvstartid": gboost_source_revid,
    "rvslots": "main",
    "formatversion": "2",
    "format": "json",
    "rvdir": "newer"
}
gboost_revisions = session.get(revision_parameters)
gboost_revisions
{'batchcomplete': True,
 'query': {'pages': [{'pageid': 18380322,
    'ns': 0,
    'title': 'Altab Ali',
    'revisions': [{'user': 'Benwwk',
      'timestamp': '2018-05-01T17:34:44Z',
      'comment': 'Death (/murder) date in information box was wrong, and inconsistent with the rest of the article, due to misformatting from U.S. dating system.'},
     {'user': '2A00:23C5:D10E:A400:7479:70EB:DAB5:DD9C',
      'anon': True,
      'timestamp': '2018-05-16T20:19:55Z',
      'comment': '/* Legacy */'},
     {'user': 'Tanbircdq',
      'timestamp': '2018-06-01T01:00:20Z',
      'comment': 'Further content & sources added'},
     {'user': 'Tanbircdq',
      'timestamp': '2018-06-04T10:17:34Z',
      'comment': '+[[Category:Altab Ali]]'},
     {'user': 'KolbertBot',
      'timestamp': '2018-07-20T21:37:19Z',
      'comment': 'Bot: [[User:KolbertBot|HTTP→HTTPS]] (v485)'},
     {'user': 'Firstorm',
      'timestamp': '2018-08-21T22:18:47Z',
      'comment': '/* Legacy */'},
     {'user': 'Justlettersandnumbers',
      'timestamp': '2018-08-23T14:57:23Z',
      'comment': 'Reverted to revision 658757156 by [[Special:Contributions/Invadibot|Invadibot]] ([[User talk:Invadibot|talk]]): Revert to last clean version before copyvio from http://www.indymedia.org.uk/en/2007/09/381365.html. ([[WP:TW|TW]])'},
     {'user': 'Justlettersandnumbers',
      'timestamp': '2018-08-23T15:01:02Z',
      'comment': 'copyvio-revdel request'},
     {'user': 'MER-C',
      'timestamp': '2018-08-25T19:21:54Z',
      'comment': 'done'}]}]}}
df = pd.DataFrame(gboost_revisions)
gboost_source_title = df.loc[8, 'sourceTitle']
gboost_target_title = df.loc[8, 'targetTitle']
gboost_tid = df.loc[8,'translationId']
gboost_source_revid = df.loc[8, 'sourceRevisionId']
print(gboost_source_title)
print(gboost_target_title)
print(gboost_tid)
print(gboost_source_revid)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-16-f82a86710881> in <module>()
----> 1 gboost_source_title = df.loc[8, 'sourceTitle']
      2 gboost_target_title = df.loc[8, 'targetTitle']
      3 gboost_tid = df.loc[8,'translationId']
      4 gboost_source_revid = df.loc[8, 'sourceRevisionId']
      5 print(gboost_source_title)

/srv/paws/lib/python3.6/site-packages/pandas/core/indexing.py in __getitem__(self, key)
   1470             except (KeyError, IndexError):
   1471                 pass
-> 1472             return self._getitem_tuple(key)
   1473         else:
   1474             # we by definition only have the 0th axis

/srv/paws/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
    868     def _getitem_tuple(self, tup):
    869         try:
--> 870             return self._getitem_lowerdim(tup)
    871         except IndexingError:
    872             pass

/srv/paws/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_lowerdim(self, tup)
    996         for i, key in enumerate(tup):
    997             if is_label_like(key) or isinstance(key, tuple):
--> 998                 section = self._getitem_axis(key, axis=i)
    999 
   1000                 # we have yielded a scalar ?

/srv/paws/lib/python3.6/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1909 
   1910         # fall thru to straight lookup
-> 1911         self._validate_key(key, axis)
   1912         return self._get_label(key, axis=axis)
   1913 

/srv/paws/lib/python3.6/site-packages/pandas/core/indexing.py in _validate_key(self, key, axis)
   1786 
   1787             try:
-> 1788                 key = self._convert_scalar_indexer(key, axis)
   1789                 if not ax.contains(key):
   1790                     error()

/srv/paws/lib/python3.6/site-packages/pandas/core/indexing.py in _convert_scalar_indexer(self, key, axis)
    259         ax = self.obj._get_axis(min(axis, self.ndim - 1))
    260         # a scalar
--> 261         return ax._convert_scalar_indexer(key, kind=self.name)
    262 
    263     def _convert_slice_indexer(self, key, axis):

/srv/paws/lib/python3.6/site-packages/pandas/core/indexes/base.py in _convert_scalar_indexer(self, key, kind)
   1663             elif kind in ['loc'] and is_integer(key):
   1664                 if not self.holds_integer():
-> 1665                     return self._invalid_indexer('label', key)
   1666 
   1667         return key

/srv/paws/lib/python3.6/site-packages/pandas/core/indexes/base.py in _invalid_indexer(self, form, key)
   1847                         "indexers [{key}] of {kind}".format(
   1848                             form=form, klass=type(self), key=key,
-> 1849                             kind=type(key)))
   1850 
   1851     def get_duplicates(self):

TypeError: cannot do label indexing on <class 'pandas.core.indexes.base.Index'> with these indexers [8] of <class 'int'>
# Translated sections for "United Arab Emarites takeover of socatra1" article
for translated_section in parallel_corpus:
    if translated_section['id'].split('/')[0] == gboost_tid:
        print(translated_section, '\n')
# Page revision history United Arab Emirates takeover of socatra
revision_parameters = {
    "action": "query",
    "prop": "revisions",
    "titles": gboost_source_title,
    "rvprop": "timestamp|user|comment",
    "rvlimit": 100,
    "rvstartid": gboost_source_revid,
    "rvslots": "main",
    "formatversion": "2",
    "format": "json",
    "rvdir": "newer"
}
gboost_revisions = session.get(revision_parameters)
gboost_revisions
df = pd.DataFrame(res['result']['translations'])
df.head(10)
parameters = {'action':'query',
              'format':'json',
              'list':'cxpublishedtranslations',
              'from':'en',
              'to':'ar',
              'limit':500,
              'offset':5000}
res = session.get(parameters)
df = pd.DataFrame(res['result']['translations'])
df.head(10)
gboost_source_title = df.loc[0, 'sourceTitle']
gboost_target_title = df.loc[0, 'targetTitle']
gboost_tid = df.loc[0,'translationId']
gboost_source_revid = df.loc[0, 'sourceRevisionId']
print(gboost_source_title)
print(gboost_target_title)
print(gboost_tid)
print(gboost_source_revid)
json_str = ""
with gzip.open('cx-corpora.en2ar.text.json.gz', 'rt') as fin:
    for line in fin:
        json_str += line.strip()
# remove repetitive commas
json_str = re.sub(',{2,}', ',', json_str)
parallel_corpus = json.loads(json_str)
# id: a string composed of <translationID>/<sectionID>
# <translationID> is in the data accessed from the first API
# each section in the article that was translated gets its own <sectionID>
# mt indicates whether machine translation was used in the interface
print("Descriptive statistics:")
print("{0} translated sections.".format(len(parallel_corpus)))
print("{0} translated articles.".format(len(set(sec['id'].split('/')[0] for sec in parallel_corpus))))
mt_counts = {}
for sec in parallel_corpus:
    if sec['mt']:
        service = sec['mt']['engine']
        mt_counts[service] = mt_counts.get(service, 0) + 1
    else:
        mt_counts['no-mt'] = mt_counts.get('no-mt', 0) + 1
print("Machine translation services used: {0}".format(mt_counts))
print("\nExample:\n", parallel_corpus[0])
# Translated sections for "Abdel Hamid Al_Gazzawi" article
for translated_section in parallel_corpus:
    if translated_section['id'].split('/')[0] == gboost_tid:
        print(translated_section, '\n')
gboost_source_title = df.loc[8, 'sourceTitle']
gboost_target_title = df.loc[8, 'targetTitle']
gboost_tid = df.loc[8,'translationId']
gboost_source_revid = df.loc[8, 'sourceRevisionId']
print(gboost_source_title)
print(gboost_target_title)
print(gboost_tid)
print(gboost_source_revid)
# Translated sections for "Discovery and exploration of the solar system" article
for translated_section in parallel_corpus:
    if translated_section['id'].split('/')[0] == gboost_tid:
        print(translated_section, '\n')
# Page revision history Discovery and exploration of the Solar System
revision_parameters = {
    "action": "query",
    "prop": "revisions",
    "titles": gboost_source_title,
    "rvprop": "timestamp|user|comment",
    "rvlimit": 100,
    "rvstartid": gboost_source_revid,
    "rvslots": "main",
    "formatversion": "2",
    "format": "json",
    "rvdir": "newer"
}
gboost_revisions = session.get(revision_parameters)
gboost_revisions