Calculate daily rate of app description edit and editors

# cf. http://paws-public.wmflabs.org/paws-public/User:Jtmorgan/ds4ux/paws-cheatsheet.ipynb
import os 
"""
Your db login credentials are stored in os.environ. 
DO NOT print or run os.environ, or it will expose your credentials in the Notebook
"""
import pymysql
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
import collections

# cf. http://paws-public.wmflabs.org/paws-public/User:EpochFail/editquality/ipython/reverted_detection_demo.ipynb :
import sys, traceback
import mwreverts.api
import mwapi
%%time

conn = pymysql.connect(
    host=os.environ['MYSQL_HOST'],
    user=os.environ['MYSQL_USERNAME'],
    password=os.environ['MYSQL_PASSWORD'],
    database='wikidatawiki_p',
    charset='utf8'
)
with conn.cursor() as cur:    
    cur.execute("""
    SELECT LEFT(rev_timestamp, 8) AS date, COUNT(*) AS edit_count, COUNT(DISTINCT rev_user_text) AS user_count FROM (
    SELECT rev_id, rev_timestamp, rev_user_text FROM wikidatawiki_p.revision AS rev 
    JOIN wikidatawiki_p.change_tag AS ct 
    ON rev.rev_id = ct.ct_rev_id AND ct.ct_tag = "mobile app edit") AS app_edits
    GROUP BY date
    ORDER BY date;""")
    appeditusersperday = cur.fetchall()
    
# convert into Pandas dataframe
labels = ['date', 'edit_count', 'user_count'] # todo: should come from cur instead
df = pd.DataFrame(list(appeditusersperday), columns=labels)
convertfetchalltstodt = lambda x: pd.to_datetime(x.decode())
df['date'] = df['date'].apply(convertfetchalltstodt)
CPU times: user 52 ms, sys: 4 ms, total: 56 ms
Wall time: 46.5 s
# check result
df.tail(20)
date edit_count user_count
406 2017-12-30 448 201
407 2017-12-31 651 213
408 2018-01-01 756 212
409 2018-01-02 857 226
410 2018-01-03 533 227
411 2018-01-04 538 201
412 2018-01-05 413 192
413 2018-01-06 675 206
414 2018-01-07 513 234
415 2018-01-08 389 201
416 2018-01-09 357 214
417 2018-01-10 328 177
418 2018-01-11 322 188
419 2018-01-12 302 168
420 2018-01-13 370 221
421 2018-01-14 275 194
422 2018-01-15 244 173
423 2018-01-16 395 157
424 2018-01-17 508 174
425 2018-01-18 120 83
# further check result
df.describe()
edit_count user_count
count 426.000000 426.000000
mean 290.889671 129.359155
std 230.775724 101.880392
min 1.000000 1.000000
25% 63.500000 26.000000
50% 277.500000 110.000000
75% 475.750000 228.000000
max 1089.000000 354.000000
daterange =  pd.date_range(start='2016-12-31', end='2017-10-17', freq='D')
fig, ax = plt.subplots(dpi=300)
df.loc[ df['date'].isin(daterange) ].plot(x="date", ax=ax)
ax.set_title('Wikidata description edits from the Android app') # currently all app edits are description edits
plt.annotate("description\nediting\navailable in\nalpha only", xycoords='data', xy=('2017-01-15',45), xytext=('2017-01-15',210), ha='center', fontsize = 7, arrowprops=dict(facecolor='black', shrink=0.04, width=2, headwidth=10))
plt.annotate("rolled out\nwith beta app\n(ru, he, ca)", xycoords='data', xy=('2017-02-09',50), xytext=('2017-01-15',430), ha='center', fontsize = 7, arrowprops=dict(facecolor='black', shrink=0.05, width=2, headwidth=10))
plt.annotate("rolled out\nin production\n(ru, he, ca)", xycoords='data', xy=('2017-02-27',50), xytext=('2017-02-10',550), ha='center', fontsize = 7, arrowprops=dict(facecolor='black', shrink=0.05, width=2, headwidth=10))
plt.annotate("March 24:\n371 he edits\nby a single user", xycoords='data', xy=('2017-03-24',371), xytext=('2017-02-25',730), ha='center', fontsize = 7, arrowprops=dict(facecolor='black', shrink=0.04, width=2, headwidth=10))
# cf. https://www.wikidata.org/w/index.php?title=Special:Contributions/MathKnight-at-TAU&offset=&limit=1000&target=MathKnight-at-TAU
plt.annotate("rolled out\nin production\nfor more\nlanguages", xycoords='data', xy=('2017-04-24',120), xytext=('2017-04-06',750), ha='center', fontsize = 7, arrowprops=dict(facecolor='black', shrink=0.04, width=2, headwidth=10))
plt.annotate("rolled out\nin production\nfor all\nexcept English", xycoords='data', xy=('2017-07-05',220), xytext=('2017-06-10',900), ha='center', fontsize = 7, arrowprops=dict(facecolor='black', shrink=0.04, width=2, headwidth=10))
<matplotlib.text.Annotation at 0x7f0281544ba8>
# Number of edits since the launch with the beta app on February 10:
df.loc[ (df['date'] >= pd.to_datetime('20170210')) & (df['date'] <= pd.to_datetime('20171017')) ]['edit_count'].sum()
78637

Calculate daily rate of app description edits and reverted edits

# moved to https://paws.wmflabs.org/paws/user/HaeB/notebooks/Mobile%20app%20edits%20on%20Wikidata%20-%20reverts%20analysis.ipynb#

Calculate revert rate for all description edits (including non-app edits, excluding bots)

# For comparison, calculate revert rate for all description edits (including non-app edits, excluding bots)
# check edit tag table for some edit types

conn3 = pymysql.connect(
    host=os.environ['MYSQL_HOST'],
    user=os.environ['MYSQL_USERNAME'],
    password=os.environ['MYSQL_PASSWORD'],
    database='wikidatawiki_p',
    charset='utf8'
)
with conn3.cursor() as cur:
    cur.execute("""SELECT * FROM wikidatawiki_p.change_tag 
    WHERE ct_rev_id = 468304660 OR ct_rev_id = 457815520 OR ct_rev_id = 468758591 OR ct_rev_id = 467990957;""")
    alltags = cur.fetchall()
alltags
((485606401, None, 457815520, b'OAuth CID: 378', None),
 (496366368, None, 467990957, b'mobile edit', None),
 (496366368, None, 467990957, b'mobile web edit', None),
 (496692027, None, 468304660, b'OAuth CID: 408', None),
 (497177935, None, 468758591, b'OAuth CID: 378', None))
%%time
# (track how long this cell takes to execute)

# get all (non-bot, non-OAuth) description edits for a period of time
    
# excludes:
# edits that change description together with other fields 
#     (e.g. https://www.wikidata.org/w/index.php?diff=468744965:  "wbsetlabeldescriptionaliases")
# edits from "autoEdit Update Descriptions" 
#     (e.g. https://www.wikidata.org/w/index.php?diff=468745165 : "wbeditentity-update")
# bot edits like https://www.wikidata.org/w/index.php?diff=468757331by by Emijrpbot 
#     or https://www.wikidata.org/w/index.php?diff=468758634 by Mr.Ibrahembot 
#     (these use "wbeditentity-update" instead of "wbsetdescription")
# "#quickstatements" edits (e.g. https://www.wikidata.org/w/index.php?diff=468756428 , 
#      or https://www.wikidata.org/w/index.php?diff=468758591 by Poulpybot)
#      looks like these may have a tag anyway ((Tag: Widar [1.4]" or "Tag: QuickStatements [1.1]")
#      but check edit summary too)
# edits made using an OAuth tool such as:
#    reCH (e.g. https://www.wikidata.org/w/index.php?limit=50&title=Special%3AContributions&contribs=user&target=%D4%B1%D5%B7%D5%A2%D5%B8%D5%BF&namespace=&tagfilter=&year=2017&month=2 
#    distributed Wikidata game and other Widar edits 
#       (e.g https://www.wikidata.org/w/index.php?diff=457815520 :
#       "The Distributed Game (23): Kaspar's Persondata game: Descriptions #distributed-game")
# edits from users whose names end in -bot or -Bot

# should exclude: 
# bot edits in general (use bot flag from RC table?)


conn3 = pymysql.connect(
    host=os.environ['MYSQL_HOST'],
    user=os.environ['MYSQL_USERNAME'],
    password=os.environ['MYSQL_PASSWORD'],
    database='wikidatawiki_p',
    charset='utf8')

with conn3.cursor() as cur:
    cur.execute("""
    SELECT rev_id, rev_timestamp, rev_comment, rev_user_text, ct_tag
    FROM (
        SELECT rev_id, rev_timestamp, rev_comment, rev_user_text
        FROM wikidatawiki_p.revision 
        WHERE rev_timestamp LIKE '201702%'    
        AND rev_comment LIKE '/* wbsetdescription%'
        AND rev_comment NOT LIKE '%#quickstatements') AS rev
    LEFT JOIN (
        SELECT ct_rev_id, ct_tag FROM wikidatawiki_p.change_tag
        WHERE ct_tag LIKE 'OAuth%'
        OR ct_tag LIKE 'QuickStatements%') AS ct
    ON rev.rev_id = ct.ct_rev_id  
    GROUP BY rev_id
    HAVING ct_tag IS NULL
    AND rev_user_text NOT RLIKE '[Bb]ot$'
    ORDER BY rev_timestamp;""")
    alldescedits = cur.fetchall()

    # NB: The change_tag table can contain several rows for the same edit 
    # NB: crude bot check - exclude user names ending in -bot or Bot
CPU times: user 1.18 s, sys: 84 ms, total: 1.26 s
Wall time: 3min 41s
 
#check result
print(len(alldescedits))
print(alldescedits[0])
print(alldescedits[-1])
print(alldescedits[0:10])
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-e6fddc99f800> in <module>()
      1 #check result
----> 2 print(len(alldescedits))
      3 print(alldescedits[0])
      4 print(alldescedits[-1])
      5 print(alldescedits[0:10])

NameError: name 'alldescedits' is not defined
# check result
for i in range(5000,5099):
    rev = alldescedits[i]
    rev_id = rev[0]
    rev_comment = rev[2].decode()
    rev_user_text = rev[3].decode()
    print('https://www.wikidata.org/w/index.php?diff=' + str(rev[0]) + ' ' + rev_comment + ' by ' + rev_user_text)
https://www.wikidata.org/w/index.php?diff=441054616 /* wbsetdescription-add:1|en */ Canadian philosopher by Missvain
https://www.wikidata.org/w/index.php?diff=441054668 /* wbsetdescription-add:1|en */ Ukrainian-born artist, poet and author by Jheald
https://www.wikidata.org/w/index.php?diff=441054886 /* wbsetdescription-set:1|en */ Spanish Chuli by 90.173.1.146
https://www.wikidata.org/w/index.php?diff=441055190 /* wbsetdescription-add:1|ru */ российский врач, рентгенолог by Ann Cane
https://www.wikidata.org/w/index.php?diff=441055275 /* wbsetdescription-add:1|en */ actor, born 1932 by Valentina.Anitnelav
https://www.wikidata.org/w/index.php?diff=441055327 /* wbsetdescription-add:1|en */ Army Reserve unit based in Tulse Hill, London by Cjkotecki
https://www.wikidata.org/w/index.php?diff=441055360 /* wbsetdescription-add:1|en */ painting by anonymous artit by Missvain
https://www.wikidata.org/w/index.php?diff=441055381 /* wbsetdescription-set:1|en */ painting by anonymous artist by Missvain
https://www.wikidata.org/w/index.php?diff=441055421 /* wbsetdescription-add:1|en */ scientific publication by Amb sib
https://www.wikidata.org/w/index.php?diff=441055435 /* wbsetdescription-add:1|fr */ article scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441055475 /* wbsetdescription-add:1|en */ formation of the Parachute Regiment, part of the British Army by Danrok
https://www.wikidata.org/w/index.php?diff=441055522 /* wbsetdescription-add:1|it */ album degli Skálmöld del 2014 by Fringio
https://www.wikidata.org/w/index.php?diff=441055691 /* wbsetdescription-set:1|en */ scientific journal by Amb sib
https://www.wikidata.org/w/index.php?diff=441055694 /* wbsetdescription-add:1|fr */ revue scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441055771 /* wbsetdescription-add:1|it */ album degli Skálmöld del 2016 by Fringio
https://www.wikidata.org/w/index.php?diff=441056262 /* wbsetdescription-add:1|ca */ Compositor by Papapep
https://www.wikidata.org/w/index.php?diff=441056513 /* wbsetdescription-set:1|en */ scientific journal by Amb sib
https://www.wikidata.org/w/index.php?diff=441056517 /* wbsetdescription-set:1|fr */ revue scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441056548 /* wbsetdescription-set:1|es */ diácono anglicano, lógico, matemático, fotógrafo  y escritor británico by 181.226.45.39
https://www.wikidata.org/w/index.php?diff=441056742 /* wbsetdescription-add:1|fr */ revue scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441057199 /* wbsetdescription-set:1|en */ war in Afghanistan from 2001 to 2014 by Danrok
https://www.wikidata.org/w/index.php?diff=441057376 /* wbsetdescription-set:1|es */ francés by 189.237.120.91
https://www.wikidata.org/w/index.php?diff=441057382 /* wbsetdescription-set:1|en */ French god by 189.237.120.91
https://www.wikidata.org/w/index.php?diff=441057383 /* wbsetdescription-add:1|en */ church in Minderhout (Hoogstraten), Belgium by Funkyxian
https://www.wikidata.org/w/index.php?diff=441057461 /* wbsetdescription-set:1|es */ dios francés by 189.237.120.91
https://www.wikidata.org/w/index.php?diff=441057471 /* wbsetdescription-add:1|fr */ revue scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441057517 /* wbsetdescription-add:1|de */ US-amerikanische Archäologin, spezialisiert in Felsmalerei und Moais by Emeritus
https://www.wikidata.org/w/index.php?diff=441057586 /* wbsetdescription-set:1|en */ scientific journal by Amb sib
https://www.wikidata.org/w/index.php?diff=441057603 /* wbsetdescription-add:1|fr */ graveur slovène by Daehan
https://www.wikidata.org/w/index.php?diff=441057793 /* wbsetdescription-add:1|fr */ famille de véhicules de transport de troupes by Metamorforme42
https://www.wikidata.org/w/index.php?diff=441057812 /* wbsetdescription-set:1|en */ scientific journal by Amb sib
https://www.wikidata.org/w/index.php?diff=441057821 /* wbsetdescription-add:1|fr */ revue scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441058239 /* wbsetdescription-add:1|en */ infantry battalion of the Royal Regiment of Scotland by Danrok
https://www.wikidata.org/w/index.php?diff=441058241 /* wbsetdescription-set:1|en */ district in the London Borough of Lambeth in south London, England by Cjkotecki
https://www.wikidata.org/w/index.php?diff=441058307 /* wbsetdescription-add:1|fr */ revue scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441058855 /* wbsetdescription-add:1|en */ scientific journal by Amb sib
https://www.wikidata.org/w/index.php?diff=441058860 /* wbsetdescription-add:1|fr */ revue scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441059179 /* wbsetdescription-add:1|ru */ размер программного обеспечения, а также и отдельных файлов, если у нас в будущем будут элементы для них by ArthurPSmith
https://www.wikidata.org/w/index.php?diff=441059230 /* wbsetdescription-add:1|en */ scientific journal by Amb sib
https://www.wikidata.org/w/index.php?diff=441059235 /* wbsetdescription-add:1|fr */ revue scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441059677 /* wbsetdescription-add:1|en */ Village in Velyka Mykhailivka Raion (district), Odesa Oblast, Ukraine by Kloflins
https://www.wikidata.org/w/index.php?diff=441059689 /* wbsetdescription-add:1|lv */ Ciems Ukrainā Odesas apgabala Velika Mihajļivkas rajonā by Kloflins
https://www.wikidata.org/w/index.php?diff=441059731 /* wbsetdescription-add:1|de */ Transportpanzer by Metamorforme42
https://www.wikidata.org/w/index.php?diff=441060140 /* wbsetdescription-set:1|en */ scientific journal by Amb sib
https://www.wikidata.org/w/index.php?diff=441060145 /* wbsetdescription-add:1|fr */ revue scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441060628 /* wbsetdescription-add:1|en */ training centre for the Service Police of the British Armed Forces by Cjkotecki
https://www.wikidata.org/w/index.php?diff=441060653 /* wbsetdescription-add:1|hu */ szobor by Csigabi
https://www.wikidata.org/w/index.php?diff=441060686 /* wbsetdescription-add:1|en */ scientific journal by Amb sib
https://www.wikidata.org/w/index.php?diff=441060687 /* wbsetdescription-add:1|fr */ revue scientifique by Amb sib
https://www.wikidata.org/w/index.php?diff=441060819 /* wbsetdescription-add:1|fr */ graveur autrichien by Daehan
https://www.wikidata.org/w/index.php?diff=441060886 /* wbsetdescription-add:1|lv */ Norvēģijas filke by Treisijs
https://www.wikidata.org/w/index.php?diff=441061335 /* wbsetdescription-add:1|lv */ karpu dzimtas zivju ģints by Treisijs
https://www.wikidata.org/w/index.php?diff=441061862 /* wbsetdescription-add:1|fr */ identifiant d'un handballeur par la Fédération européenne de handball by Thierry Caro
https://www.wikidata.org/w/index.php?diff=441061907 /* wbsetdescription-add:1|fr */ peintre et graveur français by Daehan
https://www.wikidata.org/w/index.php?diff=441062035 /* wbsetdescription-add:1|de */ US-amerikanische Ethnologin, Buddhistin und Menschenrechtsaktivistin by Emeritus
https://www.wikidata.org/w/index.php?diff=441062047 /* wbsetdescription-add:1|en */ film series by Valentina.Anitnelav
https://www.wikidata.org/w/index.php?diff=441062368 /* wbsetdescription-add:1|en */ service police branch of the Royal Air Force by Cjkotecki
https://www.wikidata.org/w/index.php?diff=441063038 /* wbsetdescription-add:1|qu */ Italya mama llaqtayuq llimphiq by Miguel Chong
https://www.wikidata.org/w/index.php?diff=441063044 /* wbsetdescription-add:1|ay */ Italya jacha marka llimphiri by Miguel Chong
https://www.wikidata.org/w/index.php?diff=441063151 /* wbsetdescription-add:1|fr */ identifiant par ESPN d'un joueur de baseball en Ligue majeure de baseball by Thierry Caro
https://www.wikidata.org/w/index.php?diff=441063268 /* wbsetdescription-add:1|de */ kanadische Linguistin by Emeritus
https://www.wikidata.org/w/index.php?diff=441063544 /* wbsetdescription-add:1|en */ Nigerian screen writer by Elishagh1
https://www.wikidata.org/w/index.php?diff=441063989 /* wbsetdescription-add:1|fr */ bioinformaticien Suisse by Amb sib
https://www.wikidata.org/w/index.php?diff=441064173 /* wbsetdescription-set:1|en */ unit of the Royal Navy Police and the military police of the British Royal Marines by Cjkotecki
https://www.wikidata.org/w/index.php?diff=441064755 /* wbsetdescription-add:1|de */ neuseeländische Sozialanthropologin, spezialisiert in Māori by Emeritus
https://www.wikidata.org/w/index.php?diff=441064884 /* wbsetdescription-add:1|it */ gruppo musicale giapponese by ValterVB
https://www.wikidata.org/w/index.php?diff=441065538 /* wbsetdescription-add:1|hu */ boszniai horvát író, politikai aktivista by Csurla
https://www.wikidata.org/w/index.php?diff=441065644 /* wbsetdescription-add:1|en */ grocery store chain by Nsydgn
https://www.wikidata.org/w/index.php?diff=441065902 /* wbsetdescription-set:1|nl */ deel van Würzburg, Duitsland by Holger1959
https://www.wikidata.org/w/index.php?diff=441066066 /* wbsetdescription-add:1|en */ hamlet of San Casciano dei Bagni by LigaDue
https://www.wikidata.org/w/index.php?diff=441066310 /* wbsetdescription-add:1|de */ US-amerikanische Anthropologin und Musikethnologin by Emeritus
https://www.wikidata.org/w/index.php?diff=441066361 /* wbsetdescription-add:1|en */ corps of the British Army responsible for the policing of service personnel by Cjkotecki
https://www.wikidata.org/w/index.php?diff=441066411 /* wbsetdescription-set:1|en */ house in Ham, London by Ham II
https://www.wikidata.org/w/index.php?diff=441066484 /* wbsetdescription-add:1|es */ Pico situado en el Valle de Arán by Urisole
https://www.wikidata.org/w/index.php?diff=441066622 /* wbsetdescription-set:1|en */ French-English medical practitioners (c.1600–c.1730) by Charles Matthews
https://www.wikidata.org/w/index.php?diff=441067105 /* wbsetdescription-add:1|de */ US-amerikanische Sozial- und Kulturanthropologin by Emeritus
https://www.wikidata.org/w/index.php?diff=441067391 /* wbsetdescription-set:1|en */ ideogramus that conveyssus its meaningnus throughus its pictorialun resemblanceun to a physicalus objectus eretus by 177.138.128.27
https://www.wikidata.org/w/index.php?diff=441067511 /* wbsetdescription-add:1|en */ the title song from the 1961 Disney film, The Parent Trap by Valentina.Anitnelav
https://www.wikidata.org/w/index.php?diff=441067636 /* wbsetdescription-add:1|en */ service police branch of the Royal Navy and Royal Marines by Cjkotecki
https://www.wikidata.org/w/index.php?diff=441067704 /* wbsetdescription-add:1|cs */ světová skupina týmové tenisové soutěže mužů v roce 2017 by Kacir
https://www.wikidata.org/w/index.php?diff=441068065 /* wbsetdescription-add:1|cs */ týmová tenisová soutěže mužů v roce 2017 by Kacir
https://www.wikidata.org/w/index.php?diff=441068196 /* wbsetdescription-add:1|es */ darandan by 201.234.190.166
https://www.wikidata.org/w/index.php?diff=441068260 /* wbsetdescription-add:1|de */ ungarische Ethnographin by Emeritus
https://www.wikidata.org/w/index.php?diff=441068326 /* wbsetdescription-add:1|en */ an athletics stadium in Abingdon-on-Thames by JcortezNTENT
https://www.wikidata.org/w/index.php?diff=441068475 /* wbsetdescription-add:1|cs */ obec na Slovensku by Pistal
https://www.wikidata.org/w/index.php?diff=441068484 /* wbsetdescription-add:1|sk */ obec na Slovensku by Pistal
https://www.wikidata.org/w/index.php?diff=441068966 /* wbsetdescription-set:1|es */ unidad de medida de potencia by Andreasmperu
https://www.wikidata.org/w/index.php?diff=441069618 /* wbsetdescription-add:1|en */ a group of around 150 regular and 50 reservist Royal Air Force Police by Cjkotecki
https://www.wikidata.org/w/index.php?diff=441069990 /* wbsetdescription-add:1|en */ a centrally located space used as the principal recreation ground in the town by JcortezNTENT
https://www.wikidata.org/w/index.php?diff=441070094 /* wbsetdescription-add:1|en */ kid clothing store chain in Turkey by Nsydgn
https://www.wikidata.org/w/index.php?diff=441070098 /* wbsetdescription-add:1|tr */ Türkiye'de bebek ve çocuk giyim mağazaları zinciri by Nsydgn
https://www.wikidata.org/w/index.php?diff=441070791 /* wbsetdescription-add:1|en */ a football ground in Wantage by JcortezNTENT
https://www.wikidata.org/w/index.php?diff=441070839 /* wbsetdescription-add:1|en */ cyclist by Anthony59999
https://www.wikidata.org/w/index.php?diff=441070918 /* wbsetdescription-add:1|lv */ Wikimedia projekta kategorija by Voll
https://www.wikidata.org/w/index.php?diff=441071139 /* wbsetdescription-add:1|de */ norwegische Sozialanthropologin by Emeritus
https://www.wikidata.org/w/index.php?diff=441071700 /* wbsetdescription-add:1|en */ territorial division of the Russian Academy of Sciences based in Novosibirsk by ArthurPSmith
https://www.wikidata.org/w/index.php?diff=441071718 /* wbsetdescription-add:1|en */ volunteer reserve force used to augment the regular Royal Marines in times of war or national crisis. by Cjkotecki
https://www.wikidata.org/w/index.php?diff=441071795 /* wbsetdescription-add:1|es */ unidad de medida de potencia utilizada en el sistema anglosajón by Andreasmperu
https://www.wikidata.org/w/index.php?diff=441071803 /* wbsetdescription-add:1|fr */ cheval-vapeur en système anglo-saxon by Andreasmperu
%%time

# calculate revert rate for all (non-bot, non-OAuth) description edits

# initialize dataframe to count reverts per day:
daterange =  pd.date_range(start='2017-02-01', end='2017-02-07', freq='D')
columns = ['all', 'reverted', 'revert_rate']  # all refer to number of edits. could add: number of revertees and reverters

try:
    del(alldescrevperday)
except NameError:
    pass
alldescrevperday = pd.DataFrame(index=daterange, columns=columns)
alldescrevperday = alldescrevperday.fillna(0)

# count reverts per day:

# initialize API session again, to be safe:
session = mwapi.Session("https://www.wikidata.org", 
                        user_agent="Revert detection using mwapi (tbayer@wikimedia.org)")

for rev in alldescedits:
    date = pd.to_datetime(rev[1][:8].decode())
    
    if date in daterange:        
        
        alldescrevperday.loc[date]['all'] += 1
    
        rev_id = rev[0]
        # from http://paws-public.wmflabs.org/paws-public/User:EpochFail/editquality/ipython/reverted_detection_demo.ipynb :
        try:
            _, reverted, reverted_to = mwreverts.api.check(
                session, rev_id, radius=5,  # most reverts within 5 edits
                window=48*60*60,  # 2 days
                rvprop={'user', 'ids'})  # Some properties we may make use of
        except RuntimeError as e:
            sys.stderr.write(str(e))
            continue
        
        # leave out special cases of self-reverts and reverts that were re-reverted
        
        if reverted:
            alldescrevperday.loc[date]['reverted'] += 1
        

alldescrevperday['revert_rate'] = alldescrevperday['reverted'] / alldescrevperday['all']
CPU times: user 5min 58s, sys: 17.8 s, total: 6min 16s
Wall time: 51min 14s
# check result
alldescrevperday
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-8-73f2efad468b> in <module>()
      1 # check result
----> 2 alldescrevperday

NameError: name 'alldescrevperday' is not defined
# overall revert rate 
overallrevrate = alldescrevperday.sum()['reverted'] / alldescrevperday.sum()['all']
print(overallrevrate)
0.0115961800819
# save result 
alldescrevperday.to_pickle('alldescrevperday_2017-02-01..07.pickle')
# earlier result for different timespan
march_alldescrevperday = pd.read_pickle('alldescrevperday_2017-03-01..07.pickle')
march_alldescrevperday
all reverted revert_rate
2017-03-01 3645 40 0.010974
2017-03-02 3657 32 0.008750
2017-03-03 5041 22 0.004364
2017-03-04 3453 39 0.011295
2017-03-05 3478 17 0.004888
2017-03-06 3145 47 0.014944
2017-03-07 2957 51 0.017247
# overall revert rate in the first week of March
march_alldescrevperday.sum()['reverted'] / march_alldescrevperday.sum()['all']
0.0097730138713745269
# plot edits vs. reverts in absolute terms
fig, ax = plt.subplots(dpi=300)
cols = ['all','reverted']
colm2colr = { 
    'reverted': 'red'
}
alldescrevperday[cols].plot(ax=ax, color=map(colm2colr.get, cols))
ax.set_title('description edits and reverts on Wikidata')
<matplotlib.text.Text at 0x7f5c8ba05dd8>
/srv/paws/lib/python3.4/site-packages/matplotlib/font_manager.py:1297: UserWarning: findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans
  (prop.get_family(), self.defaultFamily[fontext]))
# test revert detection
rev = alldescedits[0]
rev_id = rev[0]
rev_id = 457840896 
print(rev_id)
_, reverted, reverted_to = mwreverts.api.check(
                session, rev_id, radius=5,  # most reverts within 5 edits
                window=48*60*60,  # 2 days
                rvprop={'user', 'ids'})
print(reverted)
# todo: use db instead of API
# after https://github.com/mediawiki-utilities/python-mwreverts/issues/8 has been fixed
457840896
Revert(reverting={'parentid': 457841389, 'sha1': '60355c45351034ae4749597eda3a3bc71696a1ff', 'revid': 457941698, 'user': 'YMS', 'page': {'pageid': 2783371, 'ns': 0, 'title': 'Q2907745'}, 'timestamp': '2017-02-28T06:46:47Z'}, reverteds=[{'parentid': 457840896, 'sha1': 'ca5dd45c095b84b106f8b90438986849854aaa3b', 'revid': 457841389, 'user': 'Shamil0557', 'page': {'pageid': 2783371, 'ns': 0, 'title': 'Q2907745'}, 'timestamp': '2017-02-28T03:29:36Z'}, {'parentid': 455820100, 'sha1': '3033a67da25d34a4f68e8bdc4e38177c2cac5eab', 'revid': 457840896, 'user': 'Shamil0557', 'page': {'pageid': 2783371, 'ns': 0, 'title': 'Q2907745'}, 'timestamp': '2017-02-28T03:27:46Z'}], reverted_to={'parentid': 437515849, 'sha1': '60355c45351034ae4749597eda3a3bc71696a1ff', 'revid': 455820100, 'user': 'Deinocheirus', 'page': {'pageid': 2783371, 'ns': 0, 'title': 'Q2907745'}, 'timestamp': '2017-02-24T20:06:16Z'})
# check result
print(daterange)
alldescrevperday.tail(20)
DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03',
               '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07',
               '2017-01-08', '2017-01-09', '2017-01-10', '2017-01-11',
               '2017-01-12', '2017-01-13', '2017-01-14', '2017-01-15',
               '2017-01-16', '2017-01-17', '2017-01-18', '2017-01-19',
               '2017-01-20', '2017-01-21', '2017-01-22', '2017-01-23',
               '2017-01-24', '2017-01-25', '2017-01-26', '2017-01-27',
               '2017-01-28', '2017-01-29', '2017-01-30', '2017-01-31',
               '2017-02-01', '2017-02-02', '2017-02-03', '2017-02-04',
               '2017-02-05', '2017-02-06', '2017-02-07', '2017-02-08',
               '2017-02-09', '2017-02-10', '2017-02-11', '2017-02-12',
               '2017-02-13', '2017-02-14', '2017-02-15', '2017-02-16',
               '2017-02-17', '2017-02-18', '2017-02-19', '2017-02-20',
               '2017-02-21', '2017-02-22', '2017-02-23', '2017-02-24',
               '2017-02-25', '2017-02-26', '2017-02-27', '2017-02-28',
               '2017-03-01', '2017-03-02', '2017-03-03', '2017-03-04',
               '2017-03-05', '2017-03-06', '2017-03-07', '2017-03-08',
               '2017-03-09', '2017-03-10', '2017-03-11', '2017-03-12',
               '2017-03-13', '2017-03-14', '2017-03-15', '2017-03-16',
               '2017-03-17', '2017-03-18', '2017-03-19', '2017-03-20',
               '2017-03-21', '2017-03-22', '2017-03-23', '2017-03-24',
               '2017-03-25', '2017-03-26', '2017-03-27', '2017-03-28',
               '2017-03-29', '2017-03-30', '2017-03-31', '2017-04-01',
               '2017-04-02', '2017-04-03', '2017-04-04', '2017-04-05',
               '2017-04-06', '2017-04-07', '2017-04-08', '2017-04-09'],
              dtype='datetime64[ns]', freq='D')
all reverted revert_rate
2017-02-01 2798 37 0.013224
2017-02-02 2893 39 0.013481
2017-02-03 2734 26 0.009510
2017-02-04 2878 19 0.006602
2017-02-05 3520 23 0.006534
2017-02-06 2647 43 0.016245
2017-02-07 3054 51 0.016699

[WIP] Compare with revert rate for all edits on Wikipedia

# For comparison, calculate revert rate for all edits on various Wikipedias
# using Wikistats data from November 2015
# Caveats: not 100% comparable because e.g.:
# - counting ratio of reverts vs. counting ratio of reverted edits 
#   (one revert occasionally reverts more than one edit, e.g. 22% of reverts on enwiki per https://stats.wikimedia.org/EN/EditsRevertsEN.htm ) 
# - WP numbers include non-mainspace (non-content) edits
# - WP numbers appear to include bot edits (for which revert rate is lower)
WPrevertrate = dict()
WPrevertrate['en'] = 0.081 # https://stats.wikimedia.org/EN/EditsRevertsEN.htm  
WPrevertrate['ru'] = 0.049 # https://stats.wikimedia.org/EN/EditsRevertsRU.htm  
WPrevertrate['he'] = 0.059 # https://stats.wikimedia.org/EN/EditsRevertsHE.htm 
WPrevertrate['ca'] = 0.020 # https://stats.wikimedia.org/EN/EditsRevertsCA.htm 

WPanonrevertrate = dict()
WPanonrevertrate['en'] = 0.290 # https://stats.wikimedia.org/EN/EditsRevertsEN.htm , 
WPanonrevertrate['ru'] = 0.149 # https://stats.wikimedia.org/EN/EditsRevertsRU.htm  
WPanonrevertrate['he'] = 0.292 # https://stats.wikimedia.org/EN/EditsRevertsHE.htm
WPanonrevertrate['ca'] = 0.195 # https://stats.wikimedia.org/EN/EditsRevertsCA.htm