import pywikibot as pwb
site = pwb.Site("wikidata", "wikidata")
repo = site.data_repository()
from pywikibot import pagegenerators as pg
def humans(limit=30000000):
    batchsize = 1000
    query = """\
SELECT ?item WHERE {
  ?item wdt:P31 wd:Q5.
}
LIMIT batchsize
OFFSET offset"""
    for offset in range(0, limit, batchsize):
        currentQuery = query.replace("batchsize", str(batchsize)).replace("offset", str(offset))
        count = 0
        for item in pg.WikidataSPARQLPageGenerator(currentQuery, site):
            yield item
            count += 1
        if (count != batchsize):
            break
def fix_eo_label(human, actually_edit=False):
    human_ = human.get()
    claims = human_["claims"]
    labels = human_["labels"]
    if "eo" in labels and "P735" in claims and len(claims["P735"]) == 1 and "P734" in claims and len(claims["P734"]) == 1:
        givenLabels = claims["P735"][0].getTarget().get()["labels"]
        familyLabels = claims["P734"][0].getTarget().get()["labels"]
        if "en" in givenLabels and "en" in familyLabels:     # use en because eo labels don’t always exist, and it shouldn’t make a difference
            given = givenLabels["en"]
            family = familyLabels["en"]
            if given + " " + family.upper() == labels["eo"]: # and if it does, this should fail, so no harm done
                newlabel = given + " " + family
                print("{}: {} => {}".format(human.getID(), labels["eo"], newlabel))
                if actually_edit:
                    human.editLabels(labels={"eo": newlabel}, summary="Fix uppercase family name in eo label: see https://www.wikidata.org/wiki/User:TweetsFactsAndQueries/Esperanto-Family-Labels")
fix_eo_label(pwb.ItemPage(repo, "Q935"), actually_edit=True)
Q935: Isaac NEWTON => Isaac Newton
for human in humans(4):
    fix_eo_label(human, actually_edit=False)
Q80: Tim BERNERS-LEE => Tim Berners-Lee
for human in humans(4):
    fix_eo_label(human, actually_edit=True)
Q80: Tim BERNERS-LEE => Tim Berners-Lee
# this should work to fix all humans:
# for human in humans():
#     fix_eo_label(human, actually_edit=True)
# however, it would take ages to run on an account without the bot flag,
# and it’s also really really scary, so some extra eyes on the code would be good
# let’s try a limit of 20
# VERY IMPORTANT NOTE: humans() will always return at least batchsize items,
# so make sure to tweak that to a small value when doing small test runs
for human in humans(20):
    fix_eo_label(human, actually_edit=True)
# hrm, apparently no broken labels in the first twenty humans. Let’s try forty
for human in humans(40):
    fix_eo_label(human, actually_edit=True)
Q400: Jenna JAMESON => Jenna Jameson
# still not many results. How about 100?
# (By the way, I checked my user contributions to verify that no edits apart from the printed ones were made)
for human in humans(100):
    fix_eo_label(human, actually_edit=True)
Q440: Salvador ALLENDE => Salvador Allende
Q448: Denis DIDEROT => Denis Diderot
Sleeping for 8.7 seconds, 2016-09-26 17:23:19
Q535: Victor HUGO => Victor Hugo
Sleeping for 3.6 seconds, 2016-09-26 17:23:34
# By the way: this only fixes the most trivial of cases: exactly one given and family name.
# George W. BUSH, for example, isn’t fixed, because he has multiple given names.
# Let’s run the first big batch. batchsize changed back to 1000, and run for 10000 humans.
for human in humans(10000):
    fix_eo_label(human, actually_edit=True)
Q1511: Richard WAGNER => Richard Wagner
Q1868: Paul OTLET => Paul Otlet
Sleeping for 6.7 seconds, 2016-09-28 18:08:51
Q2492: Konrad ADENAUER => Konrad Adenauer
Sleeping for 6.3 seconds, 2016-09-28 18:09:01
Q10218: Rahul GANDHI => Rahul Gandhi
Q7199: Marcel PROUST => Marcel Proust
Sleeping for 4.0 seconds, 2016-09-28 18:11:13
Q8312: Nicholas MEYER => Nicholas Meyer
Sleeping for 4.9 seconds, 2016-09-28 18:11:22
Q7604: Leonhard EULER => Leonhard Euler
Q11812: Thomas JEFFERSON => Thomas Jefferson
Sleeping for 5.8 seconds, 2016-09-28 18:13:29
Q19504: Fritz LANG => Fritz Lang
Q12906: Helmar FRANK => Helmar Frank
Q24036: Sten JOHANSSON => Sten Johansson
Q33760: Bertrand RUSSELL => Bertrand Russell
Q28480: Max BROD => Max Brod
Q34969: Benjamin FRANKLIN => Benjamin Franklin
Q37103: Florence NIGHTINGALE => Florence Nightingale
Q38203: Helen KELLER => Helen Keller
Q41233: Timothy DALTON => Timothy Dalton
Q42544: Thomas MORE => Thomas More
Q38049: Herta MÜLLER => Herta Müller
Q36290: Diana ROSS => Diana Ross
Q44519: Selma LAGERLÖF => Selma Lagerlöf
Q49325: Albert SCHWEITZER => Albert Schweitzer
Q47426: Milton FRIEDMAN => Milton Friedman
Q55282: Peter GREENAWAY => Peter Greenaway
Q55123: Alan COX => Alan Cox
# Alright, results still look good, let’s run the whole batch.
for human in humans():
    fix_eo_label(human, actually_edit=True)
Q61627: Uwe JOHNSON => Uwe Johnson
WARNING: API error badtoken: Invalid token
Sleeping for 9.8 seconds, 2016-10-04 18:06:37
Q60166: Ernst KRETSCHMER => Ernst Kretschmer
Q60955: Botho STRAUSS => Botho Strauß
Q62559: Richard WAGNER => Richard Wagner
Q63926: Lothar SCHMID => Lothar Schmid
Q66001: Otto JAHN => Otto Jahn
Q69108: Christiane PAUL => Christiane Paul
Q74815: Busso THOMA => Busso Thoma
Q76601: Franz FISCHER => Franz Fischer
Q77342: Hardy KRÜGER => Hardy Krüger
Q77389: Robert HÜBNER => Robert Hübner
Q77369: Karl HARRER => Karl Harrer
Q77669: Johann Jakob BERNHARDI => Johann Jakob Bernhardi
Q78732: Emil HOLUB => Emil Holub
Q78348: Erich BAUER => Erich Bauer
Q84211: Heinrich HARRER => Heinrich Harrer
Q87564: Valentin ROSE => Valentin Rose
Q89847: Gerd MÜLLER => Gerd Müller
Q92650: Paul GRAHAM => Paul Graham
Q92507: Christian GABRIEL => Christian Gabriel
Q93514: Ilse AICHINGER => Ilse Aichinger
Q95194: Karl BLEIBTREU => Karl Bleibtreu
Q96346: Eva ZELLER => Eva Zeller
Q103651: Glenn MILLER => Glenn Miller
Q106465: John GRISHAM => John Grisham
Q106807: Magnus CARLSEN => Magnus Carlsen
Q108807: Bent LARSEN => Bent Larsen
Q110714: Lester YOUNG => Lester Young
Q117072: Franz WEBER => Franz Weber
Q123174: Vanessa HUDGENS => Vanessa Hudgens
Q123190: Jean PIAGET => Jean Piaget
WARNING: Http response status 503
WARNING: Non-JSON response received from server wikidata:wikidata; the server may be down.
WARNING: Waiting 5 seconds before retrying.
Q125686: Jo JONES => Jo Jones
WARNING: Http response status 503
WARNING: Non-JSON response received from server wikidata:wikidata; the server may be down.
WARNING: Waiting 5 seconds before retrying.
Q125249: William JAMES => William James
WARNING: Http response status 503
WARNING: Non-JSON response received from server wikidata:wikidata; the server may be down.
WARNING: Waiting 5 seconds before retrying.
Q134333: Roger MOORE => Roger Moore
Q129130: John GRAY => John Gray
Q131805: John DEWEY => John Dewey
Q138627: David WILKIE => David Wilkie
Q152850: Raoul WALLENBERG => Raoul Wallenberg
ERROR: Traceback (most recent call last):
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 595, in urlopen
    chunked=chunked)
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 393, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 389, in _make_request
    httplib_response = conn.getresponse()
  File "/usr/lib/python3.4/http/client.py", line 1172, in getresponse
    response.begin()
  File "/usr/lib/python3.4/http/client.py", line 351, in begin
    version, status, reason = self._read_status()
  File "/usr/lib/python3.4/http/client.py", line 321, in _read_status
    raise BadStatusLine(line)
http.client.BadStatusLine: ''

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/srv/paws/lib/python3.4/site-packages/requests/adapters.py", line 423, in send
    timeout=timeout
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 640, in urlopen
    _stacktrace=sys.exc_info()[2])
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/util/retry.py", line 261, in increment
    raise six.reraise(type(error), error, _stacktrace)
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/packages/six.py", line 685, in reraise
    raise value.with_traceback(tb)
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 595, in urlopen
    chunked=chunked)
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 393, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/srv/paws/lib/python3.4/site-packages/requests/packages/urllib3/connectionpool.py", line 389, in _make_request
    httplib_response = conn.getresponse()
  File "/usr/lib/python3.4/http/client.py", line 1172, in getresponse
    response.begin()
  File "/usr/lib/python3.4/http/client.py", line 351, in begin
    version, status, reason = self._read_status()
  File "/usr/lib/python3.4/http/client.py", line 321, in _read_status
    raise BadStatusLine(line)
requests.packages.urllib3.exceptions.ProtocolError: ('Connection aborted.', BadStatusLine("''",))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/data/api.py", line 1953, in submit
    body=body, headers=headers)
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/tools/__init__.py", line 1417, in wrapper
    return obj(*__args, **__kw)
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/comms/http.py", line 279, in request
    r = fetch(baseuri, method, body, headers, **kwargs)
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/comms/http.py", line 434, in fetch
    error_handling_callback(request)
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/comms/http.py", line 352, in error_handling_callback
    raise request.data
  File "/srv/paws/lib/python3.4/site-packages/pywikibot/comms/http.py", line 331, in _http_process
    verify=not ignore_validation)
  File "/srv/paws/lib/python3.4/site-packages/requests/sessions.py", line 475, in request
    resp = self.send(prep, **send_kwargs)
  File "/srv/paws/lib/python3.4/site-packages/requests/sessions.py", line 596, in send
    r = adapter.send(request, **kwargs)
  File "/srv/paws/lib/python3.4/site-packages/requests/adapters.py", line 473, in send
    raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', BadStatusLine("''",))

WARNING: Waiting 5 seconds before retrying.
Q151278: Holger BADSTUBER => Holger Badstuber
Q145132: Robert TAYLOR => Robert Taylor
Q155764: Robert BROWN => Robert Brown
Q156585: Marc BLOCH => Marc Bloch
Q162492: Colin FIRTH => Colin Firth
Q164683: Max BECKMANN => Max Beckmann
Q171826: Alfred DREYFUS => Alfred Dreyfus
Q179497: Oliver STONE => Oliver Stone
Q184103: Christina RICCI => Christina Ricci
Q188929: Arthur EVANS => Arthur Evans
Q191004: Carl ROGERS => Carl Rogers
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-13-e80b1a07591c> in <module>()
      1 # Alright, results still look good, let’s run the whole batch.
      2 for human in humans():
----> 3     fix_eo_label(human, actually_edit=True)

<ipython-input-9-260766d965d1> in fix_eo_label(human, actually_edit)
      5     if "eo" in labels and "P735" in claims and len(claims["P735"]) == 1 and "P734" in claims and len(claims["P734"]) == 1:
      6         givenLabels = claims["P735"][0].getTarget().get()["labels"]
----> 7         familyLabels = claims["P734"][0].getTarget().get()["labels"]
      8         if "en" in givenLabels and "en" in familyLabels:     # use en because eo labels don’t always exist, and it shouldn’t make a difference
      9             given = givenLabels["en"]

AttributeError: 'NoneType' object has no attribute 'get'