import json
import locale
import pywikibot
import requests
import urllib.parse

# some setup
locale.setlocale(locale.LC_ALL, 'en_US.utf8') # for German thousands and decimal separators
sparql_headers = {
    'Accept' : 'application/sparql-results+json',
    'User-Agent': '{} (Wikidata bot by User:MisterSynergy; mailto:mister.synergy@yahoo.com)'.format(requests.utils.default_headers()['User-Agent'])
} # the User-Agent is going to be visible to the Wikidata Query Service operators (i.e. not publicly visible, but sent to the servers)
sparql_endpoint = 'https://query.wikidata.org/sparql?query={}'
sparql_groupconcat_separator = ', '
wiki_site = pywikibot.Site(code='de', fam='wikipedia')

wikidata_uri_prefix = 'http://www.wikidata.org/entity/'
commons_uri_prefix = 'http://commons.wikimedia.org/wiki/Special:FilePath/'

infobox_target = 'Vorlage:Infobox Land/{}'
edit_summary = 'BOT: aktualisiere Infobox mit Daten aus dem Wikidata-Objekt http://www.wikidata.org/entity/{}'

simulate = True # True or False

# read infobox wikitext template
with open('infobox_template.txt', mode='r', encoding='utf8') as fh:
    infobox_template = fh.read()

# read SPARQL query form file and load the results from the endpoint into a Python dictionary
with open('infobox_sparql.rq', mode='r', encoding='utf8') as fh:
    sparql_query = fh.read()
sparql_raw_results = json.loads(requests.get(sparql_endpoint.format(urllib.parse.quote_plus(sparql_query)), headers=sparql_headers).text)

# function definitions
def format_value(sparql_var, sparql_value):
    if sparql_value['type'] == 'uri':
        if sparql_value['value'][:len(wikidata_uri_prefix)] == wikidata_uri_prefix:
            return sparql_value['value'][len(wikidata_uri_prefix):]
        elif sparql_value['value'][:len(commons_uri_prefix)] == commons_uri_prefix:
            return urllib.parse.unquote(sparql_value['value'][len(commons_uri_prefix):], 'utf8')
        else:
            return sparql_value['value']
    elif sparql_value['type'] == 'literal':
        if sparql_var == 'max_flaeche_rund':
            return locale.format_string('%.0f', float(sparql_value['value']), grouping=True)
        elif sparql_var == 'max_ew_in_mio':
            return locale.format_string('%.1f', float(sparql_value['value']))
        elif sparql_var in [ 'amtssprachen', 'hauptstaedte', 'oberhaupt_bezeichnung', 'oberhaeupter' ]:
            elems = sparql_value['value'].split(sparql_groupconcat_separator)
            elems_wikilinked = []
            for elem in elems:
                elems_wikilinked.append(add_wikilink(elem))
            return ', '.join(elems_wikilinked)
        elif sparql_var == 'titel_im_klexikon':
            return sparql_value['value'].replace('_', ' ')
        else:
            return sparql_value['value']
    else:
        
        return sparql_value['value']

def add_wikilink(title):
    page = pywikibot.Page(wiki_site, title)
    if page.exists():
        return '[[{}]]'.format(title)
    else:
        return title

# loop over results from the query service
for i, sparql_raw_row in enumerate(sparql_raw_results['results']['bindings']):
    sparql_formatted_row = {}
    for sparql_var in sparql_raw_results['head']['vars']:
        if sparql_var not in sparql_raw_row: # variable not defined in results set; use empty string alternatively
            sparql_formatted_row[sparql_var] = ''
        else:
            sparql_formatted_row[sparql_var] = format_value(sparql_var, sparql_raw_row[sparql_var])

    infobox_filled = infobox_template.format(i+1, **sparql_formatted_row)
    
    if simulate == True:
        print(infobox_filled)
        print()
    else:
        infobox_page = pywikibot.Page(wiki_site, infobox_target.format(sparql_formatted_row['titel_im_klexikon']))
        #infobox_page.text = infobox_filled
        #infobox_page.save(summary=edit_summary.format(sparql_formatted_row[item]), quiet=True)
{{Infobox Land
| Name = Afghanistan
| Flagge = Flag of Afghanistan.svg
| Hauptstadt = [[Kabul]]
| Amtssprache = [[Dari]], [[Paschtu]]
| Staatsoberhaupt = [[Präsident]] [[Aschraf Ghani Ahmadsai]]
| Einwohnerzahl = etwa 34.9 Millionen
| Fläche = etwa 652,000 [[Meter|Quadratkilometer]] 
| Lagekarte = LocationAfghanistan.svg
| Lagebeschreibung = Wo Afghanistan in [[Asien]] liegt
}}<!-- aus dem Wikidata-Objekt Q889 -->

{{Infobox Land
| Name = Albanien
| Flagge = Flag of Albania.svg
| Hauptstadt = [[Tirana]]
| Amtssprache = [[Albanisch]]
| Staatsoberhaupt = [[Präsident]] [[Ilir Meta]]
| Einwohnerzahl = etwa 3.0 Millionen
| Fläche = etwa 29,000 [[Meter|Quadratkilometer]] 
| Lagekarte = LocationAlbania.svg
| Lagebeschreibung = Wo Albanien in [[Europa]] liegt
}}<!-- aus dem Wikidata-Objekt Q222 -->

{{Infobox Land
| Name = Algerien
| Flagge = Flag of Algeria.svg
| Hauptstadt = [[Algier]]
| Amtssprache = Algerische Berbersprache, [[Arabisch]]
| Staatsoberhaupt = [[Präsident]] [[Abdelkader Bensalah]]
| Einwohnerzahl = etwa 35.6 Millionen
| Fläche = etwa 2,382,000 [[Meter|Quadratkilometer]] 
| Lagekarte = LocationAlgeria.svg
| Lagebeschreibung = Wo Algerien in [[Afrika]] liegt
}}<!-- aus dem Wikidata-Objekt Q262 -->

{{Infobox Land
| Name = Andorra
| Flagge = Flag of Andorra.svg
| Hauptstadt = [[Andorra la Vella]]
| Amtssprache = [[Katalanisch]]
| Staatsoberhaupt = [[Kofürst von Andorra]] [[Joan Enric Vives i Sicília]], [[Emmanuel Macron]]
| Einwohnerzahl = etwa 0.1 Millionen
| Fläche = etwa 468 [[Meter|Quadratkilometer]] 
| Lagekarte = LocationAndorra.svg
| Lagebeschreibung = Wo Andorra in [[Europa]] liegt
}}<!-- aus dem Wikidata-Objekt Q228 -->

{{Infobox Land
| Name = Angola
| Flagge = Flag of Angola.svg
| Hauptstadt = [[Luanda]]
| Amtssprache = [[Portugiesisch]]
| Staatsoberhaupt = [[Präsident]] [[João Lourenço]]
| Einwohnerzahl = etwa 21.5 Millionen
| Fläche = etwa 1,247,000 [[Meter|Quadratkilometer]] 
| Lagekarte = LocationAngola.svg
| Lagebeschreibung = Wo Angola in [[Afrika]] liegt
}}<!-- aus dem Wikidata-Objekt Q916 -->

{{Infobox Land
| Name = Antigua und Barbuda
| Flagge = Flag of Antigua and Barbuda.svg
| Hauptstadt = [[Saint John’s]]
| Amtssprache = [[Englisch]]
| Staatsoberhaupt = [[Königin]] [[Elisabeth II.]]
| Einwohnerzahl = etwa 0.1 Millionen
| Fläche = etwa 440 [[Meter|Quadratkilometer]] 
| Lagekarte = 
| Lagebeschreibung = Wo Antigua und Barbuda in [[Nordamerika]] liegt
}}<!-- aus dem Wikidata-Objekt Q781 -->

{{Infobox Land
| Name = Argentinien
| Flagge = Flag of Argentina.svg
| Hauptstadt = [[Buenos Aires]]
| Amtssprache = [[Spanisch]]
| Staatsoberhaupt = [[Präsident]] [[Mauricio Macri]]
| Einwohnerzahl = etwa 44.9 Millionen
| Fläche = etwa 2,780,000 [[Meter|Quadratkilometer]] 
| Lagekarte = LocationArgentina2.svg
| Lagebeschreibung = Wo Argentinien in [[Südamerika]] liegt
}}<!-- aus dem Wikidata-Objekt Q414 -->

{{Infobox Land
| Name = Armenien
| Flagge = Flag of Armenia.svg
| Hauptstadt = [[Jerewan]]
| Amtssprache = [[Armenisch]]
| Staatsoberhaupt = [[Präsident]] [[Armen Sarkissjan]]
| Einwohnerzahl = etwa 3.0 Millionen
| Fläche = etwa 30,000 [[Meter|Quadratkilometer]] 
| Lagekarte = LocationArmenia.svg
| Lagebeschreibung = Wo Armenien in [[Asien]] liegt
}}<!-- aus dem Wikidata-Objekt Q399 -->

{{Infobox Land
| Name = Aserbaidschan
| Flagge = Flag of Azerbaijan.svg
| Hauptstadt = [[Baku]]
| Amtssprache = [[Aserbaidschanisch]]
| Staatsoberhaupt = [[Präsident]] [[İlham Əliyev]]
| Einwohnerzahl = etwa 10.0 Millionen
| Fläche = etwa 87,000 [[Meter|Quadratkilometer]] 
| Lagekarte = LocationAzerbaijan.svg
| Lagebeschreibung = Wo Aserbaidschan in [[Asien]] liegt
}}<!-- aus dem Wikidata-Objekt Q227 -->

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/srv/paws/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    378                 # Python 2.7, use buffering of HTTP responses
--> 379                 httplib_response = conn.getresponse(buffering=True)
    380             except TypeError:

TypeError: getresponse() got an unexpected keyword argument 'buffering'

During handling of the above exception, another exception occurred:

KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-1-7e91dac864e6> in <module>
     74             sparql_formatted_row[sparql_var] = ''
     75         else:
---> 76             sparql_formatted_row[sparql_var] = format_value(sparql_var, sparql_raw_row[sparql_var])
     77 
     78     infobox_filled = infobox_template.format(i+1, **sparql_formatted_row)

<ipython-input-1-7e91dac864e6> in format_value(sparql_var, sparql_value)
     50             elems_wikilinked = []
     51             for elem in elems:
---> 52                 elems_wikilinked.append(add_wikilink(elem))
     53             return ', '.join(elems_wikilinked)
     54         elif sparql_var == 'titel_im_klexikon':

<ipython-input-1-7e91dac864e6> in add_wikilink(title)
     62 def add_wikilink(title):
     63     page = pywikibot.Page(wiki_site, title)
---> 64     if page.exists():
     65         return '[[{}]]'.format(title)
     66     else:

/srv/paws/pwb/pywikibot/page.py in exists(self)
    802         @rtype: bool
    803         """
--> 804         return self.pageid > 0
    805 
    806     @property

/srv/paws/pwb/pywikibot/page.py in pageid(self)
    283         """
    284         if not hasattr(self, '_pageid'):
--> 285             self.site.loadpageinfo(self)
    286         return self._pageid
    287 

/srv/paws/pwb/pywikibot/site.py in loadpageinfo(self, page, preload)
   2993                                 titles=title.encode(self.encoding()),
   2994                                 inprop=inprop)
-> 2995         self._update_page(page, query)
   2996 
   2997     @need_extension('GeoData')

/srv/paws/pwb/pywikibot/site.py in _update_page(self, page, query)
   2973 
   2974     def _update_page(self, page, query):
-> 2975         for pageitem in query:
   2976             if not self.sametitle(pageitem['title'],
   2977                                   page.title(with_section=False)):

/srv/paws/pwb/pywikibot/data/api.py in __iter__(self)
   2981         """Yield results."""
   2982         self._previous_dicts = {}
-> 2983         for result in super(PropertyGenerator, self).__iter__():
   2984             yield result
   2985         for result in self._previous_dicts.values():

/srv/paws/pwb/pywikibot/data/api.py in __iter__(self)
   2805                 prev_limit, new_limit, previous_result_had_data)
   2806             if not hasattr(self, 'data'):
-> 2807                 self.data = self.request.submit()
   2808             if not self.data or not isinstance(self.data, dict):
   2809                 pywikibot.debug(

/srv/paws/pwb/pywikibot/data/api.py in submit(self)
   1983                                                                    paramstring)
   1984             rawdata, use_get = self._http_request(use_get, uri, body, headers,
-> 1985                                                   paramstring)
   1986             if rawdata is None:
   1987                 continue

/srv/paws/pwb/pywikibot/data/api.py in _http_request(self, use_get, uri, body, headers, paramstring)
   1742                 site=self.site, uri=uri,
   1743                 method='GET' if use_get else 'POST',
-> 1744                 body=body, headers=headers)
   1745         except Server504Error:
   1746             pywikibot.log('Caught HTTP 504 error; retrying')

/srv/paws/pwb/pywikibot/tools/__init__.py in wrapper(*__args, **__kw)
   1736                              cls, depth)
   1737                     del __kw[old_arg]
-> 1738             return obj(*__args, **__kw)
   1739 
   1740         if not __debug__:

/srv/paws/pwb/pywikibot/comms/http.py in request(site, uri, method, params, body, headers, data, **kwargs)
    321 
    322     baseuri = site.base_url(uri)
--> 323     r = fetch(baseuri, method, params, body, headers, **kwargs)
    324     site.throttle.retry_after = int(r.response_headers.get('retry-after', 0))
    325     return r.text

/srv/paws/pwb/pywikibot/comms/http.py in fetch(uri, method, params, body, headers, default_error_handling, use_fake_user_agent, data, **kwargs)
    519             headers['user-agent'] = fake_user_agent()
    520 
--> 521     request = _enqueue(uri, method, params, body, headers, **kwargs)
    522     # if there's no data in the answer we're in trouble
    523     assert request._data is not None

/srv/paws/pwb/pywikibot/comms/http.py in _enqueue(uri, method, params, body, headers, data, **kwargs)
    475     request = threadedhttp.HttpRequest(
    476         uri, method, params, body, all_headers, callbacks, **kwargs)
--> 477     _http_process(session, request)
    478     return request
    479 

/srv/paws/pwb/pywikibot/comms/http.py in _http_process(session, http_request)
    388                                    headers=headers, auth=auth, timeout=timeout,
    389                                    verify=not ignore_validation,
--> 390                                    **http_request.kwargs)
    391     except Exception as e:
    392         http_request.data = e

/srv/paws/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    531         }
    532         send_kwargs.update(settings)
--> 533         resp = self.send(prep, **send_kwargs)
    534 
    535         return resp

/srv/paws/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
    644 
    645         # Send the request
--> 646         r = adapter.send(request, **kwargs)
    647 
    648         # Total elapsed time of the request (approximately)

/srv/paws/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    447                     decode_content=False,
    448                     retries=self.max_retries,
--> 449                     timeout=timeout
    450                 )
    451 

/srv/paws/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    601                                                   timeout=timeout_obj,
    602                                                   body=body, headers=headers,
--> 603                                                   chunked=chunked)
    604 
    605             # If we're going to release the connection in ``finally:``, then

/srv/paws/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    381                 # Python 3
    382                 try:
--> 383                     httplib_response = conn.getresponse()
    384                 except Exception as e:
    385                     # Remove the TypeError from the exception chain in Python 3;

/usr/lib/python3.6/http/client.py in getresponse(self)
   1329         try:
   1330             try:
-> 1331                 response.begin()
   1332             except ConnectionError:
   1333                 self.close()

/usr/lib/python3.6/http/client.py in begin(self)
    295         # read until we get a non-100 response
    296         while True:
--> 297             version, status, reason = self._read_status()
    298             if status != CONTINUE:
    299                 break

/usr/lib/python3.6/http/client.py in _read_status(self)
    256 
    257     def _read_status(self):
--> 258         line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
    259         if len(line) > _MAXLINE:
    260             raise LineTooLong("status line")

/usr/lib/python3.6/socket.py in readinto(self, b)
    584         while True:
    585             try:
--> 586                 return self._sock.recv_into(b)
    587             except timeout:
    588                 self._timeout_occurred = True

/usr/lib/python3.6/ssl.py in recv_into(self, buffer, nbytes, flags)
   1010                   "non-zero flags not allowed in calls to recv_into() on %s" %
   1011                   self.__class__)
-> 1012             return self.read(nbytes, buffer)
   1013         else:
   1014             return socket.recv_into(self, buffer, nbytes, flags)

/usr/lib/python3.6/ssl.py in read(self, len, buffer)
    872             raise ValueError("Read on closed or unwrapped SSL socket.")
    873         try:
--> 874             return self._sslobj.read(len, buffer)
    875         except SSLError as x:
    876             if x.args[0] == SSL_ERROR_EOF and self.suppress_ragged_eofs:

/usr/lib/python3.6/ssl.py in read(self, len, buffer)
    629         """
    630         if buffer is not None:
--> 631             v = self._sslobj.read(len, buffer)
    632         else:
    633             v = self._sslobj.read(len)

KeyboardInterrupt: