# Imports modules almost always used
from pprint import pprint
import requests

# Imports modules needed for a SPARQL query
import helper

# Imports modules to print the table
# import pandas as pandas
# import matplotlib as mpl
# import matplotlib.pyplot as plt
# Define a SPARQL query
query = '''PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?pietanza ?cause ?dob ?date_of_death WHERE {
    ?president_id wdt:P39 wd:Q11696 .           # ?president_id has role: President of United States
    ?president_id wdt:P509 ?cause_id .          # ?president_id has cause of death: ?cause_id
    ?president_id wdt:P569 ?dob .     # ?president_id has date of birth ?date_of_birth
    ?president_id wdt:P570 ?date_of_death .     # ?president_id has date of death ?date_of_death

    OPTIONAL {
        ?president_id rdfs:label ?president filter (lang(?president) = "it") . # ?president_id has english label ?president
    }
    OPTIONAL {
        ?cause_id rdfs:label ?cause filter (lang(?cause) = "en") .         # ?cause_id has english label ?cause
    }
}'''

# SPARQL endpoint
url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
# Get the data in JSON
data = requests.get(url, params={'query': query, 'format': 'json'}).json()

# JSON is structured this way
    # { 'head':     {'vars': ['SPARQL variable']},
    #   'results':  {'bindings': [{'SPARQL variable': {'type': 'literal',
    #                                                  'value': "SPARQL variable value for this result",
    #                                                  'xml:lang': 'en'},

# Initialize results
results = []

# Populate results from JSON
for item in data['results']['bindings']:
    results.append({
        'Name': item['president']['value'],
        'Cause of death': item['cause']['value'],
        'Date of birth': item['dob']['value'],
        #'date of death': item['date_of_death']['value']
    })
# Print the table
infosize = 12
dataframe = pandas.DataFrame(results)
print(len(dataframe))
dataframe.head()
42
Cause of death Date of birth Name
0 Alzheimer's disease 1911-02-06T00:00:00Z Ronald Reagan
1 cholera 1784-11-24T00:00:00Z Zachary Taylor
2 cholera 1795-11-02T00:00:00Z James Knox Polk
3 myocardial infarction 1872-07-04T00:00:00Z Calvin Coolidge
4 myocardial infarction 1822-10-04T00:00:00Z Rutherford Hayes
#Stampa i dati
pprint(data)