# http://query.wikidata.org/sparql
#Following the example with DBPedia https://github.com/paulhoule/gastrodon/blob/master/notebooks/remote/Querying%20DBpedia.ipynb
import matplotlib.pyplot as plt
import sys
from gastrodon import RemoteEndpoint,QName,ttl,URIRef,inline
import pandas as pd
pd.options.display.width=120
pd.options.display.max_colwidth=100
prefixes=inline("""
    @prefix : <http://dbpedia.org/resource/> .
    @prefix on: <http://dbpedia.org/ontology/> .
    @prefix pr: <http://dbpedia.org/property/> .
""").graph
endpoint=RemoteEndpoint(
    "http://dbpedia.org/sparql/"
    ,default_graph="http://dbpedia.org"
    ,prefixes=prefixes
    ,base_uri="http://dbpedia.org/resource/"
)
count=endpoint.select("""
    SELECT (COUNT(*) AS ?count) { ?s ?p ?o .}
""").at[0,"count"]
count
438336517
# -------------------------
predicates["dist"]=predicates["count"].cumsum()/count
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-649277d3bda6> in <module>()
----> 1 predicates["dist"]=predicates["count"].cumsum()/count

NameError: name 'predicates' is not defined
%matplotlib inline
predicates["dist"].plot()
# ------------------------------------------------
prefixes=inline("""
    @prefix wd: <http://www.wikidata.org/entity/> .
    @prefix wdt: <http://www.wikidata.org/prop/direct/> .
    @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
""").graph
endpoint=RemoteEndpoint(
    "http://query.wikidata.org/sparql" 
    ,prefixes=prefixes
)
count=endpoint.select("""
    SELECT (COUNT(*) AS ?count) { ?s ?p ?o .}
""").at[0,"count"]
count
4376568932
predicates=endpoint.select("""
    SELECT ?p (COUNT(*) AS ?count) { ?s ?p ?o . ?s wdt:P131 wd:Q72 } GROUP BY ?p ORDER BY DESC(?count)
""")
predicates
count
p
rdfs:label 15450
http://schema.org/description 14197
http://www.w3.org/2004/02/skos/core#altLabel 5806
wdt:P131 4380
http://www.wikidata.org/prop/P131 4380
wdt:P31 3778
http://www.wikidata.org/prop/P31 3778
http://wikiba.se/ontology#identifiers 3675
http://schema.org/version 3675
http://schema.org/dateModified 3675
http://wikiba.se/ontology#statements 3675
http://wikiba.se/ontology#sitelinks 3675
wdt:P17 3648
http://www.wikidata.org/prop/P17 3648
wdt:P625 3153
http://www.wikidata.org/prop/P625 3153
wdt:P1705 2510
http://www.wikidata.org/prop/P1705 2510
wdt:P1945 2509
http://www.wikidata.org/prop/P1945 2509
http://www.wikidata.org/prop/P1082 1501
wdt:P186 541
http://www.wikidata.org/prop/P186 541
wdt:P170 484
http://www.wikidata.org/prop/P170 484
http://www.wikidata.org/prop/P571 435
wdt:P571 433
http://www.wikidata.org/prop/P18 405
wdt:P18 403
wdt:P136 396
... ...
wdt:P631 1
http://www.wikidata.org/prop/P631 1
wdt:P38 1
http://www.wikidata.org/prop/P38 1
wdt:P3219 1
http://www.wikidata.org/prop/P3219 1
wdt:P1174 1
http://www.wikidata.org/prop/P1174 1
wdt:P1684 1
http://www.wikidata.org/prop/P1684 1
wdt:P1621 1
http://www.wikidata.org/prop/P1621 1
wdt:P1454 1
http://www.wikidata.org/prop/P1454 1
wdt:P452 1
http://www.wikidata.org/prop/P452 1
wdt:P729 1
http://www.wikidata.org/prop/P729 1
wdt:P950 1
wdt:P1005 1
wdt:P4146 1
wdt:P4702 1
http://www.wikidata.org/prop/P950 1
http://www.wikidata.org/prop/P1005 1
http://www.wikidata.org/prop/P4146 1
http://www.wikidata.org/prop/P4702 1
wdt:P3450 1
http://www.wikidata.org/prop/P3450 1
wdt:P51 1
http://www.wikidata.org/prop/P51 1

356 rows × 1 columns

predicates_ocount= predicates.sort_values(by=['count'],ascending=False)
mostused_50predicates = predicates_ocount.head(50)
%matplotlib inline
 
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f308a506c88>]], dtype=object)
 
 
# ---------------------------