PAWS demo of qwikidata

qwikidata is a python package for interacting with wikidata entities.

the important links are,

lets pip install it and have some fun

In [1]:
!pip install qwikidata
Collecting qwikidata
  Downloading https://files.pythonhosted.org/packages/a9/40/4273aaaacd7269f80d8ce475aff7115ab8fce31488ba08f3eaca776d110a/qwikidata-0.4.0-py3-none-any.whl
Requirement already satisfied: requests in /srv/paws/lib/python3.6/site-packages (from qwikidata)
Collecting mypy-extensions (from qwikidata)
  Downloading https://files.pythonhosted.org/packages/5c/eb/975c7c080f3223a5cdaff09612f3a5221e4ba534f7039db34c35d95fa6a5/mypy_extensions-0.4.3-py2.py3-none-any.whl
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /srv/paws/lib/python3.6/site-packages (from requests->qwikidata)
Requirement already satisfied: certifi>=2017.4.17 in /srv/paws/lib/python3.6/site-packages (from requests->qwikidata)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /srv/paws/lib/python3.6/site-packages (from requests->qwikidata)
Requirement already satisfied: idna<2.9,>=2.5 in /srv/paws/lib/python3.6/site-packages (from requests->qwikidata)
Installing collected packages: mypy-extensions, qwikidata
Successfully installed mypy-extensions-0.4.3 qwikidata-0.4.0
In [2]:
import qwikidata 
In [38]:
from qwikidata.linked_data_interface import get_entity_dict_from_api
from qwikidata.entity import WikidataItem, WikidataProperty, WikidataLexeme
from qwikidata.sparql import get_subclasses_of_item, return_sparql_query_results

Linked Data Interface

Items

In [26]:
# create an item representing "Douglas Adams"
Q_DOUGLAS_ADAMS = "Q42"
q42_dict = get_entity_dict_from_api(Q_DOUGLAS_ADAMS)
q42 = WikidataItem(q42_dict)
q42
Out[26]:
WikidataItem(label=Douglas Adams, id=Q42, description=English writer and humorist, aliases=['Douglas Noel Adams', 'Douglas Noël Adams', 'Douglas N. Adams'], enwiki_title=Douglas Adams)
In [7]:
claims = q42.get_truthy_claim_groups()
In [11]:
p31_claims = claims['P31']
In [17]:
p31_claims[0].mainsnak.datavalue.value
Out[17]:
{'entity-type': 'item', 'numeric-id': 5, 'id': 'Q5'}

Properties

In [27]:
# create a property representing "subclass of"
P_SUBCLASS_OF = "P279"
p279_dict = get_entity_dict_from_api(P_SUBCLASS_OF)
p279 = WikidataProperty(p279_dict)
p279
Out[27]:
WikidataProperty(label=subclass of, id=P279, description=all instances of these items are instances of those items; this item is a class (subset) of that item. Not to be confused with P31 (instance of), aliases=['rdfs:subClassOf', 'hyponym of', 'has superclass', 'is also a', 'subtype of', 'is a subtype of', 'subcategory of', 'is a category of', 'is thereby also a', 'is necessarily also a', 'whose instances are among', 'whose instances ⊆ those of', '⊆', '⊂', 'is a type of', 'is a class of', 'subset of', 'type of', 'form of'])

Lexemes

In [30]:
# create a lexeme representing the english noun "bank"
L_BANK = "L3354"
l3354_dict = get_entity_dict_from_api(L_BANK)
l3354 = WikidataLexeme(l3354_dict)
l3354
Out[30]:
WikidataLexeme(lemma=bank, id=L3354, language=Q1860, lexical_category=Q1084, forms=[WikidataForm(form_id=L3354-F1, representation=bank, grammatical_features=['Q110786']), WikidataForm(form_id=L3354-F2, representation=banks, grammatical_features=['Q146786'])], senses=[WikidataSense(sense_id=L3354-S1, gloss=financial institution), WikidataSense(sense_id=L3354-S2, gloss=dry ground next to a river), WikidataSense(sense_id=L3354-S3, gloss=shallow ocean area)])
In [32]:
l3354.get_lemma()
Out[32]:
'bank'
In [36]:
l3354.get_senses()
Out[36]:
[WikidataSense(sense_id=L3354-S1, gloss=financial institution),
 WikidataSense(sense_id=L3354-S2, gloss=dry ground next to a river),
 WikidataSense(sense_id=L3354-S3, gloss=shallow ocean area)]

Wikidata SPARQL Query Service

In [39]:
# send any sparql query to the wikidata query service and get full result back
# here we use an example that counts the number of humans
# NOTE this query can take a few minutes
sparql_query = """
SELECT (COUNT(?item) AS ?count)
WHERE {
        ?item wdt:P31/wdt:P279* wd:Q5 .
}
"""
res = return_sparql_query_results(sparql_query)
In [40]:
res
Out[40]:
{'head': {'vars': ['count']},
 'results': {'bindings': [{'count': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer',
     'type': 'literal',
     'value': '5507432'}}]}}
In [42]:
# use convenience function to get subclasses of an item as a list of item ids
Q_RIVER = "Q4022"
subclasses_of_river = get_subclasses_of_item(Q_RIVER)
In [43]:
subclasses_of_river
Out[43]:
['Q4022',
 'Q100649',
 'Q159675',
 'Q591942',
 'Q653247',
 'Q794428',
 'Q1074069',
 'Q1140845',
 'Q1299258',
 'Q1531228',
 'Q1720683',
 'Q1746748',
 'Q1986504',
 'Q2048585',
 'Q2230783',
 'Q2375084',
 'Q2557367',
 'Q3073652',
 'Q3196604',
 'Q4176368',
 'Q4366834',
 'Q4392849',
 'Q4392856',
 'Q4733366',
 'Q4879406',
 'Q7580767',
 'Q8026426',
 'Q10670417',
 'Q10875106',
 'Q11553102',
 'Q12099220',
 'Q12149663',
 'Q16465938',
 'Q16638716',
 'Q18915937',
 'Q20529834',
 'Q21079327',
 'Q21573828',
 'Q27067659',
 'Q42122394',
 'Q51205815',
 'Q60061427',
 'Q60823417',
 'Q71184245',
 'Q12048869',
 'Q3058945',
 'Q21504956']
In [ ]: