#!pip install wikitables

import requests
import pandas as pd
import numpy as np
import mwparserfromhell as mwp
from wikitables import import_tables, WikiTable
from wikitables.util import ftag
Collecting wikitables
  Using cached https://files.pythonhosted.org/packages/1a/ad/19f04ac31b80295dd39234196245c475005fb3d76ad182ccf16e0578d6d5/wikitables-0.5.4-py2.py3-none-any.whl
Requirement already satisfied: mwparserfromhell>=0.4.3 in /srv/paws/lib/python3.6/site-packages (from wikitables)
Requirement already satisfied: requests>=2.9.1 in /srv/paws/lib/python3.6/site-packages (from wikitables)
Requirement already satisfied: idna<2.9,>=2.5 in /srv/paws/lib/python3.6/site-packages (from requests>=2.9.1->wikitables)
Requirement already satisfied: certifi>=2017.4.17 in /srv/paws/lib/python3.6/site-packages (from requests>=2.9.1->wikitables)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /srv/paws/lib/python3.6/site-packages (from requests>=2.9.1->wikitables)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /srv/paws/lib/python3.6/site-packages (from requests>=2.9.1->wikitables)
Installing collected packages: wikitables
Successfully installed wikitables-0.5.4
# Modify the function from wikitables to read from an Mediawiki API URL and page name

crosswalk_page = 'User:Fuzheado/Met/glamingest/objectName'
wikidata_api_url = 'https://www.wikidata.org/w/api.php'

def import_tables_from_url(api_url, title):

    params = { 'prop': 'revisions',
               'format': 'json',
               'action': 'query',
               'explaintext': '',
               'titles': title,
               'rvprop': 'content' }

    r = requests.get(api_url, params)
    r.raise_for_status()
    pages = r.json()["query"]["pages"]

    # use key from first result in 'pages' array
    pageid = list(pages.keys())[0]
    if pageid == '-1':
        raise ArticleNotFound('no matching articles returned')

    page = pages[pageid]
    body = page['revisions'][0]['*']

    ## parse for tables
    raw_tables = mwp.parse(body).filter_tags(matches=ftag('table'))
    
    def _table_gen():
        for idx, table in enumerate(raw_tables):
            name = '%s[%s]' % (page['title'],idx)
            yield WikiTable(name, table)

    return list(_table_gen())
tables = import_tables_from_url(wikidata_api_url, crosswalk_page)
df = pd.read_json(tables[0].json())
print(tables[0].head)
['Object Name', 'QID', 'extrastatement', 'extraqualifier', 'notes']
df.sample(10)
Object Name QID extraqualifier extrastatement notes
83 Print Q11060274
62 Koto Q289037
60 Dagger (Kindjal) with sheath Q960281
127 "New Year's Bottle" with cow goddess Nebethetepet Q80228
95 Sesando Q1585780
104 Suit Q614806
67 Music stand Q930133
77 Photograph; Drawing Q125191 Q93184
89 Saber with scabbard Q661140 Q49068756
110 Tombak Q3574843
# Try to fix the fact that blanks should be NaN
#df['extraqualifier'] = df['extraqualifier'].str.strip()
newdf = df.replace(r'^\s*$', np.nan, regex=True)
newdf.sample(10)
Object Name QID extraqualifier extrastatement notes
124 Cinerary urn, fragment Q960844 NaN Q11086567 NaN
114 Vase Q191851 NaN NaN NaN
12 Bottle Q80228 NaN NaN NaN
37 Ennanga Q5375216 NaN NaN NaN
125 Cinerary urn with lid Q960844 NaN Q2488579 NaN
1 Ancestor pole NaN NaN NaN NaN
13 Bust Q17489160 NaN NaN NaN
30 Damaru Q1158229 NaN NaN NaN
3 Apse Q170463 NaN NaN NaN
43 Evening suit Q614806 NaN Q1908472 NaN
for row in tables[0].rows:
    print (row)
{'Object Name': Ancestor figure, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Ancestor pole, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Andiron, 'QID': Q262026, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Apse, 'QID': Q170463, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Armor, 'QID': Q50414774, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Bala, 'QID': Q789141, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Ball gown, 'QID': Q635784, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Baseball card, 'QID': Q1190417, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Blanket, 'QID': Q5852, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Blouse, 'QID': Q152563, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Bondjo, 'QID': Q66715406, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Bookcase, 'QID': Q215857, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Bottle, 'QID': Q80228, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Bust, 'QID': Q17489160, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Cabinet, 'QID': Q2741056, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Caftan, 'QID': Q837185, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Canoe prow ornament, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Cape, 'QID': Q1034198, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Chadri, 'QID': Q3516569, 'extrastatement': , 'extraqualifier': , 'notes': possible overlap with burka}
{'Object Name': Chair, 'QID': Q15026, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Chegah-Skah-Hdah, 'QID': Q78916699, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Chongak Kayagum, 'QID': Q717407, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Clock, 'QID': Q376, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Coat, 'QID': Q152574, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Coatdress, 'QID': Q16949531, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Container, 'QID': Q987767, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Corset, 'QID': Q180225, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Courting Flute (siyotanka), 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Dadabuan, 'QID': Q1156506, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Dado panel, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Damaru, 'QID': Q1158229, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Densho, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Dining room, 'QID': Q661199, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Drawing, 'QID': Q93184, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Dress, 'QID': Q200539, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Drum, 'QID': Q11404, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Electric clock, 'QID': Q2607895, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Ennanga, 'QID': Q5375216, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Ensemble, 'QID': Q1497375, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Eroro, 'QID': Q11337289, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Evening blouse, 'QID': Q152563, 'extrastatement': Q1908472, 'extraqualifier': , 'notes': }
{'Object Name': Evening dress, 'QID': Q2144456, 'extrastatement': Q1908472, 'extraqualifier': , 'notes': }
{'Object Name': Evening ensemble, 'QID': Q29353484, 'extrastatement': Q1908472, 'extraqualifier': , 'notes': }
{'Object Name': Evening suit, 'QID': Q614806, 'extrastatement': Q1908472, 'extraqualifier': , 'notes': }
{'Object Name': Ewer, 'QID': Q404785, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Female figure, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Figure, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Finial, 'QID': Q1406758, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Gable figure, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Ghanti (bell), 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Goqing, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Guitar, 'QID': Q6607, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Head, 'QID': Q23640, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Headdress, 'QID': Q28972621, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': House post, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': House post figure, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Inkwell, 'QID': Q1160546, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Jacket, 'QID': Q849964, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Jar, 'QID': Q1207302, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Jumpsuit, 'QID': Q16472452, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Dagger (Kindjal) with sheath, 'QID': Q960281, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Kora, 'QID': Q585969, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Koto, 'QID': Q289037, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Male figure, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Mask, 'QID': Q161524, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Mi-gyaung, 'QID': Q1789755, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Mother and child figure, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Music stand, 'QID': Q930133, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Necklace, 'QID': Q189299, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Nyonganyonga, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Opera coat, 'QID': Q7096642, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Painting, 'QID': Q3305213, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Painting, miniature, 'QID': Q3305213, 'extrastatement': Q282129, 'extraqualifier': , 'notes': }
{'Object Name': Pair of figures, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Pantsuit, 'QID': Q1630337, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Photogram, 'QID': Q841954, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Photograph, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Photograph; Drawing, 'QID': Q125191, 'extrastatement': Q93184, 'extraqualifier': , 'notes': }
{'Object Name': Polaroid, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Porringer, 'QID': Q7230377, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Postcard, 'QID': Q192425, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Power figure, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Prestige stool, 'QID': Q33163, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Print, 'QID': Q11060274, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Prototype tea service, 'QID': Q3408351, 'extrastatement': Q207977, 'extraqualifier': , 'notes': }
{'Object Name': Raven rattle, 'QID': Q78915916, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Rayograph; Photogram, 'QID': Q841954, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Ritual board, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Ritual vessel, 'QID': Q17379796, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Saber with scabbard, 'QID': , 'extrastatement': Q49068756, 'extraqualifier': Q661140, 'notes': }
{'Object Name': Sandals, 'QID': Q131704, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Śankh, 'QID': Q2479786, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Sarangi, 'QID': Q608650, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Saùng-Gauk, 'QID': Q339306, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Sculpture, 'QID': Q860861, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Sesando, 'QID': Q1585780, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Shield, 'QID': Q131559, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Sho, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Shō, 'QID': Q1975355, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Side-blown Trumpet, 'QID': Q55738913, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Sitar, 'QID': Q229205, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Skull hook, 'QID': , 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Sō Sām Sāi, 'QID': Q6577044, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Statue, 'QID': Q179700, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Suit, 'QID': Q614806, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Tánggǔ, 'QID': Q7682928, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Taūs (mayuri), 'QID': Q1440075, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Tea infuser and strainer, 'QID': Q2142257, 'extrastatement': Q2472574, 'extraqualifier': , 'notes': }
{'Object Name': Tea service, 'QID': Q3408351, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Tenor Recorder, 'QID': Q2037451, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Tombak, 'QID': Q3574843, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Tsii'edo'a'tl, 'QID': Q4778966, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Tumbler, 'QID': Q16180001, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Tunic, 'QID': Q201714, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Vase, 'QID': Q191851, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Video; Time-based Media, 'QID': , 'extrastatement': Q34508, 'extraqualifier': Q57206278, 'notes': }
{'Object Name': Wall sculpture, 'QID': Q56696652, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Watercolor, 'QID': Q18761202, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Whistle, 'QID': Q204917, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Window, 'QID': Q35473, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Windows and Paneling, 'QID': Q35473, 'extrastatement': Q1348059, 'extraqualifier': , 'notes': }
{'Object Name': Woodblock print, 'QID': Q28913685, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Sword (Kora) with scabbard and belt, 'QID': Q781298, 'extrastatement': Q661140, 'extraqualifier': , 'notes': }
{'Object Name': Cinerary urn, 'QID': Q960844, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Cinerary urn, fragment, 'QID': Q960844, 'extrastatement': Q11086567, 'extraqualifier': , 'notes': }
{'Object Name': Cinerary urn with lid, 'QID': Q960844, 'extrastatement': Q2488579, 'extraqualifier': , 'notes': }
{'Object Name': Cittern, 'QID': Q1093290, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': "New Year's Bottle" with cow goddess Nebethetepet, 'QID': Q80228, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Drawing Ornament &amp; Architecture, 'QID': Q93184, 'extrastatement': , 'extraqualifier': , 'notes': }
{'Object Name': Plaque, 'QID': Q4364339, 'extrastatement': , 'extraqualifier': , 'notes': }
 
out