%%time
import csv
from urllib.request import urlopen
# import codecs
# from tqdm import tqdm
# from tqdm._tqdm_notebook import tqdm_notebook
import numpy as np
import pandas as pd
#### PANDAS defaults for displays
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 500)
##### Setup SPARQL queries
import urllib.parse
from IPython.display import IFrame
baseurl='https://query.wikidata.org/embed.html#'
def wdq(query='',width=800,height=500):
return IFrame(baseurl+urllib.parse.quote(query), width=width, height=height)
import requests
wikidata_api_url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
met_api_url = 'https://collectionapi.metmuseum.org/public/collection/v1/objects/'
crosswalk_sheet_url = 'https://docs.google.com/spreadsheets/d/1WmXW2CjlLidcUXzahQsB3HjUVvECns4xDyIt-Hw-jW8/export?format=csv&id'
metdtypes = {
'Object Number':str,
'Is Highlight':bool,
'Is Public Domain':bool,
'Object ID':int,
'Department':str,
'Object Name':str,
'Title':str,
'Culture':str,
'Period':str,
'Dynasty':str,
'Reign':str,
'Portfolio':str,
'Artist Role':str,
'Artist Prefix':str,
'Artist Display Name':str,
'Artist Display Bio':str,
'Artist Suffix':str,
'Artist Alpha Sort':str,
'Artist Nationality':str,
'Artist Begin Date':str,
'Artist End Date':str,
'Object Date':str,
'Object Begin Date':int,
'Object End Date':int,
'Medium':str,
'Dimensions':str,
'Credit Line':str,
'Geography Type':str,
'City':str,
'State':str,
'County':str,
'Country':str,
'Region':str,
'Subregion':str,
'Locale':str,
'Locus':str,
'Excavation':str,
'River':str,
'Classification':str,
'Rights and Reproduction':str,
'Link Resource':str,
'Metadata Date':str,
'Repository':str,
'Tags':str
}
# Use local copy of CSV or HDF file for speed
databasefile = 'metmuseum/MetObjects-20190903'
import os.path
file = databasefile+'.h5'
if (os.path.isfile(file)):
# Read the HDF file into a pandas dataframe
print ('Working on HDF')
df = pd.read_hdf(file,dtype=metdtypes,low_memory=False)
else:
file = databasefile+'.csv'
if (os.path.isfile(file)):
print ('Working on CSV')
df = pd.read_csv(file,dtype=metdtypes,low_memory=False)
# Make a new shallow/efficient copy of dataframe (data in place) with just highlights
# hdf = df[df['Is Highlight'] == True].copy()