import sys
import json
import csv
import requests 
import pprint
from tqdm import tqdm_notebook
from urllib.parse import quote
from urllib.request import urlopen
from multiprocessing.pool import ThreadPool
from time import time as timer
import time

import pandas as pd
import matplotlib.pyplot as plot
from IPython.display import Image

# Load Azure Custom Vision API endpoint URL and API key
with open('azurecredentials.json') as f:
    data = json.load(f)
    PREDICTION_KEY = data['predictionkey']
    AZURE_CV_URL   = data['azure_cv_url']
# Set POST request header to JSON and our secret key
headers = {'Content-Type' : 'application/json', 'Prediction-Key' : PREDICTION_KEY }

metapibaseurl  = '{}'

threshold = 0.6   # confidence threshold to exceed for Azure predictions to be returned

maxthreads = 15   # How many parallel threads - WARNING - too many and Azure returns error

# Need to handle Azure error:
# {'statusCode': 429, 'message': 'Rate limit is exceeded. Try again in 1 seconds.'}
# Quotas:
maxtries = 20
waitinterval = 1

# Take image URL, feed it to Azure Custom Vision instance
# Return dataframe of results
def image2predictiondf(url):
    # Handle Azure rate limit by adding retry count and wait interval
    tries = 0
    wait  = waitinterval
    payload = '{"url":"' + url.strip() + '"}'
    while tries < maxtries :
            # Send POST request to our Azure Custom Vision API
            r =, headers=headers, data=payload).json()
            # Successful predictions returned
            if 'predictions' in r:
                df = pd.DataFrame(r['predictions'],columns=['tagName','probability'])
                return df
            # Error, probably rate limiting
            elif 'statusCode' in r and r['statusCode'] == 429:   # Need to sleep and retry
                # print ('Sleeping {} for {} seconds, try {}'.format(url,wait,tries))
                time.sleep(wait)     # wait a bit
                tries += 1           # record another try
                wait += waitinterval # increase wait period by interval in case of retry
            return None
        except Exception as e:
            # print ('Excpetion',e)
            return None
    print(url,' retry limit after ',tries ,file=sys.stderr)
    return None, None

# Given a Commons file name from WD (P18) return full URL
# In: September SAAM-1909.7.13 1.jpg
# Out:
def commonsfile2url (filename):
    COMMONS_API_URL = '{}&prop=imageinfo&iiprop=url&format=json'
    cqueryurl = COMMONS_API_URL.format(quote(filename))

    with urllib.request.urlopen(cqueryurl) as newurl:
        response = json.loads(
        # Need to process query►pages►66384523►title                                                                                                     
    if response["query"]: # and response["query"]["pages"][0]["title"]:                                                                              
            # Get query►pages►66384523►imageinfo►0►url                                                                                                   
            for k, v in response["query"]["pages"].items():
                returnurl = v["imageinfo"][0]["url"]
    return returnurl

# Convert a Wikidata QID to image URL via their API    
def qid2commonsfile(qid):
    wdurl = WD_API_URL.format(qid.rstrip(),imageproperty)
    filename = ''

    with urllib.request.urlopen(wdurl) as url:
        wd_r = json.loads(
        # Check if claims has anything in this area                                                                                                  
        if wd_r["claims"] and wd_r["claims"][imageproperty][0]["mainsnak"]:
        # Check that:                                                                                                                                
         # claims►P18►0►mainsnak►datatype = 'commonsMedia'                                                                                            
         # claims►P18►0►mainsnak►datavalue►value = <filename>                                                                                         
            datatype = wd_r["claims"][imageproperty][0]["mainsnak"]["datatype"]
            filename = wd_r["claims"][imageproperty][0]["mainsnak"]["datavalue"]["value"]

        if filename:
            return None

def qid2predictionlist(qid):
    df = image2predictiondf(commonsfile2url(qid2commonsfile(qid)))
    if df is None:
        return qid, None
        if df[df['probability']>threshold].empty:
            return qid, None
        return qid, df[df['probability']>threshold]['tagName'].tolist()
# Convert a Met ID to image URL via their API    
def metid2image(id):
    url = metapibaseurl.format(id)   # Create request string

        r = requests.get(url).json()
        imageurl = r['primaryImageSmall']
        return imageurl, None
    except Exception as e:
        return None, e
def metid2predictiondf(id):
        url, error = metid2image(id)
        if error:
            print (id, ": could not get Met URL", error, file=sys.stderr)
            return None
        df = image2predictiondf(url)
#        if error:
#            print ("Could not get Azure prediction for {}, {}, {}".format(id,url,error), file=sys.stderr)
        return df
    except Exception as e:
        return e

def metid2predictionlist(id):
    df = metid2predictiondf(id)
    if df is None:
        return None
        if df[df['probability']>threshold].empty:
            return None
        return df[df['probability']>threshold]['tagName'].tolist()
('Q42', ['Men', 'Portraits'])
objectsfile = 'rijks-qids.txt'
outputfile = 'rijks-predictions.csv'

with open(objectsfile) as f:
    content = f.readlines()   # Can use [start:finish] or other ways to specify lines in file
    content = [x.strip() for x in content]   # remove whitespace

    contentdict =  { i : None for i in content } # create dict from list

    # Parallel processing using threads
    start = timer()
    results = ThreadPool(maxthreads).imap_unordered(qid2predictionlist, content)

    outfile = open(outputfile,'w',buffering=1)  # Buffering=1 writes lines to files immediately
    maxitems = len(content)

    for id, prediction in tqdm_notebook(results,total=maxitems):
        outfile.write("{},{}\n".format(id, prediction))

#       if error is None:
#        print("%d, %r fetched in %ss" % (int(id), prediction, timer() - start))
#        else:
#           print("error fetching %d: %s" % (int(id), error))

#    print("Elapsed Time (parallel threads): %s" % (timer() - start,))

#    metid2image ()

#    pp = pprint.PrettyPrinter(indent=4)
#    pp.pprint(item)
#        item = json.loads(response)
#        if item.primaryImageSmall:
#            print (item.primaryImageSmall)
Elapsed Time (parallel threads): 766.3970127105713