import sys
import json
import csv
import requests 
import pprint
from tqdm import tqdm_notebook
from urllib.request import urlopen
from multiprocessing.pool import ThreadPool
from time import time as timer
import time

import pandas as pd
import matplotlib.pyplot as plot
from IPython.display import Image

# Load Azure Custom Vision API endpoint URL and API key
with open('azurecredentials.json') as f:
    data = json.load(f)
    PREDICTION_KEY = data['predictionkey']
    AZURE_CV_URL   = data['azure_cv_url']
# Set POST request header to JSON and our secret key
headers = {'Content-Type' : 'application/json', 'Prediction-Key' : PREDICTION_KEY }

metapibaseurl  = '{}'

threshold = 0.6   # confidence threshold to exceed for Azure predictions to be returned

maxthreads = 15   # How many parallel threads - WARNING - too many and Azure returns error

# Need to handle Azure error:
# {'statusCode': 429, 'message': 'Rate limit is exceeded. Try again in 1 seconds.'}
# Quotas:
maxtries = 20
waitinterval = 1

# Take image URL, feed it to Azure Custom Vision instance
# Return dataframe of results
def image2predictiondf(url):
    # Handle Azure rate limit by adding retry count and wait interval
    tries = 0
    wait  = waitinterval
    payload = '{"url":"' + url.strip() + '"}'
    while tries < maxtries :
            # Send POST request to our Azure Custom Vision API
            r =, headers=headers, data=payload).json()
            # Successful predictions returned
            if 'predictions' in r:
                df = pd.DataFrame(r['predictions'],columns=['tagName','probability'])
                return df, None
            # Error, probably rate limiting
            elif 'statusCode' in r and r['statusCode'] == 429:   # Need to sleep and retry
                # print ('Sleeping {} for {} seconds, try {}'.format(url,wait,tries))
                time.sleep(wait)     # wait a bit
                tries += 1           # record another try
                wait += waitinterval # increase wait period by interval in case of retry
            return None, r
        except Exception as e:
            # print ('Excpetion',e)
            return None, e
    print(url,' retry limit after ',tries ,file=sys.stderr)
    return None, None

# Convert a Met ID to image URL via their API    
def metid2image(id):
    url = metapibaseurl.format(id)   # Create request string

        r = requests.get(url).json()
        imageurl = r['primaryImageSmall']
        return imageurl, None
    except Exception as e:
        return None, e
def metid2predictiondf(id):
        url, error = metid2image(id)
        if error:
            print (id, ": could not get Met URL", error, file=sys.stderr)
            return None
        df, error = image2predictiondf(url)
#        if error:
#            print ("Could not get Azure prediction for {}, {}, {}".format(id,url,error), file=sys.stderr)
        return df
    except Exception as e:
        return e

def metid2predictionlist(id):
    df = metid2predictiondf(id)
    if df is None:
        return id, None
        if df[df['probability']>threshold].empty:
            return id, None
        return id, df[df['probability']>threshold]['tagName'].tolist()
('', None)
(185860, ['Men', 'Inscriptions', 'Flowers', 'Leaves', 'Women'])
metobjectsfile = 'met-new-objects-0205.txt'
metoutputfile = 'met-predictions.csv'

with open(metobjectsfile) as f:
    content = f.readlines()   # Can use [start:finish] or other ways to specify lines in file
    content = [x.strip() for x in content]   # remove whitespace

    contentdict =  { i : None for i in content } # create dict from list

    # Parallel processing using threads
    start = timer()
    results = ThreadPool(maxthreads).imap_unordered(metid2predictionlist, content)

    outfile = open(metoutputfile,'w',buffering=1)  # Buffering=1 writes lines to files immediately
    maxitems = len(content)

    for id, prediction in tqdm_notebook(results,total=maxitems):
        outfile.write("{},{}\n".format(id, prediction))

#       if error is None:
#        print("%d, %r fetched in %ss" % (int(id), prediction, timer() - start))
#        else:
#           print("error fetching %d: %s" % (int(id), error))

    print("Elapsed Time (parallel threads): %s" % (timer() - start,))

#    metid2image ()

#    pp = pprint.PrettyPrinter(indent=4)
#    pp.pprint(item)
#        item = json.loads(response)
#        if item.primaryImageSmall:
#            print (item.primaryImageSmall)
Elapsed Time (parallel threads): 1088.082231760025