In [22]:
from urllib.request import Request, urlopen
import json
default_UA = "robot x"
def total_archives(url, UA=default_UA):
    print(UA)
    hdr = { 'User-Agent' : '%s' % UA }
    request_url = "https://web.archive.org/cdx/search/cdx?url=%s&output=json&fl=statuscode" % url
    print(request_url)
    req = Request(request_url, headers=hdr) # nosec
    response = urlopen(req) #nosec
    return str(response.read()).count(",") # Most efficent method to count (yet)
In [24]:
count1 = total_archives("https://google.com")
print(count1)
robot x
https://web.archive.org/cdx/search/cdx?url=https://google.com&output=json&fl=statuscode
592503
In [ ]:
str(response.read()).count(",")