In [2]:
!pip install waybackpy
Collecting waybackpy
  Downloading https://files.pythonhosted.org/packages/57/94/42c5bdfe032001c3a5e44171a87d63eb584dba4bb5ed9dc6327041e2b0f9/waybackpy-1.6.tar.gz
Building wheels for collected packages: waybackpy
  Running setup.py bdist_wheel for waybackpy ... error
  Complete output from command /srv/paws/bin/python3.6 -u -c "import setuptools, tokenize;__file__='/tmp/pip-build-xuz7nuk8/waybackpy/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" bdist_wheel -d /tmp/tmpdp84h402pip-wheel- --python-tag cp36:
  /usr/lib/python3.6/distutils/dist.py:261: UserWarning: Unknown distribution option: 'long_description_content_type'
    warnings.warn(msg)
  /srv/paws/lib/python3.6/site-packages/setuptools/dist.py:397: UserWarning: Normalizing 'v1.6' to '1.6'
    normalized_version,
  usage: -c [global_opts] cmd1 [cmd1_opts] [cmd2 [cmd2_opts] ...]
     or: -c --help [cmd1 cmd2 ...]
     or: -c --help-commands
     or: -c cmd --help
  
  error: invalid command 'bdist_wheel'
  
  ----------------------------------------
  Failed building wheel for waybackpy
  Running setup.py clean for waybackpy
Failed to build waybackpy
Installing collected packages: waybackpy
  Running setup.py install for waybackpy ... done
Successfully installed waybackpy-1.6
In [4]:
from waybackpy import *
count = total_archives(" https://www.google.com ", UA="User-Agent")
print(count)
592521
In [14]:
from urllib.request import Request, urlopen
import json
default_UA = "robot x"
def total_archives(url, UA=default_UA):
    print(UA)
    hdr = { 'User-Agent' : '%s' % UA }
    request_url = "https://web.archive.org/cdx/search/cdx?url=%s&output=json&fl=statuscode" % clean_url(url)
    print(request_url)
    req = Request(request_url, headers=hdr) # nosec
    response = urlopen(req) #nosec
    return (len(json.loads(response.read())))
In [15]:
 
robot x
https://web.archive.org/cdx/search/cdx?url=https://www.google.com&output=json&fl=statuscode
592500
In [ ]:
count1 = total_archives("https://www.google.com")
print(count1)