import os
import requests
import pywikibot
!pip install img2pdf
Collecting img2pdf
  Downloading https://files.pythonhosted.org/packages/e0/c6/7cd14232a1b10bf884c12daf3626afb76c4f60b52ae0eb23ce1519542ae4/img2pdf-0.3.3.tar.gz (80kB)
    100% |████████████████████████████████| 81kB 1.1MB/s ta 0:00:011
Requirement already satisfied: Pillow in /srv/paws/lib/python3.6/site-packages (from img2pdf)
Building wheels for collected packages: img2pdf
  Running setup.py bdist_wheel for img2pdf ... error
  Complete output from command /srv/paws/bin/python3.6 -u -c "import setuptools, tokenize;__file__='/tmp/pip-build-lzg05tpa/img2pdf/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" bdist_wheel -d /tmp/tmp74idgd7_pip-wheel- --python-tag cp36:
  /usr/lib/python3.6/distutils/dist.py:261: UserWarning: Unknown distribution option: 'tests_requires'
    warnings.warn(msg)
  usage: -c [global_opts] cmd1 [cmd1_opts] [cmd2 [cmd2_opts] ...]
     or: -c --help [cmd1 cmd2 ...]
     or: -c --help-commands
     or: -c cmd --help
  
  error: invalid command 'bdist_wheel'
  
  ----------------------------------------
  Failed building wheel for img2pdf
  Running setup.py clean for img2pdf
Failed to build img2pdf
Installing collected packages: img2pdf
  Running setup.py install for img2pdf ... done
Successfully installed img2pdf-0.3.3
import img2pdf
import pywikibot
from pywikibot import pagegenerators

site = pywikibot.Site(fam='commons', code='commons')

cat = pywikibot.Category(site,'Category:Institutional Act Number Two at Arquivo Nacional (Rio de Janeiro)')
gen = pagegenerators.CategorizedPageGenerator(cat, recurse=True, content=True)
os.chdir(os.environ['HOME'] + '/category')
os.mkdir(cat.title())
os.chdir(cat.title())
throttle = pywikibot.throttle.Throttle(site)
VERBOSE:pywiki:Found 1 commons:commons processes running, including this one.
error = False
error_list = []
for page in gen:
    r = requests.head(page.get_file_url())
    print(r.headers.get('X-Cache-Status'), r.status_code)
    if r.status_code != 200:
        throttle.wait(3)
        requests.head(page.get_file_url())
    if not page.download():
        print('error with page {}'.format(page))
        throttle.wait(3)
        requests.head(page.get_file_url())
        if not page.download():
            error = True
            error_list.append(page)
def get_error(error_list, recurse):
    new_error_list = []
    recurse -= 1
    for page in error_list:
        r = requests.head(page.get_file_url())
        print(r.headers.get('X-Cache-Status'), r.status_code)
        if r.status_code != 200:
            throttle.wait(3)
            r = requests.head(page.get_file_url())
            print('2, ', r.headers.get('X-Cache-Status'), r.status_code)
        if not page.download():
            if r.status_code != 200:
                throttle.wait(3)
                r = requests.head(page.get_file_url())
                throttle.wait(3)
                print('3, ', r.headers.get('X-Cache-Status'), r.status_code)
            if not page.download():
                new_error_list.append(page)
    if new_error_list and recurse > 0:
        new_error_list = get_error(new_error_list, recurse)
    return new_error_list
hit 200
miss 200
hit 200
hit 200
hit 200
miss 200
hit 200
hit 200
hit 200
miss 200
miss 200
miss 200
if error_list:
    error_list = get_error(error_list, 8)
pages_list = [page for page in os.listdir() if page[-4:] in ['.jpg', '.tif']]
pages_list.sort()
# pages_list.append(pages_list.pop(1))
pages_list
['AI-2_fl.01.jpg',
 'AI-2_fl.02.jpg',
 'AI-2_fl.03.jpg',
 'AI-2_fl.04.jpg',
 'AI-2_fl.05.jpg',
 'AI-2_fl.06.jpg',
 'AI-2_fl.07.jpg',
 'AI-2_fl.08.jpg',
 'AI-2_fl.09.jpg',
 'AI-2_fl.10.jpg',
 'AI-2_fl.11.jpg',
 'AI-2_fl.12.jpg']
with open(cat.title().replace(' pages', '.pdf'), 'wb') as pdf_file:
    pdf_file.write(img2pdf.convert(pages_list))
error_list
[]
 
'.jpg'