# Imports modules almost always used
from pprint import pprint
import requests
import re

# Import modules for reading html pages
import lxml.html
# Download the page
page = requests.get('https://www.docenti.unina.it/cercaGoogle.do?query=a&submit1=Avvia+ricerca', verify=False)
# Create a tree for the page
tree = lxml.html.fromstring(page.content)
# Extract teacher page URIs
teacher_page_URIs = 'http://docenti.unina.it/' + tree.xpath('//div[@class="docente"]//a/@href')
/srv/paws/lib/python3.6/site-packages/urllib3/connectionpool.py:858: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
  InsecureRequestWarning)
/srv/paws/lib/python3.6/site-packages/urllib3/connectionpool.py:858: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings
  InsecureRequestWarning)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-2-77b3e72a89bc> in <module>()
      4 tree = lxml.html.fromstring(page.content)
      5 # Extract teacher page URIs
----> 6 teacher_page_URIs = 'http://docenti.unina.it/' + tree.xpath('//div[@class="docente"]//a/@href')

TypeError: must be str, not list
teachers = []
# Create main objects
for URI in teacher_page_URIs:
    page = requests.get(URI, verify=False)
    tree = lxml.html.fromstring(page.content)
    # Extract name
    name = tree.xpath('//h1[contains(@class, "nomeProf")]/text()')[0]
    name = re.sub('[\r\n\xa0 ]', '', name).title()
    # Extract surname
    surname = tree.xpath('//span[contains(@class, "cognProf")]/text()')[0]
    # Extract phone
    phone = tree.xpath('//div[contains(@class, "rif-item") and contains(.//span, "PHONE")]//div[contains(@class, "item-in fsz14 black")]//text()')[0]
    phone = '+39 ' + re.sub('[-]', ' ', phone)
    # Extract email
    email = tree.xpath('//div[contains(@class, "rif-item") and contains(.//span, "EMAIL")]//div[contains(@class, "item-in fsz14")]//a//text()')[0]
    email = re.sub('[\r\n ]', '', email)
    # Extract official website
    URL = tree.xpath('//div[contains(@class, "rif-item") and contains(.//span, "SHORT URL")]//div[contains(@class, "item-in fsz14")]//a//text()')[0]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-45-92b1157029be> in <module>()
      1 teachers = []
      2 # Create main objects
----> 3 for URI in teachers_page_URIs:
      4     page = requests.get(URI, verify=False)
      5     tree = lxml.html.fromstring(page.content)

NameError: name 'teachers_page_URIs' is not defined
for teacher in teachers:
    teacher['name'] = 'cacca'
type(teachers)
list