import re

import pywikibot

from pywikibot import pagegenerators


def get_id(url_list):
    regex = "^http://www\.old-computers\.com/museum/computer\.asp\?c=(\d+).*$"
    for url in url_list:
        try:
            return re.search(regex, url).groups()[0]
        except AttributeError:
            pass


def get_pages(url, site):
    return pagegenerators.LinksearchPageGenerator(url, site=site)

def retrieve_data_from_page(page):
    links = page.extlinks()
    identifier = get_id(links)
    wd_item = page.data_item()
    return (wd_item, identifier)

def make_claims(repo, item, property_id, value, source_item):
    identifier_claim = pywikibot.Claim(repo, property_id)
    identifier_claim.setTarget(value)

    imported_from = pywikibot.Claim(repo, u'P143')
    imported_from.setTarget(source_item)

    item.addClaim(identifier_claim, summary=u'Adding identifier based on Serbian Wikipedia')

    identifier_claim.addSources([imported_from], summary=u'Adding sources.')


def main():
    site = pywikibot.Site('sr', 'wikipedia')
    repo = site.data_repository()

    url = 'www.old-computers.com/museum/computer.asp'
    property_id = u'P5936'
    source_item_id = u'Q200386'

    source_item = pywikibot.ItemPage(repo, source_item_id)

    pages = get_pages(url, site)
    for page in pages:
        try:
            (wd_item, identifier) = retrieve_data_from_page(page)
        except pywikibot.exceptions.NoPage:
            print("No Wikidata item for %s" % page)
            continue
        if property_id not in wd_item.claims:
            make_claims(repo, wd_item, property_id, identifier, source_item)
            exit()
        else:
            print("%s already set on %s" % (property_id, wd_item.id))


if __name__ == '__main__':
    main()