discogs = {"Q18709534": "2731973"}
musicbrainz = {"Q17353096": "439c33e1-a12b-4b8d-9ffd-9e11beb13e9b"}
twitter = {"Q11621370": "mayugamo316"}

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""A Wikidata bot that adds referenced identifier statements as in the following example.

Claim = (Joey Ramone, MusicBrainz artist ID, 2f3f8fb1-e5dc-4548-9601-fada0485e561)
Reference = [ (stated in, MusicBrainz), (retrieved, TIMESTAMP) ]
"""

__author__ = 'Marco Fossati'
__email__ = 'fossati@spaziodati.eu'
__version__ = '1.0'
__license__ = 'GPL-3.0'
__copyright__ = 'Copyleft 2018, Hjfocs'

import logging
from datetime import date

import pywikibot

# Sandbox items in production site
SANDBOX_1_QID = 'Q4115189'
SANDBOX_2_QID = 'Q13406268'
SANDBOX_3_QID = 'Q15397819'

# Properties used for references
STATED_IN_PID = 'P248'
RETRIEVED_PID = 'P813'

# Target catalog items
BIBSYS_QID = 'Q4584301'
DISCOGS_QID = 'Q504063'
MUSICBRAINZ_QID = 'Q14005'
TWITTER_QID = 'Q918'

# Identifier properties
BIBSYS_PID = 'P1015'
DISCOGS_ARTIST_PID = 'P1953'
MUSICBRAINZ_ARTIST_PID = 'P434'
TWITTER_USERNAME_PID = 'P2002'

# Target catalogs helper dictionary
CATALOG_MAPPING = {
    'bibsys': {
        'qid': BIBSYS_QID,
        'pid': BIBSYS_PID
    },
    'discogs': {
        'qid': DISCOGS_QID,
        'pid': DISCOGS_ARTIST_PID
    },
    'musicbrainz': {
        'qid': MUSICBRAINZ_QID,
        'pid': MUSICBRAINZ_ARTIST_PID
    },
    'twitter': {
        'qid': TWITTER_QID,
        'pid': TWITTER_USERNAME_PID
    }
}

LOGGER = logging.getLogger(__name__)

SITE = pywikibot.Site("wikidata", "wikidata")
REPO = SITE.data_repository()

# Target catalog item objects
BIBSYS = pywikibot.ItemPage(REPO, BIBSYS_QID)
DISCOGS = pywikibot.ItemPage(REPO, DISCOGS_QID)
MUSICBRAINZ = pywikibot.ItemPage(REPO, MUSICBRAINZ_QID)
TWITTER = pywikibot.ItemPage(REPO, TWITTER_QID)

# (stated in, CATALOG) reference object
STATED_IN_REFERENCE = pywikibot.Claim(
    REPO, STATED_IN_PID, is_reference=True)

# (retrieved, TIMESTAMP) reference object
TODAY = date.today()
TIMESTAMP = pywikibot.WbTime(
    site=REPO, year=TODAY.year, month=TODAY.month, day=TODAY.day, precision='day')
RETRIEVED_REFERENCE = pywikibot.Claim(
    REPO, RETRIEVED_PID, is_reference=True)
RETRIEVED_REFERENCE.setTarget(TIMESTAMP)


def run_bot(mapping: dict, catalog_name: str, sandbox=False):
    """Add identifier statements to existing Wikidata items.

    :param mapping: a ``{QID: catalog_identifier}`` dictionary
    :type mapping: dict
    :param catalog_name: the name of the target catalog, e.g., ``musicbrainz``
    :type catalog_name: str
    """
    for qid, catalog_id in mapping.items():
        LOGGER.info('Processing %s match: %s -> %s',
                    catalog_name, qid, catalog_id)
        if not sandbox:
            _add_identifier(qid, catalog_id, catalog_name)
        else:
            _add_identifier('Q4115189', catalog_id, catalog_name)


def _add_identifier(qid: str, catalog_id: str, catalog_name: str):
    subject = pywikibot.ItemPage(REPO, qid)
    catalog_terms = CATALOG_MAPPING.get(catalog_name)
    claim = pywikibot.Claim(REPO, catalog_terms['pid'])
    claim.setTarget(catalog_id)
    subject.addClaim(claim)
    LOGGER.debug('Claim added: %s', claim.toJSON())
    STATED_IN_REFERENCE.setTarget(pywikibot.ItemPage(REPO, catalog_terms['qid']))
    claim.addSources([STATED_IN_REFERENCE, RETRIEVED_REFERENCE])
    LOGGER.debug('Reference node added: %s, %s',
                 STATED_IN_REFERENCE.toJSON(), RETRIEVED_REFERENCE.toJSON())
    LOGGER.info('%s identifier statement added to %s', catalog_name, qid)

    
run_bot(twitter, 'twitter', sandbox=True)
Sleeping for 9.5 seconds, 2018-09-07 11:08:58