Introduction to Jupyter

from pywikibot.data import api
import pywikibot
import pprint

Populationdata = {"Goa": {"data": ["14.1", "0.1", "Q1748"],
                                  "source": ["Q1751", "http://www.wikidata.org/entity/Q1171"]}
                  }
def getItems(site, itemtitle):
     params = { 'action' :'wbsearchentities' , 'format' : 'json' , 'language' : 'en', 'type' : 'item', 'search': itemtitle}
     request = api.Request(site=site,**params)
     return request.submit()
def check_claim_and_uncert(item, property, data):
    """
    Requires a property, value, uncertainty and unit and returns boolean.
    Returns the claim that fits into the defined precision or None.
    """
    item_dict = item.get()
    value, uncert, unit = data
    value, uncert = float(value), float(uncert)
    try:
        claims = item_dict["claims"][property]
    except:
        return None

    try:
        claim_exists = False
        uncert_set = False
        for claim in claims:
            wb_quant = claim.getTarget()
            delta_amount = wb_quant.amount - value
            if abs(delta_amount) < precision:
                claim_exists = True
            delta_lower = wb_quant.amount - wb_quant.lowerBound
            delta_upper = wb_quant.upperBound - wb_quant.amount
            check_lower = abs(uncert - delta_lower) < precision
            check_upper = abs(delta_upper - uncert) < precision
            if check_upper and check_lower:
                uncert_set = True

            if claim_exists and uncert_set:
                return claim
    except:
        return None

def check_source_set(claim, property, data):
    source_claims = claim.getSources()
    if len(source_claims) == 0:
        return False

    for source in source_claims:
        try:
            stated_in_claim = source[p_stated_in]
        except:
            return False
        for claim in stated_in_claim:
            trgt = claim.target
            if trgt.id == data[0]:
                return True

def set_claim(item, property, data):
    value, uncert, unit = data
    value, uncert = float(value), float(uncert)
    claim = pywikibot.Claim(repo, property)
    unit_item = pywikibot.ItemPage(repo, unit)
    entity_helper_string = "http://test.wikidata.org/entity/Q1171".format()
    wb_quant = pywikibot.WbQuantity(value, entity_helper_string, uncert)
    claim.setTarget(wb_quant)
    item.addClaim(claim, bot=False, summary="Population Data")
    return claim

def create_source_claim(claim, source_data):
    trgt_item, ref_url = source_data
    trgt_itempage = pywikibot.ItemPage(repo, trgt_item)
    source_claim = pywikibot.Claim(repo, p_stated_in, isReference=True)
    source_claim.setTarget(trgt_itempage)
    claim.addSources([source_claim])
    return True


def prettyPrint(variable):
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(variable)

# Login to wikidata
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()

wikidataEntries = getItems(site, "goa")
# Print the different Wikidata entries to the screen
prettyPrint(wikidataEntries)

# Print each wikidata entry as an object
#for wdEntry in wikidataEntries["search"]:
#   prettyPrint(getItem(site, wdEntry["id"], token))
{   'search': [   {   'concepturi': 'http://www.wikidata.org/entity/Q1171',
                      'description': 'state on the western coast of India',
                      'id': 'Q1171',
                      'label': 'Goa',
                      'match': {   'language': 'en',
                                   'text': 'Goa',
                                   'type': 'label'},
                      'pageid': 1535,
                      'title': 'Q1171',
                      'url': '//www.wikidata.org/wiki/Q1171'},
                  {   'concepturi': 'http://www.wikidata.org/entity/Q853920',
                      'description': 'species of antelope that inhabits '
                                     'the Tibetan plateau',
                      'id': 'Q853920',
                      'label': 'Goa',
                      'match': {   'language': 'en',
                                   'text': 'Goa',
                                   'type': 'label'},
                      'pageid': 805716,
                      'title': 'Q853920',
                      'url': '//www.wikidata.org/wiki/Q853920'},
                  {   'aliases': ['Goa'],
                      'concepturi': 'http://www.wikidata.org/entity/Q312471',
                      'description': 'surface-to-air missile system',
                      'id': 'Q312471',
                      'label': 'S-125',
                      'match': {   'language': 'en',
                                   'text': 'Goa',
                                   'type': 'alias'},
                      'pageid': 300567,
                      'title': 'Q312471',
                      'url': '//www.wikidata.org/wiki/Q312471'},
                  {   'concepturi': 'http://www.wikidata.org/entity/Q241522',
                      'description': 'Wikipedia disambiguation page',
                      'id': 'Q241522',
                      'label': 'Goa',
                      'match': {   'language': 'en',
                                   'text': 'Goa',
                                   'type': 'label'},
                      'pageid': 234810,
                      'title': 'Q241522',
                      'url': '//www.wikidata.org/wiki/Q241522'},
                  {   'concepturi': 'http://www.wikidata.org/entity/Q3471916',
                      'id': 'Q3471916',
                      'label': 'Goa',
                      'match': {   'language': 'en',
                                   'text': 'Goa',
                                   'type': 'label'},
                      'pageid': 3306950,
                      'title': 'Q3471916',
                      'url': '//www.wikidata.org/wiki/Q3471916'},
                  {   'concepturi': 'http://www.wikidata.org/entity/Q12584322',
                      'id': 'Q12584322',
                      'label': 'Goa',
                      'match': {   'language': 'en',
                                   'text': 'Goa',
                                   'type': 'label'},
                      'pageid': 13874524,
                      'title': 'Q12584322',
                      'url': '//www.wikidata.org/wiki/Q12584322'},
                  {   'concepturi': 'http://www.wikidata.org/entity/Q19571970',
                      'id': 'Q19571970',
                      'label': 'Goa',
                      'match': {   'language': 'en',
                                   'text': 'Goa',
                                   'type': 'label'},
                      'pageid': 21179830,
                      'title': 'Q19571970',
                      'url': '//www.wikidata.org/wiki/Q19571970'}],
    'search-continue': 7,
    'searchinfo': {'search': 'goa'},
    'success': 1}
WARNING: /srv/paws/lib/python3.4/site-packages/ipykernel/__main__.py:10: DeprecationWarning: Instead of using kwargs from Request.__init__, parameters for the request to the API should be added via the "parameters" parameter.

Jupyter is a web version of the python shell. It's useful to write text like this, or can also write python code and execute them block by block. It can also be used for other languages like Julia, R, Java, Javascript, and so on (pretty much anything!). This notebook introduces the basic blocks that help us use the notebook.

It provides an interactive interface to be able to run blocks of code and also to write documentation to understand the code.

1. Cells

Jupyter's basic building block is a cell. There are majorly two different types of cells in jupyter:

  • Code cells - Code cells have code which is executed by the kernel. Hence, the also have an output section right underneath the cell.
  • Markdown cells - Markdown cells are parsed into HTML and using markdown stylization.

Focusing on cells: Cells can be selected by clicking on them. The currently focused cell is shown with a blue border. It is also possible to change the focused cell by using the arrow keys.

2. Editing cells

There are two modes that are available on Jupyter notebooks (Similar to vim or emacs):

  • Edit mode - If a cell is in an edit mode, it is possible to type content into it. When a cell is in the edit mode, it has a **green border**.
  • Command mode - If a cell is in the command mode, it can't be edited. But it's easier to move, copy, cut, run, etc. and perform operations on the cell itself. When a cell is in a command mode, it has a **blue border**.

Creating a new cell

To create a new cell, you can either:

  • Choose the "Insert" > "Insert cell below" or "Insert cell above" option in the menubar on the top.
  • Type b to add a cell below the focused cell. Type a to add a cell above the focused cell. This needs to be done in the command mode.

Deleting a cell

To delete a cell, you can either:

  • Choose the "Edit" > "Delete a cell" option in the menubar on the top.
  • Type dd to delete the cell in the command mode. (2 times d)

3. Running Code

Code cells allow you to enter and run code in them, unline markdown cells.

Run a code cell by:

  1. Focus on the cell by clicking on it.
  2. Then, press Shift-Enter or press the button in the toolbar above to run the cell.
# This creates a variable a. Hence, there's no output.
a = 10
print("The value of a is =", a)

There are two other keyboard shortcuts for running code:

  • Alt-Enter runs the current cell and inserts a new cell below the one that was executed.
  • Ctrl-Enter run the current cell and enters the edit mode of the cell to edit it.

You can check if a cell is running by looking on it's left. If it shows In [*]:, it means that the cell is still being run. Once the cell has completed running, it is assigned a number. For example: In [4]:. The number denotes the order in which the cells are run. Hence, if 4 is shown, the cell was the fourth to be run in the notebook.

Code cells show the output of the code below the cell. If there is no output (Like in a = 10) there is no output shown. If there is an error, it shows the error with helpful debugging information instead:

a =

4. The Kernel

All code is run using a kernel. The kernel is a separate process which links the notebook with the compiler/interpreter of the appropriate type (based on the language). The Kernel can be interrupted or restarted.

When you stop the execution of a kernel, python throws a KeyboardInterrupt exception.

Try running the following cell. It pauses and sleeps for 30 seconds. While it is running, hit the (stop) button in the toolbar above. It should give the KeyboardInterrupt exception:

import time
time.sleep(30)

Restarting the kernels

The kernel maintains the state of a notebook's computations. You can reset this state by restarting the kernel. This is done by clicking on the in the toolbar above. On doing this, all your stored variables will be deleted from the session and will need to be redefined.

5. Output formatting

Handling sys.stdout and sys.stderr

The stdout and stderr streams are displayed as text in the output area.

print("hi, stdout")
from __future__ import print_function
import sys
print('hi, stderr', file=sys.stderr)

Output is asynchronous

All output is displayed asynchronously as it is generated in the Kernel. If you execute the next cell, you will see the output one piece at a time, not all at the end.

import time, sys
for i in range(5):
    print(i)
    time.sleep(0.5)

Large outputs

Beyond a certain point, output will scroll automatically:

for i in range(500):
    print(2**i)