%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt  
import matplotlib
import numpy as np
import urllib
from scipy.stats import pearsonr

load seastar data

def loadalldatasetsmeasurements(list):
    measure = list[0]
    typeofmeasure = list[1]
    dataset =list[2]
    typelink= list[3]
    #change and remove the /test with real one
    if typeofmeasure=='b':
         result = pd.read_csv('data/measurements/'+measure+typeofmeasure+'File_'+dataset+'_'+typelink+'.tsv',sep="\t",usecols=[0,1,2],dtype={'entity':str, 'typeentity':object, 'diffratio':float})
        #result = pd.read_csv('http://141.26.208.201/datameasures/'+measure+typeofmeasure+'File_'+dataset+'_'+typelink+'.tsv',sep="\t",usecols=[0,1,2],dtype={'entity':str, 'typeentity':object, 'diffratio':float})
   
        #print('http://141.26.208.201/'+measure+typeofmeasure+'File_'+dataset+'_'+typelink+'.tsv')   
    elif typeofmeasure=='c':    
        result = pd.read_csv('data/measurements/'+measure+typeofmeasure+'File_'+dataset+'_'+typelink+'.tsv',sep="\t",usecols=[0,1,2],dtype={'entity':str, 'typeentity':object, 'diffentropy':float})
        #result = pd.read_csv('http://141.26.208.201/datameasures/'+measure+typeofmeasure+'File_'+dataset+'_'+typelink+'.tsv',sep="\t",usecols=[0,1,2],dtype={'entity':str, 'typeentity':object, 'diffentropy':float})
            #print('http://141.26.208.201/'+measure+typeofmeasure+'File_'+dataset+'_'+typelink+'.tsv')
    result['dataset']=dataset
    return result                                                                                                                                                                                                                                                 

    
def loadalldatasets_ameasures(list):
    measure=list[0]
    dataset = list[1]
    typelink = list[2]
    if measure=='m11':
        result = pd.read_csv('data/measurements/'+measure+'aFile_'+dataset+'_'+typelink+'.tsv',sep="\t",usecols=[0,1,2,3],dtype={'entity':str, 'typeentity':object, 'classifcardinality':float,'classifprimecardinality':float})
    elif measure=='m12':
        result = pd.read_csv('data/measurements/'+measure+'aFile_'+dataset+'_'+typelink+'.tsv',sep="\t",usecols=[0,1,2,3],dtype={'entity':str, 'typeentity':object, 'desccardinality':float,'descprimecardinality':float})
    elif measure=='m13':
        result = pd.read_csv('data/measurements/'+measure+'aFile_'+dataset+'_'+typelink+'.tsv',sep="\t",usecols=[0,1,2,3],dtype={'entity':str, 'typeentity':object, 'desccardinality':float,'descprimecardinality':float})
    elif measure=='m21':
        result = pd.read_csv('data/measurements/'+measure+'aFile_'+dataset+'_'+typelink+'.tsv',sep="\t",usecols=[0,1,2,3],dtype={'entity':str, 'typeentity':object, 'econncardinality':float,'econnprimecardinality':float})
    elif measure=='m22':
        result = pd.read_csv('data/measurements/'+measure+'aFile_'+dataset+'_'+typelink+'.tsv',sep="\t",usecols=[0,1,2,3],dtype={'entity':str, 'typeentity':object, 'datasetcardinality':float,'datasetprimecardinality':float})
    elif measure=='m31':
        result = pd.read_csv('data/measurements/'+measure+'aFile_'+dataset+'_'+typelink+'.tsv',sep="\t",usecols=[0,1,2,3],dtype={'entity':str, 'typeentity':object, 'vocabdesccardinality':float,'vocabdescprimecardinality':float})
    result['dataset']=dataset
    return result

    
    
 

 
datasets = ['dnb','aemet']

#datasets = ['l3s','sheffield','bne','bibbase','dwsmannheim','semanticweb','soton','dnb','bibsonomy','eurostat','openei','reegle','govwild','bfs','imf','fao','rodeionet','ndljp','icane','swcompany','korrekt','harth','revyu','eye48','simia','giftree','tomheath','twrpi','torre','linkedgeodata','aemet','geovocab','ontologi','ukpostcodes']

#datasets = ['l3s','sheffield','bne','bibbase','dwsmannheim','semanticweb','soton','dnb'] #tests


l2c_m11ai=[]
l2c_m12ai=[]
l2c_m13ai=[]
l2c_m21ai=[]
l2c_m22ai=[]
l2c_m31ai=[]

l2c_m11as=[]
l2c_m12as=[]
l2c_m13as=[]
l2c_m21as=[]
l2c_m22as=[]
l2c_m31as=[]

l2c_m11ar=[]
l2c_m12ar=[]
l2c_m13ar=[]
l2c_m21ar=[]
l2c_m22ar=[]
l2c_m31ar=[]

l2c_m11ao=[]
l2c_m12ao=[]
l2c_m13ao=[]
l2c_m21ao=[]
l2c_m22ao=[]
l2c_m31ao=[]

l2c_m11ac=[]
l2c_m12ac=[]
l2c_m13ac=[]
l2c_m21ac=[]
l2c_m22ac=[]
l2c_m31ac=[]

l2c_m11bi=[]
l2c_m12bi=[]
l2c_m13bi=[]
l2c_m21bi=[]
l2c_m22bi=[]
l2c_m31bi=[]

l2c_m11bs=[]
l2c_m12bs=[]
l2c_m13bs=[]
l2c_m21bs=[]
l2c_m22bs=[]
l2c_m31bs=[]

l2c_m11br=[]
l2c_m12br=[]
l2c_m13br=[]
l2c_m21br=[]
l2c_m22br=[]
l2c_m31br=[]

l2c_m11bo=[]
l2c_m12bo=[]
l2c_m13bo=[]
l2c_m21bo=[]
l2c_m22bo=[]
l2c_m31bo=[]

l2c_m11bc=[]
l2c_m12bc=[]
l2c_m13bc=[]
l2c_m21bc=[]
l2c_m22bc=[]
l2c_m31bc=[]

l2c_m11ci=[]
l2c_m12ci=[]
l2c_m13ci=[]
l2c_m21ci=[]
l2c_m22ci=[]
l2c_m31ci=[]

l2c_m11cs=[]
l2c_m12cs=[]
l2c_m13cs=[]
l2c_m21cs=[]
l2c_m22cs=[]
l2c_m31cs=[]

l2c_m11cr=[]
l2c_m12cr=[]
l2c_m13cr=[]
l2c_m21cr=[]
l2c_m22cr=[]
l2c_m31cr=[]

l2c_m11co=[]
l2c_m12co=[]
l2c_m13co=[]
l2c_m21co=[]
l2c_m22co=[]
l2c_m31co=[]

l2c_m11cc=[]
l2c_m12cc=[]
l2c_m13cc=[]
l2c_m21cc=[]
l2c_m22cc=[]
l2c_m31cc=[]


def getdata():
    for dataset in datasets:     
        ##### a 
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m11',dataset,'i'])
            l2c_m11ai.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m12',dataset,'i'])
            l2c_m12ai.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m13',dataset,'i'])
            l2c_m13ai.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m21',dataset,'i'])
            l2c_m12ai.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m22',dataset,'i'])
            l2c_m22ai.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m31',dataset,'i'])
            l2c_m31ai.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
       
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m11',dataset,'r'])
            l2c_m11ar.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m12',dataset,'r'])
            l2c_m12ar.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m13',dataset,'r'])
            l2c_m13ar.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m21',dataset,'r'])
            l2c_m12ar.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m22',dataset,'r'])
            l2c_m22ar.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m31',dataset,'r'])
            l2c_m31ar.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m11',dataset,'s'])
            l2c_m11as.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m12',dataset,'s'])
            l2c_m12as.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m13',dataset,'s'])
            l2c_m13as.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m21',dataset,'s'])
            l2c_m12as.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m22',dataset,'s'])
            l2c_m22as.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m31',dataset,'s'])
            l2c_m31as.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m11',dataset,'o'])
            l2c_m11ao.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m12',dataset,'o'])
            l2c_m12ao.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m13',dataset,'o'])
            l2c_m13ao.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m21',dataset,'o'])
            l2c_m12ao.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m22',dataset,'o'])
            l2c_m22ao.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m31',dataset,'o'])
            l2c_m31ao.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
       
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m11',dataset,'c'])
            l2c_m11ac.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m12',dataset,'c'])
            l2c_m12ac.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m13',dataset,'c'])
            l2c_m13ac.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m21',dataset,'c'])
            l2c_m12ac.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m22',dataset,'c'])
            l2c_m22ac.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:
            #all measurements for links type='i'
            measurement=loadalldatasets_ameasures(['m31',dataset,'c'])
            l2c_m31ac.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
       
        ##### b and c
        try:
            #all measurements for links type='i'
            measurement=loadalldatasetsmeasurements(['m11','b',dataset,'i'])
            #print(dataset)
            #print(measurement)
            l2c_m11bi.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m12','b',dataset,'i'])
            #print(dataset)
            #print(measurement)
            l2c_m12bi.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m13','b',dataset,'i'])
            l2c_m13bi.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m21','b',dataset,'i'])
            l2c_m21bi.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m22','b',dataset,'i'])
            l2c_m22bi.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m31','b',dataset,'i'])
            l2c_m31bi.append(measurement) 
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:   
            #all measurements for links type='s'
            measurement=loadalldatasetsmeasurements(['m11','b',dataset,'s'])
            l2c_m11bs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m12','b',dataset,'s'])
            l2c_m12bs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m13','b',dataset,'s'])
            l2c_m13bs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m21','b',dataset,'s'])
            l2c_m21bs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m22','b',dataset,'s'])
            l2c_m22bs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m31','b',dataset,'s'])
            l2c_m31bs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
            #all measurements for links type='r'
            
        try:    
            measurement=loadalldatasetsmeasurements(['m11','b',dataset,'r'])
            l2c_m11br.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m12','b',dataset,'r'])
            l2c_m12br.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:       
            measurement=loadalldatasetsmeasurements(['m13','b',dataset,'r'])
            l2c_m13br.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try: 
            measurement=loadalldatasetsmeasurements(['m21','b',dataset,'r'])
            l2c_m21br.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m22','b',dataset,'r'])
            l2c_m22br.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:   
            measurement=loadalldatasetsmeasurements(['m31','b',dataset,'r'])
            l2c_m31br.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            #all measurements for links type='o'
            measurement=loadalldatasetsmeasurements(['m11','b',dataset,'o'])
            l2c_m11bo.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)   
        try:   
            measurement=loadalldatasetsmeasurements(['m12','b',dataset,'o'])
            l2c_m12bo.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m13','b',dataset,'o'])
            l2c_m13bo.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m21','b',dataset,'o'])
            l2c_m21bo.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)       
        try:    
            measurement=loadalldatasetsmeasurements(['m22','b',dataset,'o'])
            l2c_m22bo.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m31','b',dataset,'o'])
            l2c_m31bo.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
            #all measurements for links type='c'
        try:    
            measurement=loadalldatasetsmeasurements(['m11','b',dataset,'c'])
            l2c_m11bc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m12','b',dataset,'c'])
            l2c_m12bc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:   
            measurement=loadalldatasetsmeasurements(['m13','b',dataset,'c'])
            l2c_m13bc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m21','b',dataset,'c'])
            l2c_m21bc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m22','b',dataset,'c'])
            l2c_m22bc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m31','b',dataset,'c'])
            l2c_m31bc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
            #listtoconcatenate l2c -------------------------
        try:   
            measurement=loadalldatasetsmeasurements(['m11','c',dataset,'i'])
            l2c_m11ci.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m12','c',dataset,'i'])
            l2c_m12ci.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m13','c',dataset,'i'])
            l2c_m13ci.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m21','c',dataset,'i'])
            l2c_m21ci.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:        
            measurement=loadalldatasetsmeasurements(['m22','c',dataset,'i'])
            l2c_m22ci.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m31','c',dataset,'i'])
            l2c_m31ci.append(measurement) 
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            #all measurements for links type='s'
            measurement=loadalldatasetsmeasurements(['m11','c',dataset,'s'])
            l2c_m11cs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m12','c',dataset,'s'])
            l2c_m12cs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m13','c',dataset,'s'])
            l2c_m13cs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m21','c',dataset,'s'])
            l2c_m21cs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m22','c',dataset,'s'])
            l2c_m22cs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m31','c',dataset,'s'])
            l2c_m31cs.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
            #all measurements for links type='r'
        try:    
            measurement=loadalldatasetsmeasurements(['m11','c',dataset,'r'])
            l2c_m11cr.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m12','c',dataset,'r'])
            l2c_m12cr.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m13','c',dataset,'r'])
            l2c_m13cr.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m21','c',dataset,'r'])
            l2c_m21cr.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m22','c',dataset,'r'])
            l2c_m22cr.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m31','c',dataset,'r'])
            l2c_m31cr.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            #all measurements for links type='o'
            measurement=loadalldatasetsmeasurements(['m11','c',dataset,'o'])
            l2c_m11co.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m12','c',dataset,'o'])
            l2c_m12co.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m13','c',dataset,'o'])
            l2c_m13co.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)    
        try:    
            measurement=loadalldatasetsmeasurements(['m21','c',dataset,'o'])
            l2c_m21co.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m22','c',dataset,'o'])
            l2c_m22co.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m31','c',dataset,'o'])
            l2c_m31co.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            #all measurements for links type='c'
            measurement=loadalldatasetsmeasurements(['m11','c',dataset,'c'])
            l2c_m11cc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m12','c',dataset,'c'])
            l2c_m12cc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m13','c',dataset,'c'])
            l2c_m13cc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)  
        try:        
            measurement=loadalldatasetsmeasurements(['m21','c',dataset,'c'])
            l2c_m21cc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m22','c',dataset,'c'])
            l2c_m22cc.append(measurement)
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
        try:        
            measurement=loadalldatasetsmeasurements(['m31','c',dataset,'c'])
            l2c_m31cc.append(measurement)
            #listtoconcatenate l2c
        except urllib.error.HTTPError as err:
            if err.code == 404:
                print('HTTP error when loading all measurements for a measure for data set: '+dataset)
                          
getdata() 
print(str(len(l2c_m22br))) # still accesible after modified by the function (global var)
2
def ratioandentropyinonetable(list):
    concat = pd.concat(list, axis=1)
    return concat
 
def inequalities(b,c):
    ratioandentropy=[]
    ratioandentropy.append(b)
    ratioandentropy.append(c['diffentropy']) # the columns entity, typeentity,dataset are the same, just add the column on diffentropy
    bc = ratioandentropyinonetable(ratioandentropy)
    bc.head(3)
    #bc2 = bc.drop(bc.columns[[3, 5]], axis=1)
    
    #print(bc.head(5))
    
    #groupedbc = bc.groupby('dataset')
    #print(groupedbc['diffentropy'].describe())
    
    #print(groupedbc['diffratio'].describe())
    bc_grouped =bc.groupby('dataset')
    bc_grouped.boxplot(column='diffentropy',figsize=(20,20))
    #bc_grouped.boxplot(column=['diffratio','diffentropy'],figsize=(20,20)) #GOOD for showing both ratio and entropy
    #bc.boxplot(column='diffratio',by='dataset')
    #bc.boxplot(column='diffentropy',by='dataset')
   
concat_m11ai=pd.DataFrame()
concat_m12ai=pd.DataFrame()
concat_m13ai=pd.DataFrame()
concat_m21ai=pd.DataFrame()
concat_m22ai=pd.DataFrame()
concat_m31ai=pd.DataFrame()

concat_m11as=pd.DataFrame()
concat_m12as=pd.DataFrame()
concat_m13as=pd.DataFrame()
concat_m21as=pd.DataFrame()
concat_m22as=pd.DataFrame()
concat_m31as=pd.DataFrame()

concat_m11ar=pd.DataFrame()
concat_m12ar=pd.DataFrame()
concat_m13ar=pd.DataFrame()
concat_m21ar=pd.DataFrame()
concat_m22ar=pd.DataFrame()
concat_m31ar=pd.DataFrame()

concat_m11ao=pd.DataFrame()
concat_m12ao=pd.DataFrame()
concat_m13ao=pd.DataFrame()
concat_m21ao=pd.DataFrame()
concat_m22ao=pd.DataFrame()
concat_m31ao=pd.DataFrame()

concat_m11ac=pd.DataFrame()
concat_m12ac=pd.DataFrame()
concat_m13ac=pd.DataFrame()
concat_m21ac=pd.DataFrame()
concat_m22ac=pd.DataFrame()
concat_m31ac=pd.DataFrame()
if len(l2c_m11ai) != 0:
    concat_m11ai =pd.concat(l2c_m11ai)
if len(l2c_m12ai) != 0:
    concat_m12ai =pd.concat(l2c_m12ai)
if len(l2c_m13ai) != 0:
    concat_m13ai =pd.concat(l2c_m13ai)
if len(l2c_m21ai) != 0:
    concat_m21ai =pd.concat(l2c_m21ai)
if len(l2c_m22ai) != 0:
    concat_m22ai =pd.concat(l2c_m22ai)
if len(l2c_m31ai) != 0:
    concat_m31ai =pd.concat(l2c_m31ai)
    
if len(l2c_m11as) != 0:
    concat_m11as =pd.concat(l2c_m11as)
if len(l2c_m12as) != 0:
    concat_m12as =pd.concat(l2c_m12as)
if len(l2c_m13as) != 0:
    concat_m13as =pd.concat(l2c_m13as)
if len(l2c_m21as) != 0:
    concat_m21as =pd.concat(l2c_m21as)
if len(l2c_m22as) != 0:
    concat_m22as =pd.concat(l2c_m22as)
if len(l2c_m31as) != 0:
    concat_m31as =pd.concat(l2c_m31as)
    
if len(l2c_m11ar) != 0:
    concat_m11ar =pd.concat(l2c_m11ar)
if len(l2c_m12ar) != 0:
    concat_m12ar =pd.concat(l2c_m12ar)
if len(l2c_m13ar) != 0:
    concat_m13ar =pd.concat(l2c_m13ar)
if len(l2c_m21ar) != 0:
    concat_m21ar =pd.concat(l2c_m21ar)
if len(l2c_m22ar) != 0:
    concat_m22ar =pd.concat(l2c_m22ar)
if len(l2c_m31ar) != 0:
    concat_m31ar =pd.concat(l2c_m31ar)

if len(l2c_m11ao) != 0:
    concat_m11ao =pd.concat(l2c_m11ao)
if len(l2c_m12ao) != 0:
    concat_m12ao =pd.concat(l2c_m12ao)
if len(l2c_m13ao) != 0:
    concat_m13ao =pd.concat(l2c_m13ao)
if len(l2c_m21ao) != 0:
    concat_m21ao =pd.concat(l2c_m21ao)
if len(l2c_m22ao) != 0:
    concat_m22ao =pd.concat(l2c_m22ao)
if len(l2c_m31ao) != 0:
    concat_m31ao =pd.concat(l2c_m31ao)

if len(l2c_m11ac) != 0:
    concat_m11ac =pd.concat(l2c_m11ac)
if len(l2c_m12ac) != 0:
    concat_m12ac =pd.concat(l2c_m12ac)
if len(l2c_m13ac) != 0:
    concat_m13ac =pd.concat(l2c_m13ac)
if len(l2c_m21ac) != 0:
    concat_m21ac =pd.concat(l2c_m21ac)
if len(l2c_m22ac) != 0:
    concat_m22ac =pd.concat(l2c_m22ac)
if len(l2c_m31ac) != 0:
    concat_m31ac =pd.concat(l2c_m31ac)

    
print(concat_m11ai.head(2))
Empty DataFrame
Columns: [entity, typeentity, classifcardinality, classifprimecardinality, dataset]
Index: []
/srv/paws/lib/python3.6/site-packages/ipykernel_launcher.py:4: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  after removing the cwd from sys.path.
/srv/paws/lib/python3.6/site-packages/ipykernel_launcher.py:17: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

/srv/paws/lib/python3.6/site-packages/ipykernel_launcher.py:30: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

/srv/paws/lib/python3.6/site-packages/ipykernel_launcher.py:43: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

/srv/paws/lib/python3.6/site-packages/ipykernel_launcher.py:56: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

#####

concat_m11bi=pd.DataFrame()
concat_m12bi=pd.DataFrame()
concat_m13bi=pd.DataFrame()
concat_m21bi=pd.DataFrame()
concat_m22bi=pd.DataFrame()
concat_m31bi=pd.DataFrame()

concat_m11ci=pd.DataFrame()
concat_m12ci=pd.DataFrame()
concat_m13ci=pd.DataFrame()
concat_m21ci=pd.DataFrame()
concat_m22ci=pd.DataFrame()
concat_m31ci=pd.DataFrame()

concat_m11bs=pd.DataFrame()
concat_m12bs=pd.DataFrame()
concat_m13bs=pd.DataFrame()
concat_m21bs=pd.DataFrame()
concat_m22bs=pd.DataFrame()
concat_m31bs=pd.DataFrame()

concat_m11cs=pd.DataFrame()
concat_m12cs=pd.DataFrame()
concat_m13cs=pd.DataFrame()
concat_m21cs=pd.DataFrame()
concat_m22cs=pd.DataFrame()
concat_m31cs=pd.DataFrame()

concat_m11br=pd.DataFrame()
concat_m12br=pd.DataFrame()
concat_m13br=pd.DataFrame()
concat_m21br=pd.DataFrame()
concat_m22br=pd.DataFrame()
concat_m31br=pd.DataFrame()

concat_m11cr=pd.DataFrame()
concat_m12cr=pd.DataFrame()
concat_m13cr=pd.DataFrame()
concat_m21cr=pd.DataFrame()
concat_m22cr=pd.DataFrame()
concat_m31cr=pd.DataFrame()

concat_m11bo=pd.DataFrame()
concat_m12bo=pd.DataFrame()
concat_m13bo=pd.DataFrame()
concat_m21bo=pd.DataFrame()
concat_m22bo=pd.DataFrame()
concat_m31bo=pd.DataFrame()

concat_m11co=pd.DataFrame()
concat_m12co=pd.DataFrame()
concat_m13co=pd.DataFrame()
concat_m21co=pd.DataFrame()
concat_m22co=pd.DataFrame()
concat_m31co=pd.DataFrame()

concat_m11bc=pd.DataFrame()
concat_m12bc=pd.DataFrame()
concat_m13bc=pd.DataFrame()
concat_m21bc=pd.DataFrame()
concat_m22bc=pd.DataFrame()
concat_m31bc=pd.DataFrame()

concat_m11cc=pd.DataFrame()
concat_m12cc=pd.DataFrame()
concat_m13cc=pd.DataFrame()
concat_m21cc=pd.DataFrame()
concat_m22cc=pd.DataFrame()
concat_m31cc=pd.DataFrame()
if len(l2c_m11bi) != 0 and len(l2c_m11ci)!= 0:
    print('m11 i check')
    concat_m11bi =pd.concat(l2c_m11bi)
    concat_m11ci = pd.concat(l2c_m11ci)
    inequalities(concat_m11bi, concat_m11ci)

if len(l2c_m12bi) != 0 and len(l2c_m12ci)!= 0:
    print('m12 i check')
    concat_m12bi =pd.concat(l2c_m12bi)
    concat_m12ci = pd.concat(l2c_m12ci)
    inequalities(concat_m12bi, concat_m12ci)

if len(l2c_m13bi) != 0 and len(l2c_m13ci)!= 0:
    print('m13 i check')
    concat_m13bi =pd.concat(l2c_m13bi)
    concat_m13ci = pd.concat(l2c_m13ci)
    inequalities(concat_m13bi, concat_m13ci)

if len(l2c_m21bi) != 0 and len(l2c_m21ci)!= 0:
    print('m21 i check')
    concat_m21bi =pd.concat(l2c_m21bi)
    concat_m21ci = pd.concat(l2c_m21ci)
    inequalities(concat_m21bi, concat_m21ci)

if len(l2c_m22bi) != 0 and len(l2c_m22ci)!= 0:
    print('m22 i check')
    concat_m22bi =pd.concat(l2c_m22bi)
    concat_m22ci = pd.concat(l2c_m22ci)
    inequalities(concat_m22bi, concat_m22ci)

if len(l2c_m31bi) != 0 and len(l2c_m31ci)!= 0:
    print('m31 i check')
    concat_m31bi =pd.concat(l2c_m31bi)
    concat_m31ci = pd.concat(l2c_m31ci)
    inequalities(concat_m31bi, concat_m31ci)
m11 i check
m12 i check
m13 i check
m21 i check
m22 i check
m31 i check
if len(l2c_m11bs) != 0 and len(l2c_m11cs)!= 0:
    concat_m11bs =pd.concat(l2c_m11bs)
    concat_m11cs = pd.concat(l2c_m11cs)
    inequalities(concat_m11bs, concat_m11cs)

if len(l2c_m12bs) != 0 and len(l2c_m12cs)!= 0:
    concat_m12bs =pd.concat(l2c_m12bs)
    concat_m12cs = pd.concat(l2c_m12cs)
    inequalities(concat_m12bs, concat_m12cs)

if len(l2c_m13bs) != 0 and len(l2c_m13cs)!= 0:
    concat_m13bs =pd.concat(l2c_m13bs)
    concat_m13cs = pd.concat(l2c_m13cs)
    inequalities(concat_m13bs, concat_m13cs)

if len(l2c_m21bs) != 0 and len(l2c_m21cs)!= 0:
    concat_m21bs =pd.concat(l2c_m21bs)
    concat_m21cs = pd.concat(l2c_m21cs)
    inequalities(concat_m21bs, concat_m21cs)

if len(l2c_m22bs) != 0 and len(l2c_m22cs)!= 0:
    concat_m22bs =pd.concat(l2c_m22bs)
    concat_m22cs = pd.concat(l2c_m22cs)
    inequalities(concat_m22bs, concat_m22cs)

if len(l2c_m31bs) != 0 and len(l2c_m31cs)!= 0:
    concat_m31bs =pd.concat(l2c_m31bs)
    concat_m31cs = pd.concat(l2c_m31cs)
    inequalities(concat_m31bs, concat_m31cs)
if len(l2c_m11br) != 0 and len(l2c_m11cr)!= 0:
    concat_m11br =pd.concat(l2c_m11br)
    concat_m11cr = pd.concat(l2c_m11cr)
    inequalities(concat_m11br, concat_m11cr)
    
if len(l2c_m12br) != 0 and len(l2c_m12cr)!= 0:
    concat_m12br =pd.concat(l2c_m12br)
    concat_m12cr = pd.concat(l2c_m12cr)
    inequalities(concat_m12br, concat_m12cr)

if len(l2c_m13br) != 0 and len(l2c_m13cr)!= 0:
    concat_m13br =pd.concat(l2c_m13br)
    concat_m13cr = pd.concat(l2c_m13cr)
    inequalities(concat_m13br, concat_m13cr)

if len(l2c_m21br) != 0 and len(l2c_m21cr)!= 0:
    concat_m21br =pd.concat(l2c_m21br)
    concat_m21cr = pd.concat(l2c_m21cr)
    inequalities(concat_m21br, concat_m21cr)

if len(l2c_m22br) != 0 and len(l2c_m22cr)!= 0:
    concat_m22br =pd.concat(l2c_m22br)
    concat_m22cr = pd.concat(l2c_m22cr)
    inequalities(concat_m22br, concat_m22cr)

if len(l2c_m31br) != 0 and len(l2c_m31cr)!= 0:
    concat_m31br =pd.concat(l2c_m31br)
    concat_m31cr = pd.concat(l2c_m31cr)
    inequalities(concat_m31br, concat_m31cr)
if len(l2c_m11bo) != 0 and len(l2c_m11co)!= 0:
    concat_m11bo =pd.concat(l2c_m11bo)
    concat_m11co = pd.concat(l2c_m11co)
    inequalities(concat_m11bo, concat_m11co)

if len(l2c_m12bo) != 0 and len(l2c_m12co)!= 0:
    concat_m12bo =pd.concat(l2c_m12bo)
    concat_m12co = pd.concat(l2c_m12co)
    inequalities(concat_m12bo, concat_m12co)

if len(l2c_m13bo) != 0 and len(l2c_m13co)!= 0:
    concat_m13bo =pd.concat(l2c_m13bo)
    concat_m13co = pd.concat(l2c_m13co)
    inequalities(concat_m13bo, concat_m13co)

if len(l2c_m21bo) != 0 and len(l2c_m21co)!= 0:
    concat_m21bo =pd.concat(l2c_m21bo)
    concat_m21co = pd.concat(l2c_m21co)
    inequalities(concat_m21bo, concat_m21co)

if len(l2c_m22bo) != 0 and len(l2c_m22co)!= 0: 
    concat_m22bo =pd.concat(l2c_m22bo)
    concat_m22co = pd.concat(l2c_m22co)
    inequalities(concat_m22bo, concat_m22co)

if len(l2c_m31bo) != 0 and len(l2c_m31co)!= 0:
    concat_m31bo =pd.concat(l2c_m31bo)
    concat_m31co = pd.concat(l2c_m31co)
    inequalities(concat_m31bo, concat_m31co)
if len(l2c_m11bc) != 0 and len(l2c_m11cc)!= 0:
    concat_m11bc =pd.concat(l2c_m11bc)
    concat_m11cc = pd.concat(l2c_m11cc)
    inequalities(concat_m11bc, concat_m11cc)

if len(l2c_m12bc) != 0 and len(l2c_m12cc)!= 0:
    concat_m12bc =pd.concat(l2c_m12bc)
    concat_m12cc = pd.concat(l2c_m12cc)
    inequalities(concat_m12bc, concat_m12cc)

    
if len(l2c_m13bc) != 0 and len(l2c_m13cc)!= 0:
    concat_m13bc =pd.concat(l2c_m13bc)
    concat_m13cc = pd.concat(l2c_m13cc)
    inequalities(concat_m13bc, concat_m13cc)


if len(l2c_m21bc) != 0 and len(l2c_m21cc)!= 0:
    concat_m21bc =pd.concat(l2c_m21bc)
    concat_m21cc = pd.concat(l2c_m21cc)
    inequalities(concat_m21bc, concat_m21cc)


if len(l2c_m22bc) != 0 and len(l2c_m22cc)!= 0:
    concat_m22bc =pd.concat(l2c_m22bc)
    concat_m22cc = pd.concat(l2c_m22cc)
    inequalities(concat_m22bc, concat_m22cc)


if len(l2c_m31bc) != 0 and len(l2c_m31cc)!= 0:
    concat_m31bc =pd.concat(l2c_m31bc)
    concat_m31cc = pd.concat(l2c_m31cc)
    inequalities(concat_m31bc, concat_m31cc)

discriminative

def describe_discriminative(list):
    df= list[0]
    mi=list[1]
    if len(df)!=0:
        if 'diffentropy' in df:
            df.groupby('dataset').diffentropy.describe().to_csv(mi, header=True) 
            print(df.groupby('dataset').diffentropy.mean())
        elif 'diffratio' in df:
            df.groupby('dataset').diffratio.describe().to_csv(mi, header=True)
            print(df.groupby('dataset').diffratio.mean())
        else: 
            df.groupby('dataset').describe().to_csv(mi, header=True)
            print(df.groupby('dataset').mean())
with open('m11a_alltypesoflinks_describe.csv', 'a') as mi:
    
    concat_m11a = pd.concat([concat_m11ai,concat_m11as,concat_m11ar,concat_m11ao,concat_m11ac])
    describe_discriminative([concat_m11a,mi])
    print('m11a')
    
with open('m11b_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m11b = pd.concat([concat_m11bi,concat_m11bs,concat_m11br,concat_m11bo,concat_m11bc])
    describe_discriminative([concat_m11b,mi])
    print('m11b')
    
with open('m11c_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m11c = pd.concat([concat_m11ci,concat_m11cs,concat_m11cr,concat_m11co,concat_m11cc])
    describe_discriminative([concat_m11a,mi])
    print('m11c')

with open('m12a_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m12a = pd.concat([concat_m12ai,concat_m12as,concat_m12ar,concat_m12ao,concat_m12ac])
    describe_discriminative([concat_m12a,mi])
    print('m12a')
with open('m12b_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m12b = pd.concat([concat_m12bi,concat_m12bs,concat_m12br,concat_m12bo,concat_m12bc])
    describe_discriminative([concat_m12b,mi])
    print('m12b')
with open('m12c_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m12c = pd.concat([concat_m12ci,concat_m12cs,concat_m12cr,concat_m12co,concat_m12cc])
    describe_discriminative([concat_m12c,mi]) 
    print('m12c')
with open('m13a_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m13a = pd.concat([concat_m13ai,concat_m13as,concat_m13ar,concat_m13ao,concat_m13ac])
    describe_discriminative([concat_m13a,mi])
    print('m13a')
with open('m13b_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m13b = pd.concat([concat_m13bi,concat_m13bs,concat_m13br,concat_m13bo,concat_m13bc])
    describe_discriminative([concat_m13b,mi])
    print('m13b')
with open('m13c_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m13c = pd.concat([concat_m13ci,concat_m13cs,concat_m13cr,concat_m13co,concat_m13cc])
    describe_discriminative([concat_m13c,mi]) 
    print('m13c')
    
with open('m21a_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m21a = pd.concat([concat_m21ai,concat_m21as,concat_m21ar,concat_m21ao,concat_m21ac])
    describe_discriminative([concat_m21a,mi])
    print('m21a')
with open('m21b_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m21b = pd.concat([concat_m21bi,concat_m21bs,concat_m21br,concat_m21bo,concat_m21bc])
    describe_discriminative([concat_m21b,mi])
    print('m21b')
with open('m21c_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m21c = pd.concat([concat_m21ci,concat_m21cs,concat_m21cr,concat_m21co,concat_m21cc])
    describe_discriminative([concat_m21c,mi])    
    print('m21c')
with open('m22a_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m22a = pd.concat([concat_m22ai,concat_m22as,concat_m22ar,concat_m22ao,concat_m22ac])
    describe_discriminative([concat_m22a,mi])
    print('m22a')
with open('m22b_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m22b = pd.concat([concat_m22bi,concat_m22bs,concat_m22br,concat_m22bo,concat_m22bc])
    describe_discriminative([concat_m22b,mi])
    print('m22b')
with open('m22c_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m22c = pd.concat([concat_m22ci,concat_m22cs,concat_m22cr,concat_m22co,concat_m22cc])
    describe_discriminative([concat_m22c,mi])  
    print('m22c')

with open('m31a_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m31a = pd.concat([concat_m31ai,concat_m31as,concat_m31ar,concat_m31ao,concat_m31ac])
    describe_discriminative([concat_m31a,mi])   
    print('m31a')
with open('m31b_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m31b = pd.concat([concat_m31bi,concat_m31bs,concat_m31br,concat_m31bo,concat_m31bc])
    describe_discriminative([concat_m31b,mi])   
    print('m31b')
with open('m31c_alltypesoflinks_describe.csv', 'a') as mi:
    concat_m31c = pd.concat([concat_m31ci,concat_m31cs,concat_m31cr,concat_m31co,concat_m31cc])
    describe_discriminative([concat_m31c,mi])  
    print('m31c')
m11a
dataset
aemet    0.0
dnb      0.0
Name: diffratio, dtype: float64
m11b
m11c
m12a
dataset
aemet    1.150376
dnb      0.792945
Name: diffratio, dtype: float64
m12b
m12c
m13a
dataset
aemet    1.052632
dnb      0.000000
Name: diffratio, dtype: float64
m13b
m13c
m21a
dataset
aemet    1.150376
dnb      1.397371
Name: diffratio, dtype: float64
m21b
m21c
m22a
dataset
aemet    1.067669
dnb      1.352545
Name: diffratio, dtype: float64
m22b
m22c
m31a
dataset
aemet    0.977444
dnb      0.606449
Name: diffratio, dtype: float64
m31b
m31c

correlation between ratio and diffentropy: do they really measure something different?

#def correlation_ratio_diffentropy(list):
     #concat = pd.concat(list, axis=1)
     #corr_bc = concat[['diffratio', 'diffentropy']]
         #print(corr_bc.corr(method='spearman')) #pearson normality
        #print(corr_bc.hist(column='diffentropy'))
        #print(concat.head
def correlation_ratio_difference(b,c):
    ratioandentropy=[]
    ratioandentropy.append(b['diffratio'])
    ratioandentropy.append(c['diffentropy'])
    bc = ratioandentropyinonetable(ratioandentropy)
    bc.head(2)
    print(b.head())
    print(c.head())
    print(bc.corr(method='spearman')) #pearson normality
    #spearmanr(b,c)
    #print(bc.hist(column='diffentropy'))
    
    
print(concat_m22bo.head())
print(str(concat_m11bi.count(axis=0)))
                           entity  \
0  http://d-nb.info/gnd/4002820-3   
1  http://d-nb.info/gnd/4129650-3   
2  http://d-nb.info/gnd/4120506-6   
3  http://d-nb.info/gnd/4233258-8   
4  http://d-nb.info/gnd/4242445-8   

                                          typeentity  diffratio dataset  
0  http://d-nb.info/standards/elementset/gnd#Subj...        1.0     dnb  
1  http://d-nb.info/standards/elementset/gnd#Subj...        1.0     dnb  
2  http://d-nb.info/standards/elementset/gnd#Subj...        1.0     dnb  
3  http://d-nb.info/standards/elementset/gnd#Subj...        1.0     dnb  
4  http://d-nb.info/standards/elementset/gnd#Subj...        1.0     dnb  
entity        3512
typeentity    3512
diffratio     3513
dataset       3513
dtype: int64
print(concat_m22co.head())
Empty DataFrame
Columns: [entity, typeentity, diffentropy, dataset]
Index: []
print(concat_m11ci.describe())

#concat_m11bi.isnull().values.any()
       diffentropy
count          0.0
mean           NaN
std            NaN
min            NaN
25%            NaN
50%            NaN
75%            NaN
max            NaN

if not concat_m11bi.empty and not concat_m11ci.empty:  # or for all rows (including NaN and null) len(concat_m11bi) !=0 and len(concat_m11ci) !=0
    correlation_ratio_diffentropy([concat_m11bi,concat_m11ci])
if not concat_m12bi.empty and not concat_m12ci.empty:     
    correlation_ratio_diffentropy([concat_m12bi,concat_m12ci])
if not concat_m13bi.empty and not concat_m13ci.empty: 
    correlation_ratio_diffentropy([concat_m13bi,concat_m13ci])
if not concat_m21bi.empty and not concat_m21ci.empty: 
    correlation_ratio_diffentropy([concat_m21bi,concat_m21ci])
if not concat_m22bi.empty and not concat_m22ci.empty: 
    correlation_ratio_diffentropy([concat_m22bi,concat_m22ci])
if not concat_m31bi.empty and not concat_m31ci.empty: 
    correlation_ratio_diffentropy([concat_m31bi,concat_m31ci])
    
#cor(i,j) = cov(i,j)/[stdev(i)*stdev(j)] will be Nan if denominator is 0    
if not concat_m11bs.empty and not concat_m11cs.empty:
    correlation_ratio_diffentropy([concat_m11bs,concat_m11cs])
if not concat_m12bs.empty and not concat_m12cs.empty:
    correlation_ratio_diffentropy([concat_m12bs,concat_m12cs])
if not concat_m13bs.empty and not concat_m13cs.empty:
    correlation_ratio_diffentropy([concat_m13bs,concat_m13cs])
if not concat_m21bs.empty and not concat_m21cs.empty:
    correlation_ratio_diffentropy([concat_m21bs,concat_m21cs])
if not concat_m22bs.empty and not concat_m22cs.empty:
    correlation_ratio_diffentropy([concat_m22bs,concat_m22cs])
if not concat_m31bs.empty and not concat_m31cs.empty:
    correlation_ratio_diffentropy([concat_m31bs,concat_m31cs])
if not concat_m11br.empty and not concat_m11cr.empty:
    correlation_ratio_diffentropy([concat_m11br,concat_m11cr])
if not concat_m12br.empty and not concat_m12cr.empty:
    correlation_ratio_diffentropy([concat_m12br,concat_m12cr])
if not concat_m13br.empty and not concat_m13cr.empty:
    correlation_ratio_diffentropy([concat_m13br,concat_m13cr])
if not concat_m21br.empty and not concat_m21cr.empty:
    correlation_ratio_diffentropy([concat_m21br,concat_m21cr])
if not concat_m22br.empty and not concat_m22cr.empty:
    correlation_ratio_diffentropy([concat_m22br,concat_m22cr])
if not concat_m31br.empty and not concat_m31cr.empty:
    correlation_ratio_diffentropy([concat_m31br,concat_m31cr])
if not concat_m11bo.empty and not concat_m11co.empty:
    correlation_ratio_diffentropy([concat_m11bo,concat_m11co])
if not concat_m12bo.empty and not concat_m12co.empty:
    correlation_ratio_diffentropy([concat_m12bo,concat_m12co])
if not concat_m13bo.empty and not concat_m13co.empty:
    correlation_ratio_diffentropy([concat_m13bo,concat_m13co])
if not concat_m21bo.empty and not concat_m21co.empty:
    correlation_ratio_diffentropy([concat_m21bo,concat_m21co])
if not concat_m22bo.empty and not concat_m22co.empty:
    correlation_ratio_diffentropy([concat_m22bo,concat_m22co])
if not concat_m31bo.empty and not concat_m31co.empty:
    correlation_ratio_diffentropy([concat_m31bo,concat_m31co])
if not concat_m11bc.empty and not concat_m11cc.empty:
    correlation_ratio_diffentropy([concat_m11bc,concat_m11cc])
if not concat_m12bc.empty and not concat_m12cc.empty:
    correlation_ratio_diffentropy([concat_m12bc,concat_m12cc])
if not concat_m13bc.empty and not concat_m13cc.empty:
    correlation_ratio_diffentropy([concat_m13bc,concat_m13cc])
if not concat_m21bc.empty and not concat_m21cc.empty:
    correlation_ratio_diffentropy([concat_m21bc,concat_m21cc])
if not concat_m22bc.empty and not concat_m22cc.empty:
    correlation_ratio_diffentropy([concat_m22bc,concat_m22cc])
if not concat_m31bc.empty and not concat_m31cc.empty:
    correlation_ratio_diffentropy([concat_m31bc,concat_m31cc])
def boxplots_for_alldatasets_entropy(list):
    df = list[0]
    measure = list[1]
    i= list[2]
    print(measure)
    df.boxplot(column='diffentropy',by='dataset',rot=15,vert=False)
    #date =pd.to_datetime('today')
    plt.savefig('boxplot_'+measure+str(i)+'.png')
# only for the measures with entropy
print('m11c for all each type of link in all data sets ------------------------------------------')
boxplots_for_alldatasets_entropy([concat_m11ci,'m11c',1])
boxplots_for_alldatasets_entropy([concat_m11cs,'m11c',2])
boxplots_for_alldatasets_entropy([concat_m11cr,'m11c',3])
boxplots_for_alldatasets_entropy([concat_m11co,'m11c',4])
boxplots_for_alldatasets_entropy([concat_m11cc,'m11c',5])


print('m12c for all each type of link in all data sets ------------------------------------------')
boxplots_for_alldatasets_entropy([concat_m12ci,'m12c',1])
boxplots_for_alldatasets_entropy([concat_m12cs,'m12c',2])
boxplots_for_alldatasets_entropy([concat_m12cr,'m12c',3])
boxplots_for_alldatasets_entropy([concat_m12co,'m12c',4])
boxplots_for_alldatasets_entropy([concat_m12cc,'m12c',5])


print('m13c for all each type of link in all data sets ------------------------------------------')                               
boxplots_for_alldatasets_entropy([concat_m13ci,'m13c',1])
boxplots_for_alldatasets_entropy([concat_m13cs,'m13c',2])
boxplots_for_alldatasets_entropy([concat_m13cr,'m13c',3])
boxplots_for_alldatasets_entropy([concat_m13co,'m13c',4])
boxplots_for_alldatasets_entropy([concat_m13cc,'m13c',5])


print('m21c for all each type of link in all data sets ------------------------------------------')
boxplots_for_alldatasets_entropy([concat_m21ci,'m21c',1])
boxplots_for_alldatasets_entropy([concat_m21cs,'m21c',2])
boxplots_for_alldatasets_entropy([concat_m21cr,'m21c',3])
boxplots_for_alldatasets_entropy([concat_m21co,'m21c',4])
boxplots_for_alldatasets_entropy([concat_m21cc,'m21c',5])


print('m22c for all each type of link in all data sets ------------------------------------------')
boxplots_for_alldatasets_entropy([concat_m22ci,'m22c',1])
boxplots_for_alldatasets_entropy([concat_m22cs,'m22c',2])
boxplots_for_alldatasets_entropy([concat_m22cr,'m22c',3])
boxplots_for_alldatasets_entropy([concat_m22co,'m22c',4])
boxplots_for_alldatasets_entropy([concat_m22cc,'m22c',5])


print('m31c for all each type of link in all data sets ------------------------------------------')
boxplots_for_alldatasets_entropy([concat_m31ci,'m31c',1])
boxplots_for_alldatasets_entropy([concat_m31cs,'m31c',2])
boxplots_for_alldatasets_entropy([concat_m31cr,'m31c',3])
boxplots_for_alldatasets_entropy([concat_m31co,'m31c',4])
boxplots_for_alldatasets_entropy([concat_m31cc,'m31c',5])
m11c for all each type of link in all data sets ------------------------------------------
m11c
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-55-51fc7363ccbb> in <module>()
      1 # only for the measures with entropy
      2 print('m11c for all each type of link in all data sets ------------------------------------------')
----> 3 boxplots_for_alldatasets_entropy([concat_m11ci,'m11c',1])
      4 boxplots_for_alldatasets_entropy([concat_m11cs,'m11c',2])
      5 boxplots_for_alldatasets_entropy([concat_m11cr,'m11c',3])

<ipython-input-54-0f07b1a24eae> in boxplots_for_alldatasets_entropy(list)
      5     i= list[2]
      6     print(measure)
----> 7     df.boxplot(column='diffentropy',by='dataset',rot=15,vert=False)
      8     #date =pd.to_datetime('today')
      9     plt.savefig('boxplot_'+measure+str(i)+'.png')

/srv/paws/lib/python3.6/site-packages/pandas/plotting/_core.py in boxplot_frame(self, column, by, ax, fontsize, rot, grid, figsize, layout, return_type, **kwds)
   2255     ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize,
   2256                  grid=grid, rot=rot, figsize=figsize, layout=layout,
-> 2257                  return_type=return_type, **kwds)
   2258     plt.draw_if_interactive()
   2259     return ax

/srv/paws/lib/python3.6/site-packages/pandas/plotting/_core.py in boxplot(data, column, by, ax, fontsize, rot, grid, figsize, layout, return_type, **kwds)
   2224                                          by=by, grid=grid, figsize=figsize,
   2225                                          ax=ax, layout=layout,
-> 2226                                          return_type=return_type)
   2227     else:
   2228         if return_type is None:

/srv/paws/lib/python3.6/site-packages/pandas/plotting/_core.py in _grouped_plot_by_column(plotf, data, columns, by, numeric_only, grid, figsize, ax, layout, return_type, **kwargs)
   2680         ax = _axes[i]
   2681         gp_col = grouped[col]
-> 2682         keys, values = zip(*gp_col)
   2683         re_plotf = plotf(keys, values, ax, **kwargs)
   2684         ax.set_title(col)

ValueError: not enough values to unpack (expected 2, got 0)
linksdistributions = pd.read_csv('http://141.26.208.201/countlinks_09122016.csv',sep="\t",usecols=[0,1,2],dtype={'dataset':object, 'typelink':object, 'countlinks':float})
linksdistributions.head()
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
/usr/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
   1317                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error

/usr/lib/python3.6/http/client.py in request(self, method, url, body, headers, encode_chunked)
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 

/usr/lib/python3.6/http/client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1284             body = _encode(body, 'body')
-> 1285         self.endheaders(body, encode_chunked=encode_chunked)
   1286 

/usr/lib/python3.6/http/client.py in endheaders(self, message_body, encode_chunked)
   1233             raise CannotSendHeader()
-> 1234         self._send_output(message_body, encode_chunked=encode_chunked)
   1235 

/usr/lib/python3.6/http/client.py in _send_output(self, message_body, encode_chunked)
   1025         del self._buffer[:]
-> 1026         self.send(msg)
   1027 

/usr/lib/python3.6/http/client.py in send(self, data)
    963             if self.auto_open:
--> 964                 self.connect()
    965             else:

/usr/lib/python3.6/http/client.py in connect(self)
    935         self.sock = self._create_connection(
--> 936             (self.host,self.port), self.timeout, self.source_address)
    937         self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    723     if err is not None:
--> 724         raise err
    725     else:

/usr/lib/python3.6/socket.py in create_connection(address, timeout, source_address)
    712                 sock.bind(source_address)
--> 713             sock.connect(sa)
    714             # Break explicitly a reference cycle

OSError: [Errno 113] No route to host

During handling of the above exception, another exception occurred:

URLError                                  Traceback (most recent call last)
<ipython-input-56-0fde317fca05> in <module>()
----> 1 linksdistributions = pd.read_csv('http://141.26.208.201/countlinks_09122016.csv',sep="\t",usecols=[0,1,2],dtype={'dataset':object, 'typelink':object, 'countlinks':float})
      2 linksdistributions.head()

/srv/paws/lib/python3.6/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)
    676                     skip_blank_lines=skip_blank_lines)
    677 
--> 678         return _read(filepath_or_buffer, kwds)
    679 
    680     parser_f.__name__ = name

/srv/paws/lib/python3.6/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    422     compression = _infer_compression(filepath_or_buffer, compression)
    423     filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
--> 424         filepath_or_buffer, encoding, compression)
    425     kwds['compression'] = compression
    426 

/srv/paws/lib/python3.6/site-packages/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode)
    193 
    194     if _is_url(filepath_or_buffer):
--> 195         req = _urlopen(filepath_or_buffer)
    196         content_encoding = req.headers.get('Content-Encoding', None)
    197         if content_encoding == 'gzip':

/usr/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    221     else:
    222         opener = _opener
--> 223     return opener.open(url, data, timeout)
    224 
    225 def install_opener(opener):

/usr/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
    524             req = meth(req)
    525 
--> 526         response = self._open(req, data)
    527 
    528         # post-process response

/usr/lib/python3.6/urllib/request.py in _open(self, req, data)
    542         protocol = req.type
    543         result = self._call_chain(self.handle_open, protocol, protocol +
--> 544                                   '_open', req)
    545         if result:
    546             return result

/usr/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
    502         for handler in handlers:
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:
    506                 return result

/usr/lib/python3.6/urllib/request.py in http_open(self, req)
   1344 
   1345     def http_open(self, req):
-> 1346         return self.do_open(http<