%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt  
import matplotlib
import numpy as np
import urllib
from scipy.stats import pearsonr
import seaborn as sns
test = pd.read_csv('data/m11c_alltypesoflinks_describe.csv')
test.head()
dataset Unnamed: 1 diffentropy
0 aemet count 133.0
1 aemet mean 0.0
2 aemet std 0.0
3 aemet min 0.0
4 aemet 25% 0.0
#one measure

datasets = ['d1','d2']
measurements = pd.DataFrame()
m='m11b'

for d in datasets:
    measurements[d] = pd.read_csv('data/'+m+'File_'+d+'.tsv',sep="\t",dtype={'entity':str, 'entity':str, 'diffentropy':float})

    
print(measurements)   
#sns.boxplot(y=measurements['diff'])
    
    
    
    
    
    
    
    
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/srv/paws/lib/python3.6/site-packages/pandas/core/frame.py in _ensure_valid_index(self, value)
   3172             try:
-> 3173                 value = Series(value)
   3174             except:

/srv/paws/lib/python3.6/site-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
    276 
--> 277                 data = SingleBlockManager(data, index, fastpath=True)
    278 

/srv/paws/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, block, axis, do_integrity_check, fastpath)
   4676         if not isinstance(block, Block):
-> 4677             block = make_block(block, placement=slice(0, len(axis)), ndim=1)
   4678 

/srv/paws/lib/python3.6/site-packages/pandas/core/internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
   3204 
-> 3205     return klass(values, ndim=ndim, placement=placement)
   3206 

/srv/paws/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim)
   2302         super(ObjectBlock, self).__init__(values, ndim=ndim,
-> 2303                                           placement=placement)
   2304 

/srv/paws/lib/python3.6/site-packages/pandas/core/internals.py in __init__(self, values, placement, ndim)
    124                 'Wrong number of items passed {val}, placement implies '
--> 125                 '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
    126 

ValueError: Wrong number of items passed 3, placement implies 9

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-6-6c28d6d3ee0f> in <module>()
      6 
      7 for d in datasets:
----> 8     measurements[d] = pd.read_csv('data/'+m+'File_'+d+'.tsv',sep="\t",dtype={'entity':str, 'entity':str, 'diffentropy':float})
      9 
     10 

/srv/paws/lib/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
   3117         else:
   3118             # set column
-> 3119             self._set_item(key, value)
   3120 
   3121     def _setitem_slice(self, key, value):

/srv/paws/lib/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
   3191         """
   3192 
-> 3193         self._ensure_valid_index(value)
   3194         value = self._sanitize_column(key, value)
   3195         NDFrame._set_item(self, key, value)

/srv/paws/lib/python3.6/site-packages/pandas/core/frame.py in _ensure_valid_index(self, value)
   3173                 value = Series(value)
   3174             except:
-> 3175                 raise ValueError('Cannot set a frame with no defined index '
   3176                                  'and a value that cannot be converted to a '
   3177                                  'Series')

ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series
# m11aFile_aemet.tsv


#m11a all values
#b is diff entropy (diversity)  - at least in the files, cross-check with 
#c is diff ratio 

#need to check where the c-s are in disk!!

# I think b and c were to test the difference between diversty and no diversity
dataset = 'aemet'
#measures_list = ['m11b','m12b','m21b','m22b','m31b']
measures_list = ['m21b']

#data/'+m+'File_'+dataset+'_'+typelink+'
for m in measures_list:
    print(m)
    measurements = pd.read_csv('data/'+m+'File_'+dataset+'.tsv',sep="\t",dtype={'uri':str, 'uritype':str, 'diff':float}) 
    
    print(measurements['diff'])
    #sns.boxplot(y=measurements['diff'])
    
m21b
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/srv/paws/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3063             try:
-> 3064                 return self._engine.get_loc(key)
   3065             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'diff'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-23-10745e8e1171> in <module>()
     18     measurements = pd.read_csv('data/'+m+'File_'+dataset+'.tsv',dtype={'uri':str, 'uritype':str, 'diff':float})
     19 
---> 20     print(measurements['diff'])
     21     #sns.boxplot(y=measurements['diff'])
     22 

/srv/paws/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2686             return self._getitem_multilevel(key)
   2687         else:
-> 2688             return self._getitem_column(key)
   2689 
   2690     def _getitem_column(self, key):

/srv/paws/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2693         # get column
   2694         if self.columns.is_unique:
-> 2695             return self._get_item_cache(key)
   2696 
   2697         # duplicate columns & possible reduce dimensionality

/srv/paws/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   2484         res = cache.get(item)
   2485         if res is None:
-> 2486             values = self._data.get(item)
   2487             res = self._box_item_values(item, values)
   2488             cache[item] = res

/srv/paws/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   4113 
   4114             if not isna(item):
-> 4115                 loc = self.items.get_loc(item)
   4116             else:
   4117                 indexer = np.arange(len(self.items))[isna(self.items)]

/srv/paws/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   3064                 return self._engine.get_loc(key)
   3065             except KeyError:
-> 3066                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   3067 
   3068         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'diff'