import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import pymysql
from sqlalchemy import create_engine
%matplotlib inline
import requests
import re
data = pd.read_json(path_or_buf="lod-data.json", lines=False,encoding='utf-8', orient="index")
data.head(100)
Image _id contact_point description doi domain example full_download identifier image keywords license links namespace other_download owner sparql title triples website
eagle-i-jsu NaN eagle-i-jsu {'email': 'info@eagle-i.org', 'name': 'The eag... {'en': 'Groundbreaking biomedical research req... life_sciences [{'status': 'OK', 'media_type': 'application/x... [] eagle-i-jsu [deref-vocab, format-dc, format-ero, format-fo... http://www.opendefinition.org/licenses/cc-by [{'target': 'bioportal-ero', 'value': '269'}, ... http://jsu.eagle-i.net/i/ [{'status': 'OK', 'description': 'All eagle-i ... {'email': 'irunoff@gmail.com', 'name': 'Igor R... [{'status': 'FAIL (400)', 'access_url': 'http:... eagle-i @ Jackson State University 10200 http://jsu.eagle-i.net
idreffr NaN idreffr {'email': 'apisudoc@abes.fr', 'name': 'ABES'} {'en': 'Source : [IdRef](http://www.idref.fr) ... publications [{'status': 'OK', 'media_type': 'application/r... [] idreffr [authority, bibliographic, format-rdf, france,... NaN [{'target': 'FMeSH', 'value': '25000'}, {'targ... http://www.idref.fr/ [{'status': 'OK', 'media_type': 'text/html;cha... [] IdRef: Sudoc authority data 20000000 http://punktokomo.abes.fr/2011/07/05/idref-des...
bio2rdf-genage NaN bio2rdf-genage {'email': 'bio2rdf@googlegroups.com', 'name': ... {'en': 'GenAge is a database of human and mode... life_sciences [{'status': 'OK', 'media_type': 'application/r... [{'status': 'FAIL (404)', 'media_type': 'appli... bio2rdf-genage [bio2rdf, deref-vocab, format-dc, format-rdf, ... http://www.opendefinition.org/licenses/cc-by [{'target': 'bio2rdf-genage', 'value': '1856'}... http://bio2rdf.org/genage: [{'status': 'FAIL (404)', 'media_type': '', 'a... [{'status': 'OK', 'access_url': 'http://genage... Bio2RDF::Genage 73048 http://genage.bio2rdf.org/
european-election-results NaN european-election-results {'email': 'tim.hodson@kasabi.com', 'name': 'Ti... {'en': 'EU Election Results from 2009.'} [{'status': 'FAIL (HTTPConnectionPool(host='el... [] european-election-results [deref-vocab, eu, eu-institution, europe, euro... http://www.opendefinition.org/licenses/cc-zero [{'target': 'eurostat-linked-data', 'value': '... http://elections.publicdata.eu/data/ [{'status': 'OK', 'description': '', 'title': ... [{'status': 'FAIL (HTTPConnectionPool(host='ap... European Election Results 3165 http://elections.publicdata.eu
bioportal-tao NaN bioportal-tao {'email': None, 'name': 'Wasila Dahdul'} {'en': 'Multispecies fish anatomy ontology. Or... life_sciences [] [] bioportal-tao [biomedicine, bioportal, format-obo, format-rd... NaN [{'target': 'bioportal-aao', 'value': '391'}, ... [{'status': 'OK', 'media_type': 'text/html; ch... [] Teleost Anatomy Ontology 35496 http://bioportal.bioontology.org/ontologies/1110
patents-data-gov-uk NaN patents-data-gov-uk {'email': 'team@data.gov.uk', 'name': 'Data.go... {'en': '[Namespace for patent applications](ht... government [{'status': 'FAIL (HTTPConnectionPool(host='pa... [{'status': 'OK', 'description': 'Download (N-... patents-data-gov-uk [deref-vocab, format-dc, format-rdf, governmen... NaN [{'target': 'research-data-gov-uk', 'value': '... http://patents.data.gov.uk/id/ [{'status': 'FAIL (HTTPConnectionPool(host='se... [{'status': 'FAIL (HTTPConnectionPool(host='se... patents.data.gov.uk 11627 http://patents.data.gov.uk/
iati-as-linked-data NaN iati-as-linked-data {'email': '', 'name': 'Kasper Brandt'} {'en': 'International Aid Transparancy Initiat... government [{'status': 'FAIL (HTTPConnectionPool(host='ec... [] iati-as-linked-data [Aid, Development, Transparency, accountibilit... http://www.opendefinition.org/licenses/cc-by [{'target': 'cia-world-factbook', 'value': '24... [{'status': 'OK', 'media_type': 'text/html; ch... [{'status': 'FAIL (400)', 'access_url': 'http:... IATI as Linked Data 36629045 None
bioportal-air NaN bioportal-air {'email': '', 'name': 'May Cheh'} {'en': 'AI/RHEUM is used for the diagnosis of ... life_sciences [] [] bioportal-air [biomedicine, bioportal, format-rdf, format-rr... NaN [{'target': 'bioportal-bdo', 'value': '49'}, {... [{'status': 'OK', 'media_type': 'text/html; ch... [] AI/RHEUM 5528 http://bioportal.bioontology.org/ontologies/1430
bio2rdf-clinicaltrials NaN bio2rdf-clinicaltrials {'email': 'bio2rdf@googlegroups.com', 'name': ... {'en': 'ClinicalTrials.gov is a registry and r... life_sciences [{'status': 'OK', 'media_type': 'application/r... [{'status': 'FAIL (404)', 'media_type': 'appli... bio2rdf-clinicaltrials [bio2rdf, deref-vocab, format-dc, format-rdf, ... http://www.opendefinition.org/licenses/cc-by [{'target': 'bio2rdf-clinicaltrials', 'value':... http://bio2rdf.org/clinicaltrials: [{'status': 'FAIL (404)', 'media_type': '', 'a... [{'status': 'OK', 'access_url': 'http://clinic... Bio2RDF::Clinicaltrials 8323598 http://clinicaltrials.bio2rdf.org/
fu-berlin-sider NaN fu-berlin-sider {'email': 'anja@anjeve.de', 'name': 'Anja Jent... {'en': 'The Linked Data version of SIDER which... life_sciences [{'status': 'FAIL (503)', 'media_type': 'examp... [{'status': 'OK', 'description': 'N-Triples, b... fu-berlin-sider [deref-vocab, format-rdf, lifesciences, lod, l... NaN [{'target': 'dbpedia', 'value': '2126'}, {'tar... http://www4.wiwiss.fu-berlin.de/sider/resource/ [] [{'status': 'FAIL (503)', 'access_url': 'http:... SIDER: Side Effect Resource 192515 http://www4.wiwiss.fu-berlin.de/sider/
oceandrilling-borehole NaN oceandrilling-borehole {'email': None, 'name': None} {'en': None} geography [{'status': 'OK', 'media_type': 'text/html; ch... [] oceandrilling-borehole [LinkedDataCrawl2014, crawledLinkedDataCloud20... NaN [{'target': 'w3c', 'value': '5'}] [] [] oceandrilling-borehole 0 None
sudocfr NaN sudocfr {'email': 'apisudoc@abes.fr', 'name': 'ABES'} {'en': 'Source : [Sudoc](http://www.sudoc.abes... publications [{'status': 'OK', 'media_type': 'application/r... [] sudocfr [bibliographic, format-rdf, format-skos, franc... NaN [{'target': 'dewey_decimal_classification', 'v... http://www.sudoc.fr/ [{'status': 'OK', 'media_type': 'text/html;cha... [{'status': 'OK', 'access_url': 'http://sparql... Sudoc bibliographic data 350000000 http://punktokomo.abes.fr/2011/07/04/le-sudoc-...
bio2rdf-sgd NaN bio2rdf-sgd {'email': 'bio2rdf@googlegroups.com', 'name': ... {'en': 'The Saccharomyces Genome Database (SGD... life_sciences [{'status': 'OK', 'media_type': 'application/r... [{'status': 'FAIL (404)', 'media_type': 'appli... bio2rdf-sgd [bio2rdf, deref-vocab, format-dc, format-rdf, ... http://www.opendefinition.org/licenses/cc-by [{'target': 'bio2rdf-apo', 'value': '241'}, {'... http://bio2rdf.org/sgd: [{'status': 'FAIL (404)', 'media_type': '', 'a... [{'status': 'OK', 'access_url': 'http://sgd.bi... Bio2RDF::Sgd 12494945 http://sgd.bio2rdf.org/
museosespania-gnoss NaN museosespania-gnoss {'email': 'gnoss@gnoss.com', 'name': 'GNOSS Te... {'en': '<p> <a href="http://museos.gnoss.com/... user_generated [{'status': 'OK', 'media_type': 'text/html; ch... [] museosespania-gnoss [bellas artes, ciencias naturales, country-spa... http://creativecommons.org/licenses/by-nc/2.0/ [{'target': 'dbpedia', 'value': '140275'}, {'t... http://museos.gnoss.com/comunidad/mismuseos/ [{'status': 'OK', 'description': 'Gnoss OWL On... [] Museos de España, colección de museos públicos... 79065 http://museos.gnoss.com/comunidad/mismuseos/
bioportal-mpath NaN bioportal-mpath {'email': None, 'name': 'Mouse_pathology Admin... {'en': 'A structured controlled vocabulary of ... life_sciences [] [] bioportal-mpath [biomedicine, bioportal, format-obo, format-rd... NaN [{'target': 'bioportal-acgt', 'value': '23'}, ... [{'status': 'OK', 'media_type': 'text/html; ch... [] Mouse pathology 9413 http://bioportal.bioontology.org/ontologies/1031
cz-ctia-bans NaN cz-ctia-bans {'email': '', 'name': ''} {'en': 'Basic information about bans issued by... government [{'status': 'OK', 'media_type': 'text/turtle; ... [] cz-ctia-bans [format-gr, format-skos, government, license-m... NaN [{'target': 'cz-ctia-inspections', 'value': '1... http://linked.opendata.cz/resource/domain/coi.... [{'status': 'OK', 'media_type': 'text/turtle; ... [{'status': 'OK', 'access_url': 'http://linked... Bans of the Czech Trade Inspection Authority 458039 http://linked.opendata.cz/resource/dataset/coi...
universal-dependencies-treebank-chinese NaN universal-dependencies-treebank-chinese {'email': '', 'name': 'Shen, Mo; McDonald, Ryan'} {'en': 'Universal Dependencies is a project th... [] [] universal-dependencies-treebank-chinese [corpus, linguistics, lod] http://creativecommons.org/licenses/by-nc/2.0/ [{'target': 'olia', 'value': '72'}] [{'status': 'OK', 'media_type': 'application/z... [] Universal Dependencies Treebank Chinese 123283 https://github.com/UniversalDependencies/UD_Ch...
linked-taiwan-geo-names NaN linked-taiwan-geo-names {'email': 'dongpo.deng@gmail.com', 'name': 'Do... {'en': 'Taiwan geographic name (台灣地名) is obtai... [] [] linked-taiwan-geo-names [Taiwan, format-geonames, geographic name, geo... http://www.opendefinition.org/licenses/odc-odbl [{'target': 'geonames-semantic-web:', 'value':... tgn [{'status': 'FAIL (503)', 'media_type': 'HTML'... [{'status': 'FAIL (503)', 'access_url': 'http:... Linked Taiwan Geo-Names 1365654 http://geoname.linkedopendata.tw
bioportal-vandf NaN bioportal-vandf {'email': None, 'name': 'Michael Lincoln'} {'en': 'Veterans Health Administration Nationa... life_sciences [] [] bioportal-vandf [biomedicine, bioportal, format-rdf, format-rr... NaN [{'target': 'bioportal-acgt', 'value': '24'}, ... [{'status': 'OK', 'media_type': 'text/html; ch... [] VANDF 667272 http://bioportal.bioontology.org/ontologies/1527
statusnet-fragdev-com NaN statusnet-fragdev-com {'email': None, 'name': None} {'en': 'StatusNet instance hosted at http://fr... social_networking [{'status': 'FAIL (404)', 'media_type': 'examp... [] statusnet-fragdev-com [LinkedDataCrawl2014, crawledLinkedDataCloud20... NaN [{'target': 'geonames-semantic-web', 'value': ... [] [] statusnet-fragdev-com 0 None
opendatacommunities-transparency-impact-indicators-neighbourhood-plans NaN opendatacommunities-transparency-impact-indica... {'email': None, 'name': None} {'en': None} government [{'status': 'OK', 'media_type': 'text/html; ch... [] opendatacommunities-transparency-impact-indica... [LinkedDataCrawl2014, crawledLinkedDataCloud20... NaN [{'target': 'reference-data-gov-uk', 'value': ... [] [] opendatacommunities-transparency-impact-indica... 0 None
bioportal-envo NaN bioportal-envo {'email': None, 'name': 'Envo Administrators'} {'en': 'Ontology of environmental features and... life_sciences [] [] bioportal-envo [biomedicine, bioportal, format-obo, format-rd... NaN [{'target': 'bioportal-adw', 'value': '13'}, {... [{'status': 'OK', 'media_type': 'text/html; ch... [] Environment Ontology 26584 http://bioportal.bioontology.org/ontologies/1069
betweenourworlds betweenourworlds {'email': 'pieterheyvaert@gmail.com', 'name': ... {'en': 'A dataset that provides metadata infor... media [] [{'status': 'OK', 'description': 'The HDT file... betweenourworlds https://betweenourworlds.org/img/logo.png [anime, episode, season, character, trailer, s... http://www.opendefinition.org/licenses/cc-zero [{'_id': 'b07b5aa4-3f59-da5b-36e2-a6e8c820ea6a... https://betweenourworlds.org/ [] {'name': 'Pieter Heyvaert', 'email': 'pheyvaer... [] Between Our Worlds 3862353 https://betweenourworlds.org
rechtspraak NaN rechtspraak {'email': 'hoekstra@few.vu.nl', 'name': 'Rinke... {'en': 'RDF version of the official case law d... government [{'status': 'FAIL (HTTPConnectionPool(host='li... [] rechtspraak [broken_link, case-law, deref-vocab, format-db... http://www.opendefinition.org/licenses/cc-by [{'target': 'dbpedia', 'value': '575'}, {'targ... http://linkeddata.few.vu.nl/rechtspraak/ [{'status': 'OK', 'description': 'VOID Descrip... [{'status': 'FAIL (HTTPConnectionPool(host='ec... Rechtspraak.nl 1931860 http://www.best-project.nl
bioportal-lhn NaN bioportal-lhn {'email': None, 'name': 'Peter Midford'} {'en': 'A demonstration of ontology constructi... life_sciences [] [] bioportal-lhn [biomedicine, bioportal, format-obo, format-rd... NaN [{'target': 'bioportal-canco', 'value': '12'},... [{'status': 'OK', 'media_type': 'text/html; ch... [] Loggerhead nesting 2623 http://bioportal.bioontology.org/ontologies/1024
opendatacommunities-imd-crime-rank-2010 NaN opendatacommunities-imd-crime-rank-2010 {'email': None, 'name': None} {'en': None} government [{'status': 'FAIL (404)', 'media_type': 'examp... [] opendatacommunities-imd-crime-rank-2010 [LinkedDataCrawl2014, crawledLinkedDataCloud20... NaN [{'target': 'opendatacommunities-societal-well... [] [] opendatacommunities-imd-crime-rank-2010 0 None
eagle-i-xula NaN eagle-i-xula {'email': 'info@eagle-i.org', 'name': 'The eag... {'en': 'Groundbreaking biomedical research req... life_sciences [{'status': 'OK', 'media_type': 'application/x... [] eagle-i-xula [deref-vocab, format-bibo, format-dc, format-e... NaN [{'target': 'bioportal-ero', 'value': '458'}, ... http://xula.eagle-i.net/i/ [{'status': 'OK', 'description': 'All eagle-i ... [{'status': 'FAIL (400)', 'access_url': 'http:... eagle-i @ Xavier University of Louisiana 10000 http://xula.eagle-i.net
statusnet-somsants-net NaN statusnet-somsants-net {'email': None, 'name': None} {'en': 'StatusNet instance hosted at http://so... social_networking [{'status': 'OK', 'media_type': 'application/r... [] statusnet-somsants-net [LinkedDataCrawl2014, crawledLinkedDataCloud20... NaN [{'target': 'personal-homepages', 'value': '2'... [] [] statusnet-somsants-net 0 None
universal-dependencies-treebank-czech NaN universal-dependencies-treebank-czech {'email': 'zeman@ufal.mff.cuni.cz', 'name': 'Z... {'en': 'Universal Dependencies is a project th... [] [] universal-dependencies-treebank-czech [corpus, linguistics, lod] http://creativecommons.org/licenses/by-nc/2.0/ [{'target': 'olia', 'value': '72'}] [{'status': 'OK', 'media_type': 'application/z... [] Universal Dependencies Treebank Czech 1503732 https://github.com/UniversalDependencies/UD_Czech
linkedct NaN linkedct {'email': 'oktie@cs.toronto.edu', 'name': 'Okt... {'en': 'Data exposed: Linked Clinical Trials ... life_sciences [{'status': 'OK', 'media_type': 'application/r... [{'status': 'OK', 'description': 'Download', '... linkedct [ckanupload.esw.200910, deref-vocab, format-rd... http://creativecommons.org/licenses/by-nc/2.0/ [{'target': 'bio2rdf-pubmed', 'value': '76483'... http://linkedct.org/resource/ [{'status': 'OK', 'description': 'Data Browser... [{'status': 'OK', 'access_url': 'http://data.l... LinkedCT 24653008 http://linkedct.org/
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
temple-ov-thee-lemur-datasets NaN temple-ov-thee-lemur-datasets {'email': 'cjg@ecs.soton.ac.uk', 'name': 'Chri... {'en': 'This resource provides 3 groups of RDF... cross_domain [{'status': 'OK', 'media_type': 'application/r... [] temple-ov-thee-lemur-datasets [chess, crossdomain, deref-vocab, format-dc, f... http://www.opendefinition.org/licenses/cc-zero [{'target': 'dbpedia', 'value': '500'}, {'targ... http://data.totl.net/ [{'status': 'OK', 'description': 'Download: Th... [] Temple ov thee Lemur 10000 http://data.totl.net/
lremap-ri NaN lremap-ri {'email': 'riccardo.delgratta@ilc.cnr.it', 'na... {'en': 'First not-normalized set of language r... [] [] lremap-ri [LOD, Linguistic Dataset LOD RDF, linguistic] http://www.opendefinition.org/licenses/cc-by-sa [] [{'status': 'FAIL (503)', 'media_type': 'text/... [] First not-normalized set of language resources... 7982 None
bioportal-lipro NaN bioportal-lipro {'email': None, 'name': 'Christipher Baker'} {'en': 'Lipid research is increasingly integra... life_sciences [] [] bioportal-lipro [biomedicine, bioportal, format-owl, format-rd... NaN [{'target': 'bioportal-chebi', 'value': '58'},... [{'status': 'OK', 'media_type': 'text/html; ch... [] Lipid Ontology 15406 http://bioportal.bioontology.org/ontologies/1183
congresspeople NaN congresspeople {'email': 'cheny18@rpi.edu', 'name': 'Yanning ... {'en': 'The 2011 US congress people data, with... government [] [] congresspeople [format-con, format-conv, format-dbo, format-d... NaN [{'target': 'dbpedia', 'value': '67'}, {'targe... http://logd.tw.rpi.edu/source/contactingthecon... [{'status': 'FAIL (403)', 'media_type': 'BIN',... [] 2011 US Congress People 0 http://www.contactingthecongress.org/
cpc-2008 NaN cpc-2008 {'email': 'josem.alvarez@weso.es', 'name': 'Jo... {'en': 'The Central Product Classification (CP... [{'status': 'FAIL (HTTPConnectionPool(host='15... [] cpc-2008 [10ders, cpc, cpc-2008, deref-vocab, e-procure... http://www.opendefinition.org/licenses/odc-odbl [{'target': 'http://productontology.org', 'val... http://purl.org/weso/pscs/cpa/2008/resource [{'status': 'OK', 'description': 'Data Dump Ce... [{'status': 'FAIL (HTTPConnectionPool(host='15... CPC 2008 100819 http://www.josemalvarez.es/web/2011/11/16/prod...
vivo-scripps-research-institute NaN vivo-scripps-research-institute {'email': '', 'name': 'VIVO'} {'en': 'VIVO is a research-focused discovery t... publications [] [] vivo-scripps-research-institute [deref-vocab, education, format-rdf, format-rd... http://www.opendefinition.org/licenses/cc-by [{'target': 'dbpedia ', 'value': '58'}] [{'status': 'OK', 'description': 'VIVO is a re... [] VIVO Scripps Research Institute 45000 http://vivo.scripps.edu/
pdevlemon NaN pdevlemon {'email': 'elmaarouf.ismail@yahoo.fr', 'name':... {'en': 'The Pattern Dictionary of English Verb... [] [{'status': 'OK', 'description': 'This is the ... pdevlemon [framenet, lemon, lexicography, lexicon, lexin... http://www.opendefinition.org/licenses/cc-by-sa [] [] [] pdevlemon 233372 None
rkb-explorer-newcastle NaN rkb-explorer-newcastle {'email': 'hg@ecs.soton.ac.uk', 'name': 'Hugh ... {'en': ''} publications [{'status': 'FAIL (502)', 'media_type': 'examp... [] rkb-explorer-newcastle [deref-vocab, format-akt, format-dc, format-rd... http://www.opendefinition.org/licenses/cc-by [{'target': 'rkb-explorer-acm', 'value': '1453... http://newcastle.rkbexplorer.com/id/ [{'status': 'FAIL (502)', 'media_type': 'meta/... [{'status': 'OK', 'access_url': 'http://newcas... University of Newcastle upon Tyne (RKBExplorer) 87505 http://newcastle.rkbexplorer.com
proyectoapadrina NaN proyectoapadrina {'email': 'gnoss@gnoss.com', 'name': 'GNOSS Te... {'en': '<p> <a href="http://red.gnoss.com/comu... user_generated [{'status': 'OK', 'media_type': 'application/r... [] proyectoapadrina [apadrina, buzz marketing publicity, country-s... http://creativecommons.org/licenses/by-nc/2.0/ [{'target': 'dbpedia', 'value': '7379'}, {'tar... http://gnoss.com/comunidad/MPV-APADRINA [{'status': 'OK', 'description': 'Gnoss OWL On... [] Proyecto Apadrina, Marketing Punto de Venta 71440 http://gnoss.com/comunidad/MPV-APADRINA
ecb-statistics-linked-data NaN ecb-statistics-linked-data {'email': 'keith.alexander@talis.com', 'name':... {'en': 'This dataset is a Linked Data conversi... [{'status': 'FAIL (HTTPConnectionPool(host='ec... [] ecb-statistics-linked-data [GDP, bank, eu, eu-institution, europe, europe... http://www.opendefinition.org/licenses/cc-zero [{'target': 'data-gov-uk-time-intervals', 'val... http://ecb.publicdata.eu/ [] [{'status': 'FAIL (HTTPConnectionPool(host='ec... European Central Bank Statistics (PublicData.eu) 10000000 http://ecb.publicdata.eu
universal-dependencies-treebank-slovenian-sst NaN universal-dependencies-treebank-slovenian-sst {'email': '', 'name': 'Dobrovoljc, Kaja; Nivre... {'en': 'Universal Dependencies is a project th... [] [] universal-dependencies-treebank-slovenian-sst [corpus, linguistics, lod] http://creativecommons.org/licenses/by-nc/2.0/ [{'target': 'olia', 'value': '72'}] [{'status': 'OK', 'media_type': 'application/z... [] Universal Dependencies Treebank Slovenian-SST 29488 https://github.com/UniversalDependencies/UD_Sl...
aemet NaN aemet {'email': 'dgarijov@gmail.com', 'name': 'Ontol... {'en': 'AemetLinked Data (.es) is an open init... cross_domain [{'status': 'FAIL (500)', 'media_type': 'examp... [{'status': 'OK', 'description': 'Download (.z... aemet [crossdomain, deref-vocab, format-geo, format-... NaN [{'target': 'DBpedia', 'value': '82'}, {'targe... http://aemet.linkeddata.es/resource/ [{'status': 'OK', 'description': 'HTML with th... [{'status': 'OK', 'access_url': 'http://aemet.... AEMET metereological dataset 62932032 http://aemet.linkeddata.es/
europeana-sparql NaN europeana-sparql {'email': 'info@ontotext.com', 'name': 'Ontote... {'en': 'Europeana EDM data loaded to OWLIM, wi... cross_domain [{'status': 'OK', 'media_type': 'text/html; ch... [] europeana-sparql [culturalheritage, culture, edm, europeana, eu... http://www.opendefinition.org/licenses/cc-zero [{'target': 'geonames', 'value': '1650634'}] http://data.europeana.eu/item/ [{'status': 'OK', 'description': '', 'title': ... [{'status': 'OK', 'access_url': 'http://europe... Europeana SPARQL 3798446742 None
ww1lod NaN ww1lod {'email': '', 'name': ''} {'en': 'This dataset contains strictly quality... publications [] [] ww1lod [culturalheritage, deref-vocab, format-cidoc-c... http://www.opendefinition.org/licenses/cc-by-sa [{'target': 'dbpedia', 'value': '152'}, {'targ... http://ldf.fi/ww1lod/ [{'status': 'OK', 'media_type': 'text/turtle; ... [{'status': 'OK', 'access_url': 'http://ldf.fi... World War 1 as Linked Open Data 40160 None
universal-dependencies-treebank-japanese-ktc NaN universal-dependencies-treebank-japanese-ktc {'email': '', 'name': 'Asahara, Masayuki; Kana... {'en': 'Universal Dependencies is a project th... [] [] universal-dependencies-treebank-japanese-ktc [corpus, linguistics, lod] http://www.opendefinition.org/licenses/cc-by-sa [{'target': 'olia', 'value': '72'}] [{'status': 'OK', 'media_type': 'application/z... [] Universal Dependencies Treebank Japanese-KTC 267631 https://github.com/UniversalDependencies/UD_Ja...
bio2rdf-interpro NaN bio2rdf-interpro {'email': 'bio2rdf@googlegroups.com', 'name': ... {'en': 'InterPro is an integrated database of ... life_sciences [{'status': 'OK', 'media_type': 'application/r... [{'status': 'FAIL (404)', 'media_type': 'appli... bio2rdf-interpro [bio2rdf, deref-vocab, format-dc, format-rdf, ... http://www.opendefinition.org/licenses/cc-by [{'target': 'bio2rdf-blocks', 'value': '5787'}... http://bio2rdf.org/interpro: [{'status': 'FAIL (404)', 'media_type': '', 'a... [{'status': 'OK', 'access_url': 'http://interp... Bio2RDF::Interpro 2323345 http://interpro.bio2rdf.org/
bioportal-repo NaN bioportal-repo {'email': None, 'name': 'Wageningen UR Livesto... {'en': 'Ontology for livestock reproductive tr... life_sciences [] [] bioportal-repo [biomedicine, bioportal, format-owl, format-rd... NaN [{'target': 'bioportal-mesh-owl', 'value': '21... [{'status': 'OK', 'media_type': 'text/html; ch... [] Reproductive trait and phenotype ontology 793 http://bioportal.bioontology.org/ontologies/1552
foodpedia NaN foodpedia {'email': 'kolchinmax@gmail.com', 'name': 'Max... {'en': 'At this moment FOODpedia contains info... [] [] foodpedia [food, format-rdf, ingredients, lod] http://www.opendefinition.org/licenses/cc-by [{'target': 'agrovoc', 'value': '426'}, {'targ... http://foodpedia.tk/resource/ [{'status': 'FAIL (500)', 'media_type': 'text/... [{'status': 'FAIL (502)', 'access_url': 'http:... FOODpedia - Linked Data Dataset about Food Pro... 888223 http://foodpedia.tk
semantic-universe NaN semantic-universe {'email': '', 'name': 'Semantic Universe'} {'en': 'Semantic Universe has begun producing ... [{'status': 'FAIL (HTTPConnectionPool(host='da... [] semantic-universe [deref-vocab, lod, lodcloud.nolinks, lodcloud.... NaN [] [] [{'status': 'FAIL (HTTPConnectionPool(host='da... Semantic Universe Data 20000 http://data.semanticuniverse.com
sztaki-lod NaN sztaki-lod {'email': 'webmaster@dsd.sztaki.hu', 'name': '... {'en': 'This dataset aims at publishing the co... publications [{'status': 'OK', 'media_type': 'application/r... [] sztaki-lod [catalog, country-hungary, format-dbpedia, for... http://www.opendefinition.org/licenses/cc-by-sa [{'target': 'dbpedia', 'value': '13034'}, {'ta... http://lod.sztaki.hu/data/ [{'status': 'OK', 'description': '', 'title': ... [{'status': 'OK', 'access_url': 'http://lod.sz... National Digital Data Archive of Hungary (part... 11000000 http://lod.sztaki.hu/
grrp NaN grrp {'email': 'info@linkedopendata.it', 'name': 'L... {'en': 'List of accommodations in Piedmont, It... geography [{'status': 'FAIL (404)', 'media_type': 'examp... [] grrp [accommodation, country-italy, deref-vocab, fo... http://www.opendefinition.org/licenses/cc-by-sa [{'target': 'geonames-semantic-web', 'value': ... [{'status': 'OK', 'description': 'RDF dump', '... [{'status': 'FAIL (404)', 'access_url': 'http:... Accommodations in Piedmont (LinkedOpenData.it) 153935 http://www.linkedopendata.it/datasets/grrp
rkb-explorer-photos NaN rkb-explorer-photos {'email': 'hg@ecs.soton.ac.uk', 'name': 'Hugh ... {'en': None} publications [] [] rkb-explorer-photos [broken_link, format-rdf, lod, provenance-meta... NaN [{'target': 'dbpedia', 'value': '100'}, {'targ... http://photos.rkbexplorer.com/id/ [{'status': 'FAIL (502)', 'media_type': 'meta/... [{'status': 'FAIL (404)', 'access_url': 'http:... photos 0 http://photos.rkbexplorer.com
bioportal-icd9cm NaN bioportal-icd9cm {'email': None, 'name': 'Patricia Brooks'} {'en': 'The ICD is the international standard ... life_sciences [] [] bioportal-icd9cm [biomedicine, bioportal, format-rdf, format-rr... NaN [{'target': 'bioportal-acgt', 'value': '26'}, ... [{'status': 'OK', 'media_type': 'text/html; ch... [] International Classification of Diseases 322211 http://bioportal.bioontology.org/ontologies/1101
ecco-tcp-linked-data NaN ecco-tcp-linked-data {'email': 'keithalexander@keithalexander.co.uk... {'en': 'Linked Data conversion of the metadata... [{'status': 'OK', 'media_type': 'text/html', '... [] ecco-tcp-linked-data [18thc, books, deref-vocab, eighteenth-century... http://www.opendefinition.org/licenses/odc-pddl [{'target': 'dbpedia', 'value': '50'}] http://data.kasabi.com/dataset/ecco-tcp-eighte... [{'status': 'OK', 'description': 'VoID Descrip... [{'status': 'OK', 'access_url': 'http://api.ka... ECCO-TCP Eighteenth Century Texts Linked Data 35215 http://kasabi.com/dataset/ecco-tcp-eighteenth-...
semanticquran NaN semanticquran {'email': 'mohamed.sherif@upb.de', 'name': 'Mo... {'en': 'The Semantic Quran dataset is a multil... 10.3233/SW-140137 linguistics [] [{'status': 'OK', 'media_type': 'application/x... semanticquran [semantic, quran, Multilingual dataset, Natura... https://creativecommons.org/licenses/by-nc-sa/... [{'target': 'DBpedia', 'value': '7718'}, {'tar... [] mohamed.sherif@upb.de [{'status': 'FAIL (503)', 'access_url': 'http:... Semantic Quran 15741399 http://aksw.org/Projects/SemanticQuran
dbpedia-el NaN dbpedia-el {'email': '', 'name': 'Greek DBpedia Team - ht... {'en': 'DBpedia is a "community effort to extr... cross_domain [{'status': 'OK', 'media_type': 'application/r... [{'status': 'OK', 'description': 'Downloads', ... dbpedia-el [access-api, access-bulk, access-www, crossdom... http://www.opendefinition.org/licenses/cc-by-sa [{'target': '2000-us-census-rdf', 'value': '14... http://el.dbpedia.org/resource/ [{'status': 'FAIL (404)', 'media_type': '', 'a... [{'status': 'OK', 'access_url': 'http://el.dbp... DBpedia in Greek 4399898 http://wiki.el.dbpedia.org
apertium-rdf-es-an NaN apertium-rdf-es-an {'email': '', 'name': 'Jimmy O'Regan ; Univers... {'en': 'RDF version of the Apertium bilingual ... linguistics [] [] apertium-rdf-es-an [Apertium, Aragonese, Spanish, bilingual, dict... NaN [{'target': 'babelnet', 'value': '86'}, {'targ... [{'status': 'FAIL (403)', 'media_type': 'RDF',... [] Apertium RDF ES-AN 71997 None
bioportal-soy NaN bioportal-soy {'email': None, 'name': 'Rex Nelson'} {'en': 'Growth, trait and development ontology... life_sciences [] [] bioportal-soy [biomedicine, bioportal, format-obo, format-rd... NaN [{'target': 'bioportal-go_x1', 'value': '11'},... [{'status': 'OK', 'media_type': 'text/html; ch... [] SoyOntology 33895 http://bioportal.bioontology.org/ontologies/3028
dbpedia-es NaN dbpedia-es {'email': 'mariano.rico@fi.upm.es', 'name': 'M... {'en': 'These data correspond to the ontology ... cross_domain [{'status': 'FAIL (406)', 'media_type': 'examp... [] dbpedia-es [crossdomain, linguistic, lod, lodcloud-diagra... http://www.opendefinition.org/licenses/cc-by-sa [{'target': 'dbpedia', 'value': '837609'}, {'t... http://es.dbpedia.org/resource/ [{'status': 'OK', 'description': 'DBpedia in S... [{'status': 'OK', 'access_url': 'http://es.dbp... DBpedia in Spanish 169101647 http://es.dbpedia.org
dutch-ships-and-sailors NaN dutch-ships-and-sailors {'email': 'v.de.boer@vu.nl', 'name': 'Victor d... {'en': 'Dutch Ships and Sailors brings togethe... publications [{'status': 'OK', 'media_type': 'text/plain; c... [] dutch-ships-and-sailors [digital history, dutch, e-history, economic-h... http://www.opendefinition.org/licenses/cc-by [{'target': 'geonames', 'value': '2510'}] [{'status': 'OK', 'media_type': 'text/html; ch... [{'status': 'FAIL (400)', 'access_url': 'http:... Dutch Ships and Sailors 30801840 None

100 rows × 20 columns

data.shape
(1369, 20)
# No data set has NaN as contact point - but see below!
data.contact_point.isnull().values.any()
False
def getContact(row):
    
    df = pd.DataFrame.from_dict(row.contact_point, orient="index")
    return df
   
def getContact2(row):
    
    d = pd.Series() 
   
   
    d['name'] = row.contact_point.get("name")
    d['email'] = row.contact_point.get("email")
  
    return d
   
    
contactDF = data.apply(getContact2,axis=1)
contactDF.head()
name email
eagle-i-jsu The eagle-i Network info@eagle-i.org
idreffr ABES apisudoc@abes.fr
bio2rdf-genage Bio2RDF bio2rdf@googlegroups.com
european-election-results Tim Hodson tim.hodson@kasabi.com
bioportal-tao Wasila Dahdul None
subset = data[['domain','triples','identifier']]
intData = pd.concat([subset,contactDF],axis=1)
intData.reset_index(inplace=True)
intData.head()
index domain triples identifier name email
0 eagle-i-jsu life_sciences 10200 eagle-i-jsu The eagle-i Network info@eagle-i.org
1 idreffr publications 20000000 idreffr ABES apisudoc@abes.fr
2 bio2rdf-genage life_sciences 73048 bio2rdf-genage Bio2RDF bio2rdf@googlegroups.com
3 european-election-results 3165 european-election-results Tim Hodson tim.hodson@kasabi.com
4 bioportal-tao life_sciences 35496 bioportal-tao Wasila Dahdul None
if re.search('@', str(intData[intData['identifier']=="eagle-i-jsu"].email), re.IGNORECASE):
    print("yes!")
yes!
noContactData = intData[intData['email']==""]
noContactData.head()
index domain triples identifier name email
6 iati-as-linked-data government 36629045 iati-as-linked-data Kasper Brandt
7 bioportal-air life_sciences 5528 bioportal-air May Cheh
15 cz-ctia-bans government 458039 cz-ctia-bans
16 universal-dependencies-treebank-chinese 123283 universal-dependencies-treebank-chinese Shen, Mo; McDonald, Ryan
30 vivo2doi publications 7073060 vivo2doi Nick Benik, Timothy Lebo, Griffin M Weber
noContactData.shape #I won't have emails of 251 data sets
(251, 6)
noDomain = intData[intData['domain']==""]
#TODO:  None, "", not NaN - so I ask for  ['domain']!=""
noDomain.shape # Many without domaindeclared
(336, 6)
#re.search('@', str(intData['email']), re.IGNORECASE)
usefulData = intData[(intData.domain.str.len() >0) & (intData.email.str.contains("@"))]
usefulData.shape #enough data sets!
(537, 6)
life_sciences = usefulData[usefulData['domain']== 'life_sciences']
publications = usefulData[usefulData['domain']== 'publications']
government = usefulData[usefulData['domain']== 'government']
geography = usefulData[usefulData['domain']== 'geography']
media = usefulData[usefulData['domain']== 'media']
linguistics = usefulData[usefulData['domain']== 'linguistics']
social_networking = usefulData[usefulData['domain']== 'social_networking']
user_generated = usefulData[usefulData['domain']== 'user_generated']

cross_domain = usefulData[usefulData['domain']== 'cross_domain']
print("Life Sciences has: " + str(len(life_sciences)) + " data sets.")
print("Publications has: " + str(len(publications))+ " data sets.")
print("Government has: " + str(len(government))+ " data sets.")
print("Geography has: " + str(len(geography))+ " data sets.")
print("Media has: " + str(len(media))+ " data sets.")
print("Linguistics has: " + str(len(linguistics))+ " data sets.")
print("Social Networking has: " + str(len(social_networking))+ " data sets.")
print("User Generated has: " + str(len(user_generated))+ " data sets.")
print("Cross-Domain has: " + str(len(cross_domain))+ " data sets.")
Life Sciences has: 83 data sets.
Publications has: 116 data sets.
Government has: 114 data sets.
Geography has: 26 data sets.
Media has: 23 data sets.
Linguistics has: 78 data sets.
Social Networking has: 5 data sets.
User Generated has: 40 data sets.
Cross-Domain has: 52 data sets.
usefulData.head()
domain triples identifier name email
0 life_sciences 10200 eagle-i-jsu The eagle-i Network info@eagle-i.org
1 publications 20000000 idreffr ABES apisudoc@abes.fr
2 life_sciences 73048 bio2rdf-genage Bio2RDF bio2rdf@googlegroups.com
5 government 11627 patents-data-gov-uk Data.gov.uk Team team@data.gov.uk
8 life_sciences 8323598 bio2rdf-clinicaltrials Bio2RDF bio2rdf@googlegroups.com
usefulData = usefulData.drop(['index'],axis=1)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-219-4d70dd55ffdb> in <module>()
----> 1 usefulData = usefulData.drop(['index'],axis=1)

/srv/paws/lib/python3.6/site-packages/pandas/core/frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
   3695                                            index=index, columns=columns,
   3696                                            level=level, inplace=inplace,
-> 3697                                            errors=errors)
   3698 
   3699     @rewrite_axis_style_signature('mapper', [('copy', True),

/srv/paws/lib/python3.6/site-packages/pandas/core/generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
   3106         for axis, labels in axes.items():
   3107             if labels is not None:
-> 3108                 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
   3109 
   3110         if inplace:

/srv/paws/lib/python3.6/site-packages/pandas/core/generic.py in _drop_axis(self, labels, axis, level, errors)
   3138                 new_axis = axis.drop(labels, level=level, errors=errors)
   3139             else:
-> 3140                 new_axis = axis.drop(labels, errors=errors)
   3141             dropped = self.reindex(**{axis_name: new_axis})
   3142             try:

/srv/paws/lib/python3.6/site-packages/pandas/core/indexes/base.py in drop(self, labels, errors)
   4386             if errors != 'ignore':
   4387                 raise KeyError(
-> 4388                     'labels %s not contained in axis' % labels[mask])
   4389             indexer = indexer[~mask]
   4390         return self.delete(indexer)

KeyError: "labels ['index'] not contained in axis"
 
 
usefulData.to_csv("contactData_LOD_sarasua.csv",index=False)
print(usefulData[usefulData['identifier']=="bioportal-tao"])
Empty DataFrame
Columns: [domain, triples, identifier, name, email]
Index: []
# create a file with: publisher
 
 
# Add column of experience (count data sets published)