Code

Description

Goals:

  • bla
  • blabla
  • bla
  • blabla
!pip install toolforge
Collecting toolforge
  Using cached https://files.pythonhosted.org/packages/c0/74/b18d4301b56e7decb1c361af8d5b49fa0c6f2da17e102f19d4539f3737b4/toolforge-4.3.0-py3-none-any.whl
Requirement already satisfied: requests in /srv/paws/lib/python3.6/site-packages (from toolforge)
Requirement already satisfied: pymysql in /srv/paws/lib/python3.6/site-packages (from toolforge)
Requirement already satisfied: idna<2.9,>=2.5 in /srv/paws/lib/python3.6/site-packages (from requests->toolforge)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /srv/paws/lib/python3.6/site-packages (from requests->toolforge)
Requirement already satisfied: certifi>=2017.4.17 in /srv/paws/lib/python3.6/site-packages (from requests->toolforge)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /srv/paws/lib/python3.6/site-packages (from requests->toolforge)
Installing collected packages: toolforge
Successfully installed toolforge-4.3.0
import pandas as pd
import pymysql
from sqlalchemy import create_engine
import requests
import toolforge
#import oursql
## Retrieve the data from the edit history database (ToolForge query)
 
 
 
## Store the data locally ---this will be execute in HER
# How many tools can I track that are relevant
# General tools
data_hay_tools = pd.read_json(path_or_buf="https://tools.wmflabs.org/hay/directory/api.php", lines=False,encoding='utf-8', orient="records")
data_hay_tools.head()
# header: added	author	deleted	description	fulltext	id	jsonurl	keywords	name	redirects	repository	title	unavailable	url
# index of columns that are lists: 1, 7
added author deleted description fulltext id jsonurl keywords name redirects repository title unavailable url
0 2016-01-02T23:36:47+00:00 [Pasleim] 0 turn string claims to monolingual claims wikidata-addlingue wikidata addlingue turn str... 239 http://tools.wmflabs.org/pltools/toolinfo.json [wikidata, oauth, monolingual, string] wikidata-addlingue 15 https://github.com/Pascalco/addlingue Wikidata Addlingue 0 http://tools.wmflabs.org/pltools/addlingue
1 2017-05-13T12:02:21+00:00 [Superyetkin] 0 View the 500 most popular articles on trwiki top_visited_pages top visited pages (tr) view ... 375 http://tools.wmflabs.org/superyetkin/toolinfo.... [tools, popular, top] top_visited_pages 34 None Top Visited Pages (tr) 0 http://tools.wmflabs.org/superyetkin/cok_ziyar...
2 2018-07-21T13:01:33+00:00 [Matěj Suchánek] 0 Various Wikidata Games abcgames wikidata games abc various wikidata g... 498 https://tools.wmflabs.org/abcgames/toolinfo.json [wikidata, game, gamification, api, php] abcgames 32 https://github.com/matejsuchanek/abcgames Wikidata Games abc 0 https://tools.wmflabs.org/wikidata-game/distri...
3 2019-02-08T21:00:45+00:00 [Good morning] 0 An anti-vandalism tool. toolforge.igl igloo for the chinese wikipedia ... 534 https://toolsadmin.wikimedia.org/tools/toolinf... [abuse, chinese, community-tech, editing, patr... toolforge.igl 5 Igloo for the Chinese Wikipedia 0 https://zh.wikipedia.org/wiki/WP:IGLOO
4 2016-06-26T22:00:17+00:00 [MusikAnimal] 0 Analysis of total pageviews across multiple pr... siteviews siteviews analysis analysis of total... 332 https://tools.wmflabs.org/pageviews/toolinfo.json [siteviews, pageviews, page views, traffic, vi... siteviews 68 https://github.com/MusikAnimal/pageviews Siteviews Analysis 0 https://tools.wmflabs.org/siteviews
print(len(data_hay_tools['name'].unique()))
598
data_hay_tools['name'].unique()
array(['wikidata-addlingue', 'top_visited_pages', 'abcgames',
       'toolforge.igl', 'siteviews', 'mm_wd_edit_stats', 'expose-data',
       'hay-streetwiki', 'my-first-flask-oauth-tool',
       'toolforge.extjsonuploader', 'toolforge.stockholm-mania', 'relgen',
       'wikidipendenza', 'prop-explorer', 'lp_FMIS', 'hay-wdskim',
       'mm_pagepile2kml', 'toolforge.ttt', 'vadalead', 'mm_wdfist',
       'toolforge.rhinosf1-afdclose', 'xtools-pagescreated',
       'toolforge.sge-status', 'wlmuk', 'ninthcircuit',
       'ptwikis-short-articles', 'mm_baglama', 'mm_user_edits', 'ramp',
       'vada', 'toolforge.lexeme-forms', 'mm_petscan',
       'filtered_contributions', 'mm_project_link_stats', 'mm_new_items',
       'toolforge.weapon-of-mass-description', 'yichengtry',
       'mormegil-catsuggest', 'mm_wikidata_stats', 'mm_reasonator',
       'toolforge.maria', 'erex-yomi', 'bub2-', 'mm_book2scroll',
       'hay-nadownload', 'amdb', 'toolforge.archivesearch',
       'hay-nasearch', 'meta-crossactivity', 'listpages',
       'openstack-browser', 'mm_isin', 'toolforge.swviewer',
       'ptwikis-wle-2014', 'toolforge.facebook-messenger-chatbot',
       'mapillary-commons', 'ptools-uploadcounter', 'coverage',
       'meta-catanalysis', 'worklist-demo', 'outreachy-useredits',
       'intersect-contribs', 'mm_quick_intersection',
       'toolforge.rangeblockfinder', 'mw-tool-octodata',
       'toolforge.osm-gadget-leaflet', 'MediaWiki-Benchmarker',
       'toolforge.wdvaliditycheck', 'userviews', 'churches',
       'hay-multisearch', 'mm_get_item_names', 'mm_wdq2graph',
       'WC3(WC-triple):Wikipedia Category Consistency Checker',
       'toolforge.ooui-debug', 'mm_linked_items', 'wscontest',
       'WPCleaner', 'toolforge.spamforbreakfast', 'mm_isbn2wiki',
       'toolforge.dibyadutta', 'toolforge.apt-browser',
       'mormegil-randomlist', 'mm_relator', 'wmf-sitematrix',
       'toolforge.jitrixis-test', 'irc_logs_browser',
       'contentcontributor', 'mm_autodesc', 'toolforge.fscbot',
       'commons-warper-viewer', 'Jubilarac',
       'outreachy-recent-edits-tool-', 'sal', 'ws-google-ocr',
       'hay-exturl', 'wikiloves', 'PageTools', 'toolforge.simplewd',
       'livestats', 'toolforge.userrank', 'toolforge.rfc-analyzer',
       'BsAut', 'NOA rand-upload tool',
       'toolforge.wikibase-databridge-storybook', 'Gerrit Reviewer Bot',
       'citation_hunter', 'mm_wikishootme', 'lp_WLM', 'massviews',
       'citationhunt', 'hartman', 'excel2wiki', 'ws-search',
       'textcatdemo', 'mm_toolscript', 'seth-searchsbl', 'yadfa',
       'stashbot', 'mm_fist', 'xtools-editcounter', 'random-featured',
       'rm-stats', 'MainAuthors', 'alphatest', 'analytics-quarry-web',
       'WDProp', 'mm_makeref', 'afdstats', 'meta-gusersearch',
       'bibleversefinder', 'meta-accounteligibility',
       'toolforge.volby2wiki', 'hay-reccat', 'oauth-hello-world',
       'mm_get_article_intro', 'outreachy-wikicv', 'disambig-monitor',
       'section-links', 'cats-php', 'bracketbotfind', 'wordpile-',
       'mm_nitol', 'mm_tempo_spatial_display', 'pronuncify', 'mm_geohack',
       'ipinfo', 'hay-langviews', 'mm_autolist', 'mw-tool-stylize',
       'wikilaeum', 'mormegil-catcompleter',
       'toolforge.charttableconverter', 'xtools-xagent',
       'toolforge.phab-ban', 'wm-metrics', 'mm_tabernacle', 'grlc',
       'versions', 'ordia', 'mm_get_distinct_authors',
       'WikiProjectRecentChanges', 'toolforge.blubberd', 'hay-wdview',
       'ptwikis-timeline', 'mm_herding_sheep', 'mm_deep_insight',
       'mw-tool-usage', 'mm_wikisource_dnb', 'ptools-filesincat',
       'lrtools_pages', 'mm_related_places', 'mm_wikidata_locations',
       'CleanupWorklistBot', 'hay-picapermalink', 'commonsarchive',
       'dump-torrents', 'wikiviewstats', 'toolforge.dockerregistry',
       'ptools-intertwined', 'toolforge.sge-jobs', 'mm_edit_counter',
       'lp_BBox', 'meetbot', 'TemplateParam', 'plaintexteditcounter',
       'hay-directory', 'toolforge.lexeme-senses', 'mm_usual_suspects',
       'wikistats', 'ascal', 'dataviz', 'pub',
       'mm_property_string_matcher', 'toolforge.VideoCutTool',
       'mm_resolver', 'mm_file_reuse', 'toolforge.event-live-session',
       'ascalendar', 'iabot', 'mm_random_out_of_sight', 'ptwikis-lists',
       'lp_credit_my_cc', 'commons-mass-description', 'toolforge.portal',
       'toolforge.olympics', 'programs-and-events-dashboard',
       'heritage-daily-uploads', 'intuition', 'mwpackages-',
       'toolforge.wikintu', 'mm_pagepile', 'toolforge.wikiportretdev',
       'mm_tooldir', 'wikidata-osm-link', 'taxon-info',
       'toolforge.germancon-mobile', 'hay-sum', 'cite-o-meter',
       'mm_catscan2', 'toolforge.massmailer', 'mm_rcvis',
       'wikidata-primary-sources', 'upload-stats-bot', 'paws',
       'interaction-timeline', 'toolforge.clpo13-flask',
       'rhinosf1-afdclose-', 'mm_file_siblings', 'mw-tool-snapshots',
       'CategoryWatchlistBot', 'wikipedia-externallinks-fast-extraction',
       'wikinity', 'mm_crosscats', 'mm_multidesc',
       'toolforge.vip-space-media', 'cil2', 'wikiprovenance-',
       'svgtranslate', 'toolforge.lincolnbot', 'mm_creator_from_wikidata',
       'toolforge.quickpreset-migrate', 'toolforge.toolviews',
       'articles-by-lat-lon-without-images', 'hay-gtaa', 'wmcounter',
       'integraality', 'mm_most_wanted', 'mw-tool-convert',
       'nonautomated-edit', 'mm_wikidata_useful', 'cvrminer',
       'OAuth Hello World', 'mm_quick_statements',
       'wikidata-descriptioner', 'zppixbot', 'mm_icommons', 'wlm-italy',
       'mm_commonshelper', 'meta-stewardry', 'sqid',
       'toolforge.global-search', 'lp_BBR', 'kyrksok-se',
       'mm_artwork_images', 'mm_no_information', 'ytcleaner',
       'locator-tool', 'dna', 'wikidata-taxonomy',
       'harvesting-data-refinery', 'policy-edits', 'wdprop-',
       'mapillary_widget', 'toolforge.rmstats', 'whois-gateway',
       'mm_important_blank_items', 'dewkin',
       'toolforge.messenger-chatbot', 'mm_random_page_in_cat',
       'translate', 'amineeded', 'hay-chantek', 'scholia', 'ptable',
       'mm_baglama2', 'wikipediarequests', 'ptwikis-visual-editor',
       'xtools-echo', 'mm_catnap', 'ptools-uploadersincat',
       'ptools-useractions', 'xtools-articleinfo', 'code-lookup',
       'wikidata-recent-deaths', 'wikicontrib-', 'mm_unused_files',
       'ptwikis-abusefilter-graphs', 'mm_misstake', 'mm_awarder',
       'recently-active', 'mm_flickrfree', 'xfd-stats',
       'toolforge.wdprop', 'stimmberechtigung', 'mediaviews', 'tsreports',
       'labs-tools-lists', 'languagetool-wikicheck',
       'mm_wd_species_commons', 'WikiProvenance', 'parliamentdiagram',
       'mm_wikidata_game', 'mm_duplicity', 'mm_persondata', 'persondata',
       'icalendar', 'mm_geograph2commons', 'tools-info-optimizer',
       'toolforge.cdnjs-beta', 'toolforge.outreachy-userrank',
       'toolforge.editor-retention-dashboard', 'mm_dnb_link_finder',
       'hay-wdtranslate', 'mm_dnb_ratios', 'video2commons',
       'xtools-pagehistory', 'mm_sparql_rc',
       'maps-userscript-boilerplate', 'dawiki-tools', 'topveiws',
       'ws-cat-browser', 'blankpages', 'wlm-maps', 'mm_missing_topics',
       'gridengine-status', 'toolforge.maplayers-demo', 'ircredirect',
       'toolforge.mapycz', 'jouncebot', 'primerpedia',
       'mm_label_no_instance', 'warped-to-iframe', 'zhnotofu',
       'WC3(WC-triple):Wikipedia Category Comprehensiveness Checker',
       'iw-contributors', 'mm_joanjoc', 'cluebot', 'mw-tool-expose-data',
       'wikidata-timeline', 'toolforge.bldrwnsch',
       'mm_pages_in_categories', 'mm_treeviews', 'depicts',
       'heritage-api', 'mw-tool-validator', 'cdnjs', 'commons-delinquent',
       'mm_random_items_no_instance', 'toolforge.fvcbot', 'ruprecht',
       'gmt', 'mm_bong', 'mm_wiki_todo', 'hay-vizquery', 'glamify',
       'iw-bot-statistics', 'langviews', 'meta', 'mm_mixnmatch',
       'mm_tab2wiki', 'hay-propbrowse', 'templatetransclusioncheck',
       'directory-directory-ng', 'WikiLinkBot', 'assamesetool',
       'toolforge.fpcstats', 'toolforge.kmlexport-cswiki',
       'mm_reverse_category_tree', 'toolforge.bing-maps',
       'toolforge.enhourly', 'toolforge.otrs-helper',
       'mm_file_siblings_interface', 'toolforge.borracho',
       'toolforge.tts-comparison', 'ajapaik2commons', 'remarkup2wikitext',
       'newusers', 'translatemplate', 'bbr-commons', 'ia-upload',
       'toolforge.iepcbm-bot', 'wikipagestats',
       'human-readable-kulturarvsdata', 'ReplicationLag',
       'toolforge.interact-oa', 'bbctvcite', 'toolforge.ru_monuments',
       'Jembot', 'mm_glamorous', 'lonelylinks', 'ldap', 'mm_wd_reconcile',
       'mm_wikidata_todo', 'toolforge.svgworkaroundbot',
       'seth-url-converter', 'twl17', 'wikiradio', 'toolforge.whodunnit',
       'toolforge.commtech-commons', 'checkwiki', 'admin',
       'toolforge.csp-report', 'meta-synchbot', 'toolforge.ascal',
       'mediawiki-feeds', 'mm_templator', 'enwikistalk', 'wikimap',
       'wikiComment', 'mm_translate_items', 'admin-beta',
       'itwikinews-rss', 'labs-tools-maintgraph', 'mm_item_names',
       'pileviews', 'wikidata-recent-changes', 'toolforge.texbot',
       'wembedder', 'toolforge.ipwatcher', 'mm_multibeacon',
       'mm_metamine', 'pending-subs', 'wikiatlas2014', 'toolforge.srish',
       'pywikibot', 'superyetkin', 'WikidataClassBrowser',
       'toolforge.mathqa', 'mm_prep_bio', 'wikidata-harvesttemplates',
       'mm_update_article_counter', 'mm_tooltranslate',
       'toolforge.basyounybot', 'mm_out_of_sight',
       'toolforge.parliament-diagram-generator',
       'mm_find_duplicate_items', 'grid-jobs', 'ptwikis-ip-patrol',
       'blp-edits', 'checker', 'mm_widar', 'iw-uselessprotections',
       'mm_wd_commons', 'mm_glamorous2', 'mw-tool-blankpages', 'jury',
       'asurabot', 'ptwikis-common-interwikis', 'contributor_locations',
       'wikiwhatsappbot', 'mm_trans_parent', 'Gerrit Patch Uploader',
       'mm_catfood', 'cluebot3', 'ptools-findfiles', 'pageviews',
       'mm_commons_api', 'hay-sgdtransform', 'stewardbots', 'grafa',
       'ipcheck', 'videoconvert', 'mm_concept_cloud', 'bash', 'lst-guard',
       'mm_no_statements', 'meta-magicredirect', 'mm_subclass_dates',
       'mm_map2wp', 'mm_autolist2', 'toolforge.omarghridabot',
       'mw-tool-oojs-ui-distro', 'toolforge.hub', 'douglasbot',
       'toolforge.addscholartopics', 'WikidataNavelGazer', 'copyvios',
       'lp_KMB', 'mm_item_creator', 'mormegil-catwithoutcoords',
       'toolforge.wpcleaner', 'mm_recent_deaths', 'meta-userpages',
       'toolforge.awmd-stats', 'mm_wd_missing_images', 'mm_listeria',
       'wdq2sparql', 'ptwikis-mobile', 'hay-gwtcook', 'wakt',
       'pronuncify.net', 'ksamsok-rest', 'ppp-sparql', 'universalviewer',
       'CSV2Wikitable', 'mm_wikisoba', 'mm_url2commons',
       'mm_authority_control', 'poiimport', 'croptool',
       'svgtranslate-test-', 'wd-rank', 'mm_wiki2playlist',
       'InceptionBot', 'crosswatch', 'shortnames', 'mm_redirector',
       'mapillary_simpleUpload', 'mormegil_randomlist',
       'meta-globalgroups', 'wiktioutils_planches',
       'mm_related_properties', 'crossactivity', 'replag',
       'mormegil-natbirths', 'RedirFinder', 'mm_file_dupes',
       'mm_terminator', 'mm_data_url', 'mapillary_WLM',
       'iw-category-redirects', 'mm_wdq', 'mm_missing_props', 'cewbot',
       'toolforge.spacemedia', 'toolforge.reviewer', 'raun',
       'contribsize', 'mobo', 'mm_deep_out_of_sight',
       'mm_commons_image_feed', 'wsexport', 'wikidata-externalid-url',
       'toolforge.gerrit-avatar-uploader', 'commons-maintenance-bot',
       'mw-tool-orphantalk', 'redirectviews', 'NOA upload tool',
       'wikibugs', 'Authority control validator', 'wikilist',
       'interactoa', 'meta-stalktoy', 'mw2sparql', 'mirador',
       'wikilaeum-tool', 'mm_add_information', 'wlm-stats',
       'ptwikis-matrix', 'vandalstat', 'mm_property_trees', 'giftbot',
       'toolforge.xtools-autoedits', 'toolforge.wm-commons-emoji-bot',
       'category_maintenance', 'toolforge.flickrdash',
       'toolforge.google-drive-photos-to-commons', 'toolforge.freebase',
       'mm_wdvenn', 'pbbot', 'toolforge.wikicite-dashboard', 'xtools-ec',
       'xtools-adminstats', 'wikitrends', 'wlm-wikidata',
       'category-edits', 'wikitools', 'mm_wp_no_image',
       'mm_missing_wp_animal_audio', 'ci-last-run', 'mm_flickr2commons',
       'toolforge.phabulous', 'toolforge.corhist', 'toolforge.scrotbot',
       'mm_beacon', 'toolforge.tool-db-usage', 'yadkard', 'sparqlblocks',
       'toolforge.timerelengteam', 'nonautomated-namespace-edits',
       'extreg-wos', 'mm_html2wiki', 'mm_wdq_image_feed',
       'schema-app-wikidata-lookup', 'facebook-messenger-chatbot',
       'stylize', 'mm_cas'], dtype=object)
data_hay_tools['unavailable'].value_counts() # not true I found several not being available
0    598
Name: unavailable, dtype: int64

list_of_tools = data_hay_tools[['id','name','description','fulltext']] tools = list_of_tools.sort_values(by="id") tools.to_csv("tools.csv", index=False)

# query to toolforge parametrized with tool name
#to deploy at toolforge - else at HER or here I will need to connect to ssh and ask from command line?
#f = open('/replica.my.cnf', "r")
#lines = f.readlines()
#f.close()
username="u20891"
password= "tGxn5kQpbFbRTZzk"
#jdbc:mariadb:
hostport= "wikidatawiki.analytics.db.svc.eqiad.wmflabs:3306"
db_name="wikidatawiki_p"
engine= "mysql+pymysql://"+username+":"+password+"@"+hostport+"/"+db_name
#mysql+pymysql://
engine = create_engine(engine)
df= pd.read_sql_query('''select * from revision inner join comment on rev_comment_id=comment_id where comment_text LIKE %s limit 2''', engine,params=("%#petscan%",))

df
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-17-f4d9b37ab7bb> in <module>
----> 1 df= pd.read_sql_query('''select * from revision inner join comment on rev_comment_id=comment_id where comment_text LIKE %s limit 2''', engine,params=("%#petscan%",))
      2 
      3 df

/srv/paws/lib/python3.6/site-packages/pandas/io/sql.py in read_sql_query(sql, con, index_col, coerce_float, params, parse_dates, chunksize)
    312     return pandas_sql.read_query(
    313         sql, index_col=index_col, params=params, coerce_float=coerce_float,
--> 314         parse_dates=parse_dates, chunksize=chunksize)
    315 
    316 

/srv/paws/lib/python3.6/site-packages/pandas/io/sql.py in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
   1097         args = _convert_params(sql, params)
   1098 
-> 1099         result = self.execute(*args)
   1100         columns = result.keys()
   1101 

/srv/paws/lib/python3.6/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
    988     def execute(self, *args, **kwargs):
    989         """Simple passthrough to SQLAlchemy connectable"""
--> 990         return self.connectable.execute(*args, **kwargs)
    991 
    992     def read_table(self, table_name, index_col=None, coerce_float=True,

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, statement, *multiparams, **params)
   2164 
   2165         connection = self._contextual_connect(close_with_result=True)
-> 2166         return connection.execute(statement, *multiparams, **params)
   2167 
   2168     def scalar(self, statement, *multiparams, **params):

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, object_, *multiparams, **params)
    980         """
    981         if isinstance(object_, util.string_types[0]):
--> 982             return self._execute_text(object_, multiparams, params)
    983         try:
    984             meth = object_._execute_on_connection

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_text(self, statement, multiparams, params)
   1153             parameters,
   1154             statement,
-> 1155             parameters,
   1156         )
   1157         if self._has_events or self.engine._has_events:

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1246         except BaseException as e:
   1247             self._handle_dbapi_exception(
-> 1248                 e, statement, parameters, cursor, context
   1249             )
   1250 

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   1466                 util.raise_from_cause(sqlalchemy_exception, exc_info)
   1467             else:
-> 1468                 util.reraise(*exc_info)
   1469 
   1470         finally:

/srv/paws/lib/python3.6/site-packages/sqlalchemy/util/compat.py in reraise(tp, value, tb, cause)
    127         if value.__traceback__ is not tb:
    128             raise value.with_traceback(tb)
--> 129         raise value
    130 
    131     def u(s):

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1242                 if not evt_handled:
   1243                     self.dialect.do_execute(
-> 1244                         cursor, statement, parameters, context
   1245                     )
   1246         except BaseException as e:

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
    550 
    551     def do_execute(self, cursor, statement, parameters, context=None):
--> 552         cursor.execute(statement, parameters)
    553 
    554     def do_execute_no_params(self, cursor, statement, context=None):

/srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in execute(self, query, args)
    168         query = self.mogrify(query, args)
    169 
--> 170         result = self._query(query)
    171         self._executed = query
    172         return result

/srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in _query(self, q)
    326         self._last_executed = q
    327         self._clear_result()
--> 328         conn.query(q)
    329         self._do_get_result()
    330         return self.rowcount

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in query(self, sql, unbuffered)
    515                 sql = sql.encode(self.encoding, 'surrogateescape')
    516         self._execute_command(COMMAND.COM_QUERY, sql)
--> 517         self._affected_rows = self._read_query_result(unbuffered=unbuffered)
    518         return self._affected_rows
    519 

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered)
    730         else:
    731             result = MySQLResult(self)
--> 732             result.read()
    733         self._result = result
    734         if result.server_status is not None:

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in read(self)
   1073     def read(self):
   1074         try:
-> 1075             first_packet = self.connection._read_packet()
   1076 
   1077             if first_packet.is_ok_packet():

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_packet(self, packet_type)
    655         buff = b''
    656         while True:
--> 657             packet_header = self._read_bytes(4)
    658             #if DEBUG: dump_packet(packet_header)
    659 

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_bytes(self, num_bytes)
    689         while True:
    690             try:
--> 691                 data = self._rfile.read(num_bytes)
    692                 break
    693             except (IOError, OSError) as e:

/usr/lib/python3.6/socket.py in readinto(self, b)
    584         while True:
    585             try:
--> 586                 return self._sock.recv_into(b)
    587             except timeout:
    588                 self._timeout_occurred = True

KeyboardInterrupt: 

df= pd.read_sql_query('''select count() from comment where comment_id = 143227971''', engine) df['count()'][0]

pd.options.display.max_colwidth = 100

df[df["rev_id"]==316012497]['comment_text']

df= pd.read_sql_query('''select count(*) from revision inner join comment on rev_comment_id=comment_id where comment_text LIKE %s''', engine,params=("%#petscan%",)) df

the select limit 2 gives the result but the count loses the connection

launch query with exception handling if connection bad retry

I can also get from "change-tag-def" Oauth for tool but in that case i dont know which on3 it is for the count so better dont do this

#easier for now: for each tool in the list, execute a count select and print it in a file - over night?
#dictionary with the counts of each tool - key
toolsCount = dict()
def count_tool_appearances(row):
    nameTool = row['name']
    #i cannot query every time - it will take so long!
    # i can do a count for each and let it run
    
    username=""
    password= ""
    #jdbc:mariadb:
    hostport= "wikidatawiki.analytics.db.svc.eqiad.wmflabs:3306"
    db_name="wikidatawiki_p"
    
    count = 0
    
    try:
        engine= "mysql+pymysql://"+username+":"+password+"@"+hostport+"/"+db_name
        engine = create_engine(engine)
        
        df= pd.read_sql_query('''select count(*) from revision inner join comment on rev_comment_id=comment_id where comment_text LIKE %s where ''', engine,params=("%#"+nameTool+"%",))
        count = df['count(*)'][0]

    except Exception as e:
        if e.message() == 2013:
            #connection error
            count = -1
            print(e + " \\")
    

    toolsCount[toolname] = count
data_hay_tools.apply(count_tool_appearances,axis=1)
---------------------------------------------------------------------------
ProgrammingError                          Traceback (most recent call last)
/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1243                     self.dialect.do_execute(
-> 1244                         cursor, statement, parameters, context
   1245                     )

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
    551     def do_execute(self, cursor, statement, parameters, context=None):
--> 552         cursor.execute(statement, parameters)
    553 

/srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in execute(self, query, args)
    169 
--> 170         result = self._query(query)
    171         self._executed = query

/srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in _query(self, q)
    327         self._clear_result()
--> 328         conn.query(q)
    329         self._do_get_result()

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in query(self, sql, unbuffered)
    516         self._execute_command(COMMAND.COM_QUERY, sql)
--> 517         self._affected_rows = self._read_query_result(unbuffered=unbuffered)
    518         return self._affected_rows

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered)
    731             result = MySQLResult(self)
--> 732             result.read()
    733         self._result = result

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in read(self)
   1074         try:
-> 1075             first_packet = self.connection._read_packet()
   1076 

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_packet(self, packet_type)
    683         packet = packet_type(buff, self.encoding)
--> 684         packet.check_error()
    685         return packet

/srv/paws/lib/python3.6/site-packages/pymysql/protocol.py in check_error(self)
    219             if DEBUG: print("errno =", errno)
--> 220             err.raise_mysql_exception(self._data)
    221 

/srv/paws/lib/python3.6/site-packages/pymysql/err.py in raise_mysql_exception(data)
    108     errorclass = error_map.get(errno, InternalError)
--> 109     raise errorclass(errno, errval)

ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'where' at line 1")

The above exception was the direct cause of the following exception:

ProgrammingError                          Traceback (most recent call last)
<ipython-input-13-c25fde7c041d> in count_tool_appearances(row)
     19 
---> 20         df= pd.read_sql_query('''select count(*) from revision inner join comment on rev_comment_id=comment_id where comment_text LIKE %s where ''', engine,params=("%#"+nameTool+"%",))
     21         count = df['count(*)'][0]

/srv/paws/lib/python3.6/site-packages/pandas/io/sql.py in read_sql_query(sql, con, index_col, coerce_float, params, parse_dates, chunksize)
    313         sql, index_col=index_col, params=params, coerce_float=coerce_float,
--> 314         parse_dates=parse_dates, chunksize=chunksize)
    315 

/srv/paws/lib/python3.6/site-packages/pandas/io/sql.py in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
   1098 
-> 1099         result = self.execute(*args)
   1100         columns = result.keys()

/srv/paws/lib/python3.6/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
    989         """Simple passthrough to SQLAlchemy connectable"""
--> 990         return self.connectable.execute(*args, **kwargs)
    991 

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, statement, *multiparams, **params)
   2165         connection = self._contextual_connect(close_with_result=True)
-> 2166         return connection.execute(statement, *multiparams, **params)
   2167 

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, object_, *multiparams, **params)
    981         if isinstance(object_, util.string_types[0]):
--> 982             return self._execute_text(object_, multiparams, params)
    983         try:

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_text(self, statement, multiparams, params)
   1154             statement,
-> 1155             parameters,
   1156         )

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1247             self._handle_dbapi_exception(
-> 1248                 e, statement, parameters, cursor, context
   1249             )

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   1465             elif should_wrap:
-> 1466                 util.raise_from_cause(sqlalchemy_exception, exc_info)
   1467             else:

/srv/paws/lib/python3.6/site-packages/sqlalchemy/util/compat.py in raise_from_cause(exception, exc_info)
    382     cause = exc_value if exc_value is not exception else None
--> 383     reraise(type(exception), exception, tb=exc_tb, cause=cause)
    384 

/srv/paws/lib/python3.6/site-packages/sqlalchemy/util/compat.py in reraise(tp, value, tb, cause)
    127         if value.__traceback__ is not tb:
--> 128             raise value.with_traceback(tb)
    129         raise value

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1243                     self.dialect.do_execute(
-> 1244                         cursor, statement, parameters, context
   1245                     )

/srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
    551     def do_execute(self, cursor, statement, parameters, context=None):
--> 552         cursor.execute(statement, parameters)
    553 

/srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in execute(self, query, args)
    169 
--> 170         result = self._query(query)
    171         self._executed = query

/srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in _query(self, q)
    327         self._clear_result()
--> 328         conn.query(q)
    329         self._do_get_result()

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in query(self, sql, unbuffered)
    516         self._execute_command(COMMAND.COM_QUERY, sql)
--> 517         self._affected_rows = self._read_query_result(unbuffered=unbuffered)
    518         return self._affected_rows

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered)
    731             result = MySQLResult(self)
--> 732             result.read()
    733         self._result = result

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in read(self)
   1074         try:
-> 1075             first_packet = self.connection._read_packet()
   1076 

/srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_packet(self, packet_type)
    683         packet = packet_type(buff, self.encoding)
--> 684         packet.check_error()
    685         return packet

/srv/paws/lib/python3.6/site-packages/pymysql/protocol.py in check_error(self)
    219             if DEBUG: print("errno =", errno)
--> 220             err.raise_mysql_exception(self._data)
    221 

/srv/paws/lib/python3.6/site-packages/pymysql/err.py in raise_mysql_exception(data)
    108     errorclass = error_map.get(errno, InternalError)
--> 109     raise errorclass(errno, errval)

ProgrammingError: (pymysql.err.ProgrammingError) (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'where' at line 1")
[SQL: select count(*) from revision inner join comment on rev_comment_id=comment_id where comment_text LIKE %s where ]
[parameters: ('%#wikidata-addlingue%',)]
(Background on this error at: http://sqlalche.me/e/f405)

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
<ipython-input-14-17dbc178973b> in <module>
----> 1 data_hay_tools.apply(count_tool_appearances,axis=1)

/srv/paws/lib/python3.6/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
   6485                          args=args,
   6486                          kwds=kwds)
-> 6487         return op.get_result()
   6488 
   6489     def applymap(self, func):

/srv/paws/lib/python3.6/site-packages/pandas/core/apply.py in get_result(self)
    149             return self.apply_raw()
    150 
--> 151         return self.apply_standard()
    152 
    153     def apply_empty_result(self):

/srv/paws/lib/python3.6/site-packages/pandas/core/apply.py in apply_standard(self)
    255 
    256         # compute the result using the series generator
--> 257         self.apply_series_generator()
    258 
    259         # wrap results

/srv/paws/lib/python3.6/site-packages/pandas/core/apply.py in apply_series_generator(self)
    284             try:
    285                 for i, v in enumerate(series_gen):
--> 286                     results[i] = self.f(v)
    287                     keys.append(v.name)
    288             except Exception as e:

<ipython-input-13-c25fde7c041d> in count_tool_appearances(row)
     22 
     23     except Exception as e:
---> 24         if e.message() == 2013:
     25             #connection error
     26             count = -1

AttributeError: ("'ProgrammingError' object has no attribute 'message'", 'occurred at index 0')
data = pd.DataFrame.from_dict(toolsCount)
data.to_csv("edits_per_tool.csv")
#check for each tool name? if they appear in the comments - but careful some are like sentences in name
#close result!
 
 
 
#mix here with the quality tools that we have
#OR just filter 
 
 
 
## Anayse the data