#how many pages don't have images on commons?
def non_commons_images(recs_list, list_name):
    print("\t" + list_name)
    for i in recs_list:
        if i['lead image']['on commons'] == False:
            print("\t\t" + i['title'])
for k,v in top3_recs_info.items():
    print("article: " + k)
    for k2,v2 in v.items():
#need some more counts of which images are default
#need some more counts of which images are on commons
#print out links of non-commons images
#regularize their file name endings (or else importwizard will break)
#save the top3_recs_info for later
timestr = time.strftime("%Y%m%d-%H%M%S")
print("last saved version: " + timestr)
with open('data/sample_set_2_top3_recs_info_{}.json'.format(timestr), 'w') as fout:
    json.dump(top3_recs_info,fout,sort_keys = True)    

Still to do

  • generate article ids (201-299)
  • only use 25 articles
  • generate my conditions set
  • create a lookup table with 1 and 2 versions of each article in the sample, and randomly assign WV and CS recs to either the a or b section in each version (simplest solution that's not transparently obvious to the user)
  • templating should be a separate notebook