import csv as csv

fin = open ("data_input.txt", "r") 
fout = open ("drgdata_cleaned.tsv", "w")

header = ""
result = []
isThisHeader = True

for line in fin:
    if isThisHeader == True:
        header = line
        isThisHeader = False
    else:
        record = line.split("\t")
        #Exception handling: exclude every not proper record in here
        if record[0] == "consent-no":
            print ("we do nothing")
        #Importing information, handling error if there is any and save it as a proper form
        else:
            #[0] indicates subject's grade. The category: "Undergrad", "Grad", "N/A"
            if record[1] == "ug-fr" or record[1] == "ug-jr" or record[1] == "ug-so" or record[1] == "ug-se":
                record[1] = "undergrad"
            elif record[1] == "g-m" or record[1] == "g-phd" or record[1] == "g-jd" or record[1] == "g-cert":
                record[1] = "grad"
            else:
                record[1] = "grad"
                #change to HCDE
            if record[2] == "Human centered design and engineering" or record[2] == "human centered design and engineering" or record[2] == "Human Centered Design & Engineering" or record[2] == "Human Centered Design and Engineering" or record[2] == "hcde":
                record[2] = "HCDE"
                
                #record[counter] indicates frequency of using search/textbook/classmate/mentor/other resource to locate FOR
                #and indicates frequency of consuming FOR with longform/shortform/video/audio/tutorials/images 
                #very_freq = 5, freq = 4, occa = 3 rare = 2, never = 1
            counter = 6
            for item in record:
                if record[counter] == "very_freq":
                    record[counter] = "5"
                elif record[counter] == "freq":
                    record[counter] = "4"
                elif record[counter] == "occa":
                    record[counter] = "3"
                elif record[counter] == "rare":
                    record[counter] = "2"
                elif record[counter] == "never":
                    record[counter] = "1"
                else:
                    counter += 1
        
                if counter > 16:
                
                    if record[counter] == "very_freq":
                        record[counter] = "5"
                    elif record[counter] == "freq":
                        record[counter] = "4"
                    elif record[counter] == "occa":
                        record[counter] = "3"
                    elif record[counter] == "rare":
                        record[counter] = "2"
                    elif record[counter] == "never":
                        record[counter] = "1"
                    else:
                        counter += 1
                
                
            
            result.append(record)
print("DATA IS CLEAN!")

fout.write(header)
for line_of_record in result:
    fout.write("\t".join(line_of_record))


fin.close()
fout.close()
we do nothing
we do nothing
we do nothing
we do nothing
we do nothing
we do nothing
we do nothing
we do nothing
DATA IS CLEAN!
#Which is the most common device grads and undergrads use? 
fin = open ("drgdata_cleaned.tsv", "r")

isThisHeader = True
header = []
grad_device = {
    "undergrad":{
        "lc": 0,
        "dc": 0,
        "tab": 0,
        "sp": 0,
        "itv-vgc": 0
    },
    "grad":{
        "lc": 0,
        "dc": 0,
        "tab": 0,
        "sp": 0,
        "itv-vgc": 0    
    }
}

u_count = 0
g_count = 0

for line in fin:
    list_line = []
    list_line = line.split("\t")

    if isThisHeader:
        #print (line)
        header.append(list_line[1])
        header.append(list_line[5])
        isThisHeader = False
    else:
        if list_line[1] == "undergrad":
            u_count += 1
            #print ("Do some process for undergrad")
            #Step 1. Strip the white space, split on comma
            split_list = list_line[5].strip('"').replace(' ','').split(',')
#           print(split_list)
            #Step 2. Iterating a list and increment proper part of the dictionary
            for each in split_list:
                if each == "lc":
                    grad_device["undergrad"]["lc"] += 1
                if each == "dc":
                    grad_device["undergrad"]["dc"] += 1
                if each == "tab":
                    grad_device["undergrad"]["tab"] += 1
                if each == "sp":
                    grad_device["undergrad"]["sp"] += 1
                if each == "itv-vgc":
                    grad_device["undergrad"]["itv-vgc"] += 1
        else:
            g_count += 1
            #print ("Do some process for undergrad")
            #Step 1. Strip the white space, split 
            split_list = list_line[5].strip('"').replace(' ','').split(',')
            #Step 2. Iterating a list and increment proper part of the dictionary
            for each in split_list:
                if each == "lc":
                    grad_device["grad"]["lc"] += 1
                if each == "dc":
                    grad_device["grad"]["dc"] += 1
                if each == "tab":
                    grad_device["grad"]["tab"] += 1
                if each == "sp":
                    grad_device["grad"]["sp"] += 1
                if each == "itv-vgc":
                    grad_device["grad"]["itv-vgc"] += 1
                    
#now find the max device
max_type = ""
max_count = 0

for key in grad_device["undergrad"]:
  sum = grad_device["grad"][key] + grad_device["undergrad"][key]
  if sum > max_count:
      max_count = sum
      max_type = key

print("the most device for grad and undergrads is: " + max_type + " Total " + str(max_count))
        
# #find max device for grads %
# for key in grad_device["undergrad"]:
#   sum = grad_device["grad"][key] + grad_device["undergrad"][key]
#   if sum > max_count:
#       max_count = sum
#       max_type = key

#find max device for undergrad %
max_ugrad_type = ""
max_ugrad_count = 0

for key in grad_device["undergrad"]:
  total = grad_device["undergrad"][key]
  if total > max_ugrad_count:
      max_ugrad_count = total
      max_ugrad_type = key

print("the most device for grad and undergrads is: " + max_ugrad_type + " Total " + str(max_ugrad_count))