Untitled

import csv
import re


def read_file(input, output):
    out = open(output, 'w')
    csv_writer = csv.writer(out)
    row = set()
    with open(input, 'r') as file:
        csv_reader = csv.reader(file, delimiter=',')
        count, index = 0, 0
        for line in csv_reader:
            if count == 0:
                for i in range(len(line)):
                    if line[i] == "LogbookIdent":
                        index = i
                        print("index", index)
                        break
                count += 1
                continue
            line[index] = re.sub("[^a-zA-Z0-9]", "", line[index])
            line[index] = line[index].replace(' ', '')
            if (line[index] in row):
                continue
            row.add(line[index])
            csv_writer.writerow(line)
            count += 1


read_file("/Users/aviralsrivastava/ShipLogbookID.csv",
          "/Users/aviralsrivastava/ShipLogbookID_out.csv")
read_file("/Users/aviralsrivastava/CLIWOC15.csv",
          "/Users/aviralsrivastava/CLIWOC15_out.csv")


'''
Having a key value traversal in here. I could have used dictionary but wanted to
grasp sets() as they are also used in a lot of DB implementation in Python3.
The cleaned data is then placed into SQL and then used for querying.

The table o/p is here: https://pastebin.com/qxFjmbq1
'''