Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- nohaelprince@uwaterloo.ca, 01-05-2014
- nohaelprince@uwaterloo.ca, 01-05-2014
- nohaelprince@uwaterloo.ca, 01-05-2014
- nohaelprince@gmail.com, 01-05-2014
- INSERT INTO domains(domain_name, cnt, date_of_entry) VALUES (%s, %s, %s);
- #!/usr/bin/python
- import fileinput
- import csv
- import os
- import sys
- import MySQLdb
- from collections import defaultdict
- lst = defaultdict(list)
- d_lst = defaultdict(list)
- # ======================== Defined Functions ======================
- def get_file_path(filename):
- currentdirpath = os.getcwd()
- # get current working directory path
- filepath = os.path.join(currentdirpath, filename)
- return filepath
- # ===========================================================
- def read_CSV(filepath):
- domain_list = []
- domain_date_list = []
- sorted_domain_list_bydate = defaultdict(list)
- with open(filepath, 'rb') as csvfile:
- reader = csv.reader(csvfile)
- for row in reader:
- # insert the 1st & 2nd column of the CSV file into a set called input_list
- email = row[0].strip().lower()
- date = row[1].strip()
- domain_date_list.append([date, email[ email.find("@") : ]])
- domain_list.append(email[ email.find("@") : ])
- for k, v in domain_date_list:
- sorted_domain_list_bydate[k].append(v)
- # remove duplicates from domain list
- domain_list = list(set(domain_list))
- return sorted_domain_list_bydate, domain_list
- # ===========================================================
- def update_DB(lst):
- # open a database connection
- db = MySQLdb.connect(host="localhost", # your host, usually localhost
- user="root", # your username
- passwd="abcdef1234", # your password
- db="test") # name of the data base
- cur = db.cursor()
- a = []
- for k, v in lst.items():
- # now what should I do here?
- # this is what I am confuse
- db.commit()
- db.close()
- # ==========================================================
- # ======================= main program =======================================
- path = get_file_path('emails.csv')
- [lst, d_lst] = read_CSV(path) # read the input file
- update_DB(lst) # insert data into domains table
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement