Advertisement
Guest User

Untitled

a guest
Dec 4th, 2016
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.02 KB | None | 0 0
  1. # Bradley Singer 997990414, Dimitar Vasilev 999307063
  2.  
  3. import os
  4. import sys # command line arguement (specify excel file to read at command line)
  5. import psycopg2 # Operate on sql database
  6. import csv # needed to read csv files
  7. import time
  8.  
  9. CUR_FILE = ''
  10. MID = 0
  11. user = os.environ['USER']
  12. conn = psycopg2.connect("dbname=fakeu user=%s" % user)
  13. cursor = conn.cursor()
  14.  
  15. dbDict = {'Course': ('CID', 'TERM', 'SUBJ', 'CRSE', 'SEC', 'UNITSMIN', 'UNITSMAX'),
  16. 'Student': ('SID','SURNAME', 'PREFNAME', 'EMAIL'),
  17. 'Enroll': ('CID', 'TERM', 'SEAT', 'SID', 'LEVEL', 'UNITS', 'CLASS', 'MAJOR', 'GRADE', 'GP', 'STATUS'),
  18. 'Has': ('MID', 'CID', 'TERM', 'INSTRUCTORS', 'TYPE', 'DAYS', 'STARTTIME', 'ENDTIME', 'BUILD', 'ROOM') }
  19.  
  20. fileDict = {'Course': ("CID", "TERM", "SUBJ", "CRSE", "SEC", "UNITS"),
  21. 'Meeting': ("INSTRUCTOR(S)", "TYPE", "DAYS", "TIME", "BUILD", "ROOM"),
  22. 'Student': ("SEAT","SID","SURNAME", "PREFNAME", "LEVEL", "UNITS", "CLASS", "MAJOR", "GRADE", "STATUS", "EMAIL") }
  23.  
  24. key_loc = {'Course': [0, 1],
  25. 'Student': [0],
  26. 'Enroll':[0, 1, 3],
  27. 'Has': [0, 1, 2]}
  28.  
  29. letter_to_gp = { 'A+': 4.0,
  30. 'A': 4.0,
  31. 'A-': 3.7,
  32. 'B+': 3.3,
  33. 'B': 3.0,
  34. 'B-': 2.7,
  35. 'C+': 2.3,
  36. 'C': 2.0,
  37. 'C-': 1.7,
  38. 'D+': 1.3,
  39. 'D': 1.0,
  40. 'D-': 0.7,
  41. 'F+': 0.3,
  42. 'F': 0.0,
  43. 'F-': 0.0
  44. }
  45.  
  46. course_table_tuples = []
  47. meeting_table_tuples = []
  48. student_table_tuples = []
  49.  
  50. alldata = dict()
  51. summerCourses = dict()
  52.  
  53. coursekeys = dict()
  54. studentkeys = dict()
  55. enrollkeys = dict()
  56. haskeys = dict()
  57.  
  58. keys = {'Course': coursekeys, 'Student':studentkeys, 'Enroll':enrollkeys, 'Has':haskeys}
  59.  
  60. ##### create database tables #####
  61. def initialize():
  62. cursor.execute('''CREATE TABLE IF NOT EXISTS Course (
  63. CID VARCHAR(50),
  64. TERM VARCHAR(50),
  65. SUBJ VARCHAR(50),
  66. CRSE INTEGER,
  67. SEC SMALLINT,
  68. UNITSMIN INTEGER,
  69. UNITSMAX INTEGER,
  70. PRIMARY KEY (CID, TERM)
  71. );
  72.  
  73. CREATE TABLE IF NOT EXISTS Student (
  74. SID INTEGER PRIMARY KEY,
  75. SURNAME VARCHAR(50),
  76. PREFNAME VARCHAR(50),
  77. EMAIL VARCHAR(50)
  78. );
  79.  
  80. CREATE TABLE IF NOT EXISTS Enroll (
  81. CID VARCHAR(50),
  82. TERM VARCHAR(50),
  83. SEAT SMALLINT,
  84. SID INTEGER,
  85. LEVEL VARCHAR(50),
  86. UNITS VARCHAR(50),
  87. CLASS VARCHAR(50),
  88. MAJOR VARCHAR(50),
  89. GRADE VARCHAR(10),
  90. GP NUMERIC(2,1),
  91. STATUS VARCHAR(50),
  92. PRIMARY KEY (CID, TERM, SID)
  93. );
  94.  
  95. CREATE TABLE IF NOT EXISTS Has (
  96. MID INTEGER,
  97. CID VARCHAR(50),
  98. TERM VARCHAR(50),
  99. INSTRUCTORS VARCHAR(30),
  100. TYPE VARCHAR(50),
  101. DAYS VARCHAR(50),
  102. STARTTIME VARCHAR(50),
  103. ENDTIME VARCHAR(50),
  104. BUILD VARCHAR(50),
  105. ROOM VARCHAR(5),
  106. PRIMARY KEY (MID, CID, TERM)
  107. );
  108.  
  109. ''')
  110. conn.commit()
  111.  
  112.  
  113. ##### add tuples to database #####
  114. def addValue(table, tup):
  115.  
  116. attributes = dbDict[table]
  117. query = '''INSERT INTO %s%s VALUES %s''' % (table, str(attributes).replace("'", ''), str(tup))
  118.  
  119. # strips the string of attribute single quotes but leaves values single quotes
  120. query = query.replace('"', '')
  121.  
  122. print(query)
  123. cursor.execute(query)
  124. conn.commit()
  125.  
  126.  
  127. ##### check if a tuple has not null keys #####
  128. def checkKeys(table, tup):
  129. check = False
  130. for i in key_loc[table]:
  131. if tup[i] != '':
  132. check = True
  133. return check
  134.  
  135. ##### check that a tuple doesn't have a duplicate key and its key attributes are not null #####
  136. def checkKeys(table, tup):
  137. check = 0
  138. global CUR_FILE
  139. tupkeys = ()
  140. quarter = CUR_FILE.split('_')[1]
  141.  
  142. for i in key_loc[table]:
  143. tupkeys += (tup[i],) # get the key attr from our tuple
  144. if tup[i] == '': # if one of the key attr is null, return False
  145. return check
  146. if tupkeys in keys[table]: # make sure we haven't added duplicate keys
  147. if table == 'Course':
  148. return 1
  149. if '3' in quarter and table == 'Course':
  150. newkey = ()
  151. for i in tupkeys:
  152. newkey += ((i + 'a'),)
  153. keys[table][newkey] = True
  154. check = 2
  155. else:
  156. check = 0
  157. else:
  158. keys[table][tupkeys] = True
  159. check = 1
  160.  
  161. return check
  162.  
  163.  
  164. ##### returns True if tuple not in dictionary #####
  165. def checkUnique(tup):
  166. if tup in alldata:
  167. return False
  168. else:
  169. alldata[tup] = 'true'
  170. return True
  171.  
  172.  
  173. ##### create tuples out of global arrays and prep for insertion into database #####
  174. def create_tuples():
  175.  
  176. staredpair = ()
  177. conflictingCourse = False
  178. # if the tables are not empty
  179. if meeting_table_tuples and student_table_tuples and course_table_tuples:
  180.  
  181. course = course_table_tuples[0] # Course table should only have 1 value
  182. check = checkKeys('Course', course)
  183.  
  184. # check if course tuple is unique in database and its keys are not null and its not a conflicting course
  185. if checkUnique(str(course)) and check == 1:
  186. addValue('Course', course)
  187.  
  188. elif check == 2: # if it is a conflicting course, create new key pair
  189. print('conflicting course')
  190. conflictingCourse = True
  191. diffcourse = ()
  192. staredpair = ((course[0] + 'a'),(course[1] + 'a')) # cid, term with 'a' appended to each
  193. diffcourse += staredpair
  194. for x in range(2, len(course)):
  195. diffcourse += (course[x],) # subj, crse, sec, units
  196. addValue('Course', diffcourse)
  197.  
  198. for meeting in meeting_table_tuples:
  199.  
  200. has_tup = ()
  201. has_tup += (meeting[0],) # MID
  202. if conflictingCourse:
  203. has_tup += staredpair # cid*, term* from course if conflicting course
  204. else:
  205. has_tup += (course[0], course[1])
  206. for x in range(1,len(meeting)): # everything else from meeting that we missed
  207. has_tup += (meeting[x],) # instructor, type, days, starttime, endtime, build, room
  208. # check if has tuple is unique in database
  209. if checkUnique(str(has_tup)) and checkKeys('Has', has_tup):
  210. addValue('Has', has_tup)
  211.  
  212.  
  213. for student in student_table_tuples:
  214.  
  215. # Last tuple in student array will be empty because that is the line at which we call create_tuple()
  216. if student != ('',):
  217. student_tup = ()
  218. student_tup += (student[1], student[2],student[3],student[10]) # sid, surname, prefname, email
  219.  
  220. # check if student tuple is unique in database
  221. if checkUnique(str(student_tup)) and checkKeys('Student', student_tup):
  222. addValue('Student', student_tup)
  223.  
  224. enroll_tup = ()
  225. if conflictingCourse:
  226. enroll_tup += staredpair # cid*, term* from course if conflicting course
  227. else:
  228. enroll_tup += (course[0], course[1])
  229. enroll_tup += (student[0],student[1]) # seat, sid from student
  230. for x in range(4,len(student) - 1): # everything else from student except email
  231. enroll_tup += (student[x],) # level, units, class, major, grade, gp, status
  232.  
  233. # check if enroll tuple is unique in database
  234. if checkUnique(str(enroll_tup)) and checkKeys('Enroll', enroll_tup):
  235. addValue('Enroll', enroll_tup)
  236.  
  237.  
  238. del course_table_tuples[:]
  239. del meeting_table_tuples[:]
  240. del student_table_tuples[:]
  241.  
  242. #### convert time attribute to minutes #####
  243. def convert_time(time):
  244. digit = time.split(' ')[0]
  245. day = time.split(' ')[1]
  246. hour = int(digit.split(':')[0])
  247. minn = int(digit.split(':')[1])
  248. if day == 'PM':
  249. if hour == 12:
  250. minutes = 60*(12) + minn
  251. else:
  252. minutes = 60*(hour + 12) + minn
  253. elif day == 'AM':
  254. if hour == 12:
  255. minutes = minn
  256. else:
  257. minutes = 60*(hour) + minn
  258.  
  259. return minutes
  260.  
  261. ##### format each attribute of each row #####
  262. def parseAttr(attr):
  263. attr = attr.replace('(', '')
  264. attr = attr.replace(')', '')
  265. attr = attr.strip('"')
  266. attr = attr.strip("'")
  267. attr = attr.replace("'", '')
  268. return attr
  269.  
  270.  
  271. ##### read in the file #####
  272. def readCSV(ifilepath):
  273.  
  274. # open the file
  275. csvfile = open(ifilepath)
  276. reader = csv.reader(csvfile)
  277.  
  278. # initial conditions
  279. studentTableReached = False
  280. meetingTableReached = False
  281. courseTableReached = False
  282. row_count = -1
  283. empty_line_count = 0
  284. reader = csv.reader(csvfile)
  285. global MID
  286.  
  287. # read file line by line
  288. for row in reader:
  289. row_count += 1
  290.  
  291. # First line in file is empty, skip
  292. if row_count == 0:
  293. continue
  294.  
  295. insert_row = []
  296. newrow = tuple(';'.join(row).strip("'").split(';')) # convert each line into tuple
  297.  
  298. # For every 3 empty lines you come across, corresponding tuples should be created
  299. if newrow == ('',):
  300. courseTableReached = False
  301. meetingTableReached = False
  302. studentTableReached = False
  303. empty_line_count += 1
  304. if empty_line_count == 3:
  305. create_tuples()
  306. empty_line_count = 0
  307.  
  308.  
  309.  
  310. # If course table reached, need to create unitsmin and unitsmax attributes for database
  311. if courseTableReached == True:
  312.  
  313. attr_count = 0
  314.  
  315. for attr in newrow:
  316.  
  317. attr = parseAttr(attr)
  318.  
  319. # units attribute is specified as range, create min and max
  320. if '.000' in attr:
  321. attr = attr.replace('.000', '')
  322. units_min = attr.split('-')[0].replace('"', '')
  323. units_max = attr.split('-')[1].replace('"', '')
  324. insert_row.append(units_min)
  325. insert_row.append(units_max)
  326.  
  327. # units attribute is single integer, copy twice
  328. elif attr_count == 5:
  329. insert_row.append(attr)
  330. insert_row.append(attr)
  331.  
  332. # attribute is not unit
  333. else:
  334. insert_row.append(attr)
  335.  
  336. attr_count += 1
  337.  
  338.  
  339. insert_tup = tuple(insert_row)
  340. course_table_tuples.append(insert_tup)
  341.  
  342.  
  343. # meeting table has been reached
  344. if meetingTableReached == True:
  345. MID = MID + 1
  346. insert_row.append(" \'" + str(MID) + "\'")
  347.  
  348. attr_count = 0
  349. for attr in newrow:
  350. attr = parseAttr(attr)
  351.  
  352. #split time into 2 attributes
  353. if '-' in attr:
  354. time_interval = attr.split(' - ')
  355. for i in range(len(time_interval)):
  356. time = convert_time(time_interval[i])
  357. insert_row.append(time)
  358.  
  359. elif attr_count == 3:
  360. insert_row.append(attr)
  361. insert_row.append(attr)
  362. elif attr_count == 8: # Letter Grade
  363. insert_row.append(attr)
  364. if letter_to_gp[attr]:
  365. insert_row.append(letter_to_gp[attr])
  366. else:
  367. insert_row.append(5.0) # not in gp scale
  368. else:
  369.  
  370. insert_row.append(attr)
  371.  
  372. attr_count += 1
  373.  
  374. insert_tup = tuple(insert_row)
  375. meeting_table_tuples.append(insert_tup)
  376.  
  377.  
  378.  
  379. # student table has been reached
  380. if studentTableReached == True:
  381. insert_arr = []
  382. attr_count = 0
  383. for attr in newrow:
  384. attr = parseAttr(attr)
  385. if attr_count == 5 and attr == '':
  386. attr = '-1'
  387.  
  388. insert_row.append(attr)
  389. insert_tup = tuple(insert_row)
  390. attr_count += 1
  391. student_table_tuples.append(insert_tup)
  392.  
  393. # set flags for eaech row
  394. if newrow == fileDict['Course']:
  395. courseTableReached = True
  396.  
  397. elif newrow == fileDict['Meeting']:
  398. meetingTableReached = True
  399.  
  400. elif newrow == fileDict['Student']:
  401. studentTableReached = True
  402.  
  403.  
  404. create_tuples()
  405.  
  406. ##### for debugging purposes #####
  407. def destroy():
  408.  
  409. cursor.execute('''DROP TABLE IF EXISTS Course; DROP TABLE IF EXISTS Meeting; DROP TABLE IF EXISTS Student; DROP TABLE IF EXISTS Enroll; DROP TABLE IF EXISTS Has''')
  410. conn.commit()
  411.  
  412.  
  413. def main():
  414. global CUR_FILE
  415. destroy()
  416. initialize()
  417.  
  418. print("inserting into database")
  419. tstart = time.time()
  420.  
  421. if len(sys.argv) == 2:
  422. start_time = time.time()
  423. ifilepath = str(sys.argv[1])
  424. sys.stdout.write(ifilepath + ": ")
  425. readCSV('Grades/' + ifilepath)
  426. end_time = time.time()
  427. total_time = end_time - start_time
  428. print(total_time)
  429.  
  430. else:
  431. for file in os.listdir('./Grades'):
  432. if file.endswith('.csv'):
  433. CUR_FILE = file
  434. start_time = time.time()
  435. sys.stdout.write(file + ": ")
  436. readCSV('Grades/'+ file)
  437. end_time = time.time()
  438. total_time = end_time - start_time
  439. print(total_time)
  440.  
  441.  
  442. tend = time.time()
  443. total = tend - tstart
  444. print('')
  445. print('total time: ' + str(total))
  446. conn.close()
  447.  
  448.  
  449. if __name__ == '__main__': main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement