Guest User

Untitled

a guest
Oct 20th, 2017
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.49 KB | None | 0 0
  1. import re
  2. import MySQLdb
  3.  
  4. #TODO: handle the case where annot.csv has windows /r line endings
  5. annot_CSV_filename = "Annot.csv"
  6.  
  7. #Connect to local sqldb called kanotdb
  8. dbconnection = MySQLdb.connect(host = "localhost", user = "kanot", passwd = "kanot", db = "kanotdb")
  9. dbcursor = dbconnection.cursor()
  10.  
  11. #TODO: some code here which kills everything
  12.  
  13. #create a table for hodling the BFannot data
  14. querry = """
  15. CREATE TABLE BFanot (
  16. rs varchar(15) NOT NULL,
  17. chr varchar(5) NOT NULL,
  18. maf float NOT NULL,
  19. nssnp int DEFAULT '0',
  20. prom int DEFAULT '0',
  21. eqtl int DEFAULT '0',
  22. BFlow float NOT NULL,
  23. BFmed float NOT NULL,
  24. BFhigh float NOT NULL,
  25. PRIMARY KEY (rs)
  26. ) TYPE=innodb;
  27. """
  28. dbcursor.execute(querry)
  29.  
  30. #WHY DOES THIS NOT WORK? IT WORKS IN MYSQL CLI CLIENT
  31. querry = "insert into BFanot (rs, chr, maf, nssnp, prom, eqtl, BFlow, BFmed, BFhigh) values ('tt', 'ff', '1.2', '1', '1', '0', '1.3', '1.4', '1.5')"
  32. dbcursor.execute(querry)
  33.  
  34. #define a regex suitable for extracting the bfannot data items
  35. rightfile = r"nsSNP"
  36. BFAnot_regex = r"(?P<rs>\S+),(?P<chr>\S+),(?P<maf>\S+),(?P<nssnp>\S+),(?P<prom>\S+),(?P<eqtl>\S+),(?P<BFlow>\S+),(?P<BFmed>\S+),(?P<BFhigh>\S+)"
  37. BFAnot_decoder = re.compile(BFAnot_regex)
  38.  
  39. #open file with anot data in
  40. anot_data = open("annot.csv", "r")
  41.  
  42. #retrieve the first line, which should be column headings
  43. topline = anot_data.readline()
  44. #TODO: test that it is correct
  45.  
  46.  
  47. #TODO: comment out till the above works
  48. dummy = """
  49. #For each line of the csv file
  50. for lines in anot_data:
  51. #Use the decoder regex to retrieve data items
  52. line_data = BFAnot_decoder.match(lines)
  53.  
  54. #Write the data items into the database
  55. querry = "INSERT INTO BFanot (rs, chr, maf, nssnp, prom, eqtl, BFlow, BFmed, BFhigh) VALUES ('"
  56. querry = querry + line_data.group('rs') + "', '"
  57. querry = querry + line_data.group('chr') + "', '"
  58. querry = querry + str(float(line_data.group('maf'))) + "', '" #convert str -> float -> str to guarantee decimal notation
  59. querry = querry + line_data.group('nssnp') + "', '"
  60. querry = querry + line_data.group('prom') + "', '"
  61. querry = querry + line_data.group('eqtl') + "', '"
  62. querry = querry + str(float(line_data.group('BFlow'))) + "', '"
  63. querry = querry + str(float(line_data.group('BFhigh'))) + "', '"
  64. querry = querry + str(float(line_data.group('BFmed'))) + "')"
  65. dbcursor.execute(querry)
  66. """
  67.  
  68. #close the file
  69. anot_data.close()
  70.  
  71. #close connection to the database
  72. dbcursor.close()
  73. dbconnection.close()
Add Comment
Please, Sign In to add comment