Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import MySQLdb
- #TODO: handle the case where annot.csv has windows /r line endings
- annot_CSV_filename = "Annot.csv"
- #Connect to local sqldb called kanotdb
- dbconnection = MySQLdb.connect(host = "localhost", user = "kanot", passwd = "kanot", db = "kanotdb")
- dbcursor = dbconnection.cursor()
- #TODO: some code here which kills everything
- #create a table for hodling the BFannot data
- querry = """
- CREATE TABLE BFanot (
- rs varchar(15) NOT NULL,
- chr varchar(5) NOT NULL,
- maf float NOT NULL,
- nssnp int DEFAULT '0',
- prom int DEFAULT '0',
- eqtl int DEFAULT '0',
- BFlow float NOT NULL,
- BFmed float NOT NULL,
- BFhigh float NOT NULL,
- PRIMARY KEY (rs)
- ) TYPE=innodb;
- """
- dbcursor.execute(querry)
- #WHY DOES THIS NOT WORK? IT WORKS IN MYSQL CLI CLIENT
- querry = "insert into BFanot (rs, chr, maf, nssnp, prom, eqtl, BFlow, BFmed, BFhigh) values ('tt', 'ff', '1.2', '1', '1', '0', '1.3', '1.4', '1.5')"
- dbcursor.execute(querry)
- #define a regex suitable for extracting the bfannot data items
- rightfile = r"nsSNP"
- BFAnot_regex = r"(?P<rs>\S+),(?P<chr>\S+),(?P<maf>\S+),(?P<nssnp>\S+),(?P<prom>\S+),(?P<eqtl>\S+),(?P<BFlow>\S+),(?P<BFmed>\S+),(?P<BFhigh>\S+)"
- BFAnot_decoder = re.compile(BFAnot_regex)
- #open file with anot data in
- anot_data = open("annot.csv", "r")
- #retrieve the first line, which should be column headings
- topline = anot_data.readline()
- #TODO: test that it is correct
- #TODO: comment out till the above works
- dummy = """
- #For each line of the csv file
- for lines in anot_data:
- #Use the decoder regex to retrieve data items
- line_data = BFAnot_decoder.match(lines)
- #Write the data items into the database
- querry = "INSERT INTO BFanot (rs, chr, maf, nssnp, prom, eqtl, BFlow, BFmed, BFhigh) VALUES ('"
- querry = querry + line_data.group('rs') + "', '"
- querry = querry + line_data.group('chr') + "', '"
- querry = querry + str(float(line_data.group('maf'))) + "', '" #convert str -> float -> str to guarantee decimal notation
- querry = querry + line_data.group('nssnp') + "', '"
- querry = querry + line_data.group('prom') + "', '"
- querry = querry + line_data.group('eqtl') + "', '"
- querry = querry + str(float(line_data.group('BFlow'))) + "', '"
- querry = querry + str(float(line_data.group('BFhigh'))) + "', '"
- querry = querry + str(float(line_data.group('BFmed'))) + "')"
- dbcursor.execute(querry)
- """
- #close the file
- anot_data.close()
- #close connection to the database
- dbcursor.close()
- dbconnection.close()
Add Comment
Please, Sign In to add comment