Advertisement
Guest User

Untitled

a guest
Jan 21st, 2020
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.71 KB | None | 0 0
  1. #!/usr/bin/python3
  2. import psycopg2
  3. from Object_classes import Gen
  4. from Object_classes import Eiwit
  5. from Object_classes import RNA
  6. from Object_classes import EC_nummer
  7. from Object_classes import Pathway
  8. from Object_classes import Cancer_pathway
  9.  
  10. import Insert_functions
  11.  
  12.  
  13. # Een functie om met een database te connecten, het returned de connection en de cursor.
  14. def connect(host, dbname, user, password):
  15. conn_string = "host= '%s' dbname= '%s' user= '%s' password='%s'" % (host, dbname, user, password)
  16. print("Connecting to database...")
  17. conn = psycopg2.connect(conn_string)
  18. cursor = conn.cursor()
  19. print("Connected!\n")
  20. return conn, cursor
  21.  
  22. # Leegt de database als hij gecalled wordt, en verbonden is.
  23. def empty_database(ed_cursor):
  24. # drop de tabellen als ze al bestaan
  25. ed_cursor.execute("""DROP TABLE IF EXISTS gen_A6 CASCADE;""")
  26. ed_cursor.execute("""DROP TABLE IF EXISTS RNA_A6 CASCADE;""")
  27. ed_cursor.execute("""DROP TABLE IF EXISTS eiwit_A6 CASCADE;""")
  28. ed_cursor.execute("""DROP TABLE IF EXISTS eiwit_EC_A6 CASCADE;""")
  29. ed_cursor.execute("""DROP TABLE IF EXISTS pathway_A6 CASCADE;""")
  30. ed_cursor.execute("""DROP TABLE IF EXISTS EC_pathway_A6 CASCADE;""")
  31. ed_cursor.execute("""DROP TABLE IF EXISTS EC_nummer_A6 CASCADE;""")
  32. ed_cursor.execute("""DROP TABLE IF EXISTS Pathway_Cancer_A6 CASCADE;""")
  33. ed_cursor.execute("""DROP TABLE IF EXISTS pathway_SHR_A6 CASCADE;""")
  34. print("Dropped previous tables with the same name if they existed...")
  35.  
  36. # Roept de verwijderfunctie aan, maakt de tabellen en roept vervolgens de foreign keys en constaints functie aan.
  37. def tabellen_maken(conn, cursor):
  38. empty_database(cursor)
  39. sql = """
  40. CREATE TABLE gen_A6(
  41. Entry_id INT PRIMARY KEY,
  42. Gen_id INT,
  43. Gen_naam VARCHAR(100),
  44. Gen_splicing_count_begin INT,
  45. Gen_splicing_count_end INT
  46. );
  47.  
  48.  
  49.  
  50. CREATE TABLE Pathway_Cancer_A6(
  51. Cancer_id INT PRIMARY KEY ,
  52. KO_code VARCHAR(7),
  53. Naam_cancer_protein TEXT
  54. );
  55.  
  56. CREATE TABLE Pathway_A6(
  57. Pathway TEXT PRIMARY KEY
  58. );
  59.  
  60.  
  61.  
  62. CREATE TABLE EC_nummer_A6(
  63. EC_nummer VARCHAR(25) PRIMARY KEY,
  64. Reactie TEXT,
  65.  
  66. Functie TEXT
  67.  
  68. );
  69.  
  70. --Koppel pathway en EC nummer
  71. CREATE TABLE EC_pathway_A6(
  72. Pathway TEXT REFERENCES Pathway_A6 (Pathway),
  73. EC_nummer VARCHAR(25) REFERENCES EC_nummer_A6 (EC_nummer),
  74. PRIMARY KEY (Pathway, EC_nummer)
  75. );
  76.  
  77. CREATE TABLE eiwit_A6(
  78. Eiwit_naam VARCHAR(200),
  79. Eiwit_sequence TEXT,
  80. Eiwit_id TEXT PRIMARY KEY,
  81. KO_code VARCHAR(7)
  82. );
  83. CREATE TABLE pathway_SHR_A6(
  84. Eiwit_id TEXT REFERENCES eiwit_A6(Eiwit_id),
  85. Pathway TEXT REFERENCES Pathway_A6 (Pathway),
  86. SHR_code VARCHAR(10),
  87. PRIMARY KEY (Pathway, SHR_code)
  88. );
  89.  
  90.  
  91.  
  92. --Koppel gen en eiwit
  93. CREATE TABLE rna_a6(
  94. Gen_id INT,
  95. Entry_id INT,
  96. Gen_splicing_count_begin INT ,
  97. Gen_splicing_count_end INT ,
  98. RNA_code VARCHAR(15),
  99. Eiwit_id TEXT REFERENCES eiwit_A6 (Eiwit_id),
  100. RNA_sequence TEXT,
  101. PRIMARY KEY (Entry_id, Gen_id, Gen_splicing_count_begin, Gen_splicing_count_end)
  102. );
  103.  
  104.  
  105. """
  106. cursor.execute(sql)
  107. conn.commit()
  108. print("New tables created!")
  109.  
  110.  
  111.  
  112. # gecodeerd om eiwit_gen_naam.txt uit te lezen en lists te returnen voor alle elementen
  113. # (make sure dat er geen spaties of tabs tussen de komma's zitten!)
  114. def bestanduitlezer(bestandnaam):
  115. inhoud = open(str(bestandnaam), "r")
  116. big_gen_list = []
  117. big_eiwit_list = []
  118. big_rna_list = []
  119. for regel in inhoud:
  120. regel.lstrip()
  121. inhoud_lijst = regel.split("@")
  122. inhoud_lijst.remove("\n")
  123. gen_list = []
  124. eiwit_list = []
  125. rna_list = []
  126. eiwit_list.append(inhoud_lijst[1])
  127. eiwit_list.append(inhoud_lijst[2])
  128. eiwit_list.append(inhoud_lijst[0])
  129.  
  130. if eiwit_list not in big_eiwit_list:
  131. big_eiwit_list.append(eiwit_list)
  132.  
  133. gen_list.append(inhoud_lijst[7])
  134. gen_list.append(inhoud_lijst[6])
  135.  
  136. gen_list.append(inhoud_lijst[3].split("..")[0])
  137. gen_list.append(inhoud_lijst[3].split("..")[1])
  138.  
  139. big_gen_list.append(gen_list)
  140.  
  141.  
  142.  
  143. rna_list.append(inhoud_lijst[4])
  144. rna_list.append(inhoud_lijst[5])
  145.  
  146. big_rna_list.append(rna_list)
  147.  
  148.  
  149. return big_gen_list, big_eiwit_list, big_rna_list
  150.  
  151.  
  152. def cancerbestanduitlezer(bestandnaam):
  153. big_cancer_list = []
  154. inhoud = open(str(bestandnaam), "r")
  155. for regel in inhoud:
  156. cancer_list = []
  157. regel.lstrip()
  158. inhoud_lijst = regel.split("@")
  159. inhoud_lijst.remove("\n")
  160. cancer_list.append(inhoud_lijst[0])
  161. cancer_list.append(inhoud_lijst[1])
  162. big_cancer_list.append(cancer_list)
  163. return big_cancer_list
  164.  
  165.  
  166. def pathwaybestanduitlezer(il_file):
  167. infile = open(il_file, 'r')
  168. bestand = infile.readlines()
  169. infile.close()
  170. gegevens = bestand
  171. big_pathway_list = []
  172. Eiwit_KO_Code_list = []
  173. big_ec_list=[]
  174. ec_dict={}
  175. for i in range(len(gegevens)):
  176. pathway_list = []
  177. data = gegevens[i].split('@')
  178.  
  179. shr, pathway, KO_code = data[0], data[1].split('|'), data[3]
  180.  
  181. if data[2] != " " and pathway != "":
  182. ec_nummer=list(data[2].strip("ec:").split(" "))
  183.  
  184.  
  185. for x in range(len(ec_nummer)):
  186.  
  187. if ec_nummer[x] not in ec_dict.keys():
  188. ec_dict[ec_nummer[x]] = pathway
  189.  
  190. # print(ec_dict.values(), "hi")
  191.  
  192.  
  193.  
  194. else:
  195. for key, value in ec_dict.items():
  196. if key == ec_nummer[x]:
  197. for y in pathway:
  198. if y not in value:
  199. ec_dict[ec_nummer[x]].append(pathway)
  200.  
  201. # print(ec_dict)
  202.  
  203. for key, value in ec_dict.items():
  204. lijst=[]
  205. lijst.append(key)
  206. lijst.append(value)
  207. print(lijst[1][0])
  208. big_ec_list.append(lijst)
  209. # print(key, value)
  210. #
  211.  
  212.  
  213.  
  214.  
  215.  
  216. pathway_list.append(shr)
  217. pathway_list.append(pathway)
  218. big_pathway_list.append(pathway_list)
  219. Eiwit_KO_Code_list.append(KO_code)
  220. return big_pathway_list, Eiwit_KO_Code_list, big_ec_list
  221.  
  222.  
  223. def ECbestanduitlezer(EC_file):
  224. infile = open(EC_file, 'r')
  225. gegevens = infile.readlines()
  226. infile.close()
  227. big_EC_list = []
  228. ec_nummer_lijst =[]
  229. ec_dict={}
  230.  
  231. for i in range(len(gegevens)):
  232. EC_list = []
  233. data = gegevens[i].split('@')
  234. EC, reactie, uitleg = data[0], data[1].split('|')[0], data[2]
  235.  
  236.  
  237.  
  238. if EC not in ec_nummer_lijst:
  239. ec_nummer_lijst.append(EC)
  240. EC_list.append(EC)
  241. EC_list.append(reactie)
  242. EC_list.append(uitleg)
  243. big_EC_list.append(EC_list)
  244.  
  245.  
  246. return big_EC_list
  247.  
  248.  
  249. # Input zijn 2 lijsten de functie maakt er objects van die die vervolgens in 2 andere lijsten stopt en dan returned
  250. def packager(p_eiwit_lijst, p_gen_lijst, p_rna_lijst, p_pathway_lijst, p_EC_lijst, p_cancer_lijst,
  251. p_Eiwit_KO_Code_list):
  252. genobjectlist, eiwitobjectlist, rna_objectlist, EC_objectlist, pathway_objectlist, cancer_objectlist,only_pathways = [], [], [], [], [], [],[]
  253. for genlijst in p_gen_lijst:
  254. gen_temp_object = Gen()
  255. gen_temp_object.Setgen_id(genlijst[0])
  256. gen_temp_object.Setgen_naam(genlijst[1])
  257. gen_temp_object.Setgen_splicing_count(genlijst[2:4])
  258. genobjectlist.append(gen_temp_object)
  259. for eiwitlijst in p_eiwit_lijst:
  260. eiwit_temp_object = Eiwit()
  261. eiwit_temp_object.Seteiwit_naam(eiwitlijst[0])
  262. eiwit_temp_object.Seteiwit_sequence(eiwitlijst[1])
  263. eiwit_temp_object.Seteiwit_id(eiwitlijst[2])
  264. for code in p_Eiwit_KO_Code_list:
  265. eiwit_temp_object.SetKO_code(code)
  266. eiwitobjectlist.append(eiwit_temp_object)
  267. for rnalijst in p_rna_lijst:
  268. rna_temp_object = RNA()
  269. rna_temp_object.SetRNA_sequence(rnalijst[1])
  270. rna_temp_object.SetRNA_code(rnalijst[0])
  271. rna_objectlist.append(rna_temp_object)
  272. for eclijst in p_EC_lijst:
  273. EC_temp_object = EC_nummer()
  274. EC_temp_object.SetEC_nummer(eclijst[0])
  275. EC_temp_object.Setuitleg(eclijst[1])
  276. EC_temp_object.Setreactie(eclijst[2])
  277. EC_objectlist.append(EC_temp_object)
  278. for pathwaylijst in p_pathway_lijst:
  279. pathway_temp_object = Pathway()
  280. pathway_temp_object.Setpathway(pathwaylijst[1])
  281. pathway_temp_object.SetSHR(pathwaylijst[0])
  282. pathway_objectlist.append(pathway_temp_object)
  283. for cancerlijst in p_cancer_lijst:
  284. cancer_temp_object = Cancer_pathway()
  285. cancer_temp_object.SetKO_code(cancerlijst[0])
  286. cancer_temp_object.Setnaam_prot(cancerlijst[1])
  287. cancer_objectlist.append(cancer_temp_object)
  288.  
  289. for x in p_pathway_lijst:
  290. for y in x[1]:
  291. if y !="" and y not in only_pathways:
  292. only_pathways.append(y)
  293.  
  294. return genobjectlist, eiwitobjectlist, rna_objectlist, EC_objectlist, pathway_objectlist, cancer_objectlist, only_pathways
  295.  
  296.  
  297. def main():
  298. conn, cursor = connect('145.97.16.241', 'bpapgea6_db', 'bpapgea6',
  299. 'BpapgeA6') # <---- verander deze string voor een andere database!
  300. tabellen_maken(conn, cursor)
  301.  
  302. pathway_list, Eiwit_KO_Code_list, ec_list = pathwaybestanduitlezer("kegg_pathway_EC.txt")
  303. EC_list = ECbestanduitlezer("kegg_reactie_functie.txt")
  304. gen_list, eiwit_list, rna_list = bestanduitlezer("tabel_info_nieuw.txt")
  305. cancer_list = cancerbestanduitlezer("cancer_pathway.txt")
  306.  
  307. genobjectlist, eiwitobjectlist, rna_objectlist, EC_objectlist, pathway_objectlist, cancer_objectlist, only_pathway_list = \
  308. packager(eiwit_list, gen_list, rna_list, pathway_list, EC_list, cancer_list, Eiwit_KO_Code_list)
  309.  
  310.  
  311.  
  312. Insert_functions.geninserts(conn, cursor, genobjectlist)
  313. Insert_functions.pathwayinserts(conn, cursor, only_pathway_list)
  314. Insert_functions.eiwitinserts(conn, cursor, eiwitobjectlist)
  315.  
  316. Insert_functions.cancerpathwayinserts(conn, cursor, cancer_objectlist)
  317. Insert_functions.pathway_SHR_inserts(conn, cursor, pathway_objectlist, eiwitobjectlist)
  318. Insert_functions.ec_nummerinserts(conn, cursor, EC_objectlist)
  319. Insert_functions.rnainserts(conn, cursor, genobjectlist, rna_objectlist, eiwitobjectlist)
  320. Insert_functions.koppeltabelinserts(conn, cursor, ec_list)
  321.  
  322.  
  323. print("Done!")
  324.  
  325. conn.close()
  326.  
  327.  
  328. if __name__ == "__main__":
  329. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement