Guest User

Untitled

a guest
Mar 18th, 2018
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 18.79 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 2,
  6. "metadata": {
  7. "collapsed": true
  8. },
  9. "outputs": [],
  10. "source": [
  11. "import os\n",
  12. "import pandas as pd\n",
  13. "# requires dammit env\n",
  14. "# source activate dammit\n",
  15. "from dammit.fileio.gff3 import GFF3Parser"
  16. ]
  17. },
  18. {
  19. "cell_type": "code",
  20. "execution_count": 3,
  21. "metadata": {},
  22. "outputs": [
  23. {
  24. "name": "stdout",
  25. "output_type": "stream",
  26. "text": [
  27. "L_goodei_transfer_2\n",
  28. "L_goodei_BW_3\n",
  29. "L_goodei_BW_1\n",
  30. "L_goodei_transfer_3\n",
  31. "L_goodei_transfer_1\n",
  32. "L_goodei_BW_2\n",
  33. "L_goodei_FW_2\n",
  34. "L_goodei_FW_1\n",
  35. "L_goodei_FW_3\n",
  36. "F_notti_FW_2\n",
  37. "F_notti_FW_1\n",
  38. "L_parva_FW_3\n",
  39. "L_parva_FW_1\n",
  40. "L_parva_transfer_3\n",
  41. "L_parva_transfer_1\n",
  42. "L_parva_FW_2\n",
  43. "L_parva_transfer_2\n",
  44. "L_parva_BW_2\n",
  45. "L_parva_BW_1\n",
  46. "L_parva_BW_3\n",
  47. "F_similis_transfer_1\n",
  48. "F_similis_BW_1\n",
  49. "F_similis_transfer_3\n",
  50. "F_similis_BW_3\n",
  51. "F_similis_BW_2\n",
  52. "F_similis_transfer_2\n",
  53. "F_similis_FW_2\n",
  54. "F_similis_FW_3\n",
  55. "F_similis_FW_1\n",
  56. "F_olivaceous_FW_2\n",
  57. "F_olivaceous_transfer_2\n",
  58. "F_olivaceous_FW_1\n",
  59. "F_olivaceous_transfer_1\n",
  60. "F_olivaceous_FW_3\n",
  61. "F_olivaceous_BW_3\n",
  62. "F_olivaceous_BW_1\n",
  63. "F_olivaceous_BW_2\n",
  64. "F_notatus_FW_2\n",
  65. "F_notatus_FW_3\n",
  66. "F_notatus_FW_1\n",
  67. "F_notatus_transfer_1\n",
  68. "F_notatus_transfer_3\n",
  69. "F_notatus_BW_1\n",
  70. "F_notatus_BW_3\n",
  71. "F_notatus_transfer_2\n",
  72. "F_notatus_BW_2\n",
  73. "F_heteroclitusMDPP_FW_3\n",
  74. "F_heteroclitusMDPP_FW_1\n",
  75. "F_heteroclitusMDPP_FW_2\n",
  76. "F_heteroclitusMDPP_BW_2\n",
  77. "F_heteroclitusMDPP_transfer_2\n",
  78. "F_heteroclitusMDPP_transfer_3\n",
  79. "F_heteroclitusMDPP_BW_1\n",
  80. "F_heteroclitusMDPP_transfer_1\n",
  81. "F_heteroclitusMDPP_BW_3\n",
  82. "F_heteroclitusMDPL_transfer_3\n",
  83. "F_heteroclitusMDPL_FW_2\n",
  84. "F_heteroclitusMDPL_transfer_1\n",
  85. "F_heteroclitusMDPL_FW_3\n",
  86. "F_heteroclitusMDPL_transfer_2\n",
  87. "F_heteroclitusMDPL_FW_1\n",
  88. "F_heteroclitusMDPL_BW_1\n",
  89. "F_heteroclitusMDPL_BW_3\n",
  90. "F_heteroclitusMDPL_BW_2\n",
  91. "F_parvapinis_FW_1\n",
  92. "F_parvapinis_transfer_2\n",
  93. "F_parvapinis_FW_3\n",
  94. "F_parvapinis_transfer_1\n",
  95. "F_parvapinis_FW_2\n",
  96. "F_parvapinis_BW_2\n",
  97. "F_parvapinis_BW_3\n",
  98. "F_parvapinis_BW_1\n",
  99. "F_diaphanus_BW_1\n",
  100. "F_diaphanus_BW_2\n",
  101. "F_diaphanus_transfer_2\n",
  102. "F_diaphanus_FW_2\n",
  103. "F_diaphanus_transfer_1\n",
  104. "F_diaphanus_FW_3\n",
  105. "F_catanatus_BW_2\n",
  106. "F_catanatus_transfer_1\n",
  107. "F_catanatus_BW_3\n",
  108. "F_catanatus_transfer_2\n",
  109. "F_catanatus_BW_1\n",
  110. "F_catanatus_FW_1\n",
  111. "F_catanatus_FW_2\n",
  112. "F_zebrinus_FW_2\n",
  113. "F_zebrinus_FW_1\n",
  114. "F_zebrinus_BW_1\n",
  115. "F_zebrinus_BW_2\n",
  116. "F_sciadicus_transfer_1\n",
  117. "F_sciadicus_BW_1\n",
  118. "F_sciadicus_FW_1\n",
  119. "F_sciadicus_FW_2\n",
  120. "F_grandis_transfer_3\n",
  121. "F_grandis_FW_2\n",
  122. "F_grandis_transfer_1\n",
  123. "F_grandis_FW_3\n",
  124. "F_grandis_transfer_2\n",
  125. "F_grandis_FW_1\n",
  126. "F_grandis_BW_1\n",
  127. "F_grandis_BW_3\n",
  128. "F_grandis_BW_2\n",
  129. "F_rathbuni_BW_2\n",
  130. "F_rathbuni_BW_3\n",
  131. "F_rathbuni_BW_1\n",
  132. "F_rathbuni_FW_1\n",
  133. "F_rathbuni_transfer_2\n",
  134. "F_rathbuni_FW_3\n",
  135. "F_rathbuni_transfer_1\n",
  136. "F_rathbuni_FW_2\n",
  137. "F_rathbuni_transfer_3\n",
  138. "F_chrysotus_BW_2\n",
  139. "F_chrysotus_BW_1\n",
  140. "F_chrysotus_BW_3\n",
  141. "F_chrysotus_FW_3\n",
  142. "F_chrysotus_transfer_1\n",
  143. "F_chrysotus_FW_1\n",
  144. "F_chrysotus_FW_2\n",
  145. "F_chrysotus_transfer_2\n",
  146. "A_xenica_FW_2\n",
  147. "A_xenica_transfer_2\n",
  148. "A_xenica_FW_1\n",
  149. "A_xenica_transfer_3\n",
  150. "A_xenica_transfer_1\n",
  151. "A_xenica_FW_3\n",
  152. "A_xenica_BW_3\n",
  153. "A_xenica_BW_1\n",
  154. "A_xenica_BW_2\n"
  155. ]
  156. }
  157. ],
  158. "source": [
  159. "species_dirs = \"salmon_denovo_by_species\"\n",
  160. "out_dir = \"/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/\"\n",
  161. "dirs = os.listdir(species_dirs)\n",
  162. "for species in dirs:\n",
  163. " if species != \".DS_Store\":\n",
  164. " species_gene_file = species + \"_gene_transcript_table.txt\"\n",
  165. " files = os.listdir(species_dirs + \"/\" + species)\n",
  166. " for quant_dir in files:\n",
  167. " if quant_dir != \".DS_Store\":\n",
  168. " replicate = quant_dir.split(\".\")[0]\n",
  169. " print(replicate)\n",
  170. " quant_files = os.listdir(species_dirs + \"/\" + species + \"/\" + quant_dir)\n",
  171. " for file in quant_files:\n",
  172. " if file.endswith(\".sf\"):\n",
  173. " with open(species_dirs + \"/\" + species + \"/\" + quant_dir + \"/\" + file) as qf:\n",
  174. " header = next(qf).split(\"\\t\")\n",
  175. " expression_quant_data = qf.readlines()\n",
  176. " with open(out_dir + species_gene_file,\"w\") as gt:\n",
  177. " for line in expression_quant_data:\n",
  178. " transcript = line.split(\"\\t\")[0]\n",
  179. " gene = transcript[:-3]\n",
  180. " gt.write(transcript + \"\\t\")\n",
  181. " gt.write(gene + \"\\n\")"
  182. ]
  183. },
  184. {
  185. "cell_type": "code",
  186. "execution_count": 4,
  187. "metadata": {},
  188. "outputs": [
  189. {
  190. "name": "stdout",
  191. "output_type": "stream",
  192. "text": [
  193. "['F_notatus.trinity_out.Trinity.fasta.dammit.gff3', '.DS_Store', 'F_rathbuni.trinity_out.Trinity.fasta.dammit.gff3', 'F_diaphanus.trinity_out.Trinity.fasta.dammit.gff3', 'F_chrysotus.trinity_out.Trinity.fasta.dammit.gff3', 'L_goodei.trinity_out.Trinity.fasta.dammit.gff3', 'F_heteroclitusMDPL.trinity_out.Trinity.fasta.dammit.gff3', 'F_heteroclitusMDPP.trinity_out.Trinity.fasta.dammit.gff3', 'F_similis.trinity_out.Trinity.fasta.dammit.gff3', 'L_parva.trinity_out.Trinity.fasta.dammit.gff3', 'A_xenica.trinity_out.Trinity.fasta.dammit.gff3', 'F_catanatus.trinity_out.Trinity.fasta.dammit.gff3', 'F_zebrinus.trinity_out.Trinity.fasta.dammit.gff3', 'F_parvapinis.trinity_out.Trinity.fasta.dammit.gff3', 'F_grandis.trinity_out.Trinity.fasta.dammit.gff3', 'F_sciadicus.trinity_out.Trinity.fasta.dammit.gff3', 'F_olivaceous.trinity_out.Trinity.fasta.dammit.gff3', 'F_notti.trinity_out.Trinity.fasta.dammit.gff3']\n",
  194. "F_notatus.trinity_out.Trinity.fasta.dammit.gff3\n"
  195. ]
  196. },
  197. {
  198. "name": "stderr",
  199. "output_type": "stream",
  200. "text": [
  201. "/Users/johnsolk/anaconda3/lib/python3.6/site-packages/dammit/fileio/gff3.py:73: ParserWarning: Both a converter and dtype were specified for column attributes - only the converter will be used\n",
  202. " dtype=dict(self.columns)):\n",
  203. "/Users/johnsolk/anaconda3/lib/python3.6/site-packages/dammit/fileio/gff3.py:73: ParserWarning: Both a converter and dtype were specified for column attributes - only the converter will be used\n",
  204. " dtype=dict(self.columns)):\n"
  205. ]
  206. },
  207. {
  208. "name": "stdout",
  209. "output_type": "stream",
  210. "text": [
  211. "F_rathbuni.trinity_out.Trinity.fasta.dammit.gff3\n",
  212. "F_diaphanus.trinity_out.Trinity.fasta.dammit.gff3\n",
  213. "F_chrysotus.trinity_out.Trinity.fasta.dammit.gff3\n",
  214. "L_goodei.trinity_out.Trinity.fasta.dammit.gff3\n",
  215. "F_heteroclitusMDPL.trinity_out.Trinity.fasta.dammit.gff3\n",
  216. "F_heteroclitusMDPP.trinity_out.Trinity.fasta.dammit.gff3\n",
  217. "F_similis.trinity_out.Trinity.fasta.dammit.gff3\n",
  218. "L_parva.trinity_out.Trinity.fasta.dammit.gff3\n",
  219. "A_xenica.trinity_out.Trinity.fasta.dammit.gff3\n",
  220. "F_catanatus.trinity_out.Trinity.fasta.dammit.gff3\n",
  221. "F_zebrinus.trinity_out.Trinity.fasta.dammit.gff3\n",
  222. "F_parvapinis.trinity_out.Trinity.fasta.dammit.gff3\n",
  223. "F_grandis.trinity_out.Trinity.fasta.dammit.gff3\n",
  224. "F_sciadicus.trinity_out.Trinity.fasta.dammit.gff3\n",
  225. "F_olivaceous.trinity_out.Trinity.fasta.dammit.gff3\n",
  226. "F_notti.trinity_out.Trinity.fasta.dammit.gff3\n"
  227. ]
  228. }
  229. ],
  230. "source": [
  231. "annotations_dir = \"/Users/johnsolk/Documents/UCDavis/Whitehead/gff_annotations/\"\n",
  232. "annotations = os.listdir(annotations_dir)\n",
  233. "print(annotations)\n",
  234. "for annotation in annotations:\n",
  235. " if annotation != \".DS_Store\":\n",
  236. " species = annotation.split(\".\")[0]\n",
  237. " print(annotation)\n",
  238. " name = annotations_dir + annotation\n",
  239. " annotations = GFF3Parser(filename=name).read()\n",
  240. " all_names = annotations.sort_values(by=['seqid'],ascending=True)[['seqid','Name']]\n",
  241. " annotations = annotations.dropna(subset=['Name'])\n",
  242. " pickonename = annotations.sort_values(by=['seqid', 'score'], ascending=True).query('score < 1e-05').drop_duplicates(subset='seqid')[['seqid', 'Name']]\n",
  243. " pickonename = pickonename.dropna(axis=0,how=\"all\")\n",
  244. " fund = annotations[annotations['Name'].str.startswith(\"gi\")]\n",
  245. " names = fund.sort_values(by=['seqid'], ascending=True)[['seqid', 'Name']]\n",
  246. " names_out = \"/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/\" + species + \"_genenames.csv\"\n",
  247. " fund_names = \"/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/\" + species + \"_Fhet_genenames.csv\"\n",
  248. " pickone = \"/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/\" + species + \"_onegenenamepertranscript.csv\"\n",
  249. " #all_names.to_csv(names_out)\n",
  250. " names.to_csv(fund_names)\n",
  251. " pickonename.to_csv(pickone)\n",
  252. " "
  253. ]
  254. },
  255. {
  256. "cell_type": "code",
  257. "execution_count": 27,
  258. "metadata": {},
  259. "outputs": [
  260. {
  261. "name": "stdout",
  262. "output_type": "stream",
  263. "text": [
  264. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_diaphanus_namemap2.csv\n",
  265. "(384218, 4)\n",
  266. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_diaphanus_Fhet_genenames.csv\n",
  267. "(134129, 3)\n",
  268. "(384218, 6)\n",
  269. "(455093, 7)\n",
  270. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_chrysotus_namemap2.csv\n",
  271. "(396400, 4)\n",
  272. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_chrysotus_Fhet_genenames.csv\n",
  273. "(168861, 3)\n",
  274. "(396400, 6)\n",
  275. "(485204, 7)\n",
  276. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/L_goodei_namemap2.csv\n",
  277. "(385476, 4)\n",
  278. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/L_goodei_Fhet_genenames.csv\n",
  279. "(168019, 3)\n",
  280. "(385476, 6)\n",
  281. "(478428, 7)\n",
  282. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_heteroclitusMDPL_namemap2.csv\n",
  283. "(592419, 4)\n",
  284. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_heteroclitusMDPL_Fhet_genenames.csv\n",
  285. "(149975, 3)\n",
  286. "(592419, 6)\n",
  287. "(663574, 7)\n",
  288. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_grandis_namemap2.csv\n",
  289. "(809060, 4)\n",
  290. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_grandis_Fhet_genenames.csv\n",
  291. "(182607, 3)\n",
  292. "(809060, 6)\n",
  293. "(896995, 7)\n",
  294. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_olivaceous_namemap2.csv\n",
  295. "(350265, 4)\n",
  296. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_olivaceous_Fhet_genenames.csv\n",
  297. "(157612, 3)\n",
  298. "(350265, 6)\n",
  299. "(438933, 7)\n",
  300. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_zebrinus_namemap2.csv\n",
  301. "(266978, 4)\n",
  302. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_zebrinus_Fhet_genenames.csv\n",
  303. "(119383, 3)\n",
  304. "(266978, 6)\n",
  305. "(329326, 7)\n",
  306. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_catanatus_namemap2.csv\n",
  307. "(405866, 4)\n",
  308. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_catanatus_Fhet_genenames.csv\n",
  309. "(157130, 3)\n",
  310. "(405866, 6)\n",
  311. "(484458, 7)\n",
  312. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_sciadicus_namemap2.csv\n",
  313. "(241279, 4)\n",
  314. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_sciadicus_Fhet_genenames.csv\n",
  315. "(118097, 3)\n",
  316. "(241279, 6)\n",
  317. "(304539, 7)\n",
  318. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_heteroclitusMDPP_namemap2.csv\n",
  319. "(668487, 4)\n",
  320. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_heteroclitusMDPP_Fhet_genenames.csv\n",
  321. "(146014, 3)\n",
  322. "(668487, 6)\n",
  323. "(736813, 7)\n",
  324. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_similis_namemap2.csv\n",
  325. "(520319, 4)\n",
  326. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_similis_Fhet_genenames.csv\n",
  327. "(161532, 3)\n",
  328. "(520319, 6)\n",
  329. "(607296, 7)\n",
  330. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/A_xenica_namemap2.csv\n",
  331. "(362783, 4)\n",
  332. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/A_xenica_Fhet_genenames.csv\n",
  333. "(161802, 3)\n",
  334. "(362783, 6)\n",
  335. "(451127, 7)\n",
  336. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_parvapinis_namemap2.csv\n",
  337. "(352346, 4)\n",
  338. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_parvapinis_Fhet_genenames.csv\n",
  339. "(146111, 3)\n",
  340. "(352346, 6)\n",
  341. "(435605, 7)\n",
  342. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_notatus_namemap2.csv\n",
  343. "(416299, 4)\n",
  344. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_notatus_Fhet_genenames.csv\n",
  345. "(178608, 3)\n",
  346. "(416299, 6)\n",
  347. "(512589, 7)\n",
  348. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/L_parva_namemap2.csv\n",
  349. "(409543, 4)\n",
  350. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/L_parva_Fhet_genenames.csv\n",
  351. "(159764, 3)\n",
  352. "(409543, 6)\n",
  353. "(495881, 7)\n",
  354. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_notti_namemap2.csv\n",
  355. "(159771, 4)\n",
  356. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_notti_Fhet_genenames.csv\n",
  357. "(91102, 3)\n",
  358. "(159771, 6)\n",
  359. "(209036, 7)\n",
  360. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_rathbuni_namemap2.csv\n",
  361. "(501222, 4)\n",
  362. "/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/F_rathbuni_Fhet_genenames.csv\n",
  363. "(176180, 3)\n",
  364. "(501222, 6)\n",
  365. "(595482, 7)\n"
  366. ]
  367. }
  368. ],
  369. "source": [
  370. "namemap_dir = \"/Users/johnsolk/Documents/UCDavis/Whitehead/intermediate_denovo_annotation_files/\"\n",
  371. "maps = os.listdir(namemap_dir)\n",
  372. "for namemap in maps:\n",
  373. " if namemap.endswith(\"dammit.namemap.csv\"):\n",
  374. " species = namemap.split(\".\")[0]\n",
  375. " namemap2 = namemap_dir + species + \"_namemap2.csv\"\n",
  376. " with open(namemap_dir + namemap) as f:\n",
  377. " header = next(f)\n",
  378. " new_header = header.strip() + \",\"+\"seqid\"+\",\"+\"Name\"\n",
  379. " ids = f.readlines()\n",
  380. " with open(namemap2,\"w\") as n:\n",
  381. " n.write(new_header+\"\\n\")\n",
  382. " for line in ids:\n",
  383. " trinity_full = line.split(\",\")[0]\n",
  384. " renamed = line.split(\",\")[-1].strip()\n",
  385. " trinity_contig = trinity_full.split(\" \")[0].strip('\"')\n",
  386. " gene = trinity_contig[:-3]\n",
  387. " n.write(trinity_contig+\",\")\n",
  388. " n.write(renamed+\",\")\n",
  389. " n.write(gene+\",\")\n",
  390. " n.write(gene+\"\\n\")\n",
  391. " annotations = namemap_dir + species + \"_onegenenamepertranscript.csv\"\n",
  392. " Fhet = namemap_dir + species + \"_Fhet_genenames.csv\"\n",
  393. " ann = pd.read_csv(annotations)\n",
  394. " dammit_Trinity = pd.read_csv(namemap2)\n",
  395. " print(namemap2)\n",
  396. " print(dammit_Trinity.shape)\n",
  397. " species_Fhet = pd.read_csv(Fhet)\n",
  398. " print(Fhet)\n",
  399. " print(species_Fhet.shape)\n",
  400. " species_Fhet = species_Fhet.drop(['Unnamed: 0'], axis=1)\n",
  401. " ann = ann.drop(['Unnamed: 0'], axis=1)\n",
  402. " combined = pd.merge(dammit_Trinity, ann, how='outer', left_on=\"renamed\", right_on=\"seqid\")\n",
  403. " print(combined.shape)\n",
  404. " combined = combined.drop(['renamed'],axis=1)\n",
  405. " combined = pd.merge(combined,species_Fhet,how = \"outer\",left_on=\"seqid_y\",right_on=\"seqid\")\n",
  406. " print(combined.shape)\n",
  407. " combined['Name_y'] = combined['Name_y'].fillna(combined['seqid_x'])\n",
  408. " combined = combined.drop(['Name_x'],axis=1)\n",
  409. " new = combined.to_csv(\"/Users/johnsolk/Documents/UCDavis/Whitehead/annotation_gene_names/\" + species + \"_gene_names.csv\")\n",
  410. " #print(\"/Users/johnsolk/Documents/UCDavis/Whitehead/annotation_gene_names/\" + species + \"_gene_names.csv\")"
  411. ]
  412. },
  413. {
  414. "cell_type": "code",
  415. "execution_count": null,
  416. "metadata": {
  417. "collapsed": true
  418. },
  419. "outputs": [],
  420. "source": []
  421. }
  422. ],
  423. "metadata": {
  424. "kernelspec": {
  425. "display_name": "Python 3",
  426. "language": "python",
  427. "name": "python3"
  428. },
  429. "language_info": {
  430. "codemirror_mode": {
  431. "name": "ipython",
  432. "version": 3
  433. },
  434. "file_extension": ".py",
  435. "mimetype": "text/x-python",
  436. "name": "python",
  437. "nbconvert_exporter": "python",
  438. "pygments_lexer": "ipython3",
  439. "version": "3.6.3"
  440. }
  441. },
  442. "nbformat": 4,
  443. "nbformat_minor": 2
  444. }
Add Comment
Please, Sign In to add comment