Guest User

Untitled

a guest
Aug 19th, 2018
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.16 KB | None | 0 0
  1. import sys
  2.  
  3. def check_argv(argc):
  4. expected_argc = 2
  5. if (argc != expected_argc):
  6. sys.stderr.write('Usage: # orfEnumeration.py input.fa')
  7. quit()
  8.  
  9. def find_ORF_site(seqs):
  10. start_codon = ["atg","gtg"]
  11. stop_codon = ["tag","taa","tga"]
  12. start_site = [[],[],[]]
  13. stop_site = [[],[],[]]
  14. orf_site = [[],[],[]]
  15.  
  16. for i, seq in enumerate(seqs):
  17. start_site[i] = find_site(seq,start_codon,i)
  18. # stopコドンの3文字目の位置を格納するために、リスト全ての値に+をしている
  19. stop_site[i] = list(map(lambda x: x+2, find_site(seq,stop_codon,i)))
  20. orf_site[i] = make_orf_site(start_site[i],stop_site[i])
  21.  
  22. return orf_site
  23.  
  24. def make_orf_site(start_sites, stop_sites):
  25. orf_site = []
  26. for start in start_sites:
  27. for stop in stop_sites:
  28. if start < stop:
  29. orf_site.append({'start': start, 'stop': stop})
  30. break
  31. return orf_site
  32.  
  33. def find_site(seq, serach_codon, fix):
  34. site_list = []
  35. for i in range(int(len(seq)/3)):
  36. i *= 3
  37. codon = seq[i:i+3]
  38. if codon in serach_codon:
  39. hit_site = i+1+fix
  40. site_list.append(hit_site)
  41. return site_list
  42.  
  43. def output_ORF(seqs_orf_sites, title,seq):
  44. total = 0
  45.  
  46. print("#\tStart\tStop\tPtn")
  47. for seq_orf_sites in seqs_orf_sites:
  48. total += len(seq_orf_sites)
  49. for orf_site in seq_orf_sites:
  50. start = orf_site["start"]
  51. stop = orf_site["stop"]
  52. print("\t{0}\t{1}\t{2}".format(start,stop,seq[start-1:stop]))
  53.  
  54. print("total:"+str(total))
  55.  
  56. def read_fasta(file_name):
  57. file = open(file_name,'r')
  58. title = file.readline().replace("\n","")
  59. seq = file.read().replace("\n","")
  60. file.close()
  61. return seq, title
  62.  
  63. def reading_frame(seq):
  64. seq_reading_frame = [seq[i:] for i in range(3)]
  65. return seq_reading_frame
  66.  
  67.  
  68. def main(argvs, argc):
  69. check_argv(argc)
  70. seq, title = read_fasta(argvs[1])
  71. seq_reading_frames = reading_frame(seq)
  72. orf_sites = find_ORF_site(seq_reading_frames)
  73. output_ORF(orf_sites,title,seq)
  74.  
  75.  
  76. if __name__ == '__main__':
  77. argvs = sys.argv
  78. argc = len(argvs)
  79. main(argvs,argc)
Add Comment
Please, Sign In to add comment