Advertisement
Guest User

Untitled

a guest
May 30th, 2015
237
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.41 KB | None | 0 0
  1. require 'bio'
  2.  
  3. def analyse_input_file(input_file)
  4. overall_results = []
  5. biofastafile = Bio::FlatFile.open(Bio::FastaFormat, input_file)
  6. biofastafile.each_entry do |entry|
  7. overall_results << analyse_orfs(entry.entry_id, entry.naseq)
  8. end
  9. overall_results
  10. end
  11.  
  12. def analyse_orfs(query_id, seq)
  13. results = {}
  14. orfs = get_orfs(seq)
  15. longest_orf = orfs.sort_by { |_key, hash| hash[:coverage] }.last
  16. longest_orf_frame = longest_orf[1][:frame]
  17. coverage = longest_orf[1][:coverage]
  18. translated_length = longest_orf[1][:translated_length]
  19. results[query_id] = {coverage: coverage,
  20. longest_orf_frame: longest_orf_frame,
  21. translated_length: translated_length}
  22. overall_results
  23. end
  24.  
  25. def get_orfs(seq, min_length)
  26. result = {}
  27. key = 0
  28. min_length = min_length.to_i
  29. (1..6).each do |f|
  30. s = seq.translate(f)
  31. s.scan(/(\w{#{min_length},})/) do |_orf|
  32. orf_start = $~.offset(0)[0] + 1
  33. orf_end = $~.offset(0)[1] + 1
  34. coverage = (((orf_end - orf_start) / s.length.to_f) * 100).ceil
  35. translated_length = s.length
  36. key += 1
  37. result[key] = { frame: f,
  38. orf_start: orf_start,
  39. orf_end: orf_end,
  40. coverage: coverage,
  41. translated_length: translated_length }
  42. end
  43. end
  44. result
  45. end
  46.  
  47. input_file = ARGV[0]
  48.  
  49. puts analyse_input_file(input_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement