Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'bio'
- def analyse_input_file(input_file)
- overall_results = []
- biofastafile = Bio::FlatFile.open(Bio::FastaFormat, input_file)
- biofastafile.each_entry do |entry|
- overall_results << analyse_orfs(entry.entry_id, entry.naseq)
- end
- overall_results
- end
- def analyse_orfs(query_id, seq)
- results = {}
- orfs = get_orfs(seq)
- longest_orf = orfs.sort_by { |_key, hash| hash[:coverage] }.last
- longest_orf_frame = longest_orf[1][:frame]
- coverage = longest_orf[1][:coverage]
- translated_length = longest_orf[1][:translated_length]
- results[query_id] = {coverage: coverage,
- longest_orf_frame: longest_orf_frame,
- translated_length: translated_length}
- overall_results
- end
- def get_orfs(seq, min_length)
- result = {}
- key = 0
- min_length = min_length.to_i
- (1..6).each do |f|
- s = seq.translate(f)
- s.scan(/(\w{#{min_length},})/) do |_orf|
- orf_start = $~.offset(0)[0] + 1
- orf_end = $~.offset(0)[1] + 1
- coverage = (((orf_end - orf_start) / s.length.to_f) * 100).ceil
- translated_length = s.length
- key += 1
- result[key] = { frame: f,
- orf_start: orf_start,
- orf_end: orf_end,
- coverage: coverage,
- translated_length: translated_length }
- end
- end
- result
- end
- input_file = ARGV[0]
- puts analyse_input_file(input_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement