Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Produce some kind of genbank file output
- #!/usr/bin/env ruby
- require 'bio'
- require 'fastercsv'
- proteins = Bio::FlatFile.auto('annotation/proteins.faa').inject({}) do |h,p|
- h[p.definition.split.first] = p.seq
- h
- end
- fasta = Bio::FlatFile.auto('assembly/build.fna')
- record = Bio::Sequence.new(fasta.first.seq)
- record.definition = "Pseudomonas fluorescens R124, complete genome"
- record.species = "Pseudomonas fluorescens R124"
- record.features = []
- FasterCSV.open('annotation/gene_list.csv','r',:headers => true).each do |e|
- coordinates = "#{e['Start Coord']}..#{e['End Coord']}"
- if e['Strand'] == '-'
- coordinates = "complement(#{coordinates})"
- end
- id = e['gene_oid']
- qualifiers = []
- qualifiers << Bio::Feature::Qualifier.new('gene', id)
- record.features << Bio::Feature.new('gene',coordinates,qualifiers.clone)
- if e['Description']
- qualifiers << Bio::Feature::Qualifier.new('function', e['Description'])
- end
- qualifiers << Bio::Feature::Qualifier.new('translation', proteins[id])
- record.features << Bio::Feature.new('CDS',coordinates,qualifiers)
- end
- File.open('R124.gb','w') do |out|
- out.print record.output(:genbank)
- end
Add Comment
Please, Sign In to add comment