Guest User

Untitled

a guest
Oct 17th, 2018
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.97 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. """Return subsequences mapped to contig:start-stop.
  4.  
  5. Given an aligned bam and coordinates, return the subsequences of alignments
  6. that completely overlap [start, stop].
  7. """
  8.  
  9. from __future__ import print_function
  10. import os
  11. import os.path
  12. import sys
  13. import argparse
  14.  
  15. from Bio import SeqIO
  16. from Bio.Seq import Seq
  17. from Bio.SeqRecord import SeqRecord
  18. from Bio.Alphabet import IUPAC
  19. import pysam
  20.  
  21.  
  22. direction = {True: 'r', False: 'f'}
  23.  
  24.  
  25. def get_seq_record(record, start, stop, description):
  26. """Return a SeqRecord for query between start and stop.
  27.  
  28. Given a sam record, find query sequences that cover the [start, stop]
  29. interval completely and create a SeqRecord object.
  30. """
  31. # get the query positions of the bases mapped to start, stop
  32. # TODO: handle cases where no base is mapped (None)
  33. positions = record.get_aligned_pairs(matches_only=True)
  34. first_position = [item[0] for item in positions if item[1]==start]
  35. last_position = [item[0] for item in positions if item[1]==stop]
  36. # fetch sequence and qual
  37. if first_position and last_position:
  38. name = record.query_name
  39. if not record.is_reverse:
  40. seq = Seq(record.get_forward_sequence()[first_position[0]:last_position[0]],
  41. IUPAC.IUPACUnambiguousDNA())
  42. qual = list(record.get_forward_qualities())[first_position[0]:last_position[0]]
  43. else:
  44. length = record.query_length
  45. seq = Seq(record.get_forward_sequence()[length - last_position[0]:length - first_position[0]],
  46. IUPAC.IUPACUnambiguousDNA())
  47. qual = list(record.get_forward_qualities())[length - last_position[0]:length - first_position[0]]
  48. rec = SeqRecord(seq, id=name, description=('|').join([description, direction[record.is_reverse]]))
  49. rec.letter_annotations['phred_quality'] = qual
  50. return rec
  51.  
  52.  
  53. def main(arguments):
  54. parser = argparse.ArgumentParser(
  55. description=__doc__,
  56. formatter_class=argparse.RawDescriptionHelpFormatter)
  57. parser.add_argument('inbam', help="input bam", type=argparse.FileType('r'))
  58. parser.add_argument('outfastq', help="output fastq", type=argparse.FileType('w'))
  59. parser.add_argument('contig', help="reference contig", type=str)
  60. parser.add_argument('start', help="start position on reference contig", type=int)
  61. parser.add_argument('stop', help="stop position on reference contig", type=int)
  62.  
  63. args = parser.parse_args(arguments)
  64.  
  65. description = args.contig + ':' + str(args.start) + '-' + str(args.stop)
  66. my_records = []
  67. with pysam.AlignmentFile(args.inbam, 'rb') as samfile:
  68. for record in samfile.fetch(contig=args.contig, start=args.start, stop=args.stop):
  69. my_records.append(get_seq_record(record, args.start, args.stop, description))
  70. my_records = [record for record in my_records if record is not None]
  71. if my_records:
  72. SeqIO.write(my_records, args.outfastq, 'fastq')
  73.  
  74.  
  75. if __name__ == '__main__':
  76. sys.exit(main(sys.argv[1:]))
Add Comment
Please, Sign In to add comment