Advertisement
Guest User

Untitled

a guest
Apr 30th, 2017
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.40 KB | None | 0 0
  1. from collections import defaultdict
  2. from tqdm import tqdm
  3. from Bio import SeqIO
  4.  
  5. alignment_filename = 'out/HTC10499.sam'
  6. reference_filename = 'data4/P.stipitis/Pichia_stipitis_Sanger_reference.fa'
  7. # alignment_filename = 'out/head_HTC10499.sam'
  8. # reference_filename = 'data/Pichia_stipitis_Sanger_reference.fa'
  9.  
  10.  
  11. def main():
  12. matrix = defaultdict(lambda: defaultdict(int))
  13.  
  14. with open(reference_filename) as f:
  15. reference = next(SeqIO.parse(reference_filename, 'fasta')).seq._data.upper()
  16.  
  17. with open(alignment_filename) as f:
  18. for line in tqdm(f):
  19. if line.startswith('@'):
  20. continue
  21. raw = line.split('\t')
  22. pos, cigar, read = int(raw[3]), raw[5], raw[9].upper()
  23. if 'M' in cigar and 'I' not in cigar and 'D' not in cigar:
  24. k = int(cigar[:-1])
  25. ref_read = reference[pos - 1: pos + k - 1]
  26. for c, refc in zip(read, ref_read):
  27. if c != refc:
  28. matrix[refc][c] += 1
  29.  
  30. LETTERS = sorted(matrix.keys())
  31. print('[*] Substitution matrix:')
  32. k = 9
  33. s = '{}{}'.format(' ' * (k + 4), ' |{}'.format(' ' * (k - 1)).join(LETTERS))
  34. print(s)
  35. print(' ' + '-' * len(s[3:]))
  36. for refc in LETTERS:
  37. print(' {} | {}'.format(refc, ' |'.join(map(lambda x: '{1: >{0}}'.format(k, x), (matrix[refc][c] for c in LETTERS)))))
  38.  
  39. if __name__ == '__main__':
  40. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement