Advertisement
Guest User

Untitled

a guest
Sep 18th, 2019
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.00 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import csv
  4. import re
  5. import argparse
  6. import os
  7. import sys
  8.  
  9.  
  10. def __main__():
  11.  
  12. parser = argparse.ArgumentParser(description='Transform FACETS output file to a format usable for input to PCGR')
  13. parser.add_argument('input_facets_tsv', help='FACETS tsv file with copy number segments ')
  14. parser.add_argument('output_pcgr_tsv', help='Tab-separated file of CNA segments formatted according to PCGR input requirements')
  15. parser.add_argument('sample_id', help='sample identifier')
  16. args = parser.parse_args()
  17.  
  18. facets_to_pcgr(args.input_facets_tsv, args.output_pcgr_tsv, args.sample_id)
  19.  
  20. def facets_to_pcgr(input_tsv, output_tsv, sample_id):
  21.  
  22. o = open(output_tsv,'w')
  23. o.write('Chromosome\tStart\tEnd\tSegment_Mean\tCellular_Fraction\tTotal_CN\tMinor_CN\tSampleID\n')
  24. with open(input_tsv, 'r') as tsvfile:
  25. reader = csv.DictReader(tsvfile, delimiter='\t')
  26. for rec in reader:
  27. chromosome = rec['#chromosome']
  28. if chromosome == '23':
  29. chromosome = 'X'
  30. if chromosome == '24':
  31. chromosome = 'Y'
  32. segment_start = str(rec['segment_start'])
  33. segment_end = str(rec['segment_end'])
  34. if not segment_start.isdigit() or not segment_end.isdigit():
  35. print('Unable to parse segment on chromosome - ' + str(chromosome) + ': segment_start (' + str(segment_start) + ') or segment_end (' + str(segment_end) + ') not proper integers')
  36. else:
  37. log2_ratio = rec['CN_logR_median_in_cluster']
  38. cf = rec['cluster_specific_cellular_fraction_MM_EM_optimized']
  39. total_cn = rec['cluster_specific_total_copy_number_MM_EM_optimized']
  40. minor_cn = rec['cluster_specific_minor_copy_number_MM_EM_optimized']
  41.  
  42. o.write(str(chromosome) + '\t' + str(segment_start) + '\t' + str(segment_end) + '\t' + str(log2_ratio) + '\t' + str(cf) + '\t' + str(total_cn) + '\t' + str(minor_cn) + '\t' + str(sample_id) + '\n')
  43.  
  44. o.close()
  45.  
  46. if __name__=="__main__": __main__()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement