Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import csv
- import re
- import argparse
- import os
- import sys
- def __main__():
- parser = argparse.ArgumentParser(description='Transform FACETS output file to a format usable for input to PCGR')
- parser.add_argument('input_facets_tsv', help='FACETS tsv file with copy number segments ')
- parser.add_argument('output_pcgr_tsv', help='Tab-separated file of CNA segments formatted according to PCGR input requirements')
- parser.add_argument('sample_id', help='sample identifier')
- args = parser.parse_args()
- facets_to_pcgr(args.input_facets_tsv, args.output_pcgr_tsv, args.sample_id)
- def facets_to_pcgr(input_tsv, output_tsv, sample_id):
- o = open(output_tsv,'w')
- o.write('Chromosome\tStart\tEnd\tSegment_Mean\tCellular_Fraction\tTotal_CN\tMinor_CN\tSampleID\n')
- with open(input_tsv, 'r') as tsvfile:
- reader = csv.DictReader(tsvfile, delimiter='\t')
- for rec in reader:
- chromosome = rec['#chromosome']
- if chromosome == '23':
- chromosome = 'X'
- if chromosome == '24':
- chromosome = 'Y'
- segment_start = str(rec['segment_start'])
- segment_end = str(rec['segment_end'])
- if not segment_start.isdigit() or not segment_end.isdigit():
- print('Unable to parse segment on chromosome - ' + str(chromosome) + ': segment_start (' + str(segment_start) + ') or segment_end (' + str(segment_end) + ') not proper integers')
- else:
- log2_ratio = rec['CN_logR_median_in_cluster']
- cf = rec['cluster_specific_cellular_fraction_MM_EM_optimized']
- total_cn = rec['cluster_specific_total_copy_number_MM_EM_optimized']
- minor_cn = rec['cluster_specific_minor_copy_number_MM_EM_optimized']
- o.write(str(chromosome) + '\t' + str(segment_start) + '\t' + str(segment_end) + '\t' + str(log2_ratio) + '\t' + str(cf) + '\t' + str(total_cn) + '\t' + str(minor_cn) + '\t' + str(sample_id) + '\n')
- o.close()
- if __name__=="__main__": __main__()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement