Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import argparse
- import sys
- import csv
- #import lib.sort
- import os
- def default_sort(rows, top_score_metric):
- sorted_rows = sorted(rows, key=lambda row: (int(row['Sub-peptide Position'])))
- sorted_rows = sorted(sorted_rows, key=lambda row: (float(row['Corresponding Fold Change']) if row['Corresponding Fold Change'].isdigit() else float('inf')), reverse=True)
- if top_score_metric == 'median':
- sorted_rows = sorted(
- sorted_rows,
- key=lambda row: (
- row['Gene Name'],
- row['Mutation'],
- float(row['Median MT Score']),
- )
- )
- elif top_score_metric == 'lowest':
- sorted_rows = sorted(
- sorted_rows,
- key=lambda row: (
- row['Gene Name'],
- row['Mutation'],
- float(row['Best MT Score']),
- )
- )
- return sorted_rows
- def main(input_files, output_file, top_score_metric):
- output_file = open(output_file, 'w')
- fieldnames = []
- for input_file in input_files:
- with open(input_file, 'r') as input_file_handle:
- reader = csv.DictReader(input_file_handle, delimiter='\t')
- if len(fieldnames) == 0:
- fieldnames = reader.fieldnames
- else:
- for fieldname in reader.fieldnames:
- if fieldname not in fieldnames:
- fieldnames.append(fieldname)
- rows = []
- for input_file in input_files:
- with open(input_file, 'r') as input_file_handle:
- reader = csv.DictReader(input_file_handle, delimiter='\t')
- for row in reader:
- for fieldname in fieldnames:
- if fieldname not in row:
- row[fieldname] = 'NA'
- rows.append(row)
- sorted_rows = default_sort(rows, top_score_metric)
- tsv_writer = csv.DictWriter(output_file, list(fieldnames), delimiter = '\t', lineterminator = '\n')
- tsv_writer.writeheader()
- tsv_writer.writerows(sorted_rows)
- output_file.close()
- if __name__ == "__main__":
- files = []
- for file in os.listdir("/home/ben/Documents/wustl/pvacdata/H_NT-45-1208990-indels/MHC_Class_I/tmp"):
- if ".parsed.tsv" in file:
- files.append(os.path.join("/home/ben/Documents/wustl/pvacdata/H_NT-45-1208990-indels/MHC_Class_I/tmp", file))
- main(files, "/home/ben/Documents/wustl/pvacdata/H_NT-45-1208990-indels/H_NT-45-1208990.all_epitopes.tsv ", "median")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement