SHARE
TWEET

Untitled

a guest Jul 19th, 2019 53 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import argparse
  2. import sys
  3. import csv
  4. #import lib.sort
  5.  
  6. import os
  7.  
  8. def default_sort(rows, top_score_metric):
  9.     sorted_rows = sorted(rows, key=lambda row: (int(row['Sub-peptide Position'])))
  10.     sorted_rows = sorted(sorted_rows, key=lambda row: (float(row['Corresponding Fold Change']) if row['Corresponding Fold Change'].isdigit() else float('inf')), reverse=True)
  11.     if top_score_metric == 'median':
  12.         sorted_rows = sorted(
  13.             sorted_rows,
  14.             key=lambda row: (
  15.                 row['Gene Name'],
  16.                 row['Mutation'],
  17.                 float(row['Median MT Score']),
  18.             )
  19.         )
  20.     elif top_score_metric == 'lowest':
  21.         sorted_rows = sorted(
  22.             sorted_rows,
  23.             key=lambda row: (
  24.                 row['Gene Name'],
  25.                 row['Mutation'],
  26.                 float(row['Best MT Score']),
  27.             )
  28.         )
  29.     return sorted_rows
  30.  
  31.  
  32. def main(input_files, output_file, top_score_metric):
  33.     output_file = open(output_file, 'w')
  34.  
  35.     fieldnames = []
  36.     for input_file in input_files:
  37.         with open(input_file, 'r') as input_file_handle:
  38.             reader = csv.DictReader(input_file_handle, delimiter='\t')
  39.             if len(fieldnames) == 0:
  40.                 fieldnames = reader.fieldnames
  41.             else:
  42.                 for fieldname in reader.fieldnames:
  43.                     if fieldname not in fieldnames:
  44.                         fieldnames.append(fieldname)
  45.    
  46.     rows = []
  47.     for input_file in input_files:
  48.         with open(input_file, 'r') as input_file_handle:
  49.             reader = csv.DictReader(input_file_handle, delimiter='\t')
  50.             for row in reader:
  51.                 for fieldname in fieldnames:
  52.                     if fieldname not in row:
  53.                         row[fieldname] = 'NA'
  54.                 rows.append(row)
  55.    
  56.     sorted_rows = default_sort(rows, top_score_metric)
  57.     tsv_writer = csv.DictWriter(output_file, list(fieldnames), delimiter = '\t', lineterminator = '\n')
  58.     tsv_writer.writeheader()
  59.     tsv_writer.writerows(sorted_rows)
  60.  
  61.     output_file.close()
  62.  
  63.  
  64. if __name__ == "__main__":
  65.     files = []
  66.     for file in os.listdir("/home/ben/Documents/wustl/pvacdata/H_NT-45-1208990-indels/MHC_Class_I/tmp"):
  67.         if ".parsed.tsv" in file:
  68.             files.append(os.path.join("/home/ben/Documents/wustl/pvacdata/H_NT-45-1208990-indels/MHC_Class_I/tmp", file))
  69.     main(files, "/home/ben/Documents/wustl/pvacdata/H_NT-45-1208990-indels/H_NT-45-1208990.all_epitopes.tsv ", "median")
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top