Advertisement
Guest User

Untitled

a guest
Jul 19th, 2019
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.45 KB | None | 0 0
  1. import argparse
  2. import sys
  3. import csv
  4. #import lib.sort
  5.  
  6. import os
  7.  
  8. def default_sort(rows, top_score_metric):
  9. sorted_rows = sorted(rows, key=lambda row: (int(row['Sub-peptide Position'])))
  10. sorted_rows = sorted(sorted_rows, key=lambda row: (float(row['Corresponding Fold Change']) if row['Corresponding Fold Change'].isdigit() else float('inf')), reverse=True)
  11. if top_score_metric == 'median':
  12. sorted_rows = sorted(
  13. sorted_rows,
  14. key=lambda row: (
  15. row['Gene Name'],
  16. row['Mutation'],
  17. float(row['Median MT Score']),
  18. )
  19. )
  20. elif top_score_metric == 'lowest':
  21. sorted_rows = sorted(
  22. sorted_rows,
  23. key=lambda row: (
  24. row['Gene Name'],
  25. row['Mutation'],
  26. float(row['Best MT Score']),
  27. )
  28. )
  29. return sorted_rows
  30.  
  31.  
  32. def main(input_files, output_file, top_score_metric):
  33. output_file = open(output_file, 'w')
  34.  
  35. fieldnames = []
  36. for input_file in input_files:
  37. with open(input_file, 'r') as input_file_handle:
  38. reader = csv.DictReader(input_file_handle, delimiter='\t')
  39. if len(fieldnames) == 0:
  40. fieldnames = reader.fieldnames
  41. else:
  42. for fieldname in reader.fieldnames:
  43. if fieldname not in fieldnames:
  44. fieldnames.append(fieldname)
  45.  
  46. rows = []
  47. for input_file in input_files:
  48. with open(input_file, 'r') as input_file_handle:
  49. reader = csv.DictReader(input_file_handle, delimiter='\t')
  50. for row in reader:
  51. for fieldname in fieldnames:
  52. if fieldname not in row:
  53. row[fieldname] = 'NA'
  54. rows.append(row)
  55.  
  56. sorted_rows = default_sort(rows, top_score_metric)
  57. tsv_writer = csv.DictWriter(output_file, list(fieldnames), delimiter = '\t', lineterminator = '\n')
  58. tsv_writer.writeheader()
  59. tsv_writer.writerows(sorted_rows)
  60.  
  61. output_file.close()
  62.  
  63.  
  64. if __name__ == "__main__":
  65. files = []
  66. for file in os.listdir("/home/ben/Documents/wustl/pvacdata/H_NT-45-1208990-indels/MHC_Class_I/tmp"):
  67. if ".parsed.tsv" in file:
  68. files.append(os.path.join("/home/ben/Documents/wustl/pvacdata/H_NT-45-1208990-indels/MHC_Class_I/tmp", file))
  69. main(files, "/home/ben/Documents/wustl/pvacdata/H_NT-45-1208990-indels/H_NT-45-1208990.all_epitopes.tsv ", "median")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement