Advertisement
Guest User

Untitled

a guest
Apr 29th, 2019
122
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.78 KB | None | 0 0
  1. import os
  2. import json
  3. import csv
  4. import argparse
  5. from subprocess import call
  6. from pymongo import MongoClient
  7.  
  8. client = MongoClient(host='localhost', port=27017, username=USERNAME, password=PASSWORD)
  9. db = client["10x"]
  10. collection = db["consensus_sequences"]
  11.  
  12. AA_CODON_TABLE = { 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
  13. 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
  14. 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 'TGC': 'C',
  15. 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L',
  16. 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
  17. 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
  18. 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
  19. 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I',
  20. 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T',
  21. 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
  22. 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
  23. 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
  24. 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A',
  25. 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E',
  26. 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
  27. 'GGG': 'G', 'TAA': '_', 'TAG': '_', 'TGA': '_'
  28. }
  29. lightChains = ["VL", "VK"]
  30. heavyChains = ["VH"]
  31.  
  32. def run_pyir(filename, outfile):
  33. call(['python2.7', 'PyIR/bin/pyir', '-d', 'PyIR/pyir_data', filename, '-o', outfile])
  34. call(['gzip', '-d', outfile + '.json.gz'])
  35. return outfile.split('.')[0] + '.json'
  36.  
  37.  
  38. def create_mongo_object(csv_info, json_info, consensus_id):
  39. return {
  40. 'total_occurrences': 1,
  41. 'consensus_id': consensus_id,
  42. 'metadata': csv_info,
  43. 'analysis': json_info
  44. }
  45.  
  46.  
  47. def read_csv(csvfile, sample_id, run_id):
  48. db_dict = {}
  49. csv_info = {}
  50.  
  51. with open(csvfile, 'r') as fin:
  52. reader = csv.DictReader(fin)
  53. for row in reader:
  54. if row['productive'] != 'True':
  55. continue
  56.  
  57. if row['clonotype_id'] not in db_dict and row['clonotype_id'] != "None":
  58. db_dict[row['clonotype_id']] = {
  59. 'run_id': run_id,
  60. 'sample_id': sample_id,
  61. 'clonotype_id': row['clonotype_id'],
  62. 'lights': [],
  63. 'heavies': []
  64. }
  65.  
  66. csv_info[row['consensus_id']] = row
  67.  
  68. return csv_info, db_dict
  69.  
  70.  
  71. def read_pyir_json(jsonfile, csv_info, db_dict):
  72. with open(jsonfile, 'r') as fin:
  73. for line in fin:
  74. j = json.loads(line.strip(), 'utf-8')
  75. if j[u"Sequence ID"] not in csv_info or j[u"Sequence ID"] == "None" or u"NT-Trimmed" not in j:
  76. continue
  77.  
  78. csv_item = csv_info[j[u"Sequence ID"]]
  79. if csv_item['clonotype_id'] == "None":
  80. continue
  81.  
  82. insert = True
  83. if j[u"Chain type"] in lightChains:
  84. for item in db_dict[csv_item['clonotype_id']]['lights']:
  85. if item['analysis']['NT-Trimmed'] == j['NT-Trimmed']:
  86. item['total_occurrences'] += 1
  87. insert = False
  88.  
  89. if insert and csv_item['consensus_id'] != "None":
  90. j['consensus_id'] = csv_item['consensus_id']
  91. db_dict[csv_item['clonotype_id']]['lights'].append(create_mongo_object(csv_item, j, csv_item['consensus_id']))
  92. elif j[u"Chain type"] in heavyChains:
  93. for item in db_dict[csv_item['clonotype_id']]['heavies']:
  94. if item['analysis']['NT-Trimmed'] == j['NT-Trimmed']:
  95. item['total_occurrences'] += 1
  96. insert = False
  97.  
  98. if insert and csv_item['consensus_id'] != "None":
  99. j['consensus_id'] = csv_item['consensus_id']
  100. db_dict[csv_item['clonotype_id']]['heavies'].append(create_mongo_object(csv_item, j, csv_item['consensus_id']))
  101.  
  102.  
  103. def add_dict_to_database(d, run_id, sample_id):
  104. #for key in d:
  105. collection.insert_many(list(d.values()))
  106.  
  107.  
  108. if __name__ == "__main__":
  109. parser = argparse.ArgumentParser(description='Reads 10X consensus FASTA and CSV files and imports them into the vacnode11 database')
  110. parser.add_argument('csv', help='10X Annotated Consensus CSV file')
  111. parser.add_argument('fasta', help="10X Consensus FASTA file")
  112. parser.add_argument('run_id', help="Run ID")
  113. parser.add_argument('sample_id', help="Sample ID")
  114. parser.add_argument('--json_file', help='PyIR JSON file to use with data (NOTE: Activating this option skips PyIR analysis step)')
  115. args = parser.parse_args()
  116.  
  117. csv_info, db_dict = read_csv(args.csv, args.sample_id, args.run_id)
  118.  
  119. if args.json_file:
  120. json_file = args.json_file
  121. else:
  122. pyir_output = args.run_id + '_' + args.sample_id
  123. json_file = run_pyir(args.fasta, pyir_output)
  124.  
  125. read_pyir_json(json_file, csv_info, db_dict)
  126.  
  127. add_dict_to_database(db_dict, args.run_id, args.sample_id)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement