Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import json
- import csv
- import argparse
- from subprocess import call
- from pymongo import MongoClient
- client = MongoClient(host='localhost', port=27017, username=USERNAME, password=PASSWORD)
- db = client["10x"]
- collection = db["consensus_sequences"]
- AA_CODON_TABLE = { 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L',
- 'TCT': 'S', 'TCC': 'S', 'TCA': 'S', 'TCG': 'S',
- 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 'TGC': 'C',
- 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L',
- 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
- 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
- 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
- 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I',
- 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T',
- 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
- 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
- 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
- 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A',
- 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E',
- 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
- 'GGG': 'G', 'TAA': '_', 'TAG': '_', 'TGA': '_'
- }
- lightChains = ["VL", "VK"]
- heavyChains = ["VH"]
- def run_pyir(filename, outfile):
- call(['python2.7', 'PyIR/bin/pyir', '-d', 'PyIR/pyir_data', filename, '-o', outfile])
- call(['gzip', '-d', outfile + '.json.gz'])
- return outfile.split('.')[0] + '.json'
- def create_mongo_object(csv_info, json_info, consensus_id):
- return {
- 'total_occurrences': 1,
- 'consensus_id': consensus_id,
- 'metadata': csv_info,
- 'analysis': json_info
- }
- def read_csv(csvfile, sample_id, run_id):
- db_dict = {}
- csv_info = {}
- with open(csvfile, 'r') as fin:
- reader = csv.DictReader(fin)
- for row in reader:
- if row['productive'] != 'True':
- continue
- if row['clonotype_id'] not in db_dict and row['clonotype_id'] != "None":
- db_dict[row['clonotype_id']] = {
- 'run_id': run_id,
- 'sample_id': sample_id,
- 'clonotype_id': row['clonotype_id'],
- 'lights': [],
- 'heavies': []
- }
- csv_info[row['consensus_id']] = row
- return csv_info, db_dict
- def read_pyir_json(jsonfile, csv_info, db_dict):
- with open(jsonfile, 'r') as fin:
- for line in fin:
- j = json.loads(line.strip(), 'utf-8')
- if j[u"Sequence ID"] not in csv_info or j[u"Sequence ID"] == "None" or u"NT-Trimmed" not in j:
- continue
- csv_item = csv_info[j[u"Sequence ID"]]
- if csv_item['clonotype_id'] == "None":
- continue
- insert = True
- if j[u"Chain type"] in lightChains:
- for item in db_dict[csv_item['clonotype_id']]['lights']:
- if item['analysis']['NT-Trimmed'] == j['NT-Trimmed']:
- item['total_occurrences'] += 1
- insert = False
- if insert and csv_item['consensus_id'] != "None":
- j['consensus_id'] = csv_item['consensus_id']
- db_dict[csv_item['clonotype_id']]['lights'].append(create_mongo_object(csv_item, j, csv_item['consensus_id']))
- elif j[u"Chain type"] in heavyChains:
- for item in db_dict[csv_item['clonotype_id']]['heavies']:
- if item['analysis']['NT-Trimmed'] == j['NT-Trimmed']:
- item['total_occurrences'] += 1
- insert = False
- if insert and csv_item['consensus_id'] != "None":
- j['consensus_id'] = csv_item['consensus_id']
- db_dict[csv_item['clonotype_id']]['heavies'].append(create_mongo_object(csv_item, j, csv_item['consensus_id']))
- def add_dict_to_database(d, run_id, sample_id):
- #for key in d:
- collection.insert_many(list(d.values()))
- if __name__ == "__main__":
- parser = argparse.ArgumentParser(description='Reads 10X consensus FASTA and CSV files and imports them into the vacnode11 database')
- parser.add_argument('csv', help='10X Annotated Consensus CSV file')
- parser.add_argument('fasta', help="10X Consensus FASTA file")
- parser.add_argument('run_id', help="Run ID")
- parser.add_argument('sample_id', help="Sample ID")
- parser.add_argument('--json_file', help='PyIR JSON file to use with data (NOTE: Activating this option skips PyIR analysis step)')
- args = parser.parse_args()
- csv_info, db_dict = read_csv(args.csv, args.sample_id, args.run_id)
- if args.json_file:
- json_file = args.json_file
- else:
- pyir_output = args.run_id + '_' + args.sample_id
- json_file = run_pyir(args.fasta, pyir_output)
- read_pyir_json(json_file, csv_info, db_dict)
- add_dict_to_database(db_dict, args.run_id, args.sample_id)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement