Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import argparse
- import os
- from py2neo import Graph
- import mysql.connector
- import parser
- user = "neo4j"
- pwd = "neo5j"
- DATAPORTAL_DB = Graph("bolt://dataportal.europdx.eu:7687", auth=(user, pwd), secure=True)
- def load_neo4j_data():
- pdxmodels_cna = DATAPORTAL_DB.run(
- "MATCH(mc:ModelCreation)--(s:Sample)--(mo:MolecularCharacterization)--(ma:MarkerAssociation)--(m:Marker) "
- "WHERE (mo.type=\"copy number alteration\") RETURN mc.sourcePdxId, ma.cnaLog2RCNA, ma.copyNumberStatus, "
- "m.ncbiGeneId")
- pdxmodels_mut = DATAPORTAL_DB.run(
- "MATCH(mc:ModelCreation)--(s:Sample)--(mo:MolecularCharacterization)--(ma:MarkerAssociation)--(m:Marker) "
- "WHERE (mo.type=\"mutation\") RETURN mc, ma, m")
- # cna[0] == ModelCreation, cna[1] == MarkerAssociation, cna[2] == Marker
- # ========== CNA =============
- print("CNA")
- cna_list = [] # [modelID, valueOf, entrezID]}
- for cna in pdxmodels_cna:
- copy_number_status = cna[2]
- gistic_val = ""
- if copy_number_status:
- copy_number_status = copy_number_status.lower()
- if copy_number_status == "amplification":
- gistic_val = 2
- if copy_number_status == "gain":
- gistic_val = 1
- if copy_number_status == "normal":
- gistic_val = 0
- if copy_number_status == "heterozygous loss":
- gistic_val = -1
- if copy_number_status == "homozygous deletion":
- gistic_val = -2
- if gistic_val:
- cna_list.append([cna[0], gistic_val, cna[2]["ncbiGeneId"], "gistic"])
- if cna[1]:
- cna_list.append([cna[0], cna[1], cna[3], "log2CNA"])
- # ========== CNA =============
- # ========== Mutation =============
- print("MUT")
- mut_list = []
- for mut in pdxmodels_mut:
- mut_list.append([
- mut[0]["sourcePdxId"], # Model ID
- mut[2]["hgncSymbol"], # hugo
- mut[2]["ncbiGeneId"], # entrez
- mut[1]["chromosome"],
- mut[1]['seq_start_position'],
- mut[1]["consequence"],
- mut[1]["amino_acid_change"]
- ]
- )
- # ========== Mutation =============
- return cna_list, mut_list
- def import_SQL_cna(cna_data, mydb, cursor):
- stable_id_command = "SELECT StableID.id From StableID JOIN PDXmodel ON PDXmodel.id=StableID.pdxmodel_ID WHERE PDXmodel.pdxmodel_id=%s AND StableID.stable_id=%s"
- insert_cna_command = "INSERT INTO CNA (stable_id_ID, entrez_gene_id, value_of) VALUES (%s, %s, %s)"
- print("import CNA")
- for cna in cna_data:
- cursor.execute(stable_id_command,(cna[0], cna[3]))
- stable_id_id = cursor.fetchall()[0][0]
- if stable_id_id:
- cursor.execute(insert_cna_command, (stable_id_id, cna[2], cna[1]))
- if not stable_id_id:
- print("not for " + cna[0] )
- stable_id_id = parser.create_stable(cna[0],cna[3],cursor)
- cursor.execute(insert_cna_command, (stable_id_id, cna[2], cna[1]))
- print("import CNA done")
- mydb.commit()
- def import_SQL_mut(mut_data, mydb, cursor, ncbi_build):
- pdxmodel_in_db_command = "SELECT * FROM PDXmodel WHERE PDXmodel.pdxmodel_id=%s"
- insert_mutation_command = "INSERT INTO Mutation (pdxmodel_ID, hugo_symbol, entrez_gene_id, chromosome, start_position, variant_classification, HGVSp_Short, ncbi_build) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
- print("import mut")
- for mut in mut_data:
- cursor.execute(pdxmodel_in_db_command, (mut[0],))
- result = cursor.fetchall()
- pdxmodel_database_id = result[0][0]
- ncbi_gene_id = mut[2]
- if ncbi_gene_id == "":
- ncbi_gene_id = None
- cursor.execute(insert_mutation_command, (pdxmodel_database_id, mut[1], ncbi_gene_id, mut[3], mut[4], mut[5], mut[6], ncbi_build))
- print("import mut done")
- mydb.commit()
- def main():
- parser = argparse.ArgumentParser()
- parser.add_argument("-host", help="database host")
- parser.add_argument("-database", help="database name")
- args = parser.parse_args()
- HOST = args.host
- USER = 'dhuser'
- PASSWD = 'dhpwd'
- DATABASE = args.database
- if os.environ.get('HOST') is not None:
- HOST=os.environ.get('HOST')
- if os.environ.get('DATABASE') is not None:
- DATABASE=os.environ.get('DATABASE')
- mydb = mysql.connector.connect(
- host=HOST,
- user=USER,
- passwd=PASSWD,
- db=DATABASE
- )
- cursor = mydb.cursor(buffered=True)
- mydb.autocommit = True
- cnas, muts = load_neo4j_data()
- import_SQL_cna(cnas, mydb, cursor)
- import_SQL_mut(muts, mydb, cursor)
- print()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement