Advertisement
Guest User

Untitled

a guest
Sep 17th, 2019
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.75 KB | None | 0 0
  1. import argparse
  2. import os
  3.  
  4. from py2neo import Graph
  5. import mysql.connector
  6. import parser
  7.  
  8. user = "neo4j"
  9. pwd = "neo5j"
  10. DATAPORTAL_DB = Graph("bolt://dataportal.europdx.eu:7687", auth=(user, pwd), secure=True)
  11.  
  12. def load_neo4j_data():
  13.     pdxmodels_cna = DATAPORTAL_DB.run(
  14.         "MATCH(mc:ModelCreation)--(s:Sample)--(mo:MolecularCharacterization)--(ma:MarkerAssociation)--(m:Marker) "
  15.         "WHERE (mo.type=\"copy number alteration\") RETURN mc.sourcePdxId, ma.cnaLog2RCNA, ma.copyNumberStatus, "
  16.         "m.ncbiGeneId")
  17.     pdxmodels_mut = DATAPORTAL_DB.run(
  18.         "MATCH(mc:ModelCreation)--(s:Sample)--(mo:MolecularCharacterization)--(ma:MarkerAssociation)--(m:Marker) "
  19.         "WHERE (mo.type=\"mutation\") RETURN mc, ma, m")
  20.  
  21.  
  22.     # cna[0] == ModelCreation, cna[1] == MarkerAssociation, cna[2] == Marker
  23.     # ========== CNA =============
  24.     print("CNA")
  25.     cna_list = []  # [modelID, valueOf, entrezID]}
  26.     for cna in pdxmodels_cna:
  27.         copy_number_status = cna[2]
  28.         gistic_val = ""
  29.         if copy_number_status:
  30.             copy_number_status = copy_number_status.lower()
  31.             if copy_number_status == "amplification":
  32.                 gistic_val = 2
  33.             if copy_number_status == "gain":
  34.                 gistic_val = 1
  35.             if copy_number_status == "normal":
  36.                 gistic_val = 0
  37.             if copy_number_status == "heterozygous loss":
  38.                 gistic_val = -1
  39.             if copy_number_status == "homozygous deletion":
  40.                 gistic_val = -2
  41.  
  42.         if gistic_val:
  43.             cna_list.append([cna[0], gistic_val, cna[2]["ncbiGeneId"], "gistic"])
  44.         if cna[1]:
  45.             cna_list.append([cna[0], cna[1], cna[3], "log2CNA"])
  46.  
  47.     # ========== CNA =============
  48.  
  49.     # ========== Mutation =============
  50.     print("MUT")
  51.     mut_list = []
  52.     for mut in pdxmodels_mut:
  53.         mut_list.append([
  54.             mut[0]["sourcePdxId"],  # Model ID
  55.             mut[2]["hgncSymbol"],   # hugo
  56.             mut[2]["ncbiGeneId"],   # entrez
  57.             mut[1]["chromosome"],
  58.             mut[1]['seq_start_position'],
  59.             mut[1]["consequence"],
  60.             mut[1]["amino_acid_change"]
  61.             ]
  62.         )
  63.  
  64.     # ========== Mutation =============
  65.     return cna_list, mut_list
  66.  
  67.  
  68. def import_SQL_cna(cna_data, mydb, cursor):
  69.     stable_id_command = "SELECT StableID.id From StableID JOIN PDXmodel ON PDXmodel.id=StableID.pdxmodel_ID WHERE PDXmodel.pdxmodel_id=%s AND StableID.stable_id=%s"
  70.     insert_cna_command = "INSERT INTO CNA (stable_id_ID, entrez_gene_id, value_of) VALUES (%s, %s, %s)"
  71.     print("import CNA")
  72.     for cna in cna_data:
  73.         cursor.execute(stable_id_command,(cna[0], cna[3]))
  74.         stable_id_id = cursor.fetchall()[0][0]
  75.         if stable_id_id:
  76.             cursor.execute(insert_cna_command, (stable_id_id, cna[2], cna[1]))
  77.         if not stable_id_id:
  78.             print("not for " + cna[0] )
  79.             stable_id_id = parser.create_stable(cna[0],cna[3],cursor)
  80.             cursor.execute(insert_cna_command, (stable_id_id, cna[2], cna[1]))
  81.     print("import CNA done")
  82.     mydb.commit()
  83.  
  84.  
  85. def import_SQL_mut(mut_data, mydb, cursor, ncbi_build):
  86.     pdxmodel_in_db_command = "SELECT * FROM PDXmodel WHERE PDXmodel.pdxmodel_id=%s"
  87.     insert_mutation_command = "INSERT INTO Mutation (pdxmodel_ID, hugo_symbol, entrez_gene_id, chromosome, start_position, variant_classification, HGVSp_Short, ncbi_build) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
  88.     print("import mut")
  89.     for mut in mut_data:
  90.         cursor.execute(pdxmodel_in_db_command, (mut[0],))
  91.         result = cursor.fetchall()
  92.         pdxmodel_database_id = result[0][0]
  93.         ncbi_gene_id = mut[2]
  94.         if ncbi_gene_id == "":
  95.             ncbi_gene_id = None
  96.         cursor.execute(insert_mutation_command, (pdxmodel_database_id, mut[1], ncbi_gene_id, mut[3], mut[4], mut[5], mut[6], ncbi_build))
  97.     print("import mut done")
  98.     mydb.commit()
  99.  
  100. def main():
  101.     parser = argparse.ArgumentParser()
  102.     parser.add_argument("-host", help="database host")
  103.     parser.add_argument("-database", help="database name")
  104.     args = parser.parse_args()
  105.     HOST = args.host
  106.     USER = 'dhuser'
  107.     PASSWD = 'dhpwd'
  108.     DATABASE = args.database
  109.  
  110.     if os.environ.get('HOST') is not None:
  111.         HOST=os.environ.get('HOST')
  112.  
  113.     if os.environ.get('DATABASE') is not None:
  114.         DATABASE=os.environ.get('DATABASE')
  115.     mydb = mysql.connector.connect(
  116.         host=HOST,
  117.         user=USER,
  118.         passwd=PASSWD,
  119.         db=DATABASE
  120.     )
  121.     cursor = mydb.cursor(buffered=True)
  122.     mydb.autocommit = True
  123.  
  124.     cnas, muts = load_neo4j_data()
  125.     import_SQL_cna(cnas, mydb, cursor)
  126.     import_SQL_mut(muts, mydb, cursor)
  127.     print()
  128.  
  129.  
  130. if __name__ == "__main__":
  131.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement