Advertisement
LuciaPrieto

Untitled

Jun 16th, 2018
387
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
MySQL 22.77 KB | None | 0 0
  1. delimiter @@
  2.  
  3. DROP PROCEDURE IF EXISTS Build_Bio_Similarity @@
  4.  
  5. CREATE PROCEDURE Build_Bio_Similarity()
  6. BEGIN
  7.  
  8. -- ----------------------------------------------------------------
  9. -- -- DisGenCoef                                                 --
  10. -- ----------------------------------------------------------------
  11.  
  12. -- Suma de genes por enfermedad
  13. DROP TABLE IF EXISTS tmp_disgentot;
  14. CREATE TABLE tmp_disgentot AS
  15. SELECT disease_cui, COUNT(*) AS gentot
  16.   FROM disease_gene
  17.  GROUP BY disease_cui;
  18. CREATE INDEX ix_tmp_disgentot ON tmp_disgentot ( disease_cui );
  19.  
  20. -- Combinaciones de enfermedades con genes coincidentes sin agrupar
  21. DROP TABLE IF EXISTS tmp_disgencomb;
  22. CREATE TABLE tmp_disgencomb AS
  23. SELECT dg1.disease_cui AS id_dis1, dg2.disease_cui AS id_dis2
  24.   FROM disease_gene dg1 INNER JOIN disease_gene dg2
  25.     ON ( dg1.gene_id = dg2.gene_id AND dg2.disease_cui > dg1.disease_cui );
  26. CREATE INDEX ix_tmp_disgencomb ON tmp_disgencomb ( id_dis1, id_dis2 );
  27.  
  28. -- Coeficientes de los vectores de enfermedades relacionadas por genes
  29. DROP TABLE IF EXISTS tmp_disgencoef;
  30. CREATE TABLE tmp_disgencoef
  31.        ( id_dis1 CHAR(8)
  32.         ,id_dis2 CHAR(8)
  33.         ,gen_cos FLOAT
  34.         ,gen_jac FLOAT
  35.         ,gen_dic FLOAT )
  36.     AS
  37. SELECT dgvc.id_dis1
  38.       ,dgvc.id_dis2
  39.       ,dgvc.gentot / (SQRT(dgv1.gentot) * SQRT(dgv2.gentot)) AS gen_cos
  40.       ,dgvc.gentot / (dgv1.gentot + dgv2.gentot - dgvc.gentot) AS gen_jac
  41.       ,dgvc.gentot * 2 / (dgv1.gentot + dgv2.gentot) AS gen_dic
  42.   FROM (SELECT id_dis1, id_dis2, count(*) AS gentot
  43.           FROM tmp_disgencomb
  44.          GROUP BY id_dis1, id_dis2 ) AS dgvc
  45.        ,tmp_disgentot AS dgv1
  46.        ,tmp_disgentot AS dgv2
  47.  WHERE dgvc.id_dis1 = dgv1.disease_cui
  48.    AND dgvc.id_dis2 = dgv2.disease_cui;
  49. CREATE INDEX ix_tmp_disgencoef ON tmp_disgencoef ( id_dis1, id_dis2 );
  50.  
  51. -- ----------------------------------------------------------------
  52. -- -- DisProtCoef                                                --
  53. -- ----------------------------------------------------------------
  54.  
  55. -- Relacion proteinas con enfermedades a traves de genes
  56. DROP TABLE IF EXISTS tmp_disprot;
  57. CREATE TABLE tmp_disprot AS
  58. SELECT DISTINCT disease_cui, prot_id
  59.   FROM disease_gene gd INNER JOIN encodes en
  60.     ON ( gd.gene_id = en.gene_id );
  61. CREATE INDEX ix_tmp_disprot1 ON tmp_disprot ( disease_cui, prot_id );
  62. CREATE INDEX ix_tmp_disprot2 ON tmp_disprot ( prot_id, disease_cui );
  63.  
  64. -- Suma de proteinas por enfermedad
  65. DROP TABLE IF EXISTS tmp_disprottot;
  66. CREATE TABLE tmp_disprottot AS
  67. SELECT disease_cui, COUNT(*) AS prottot
  68.   FROM tmp_disprot
  69.  GROUP BY disease_cui;
  70. CREATE INDEX ix_tmp_disprottot ON tmp_disprottot ( disease_cui );
  71.  
  72. -- Combinaciones de enfermedades con proteinas coincidentes sin agrupar
  73. DROP TABLE IF EXISTS tmp_disprotcomb;
  74. CREATE TABLE tmp_disprotcomb AS
  75. SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
  76.   FROM tmp_disprot dp1 INNER JOIN tmp_disprot dp2
  77.     ON ( dp1.prot_id = dp2.prot_id AND dp2.disease_cui > dp1.disease_cui );
  78. CREATE INDEX ix_tmp_disprotcomb ON tmp_disprotcomb ( id_dis1, id_dis2 );
  79.  
  80. -- Coeficientes de los vectores de enfermedades relacionadas por proteinas
  81. DROP TABLE IF EXISTS tmp_disprotcoef;
  82. CREATE TABLE tmp_disprotcoef
  83.        ( id_dis1 CHAR(8)
  84.         ,id_dis2 CHAR(8)
  85.         ,prot_cos FLOAT
  86.         ,prot_jac FLOAT
  87.         ,prot_dic FLOAT )
  88.     AS
  89. SELECT dpvc.id_dis1
  90.       ,dpvc.id_dis2
  91.       ,dpvc.prottot / (SQRT(dpv1.prottot) * SQRT(dpv2.prottot)) AS prot_cos
  92.       ,dpvc.prottot / (dpv1.prottot + dpv2.prottot - dpvc.prottot) AS prot_jac
  93.       ,dpvc.prottot * 2 / (dpv1.prottot + dpv2.prottot) AS prot_dic
  94.   FROM (SELECT id_dis1, id_dis2, count(*) AS prottot
  95.           FROM tmp_disprotcomb
  96.          GROUP BY id_dis1, id_dis2 ) AS dpvc
  97.        ,tmp_disprottot AS dpv1
  98.        ,tmp_disprottot AS dpv2
  99.  WHERE dpvc.id_dis1 = dpv1.disease_cui
  100.    AND dpvc.id_dis2 = dpv2.disease_cui;
  101. CREATE INDEX ix_tmp_disprotcoef ON tmp_disprotcoef ( id_dis1, id_dis2 );
  102.  
  103. -- ----------------------------------------------------------------
  104. -- -- DisPathCoef                                                --
  105. -- ----------------------------------------------------------------
  106.  
  107. -- Relacion rutas con enfermedades a traves de genes
  108. DROP TABLE IF EXISTS tmp_dispath;
  109. CREATE TABLE tmp_dispath AS
  110. SELECT DISTINCT disease_cui, path_id
  111.   FROM disease_gene gd INNER JOIN gene_pathway gp
  112.     ON ( gd.gene_id = gp.gen_id );
  113. CREATE INDEX ix_tmp_dispath1 ON tmp_dispath ( disease_cui, path_id );
  114. CREATE INDEX ix_tmp_dispath2 ON tmp_dispath ( path_id, disease_cui );
  115.  
  116. -- Suma de rutas por enfermedad
  117. DROP TABLE IF EXISTS tmp_dispathtot;
  118. CREATE TABLE tmp_dispathtot AS
  119. SELECT disease_cui, COUNT(*) AS pathtot
  120.   FROM tmp_dispath
  121.  GROUP BY disease_cui;
  122. CREATE INDEX ix_tmp_dispathtot ON tmp_dispathtot ( disease_cui );
  123.  
  124. -- Combinaciones de enfermedades con rutas coincidentes sin agrupar
  125. DROP TABLE IF EXISTS tmp_dispathcomb;
  126. CREATE TABLE tmp_dispathcomb AS
  127. SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
  128.   FROM tmp_dispath dp1 INNER JOIN tmp_dispath dp2
  129.     ON ( dp1.path_id = dp2.path_id AND dp2.disease_cui > dp1.disease_cui );
  130. CREATE INDEX ix_tmp_dispathcomb ON tmp_dispathcomb ( id_dis1, id_dis2 );
  131.  
  132. -- Coeficientes de los vectores de enfermedades relacionadas por rutas
  133. DROP TABLE IF EXISTS tmp_dispathcoef;
  134. CREATE TABLE tmp_dispathcoef
  135.        ( id_dis1 CHAR(8)
  136.         ,id_dis2 CHAR(8)
  137.         ,path_cos FLOAT
  138.         ,path_jac FLOAT
  139.         ,path_dic FLOAT )
  140.     AS
  141. SELECT dpvc.id_dis1
  142.       ,dpvc.id_dis2
  143.       ,dpvc.pathtot / (SQRT(dpv1.pathtot) * SQRT(dpv2.pathtot)) AS path_cos
  144.       ,dpvc.pathtot / (dpv1.pathtot + dpv2.pathtot - dpvc.pathtot) AS path_jac
  145.       ,dpvc.pathtot * 2 / (dpv1.pathtot + dpv2.pathtot) AS path_dic
  146.   FROM (SELECT id_dis1, id_dis2, count(*) AS pathtot
  147.           FROM tmp_dispathcomb
  148.          GROUP BY id_dis1, id_dis2 ) AS dpvc
  149.        ,tmp_dispathtot AS dpv1
  150.        ,tmp_dispathtot AS dpv2
  151.  WHERE dpvc.id_dis1 = dpv1.disease_cui
  152.    AND dpvc.id_dis2 = dpv2.disease_cui;
  153. CREATE INDEX ix_tmp_dispathcoef ON tmp_dispathcoef ( id_dis1, id_dis2 );
  154.  
  155. -- ----------------------------------------------------------------
  156. -- -- DisPPiCoef                                                 --
  157. -- ----------------------------------------------------------------
  158.  
  159. -- Relacion interaccion proteinas con enfermedades a traves de genes
  160. DROP TABLE IF EXISTS tmp_disppi;
  161. CREATE TABLE tmp_disppi AS
  162. SELECT DISTINCT dp1.disease_cui, ppi.id_p1, ppi.id_p2
  163.   FROM p_interacts_with_p ppi
  164.        INNER JOIN edsssdb.tmp_disprot dp1
  165.          ON ( dp1.prot_id = ppi.id_p1 )
  166.        INNER JOIN edsssdb.tmp_disprot dp2
  167.          ON ( dp2.prot_id = ppi.id_p2 AND dp1.disease_cui = dp2.disease_cui);
  168. CREATE INDEX ix_tmp_disppi1 ON tmp_disppi ( id_p1, id_p2, disease_cui );
  169. CREATE INDEX ix_tmp_disppi2 ON tmp_disppi ( disease_cui, id_p1, id_p2 );
  170.  
  171. -- Suma de interaccion proteinas por enfermedad
  172. DROP TABLE IF EXISTS tmp_disppitot;
  173. CREATE TABLE tmp_disppitot AS
  174. SELECT disease_cui, COUNT(*) AS ppitot
  175.   FROM tmp_disppi
  176.  GROUP BY disease_cui;
  177. CREATE INDEX ix_tmp_disppitot ON tmp_disppitot ( disease_cui );
  178.  
  179. -- Combinaciones de enfermedades con interaccion proteinas coincidentes sin agrupar
  180. DROP TABLE IF EXISTS tmp_disppicomb;
  181. CREATE TABLE tmp_disppicomb AS
  182. SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
  183.   FROM tmp_disppi dp1 INNER JOIN tmp_disppi dp2
  184.     ON ( dp1.id_p1 = dp2.id_p1 AND dp1.id_p2 = dp2.id_p2 AND dp2.disease_cui > dp1.disease_cui );
  185. CREATE INDEX ix_tmp_disppicomb ON tmp_disppicomb ( id_dis1, id_dis2 );
  186.  
  187. -- Coeficientes de los vectores de enfermedades relacionadas por interaccion proteinas
  188. DROP TABLE IF EXISTS tmp_disppicoef;
  189. CREATE TABLE tmp_disppicoef
  190.        ( id_dis1 CHAR(8)
  191.         ,id_dis2 CHAR(8)
  192.         ,ppi_cos FLOAT
  193.         ,ppi_jac FLOAT
  194.         ,ppi_dic FLOAT )
  195.     AS
  196. SELECT dpvc.id_dis1
  197.       ,dpvc.id_dis2
  198.       ,dpvc.ppitot / (SQRT(dpv1.ppitot) * SQRT(dpv2.ppitot)) AS ppi_cos
  199.       ,dpvc.ppitot / (dpv1.ppitot + dpv2.ppitot - dpvc.ppitot) AS ppi_jac
  200.       ,dpvc.ppitot * 2 / (dpv1.ppitot + dpv2.ppitot) AS ppi_dic
  201.   FROM (SELECT id_dis1, id_dis2, count(*) AS ppitot
  202.           FROM tmp_disppicomb
  203.          GROUP BY id_dis1, id_dis2 ) AS dpvc
  204.        ,tmp_disppitot AS dpv1
  205.        ,tmp_disppitot AS dpv2
  206.  WHERE dpvc.id_dis1 = dpv1.disease_cui
  207.    AND dpvc.id_dis2 = dpv2.disease_cui;
  208. CREATE INDEX ix_tmp_disppicoef ON tmp_disppicoef ( id_dis1, id_dis2 );
  209.  
  210. -- ----------------------------------------------------------------
  211. -- -- BioSimilarity                                              --
  212. -- ----------------------------------------------------------------
  213.  
  214. DROP TABLE IF EXISTS bio_similarity;
  215. CREATE TABLE bio_similarity (
  216.            id_dis1 char(8) NOT NULL
  217.           ,id_dis2 char(8) NOT NULL
  218.           ,S_gen_cos float DEFAULT 0
  219.           ,S_gen_jaccard float DEFAULT 0
  220.           ,S_gen_dice float DEFAULT 0
  221.           ,S_prot_cos float DEFAULT 0
  222.           ,S_prot_jaccard float DEFAULT 0
  223.           ,S_prot_dice float DEFAULT 0
  224.           ,S_path_cos float DEFAULT 0
  225.           ,S_path_jaccard float DEFAULT 0
  226.           ,S_path_dice float DEFAULT 0
  227.           ,S_ppi_cos float DEFAULT 0
  228.           ,S_ppi_jaccard float DEFAULT 0
  229.           ,S_ppi_dice float DEFAULT 0  )
  230.     AS
  231. SELECT  comb.id_dis1 AS id_dis1
  232.        ,comb.id_dis2 AS id_dis2
  233.        ,COALESCE(gen.gen_cos,0) AS S_gen_cos
  234.        ,COALESCE(gen.gen_jac,0) AS S_gen_jaccard
  235.        ,COALESCE(gen.gen_dic,0) AS S_gen_dice
  236.        ,COALESCE(prot.prot_cos,0) AS S_prot_cos
  237.        ,COALESCE(prot.prot_jac,0) AS S_prot_jaccard
  238.        ,COALESCE(prot.prot_dic,0) AS S_prot_dice
  239.        ,COALESCE(path.path_cos,0) AS S_path_cos
  240.        ,COALESCE(path.path_jac,0) AS S_path_jaccard
  241.        ,COALESCE(path.path_dic,0) AS S_path_dice
  242.        ,COALESCE(ppi.ppi_cos,0) AS S_ppi_cos
  243.        ,COALESCE(ppi.ppi_jac,0) AS S_ppi_jaccard
  244.        ,COALESCE(ppi.ppi_dic,0) AS S_ppi_dice
  245.   FROM ( SELECT a.cui AS id_dis1, b.cui AS id_dis2
  246.            FROM (SELECT DISTINCT cui AS cui FROM edsssdb.has_biological_info) a,
  247.                 (SELECT DISTINCT cui AS cui FROM edsssdb.has_biological_info) b
  248.           WHERE b.cui > a.cui ) comb
  249.        LEFT JOIN tmp_disgencoef AS gen
  250.          ON (comb.id_dis1 = gen.id_dis1 AND comb.id_dis2 = gen.id_dis2)
  251.        LEFT JOIN tmp_disprotcoef AS prot
  252.          ON (comb.id_dis1 = prot.id_dis1 AND comb.id_dis2 = prot.id_dis2)
  253.        LEFT JOIN tmp_dispathcoef AS path
  254.          ON (comb.id_dis1 = path.id_dis1 AND comb.id_dis2 = path.id_dis2)
  255.        LEFT JOIN tmp_disppicoef AS ppi
  256.          ON (comb.id_dis1 = ppi.id_dis1 AND comb.id_dis2 = ppi.id_dis2);
  257. ALTER TABLE bio_similarity
  258. ADD PRIMARY KEY (id_dis1, id_dis2);
  259.  
  260. -- ----------------------------------------------------------------
  261. -- -- Housekeeping                                               --
  262. -- ----------------------------------------------------------------
  263.  
  264. DROP TABLE IF EXISTS tmp_disgentot;
  265. DROP TABLE IF EXISTS tmp_disgencomb;
  266. DROP TABLE IF EXISTS tmp_disgencoef;
  267. DROP TABLE IF EXISTS tmp_disprot;
  268. DROP TABLE IF EXISTS tmp_disprottot;
  269. DROP TABLE IF EXISTS tmp_disprotcomb;
  270. DROP TABLE IF EXISTS tmp_disprotcoef;
  271. DROP TABLE IF EXISTS tmp_dispath;
  272. DROP TABLE IF EXISTS tmp_dispathtot;
  273. DROP TABLE IF EXISTS tmp_dispathcomb;
  274. DROP TABLE IF EXISTS tmp_dispathcoef;
  275. DROP TABLE IF EXISTS tmp_disppi;
  276. DROP TABLE IF EXISTS tmp_disppitot;
  277. DROP TABLE IF EXISTS tmp_disppicomb;
  278. DROP TABLE IF EXISTS tmp_disppicoef;
  279.  
  280.  
  281. END;
  282.  
  283. @@
  284.  
  285. delimiter ;
  286. delimiter @@
  287.  
  288. DROP PROCEDURE IF EXISTS Build_Bio_Similarity() @@
  289.  
  290. CREATE PROCEDURE Build_Bio_Similarity()
  291. BEGIN
  292.  
  293. -- ----------------------------------------------------------------
  294. -- -- DisGenCoef                                                 --
  295. -- ----------------------------------------------------------------
  296.  
  297. -- Suma de genes por enfermedad
  298. DROP TABLE IF EXISTS tmp_disgentot;
  299. CREATE TABLE tmp_disgentot AS
  300. SELECT disease_cui, COUNT(*) AS gentot
  301.   FROM disease_gene
  302.  GROUP BY disease_cui;
  303. CREATE INDEX ix_tmp_disgentot ON tmp_disgentot ( disease_cui );
  304.  
  305. -- Combinaciones de enfermedades con genes coincidentes sin agrupar
  306. DROP TABLE IF EXISTS tmp_disgencomb;
  307. CREATE TABLE tmp_disgencomb AS
  308. SELECT dg1.disease_cui AS id_dis1, dg2.disease_cui AS id_dis2
  309.   FROM disease_gene dg1 INNER JOIN disease_gene dg2
  310.     ON ( dg1.gene_id = dg2.gene_id AND dg2.disease_cui > dg1.disease_cui );
  311. CREATE INDEX ix_tmp_disgencomb ON tmp_disgencomb ( id_dis1, id_dis2 );
  312.  
  313. -- Coeficientes de los vectores de enfermedades relacionadas por genes
  314. DROP TABLE IF EXISTS tmp_disgencoef;
  315. CREATE TABLE tmp_disgencoef
  316.        ( id_dis1 CHAR(8)
  317.         ,id_dis2 CHAR(8)
  318.         ,gen_cos FLOAT
  319.         ,gen_jac FLOAT
  320.         ,gen_dic FLOAT )
  321.     AS
  322. SELECT dgvc.id_dis1
  323.       ,dgvc.id_dis2
  324.       ,dgvc.gentot / (SQRT(dgv1.gentot) * SQRT(dgv2.gentot)) AS gen_cos
  325.       ,dgvc.gentot / (dgv1.gentot + dgv2.gentot - dgvc.gentot) AS gen_jac
  326.       ,dgvc.gentot * 2 / (dgv1.gentot + dgv2.gentot) AS gen_dic
  327.   FROM (SELECT id_dis1, id_dis2, count(*) AS gentot
  328.           FROM tmp_disgencomb
  329.          GROUP BY id_dis1, id_dis2 ) AS dgvc
  330.        ,tmp_disgentot AS dgv1
  331.        ,tmp_disgentot AS dgv2
  332.  WHERE dgvc.id_dis1 = dgv1.disease_cui
  333.    AND dgvc.id_dis2 = dgv2.disease_cui;
  334. CREATE INDEX ix_tmp_disgencoef ON tmp_disgencoef ( id_dis1, id_dis2 );
  335.  
  336. -- ----------------------------------------------------------------
  337. -- -- DisProtCoef                                                --
  338. -- ----------------------------------------------------------------
  339.  
  340. -- Relacion proteinas con enfermedades a traves de genes
  341. DROP TABLE IF EXISTS tmp_disprot;
  342. CREATE TABLE tmp_disprot AS
  343. SELECT DISTINCT disease_cui, prot_id
  344.   FROM disease_gene gd INNER JOIN encodes en
  345.     ON ( gd.gene_id = en.gene_id );
  346. CREATE INDEX ix_tmp_disprot1 ON tmp_disprot ( disease_cui, prot_id );
  347. CREATE INDEX ix_tmp_disprot2 ON tmp_disprot ( prot_id, disease_cui );
  348.  
  349. -- Suma de proteinas por enfermedad
  350. DROP TABLE IF EXISTS tmp_disprottot;
  351. CREATE TABLE tmp_disprottot AS
  352. SELECT disease_cui, COUNT(*) AS prottot
  353.   FROM tmp_disprot
  354.  GROUP BY disease_cui;
  355. CREATE INDEX ix_tmp_disprottot ON tmp_disprottot ( disease_cui );
  356.  
  357. -- Combinaciones de enfermedades con proteinas coincidentes sin agrupar
  358. DROP TABLE IF EXISTS tmp_disprotcomb;
  359. CREATE TABLE tmp_disprotcomb AS
  360. SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
  361.   FROM tmp_disprot dp1 INNER JOIN tmp_disprot dp2
  362.     ON ( dp1.prot_id = dp2.prot_id AND dp2.disease_cui > dp1.disease_cui );
  363. CREATE INDEX ix_tmp_disprotcomb ON tmp_disprotcomb ( id_dis1, id_dis2 );
  364.  
  365. -- Coeficientes de los vectores de enfermedades relacionadas por proteinas
  366. DROP TABLE IF EXISTS tmp_disprotcoef;
  367. CREATE TABLE tmp_disprotcoef
  368.        ( id_dis1 CHAR(8)
  369.         ,id_dis2 CHAR(8)
  370.         ,prot_cos FLOAT
  371.         ,prot_jac FLOAT
  372.         ,prot_dic FLOAT )
  373.     AS
  374. SELECT dpvc.id_dis1
  375.       ,dpvc.id_dis2
  376.       ,dpvc.prottot / (SQRT(dpv1.prottot) * SQRT(dpv2.prottot)) AS prot_cos
  377.       ,dpvc.prottot / (dpv1.prottot + dpv2.prottot - dpvc.prottot) AS prot_jac
  378.       ,dpvc.prottot * 2 / (dpv1.prottot + dpv2.prottot) AS prot_dic
  379.   FROM (SELECT id_dis1, id_dis2, count(*) AS prottot
  380.           FROM tmp_disprotcomb
  381.          GROUP BY id_dis1, id_dis2 ) AS dpvc
  382.        ,tmp_disprottot AS dpv1
  383.        ,tmp_disprottot AS dpv2
  384.  WHERE dpvc.id_dis1 = dpv1.disease_cui
  385.    AND dpvc.id_dis2 = dpv2.disease_cui;
  386. CREATE INDEX ix_tmp_disprotcoef ON tmp_disprotcoef ( id_dis1, id_dis2 );
  387.  
  388. -- ----------------------------------------------------------------
  389. -- -- DisPathCoef                                                --
  390. -- ----------------------------------------------------------------
  391.  
  392. -- Relacion rutas con enfermedades a traves de genes
  393. DROP TABLE IF EXISTS tmp_dispath;
  394. CREATE TABLE tmp_dispath AS
  395. SELECT DISTINCT disease_cui, path_id
  396.   FROM disease_gene gd INNER JOIN gene_pathway gp
  397.     ON ( gd.gene_id = gp.gen_id );
  398. CREATE INDEX ix_tmp_dispath1 ON tmp_dispath ( disease_cui, path_id );
  399. CREATE INDEX ix_tmp_dispath2 ON tmp_dispath ( path_id, disease_cui );
  400.  
  401. -- Suma de rutas por enfermedad
  402. DROP TABLE IF EXISTS tmp_dispathtot;
  403. CREATE TABLE tmp_dispathtot AS
  404. SELECT disease_cui, COUNT(*) AS pathtot
  405.   FROM tmp_dispath
  406.  GROUP BY disease_cui;
  407. CREATE INDEX ix_tmp_dispathtot ON tmp_dispathtot ( disease_cui );
  408.  
  409. -- Combinaciones de enfermedades con rutas coincidentes sin agrupar
  410. DROP TABLE IF EXISTS tmp_dispathcomb;
  411. CREATE TABLE tmp_dispathcomb AS
  412. SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
  413.   FROM tmp_dispath dp1 INNER JOIN tmp_dispath dp2
  414.     ON ( dp1.path_id = dp2.path_id AND dp2.disease_cui > dp1.disease_cui );
  415. CREATE INDEX ix_tmp_dispathcomb ON tmp_dispathcomb ( id_dis1, id_dis2 );
  416.  
  417. -- Coeficientes de los vectores de enfermedades relacionadas por rutas
  418. DROP TABLE IF EXISTS tmp_dispathcoef;
  419. CREATE TABLE tmp_dispathcoef
  420.        ( id_dis1 CHAR(8)
  421.         ,id_dis2 CHAR(8)
  422.         ,path_cos FLOAT
  423.         ,path_jac FLOAT
  424.         ,path_dic FLOAT )
  425.     AS
  426. SELECT dpvc.id_dis1
  427.       ,dpvc.id_dis2
  428.       ,dpvc.pathtot / (SQRT(dpv1.pathtot) * SQRT(dpv2.pathtot)) AS path_cos
  429.       ,dpvc.pathtot / (dpv1.pathtot + dpv2.pathtot - dpvc.pathtot) AS path_jac
  430.       ,dpvc.pathtot * 2 / (dpv1.pathtot + dpv2.pathtot) AS path_dic
  431.   FROM (SELECT id_dis1, id_dis2, count(*) AS pathtot
  432.           FROM tmp_dispathcomb
  433.          GROUP BY id_dis1, id_dis2 ) AS dpvc
  434.        ,tmp_dispathtot AS dpv1
  435.        ,tmp_dispathtot AS dpv2
  436.  WHERE dpvc.id_dis1 = dpv1.disease_cui
  437.    AND dpvc.id_dis2 = dpv2.disease_cui;
  438. CREATE INDEX ix_tmp_dispathcoef ON tmp_dispathcoef ( id_dis1, id_dis2 );
  439.  
  440. -- ----------------------------------------------------------------
  441. -- -- DisPPiCoef                                                 --
  442. -- ----------------------------------------------------------------
  443.  
  444. -- Relacion interaccion proteinas con enfermedades a traves de genes
  445. DROP TABLE IF EXISTS tmp_disppi;
  446. CREATE TABLE tmp_disppi AS
  447. SELECT DISTINCT dp1.disease_cui, ppi.id_p1, ppi.id_p2
  448.   FROM p_interacts_with_p ppi
  449.        INNER JOIN edsssdb.tmp_disprot dp1
  450.          ON ( dp1.prot_id = ppi.id_p1 )
  451.        INNER JOIN edsssdb.tmp_disprot dp2
  452.          ON ( dp2.prot_id = ppi.id_p2 AND dp1.disease_cui = dp2.disease_cui);
  453. CREATE INDEX ix_tmp_disppi1 ON tmp_disppi ( id_p1, id_p2, disease_cui );
  454. CREATE INDEX ix_tmp_disppi2 ON tmp_disppi ( disease_cui, id_p1, id_p2 );
  455.  
  456. -- Suma de interaccion proteinas por enfermedad
  457. DROP TABLE IF EXISTS tmp_disppitot;
  458. CREATE TABLE tmp_disppitot AS
  459. SELECT disease_cui, COUNT(*) AS ppitot
  460.   FROM tmp_disppi
  461.  GROUP BY disease_cui;
  462. CREATE INDEX ix_tmp_disppitot ON tmp_disppitot ( disease_cui );
  463.  
  464. -- Combinaciones de enfermedades con interaccion proteinas coincidentes sin agrupar
  465. DROP TABLE IF EXISTS tmp_disppicomb;
  466. CREATE TABLE tmp_disppicomb AS
  467. SELECT dp1.disease_cui AS id_dis1, dp2.disease_cui AS id_dis2
  468.   FROM tmp_disppi dp1 INNER JOIN tmp_disppi dp2
  469.     ON ( dp1.id_p1 = dp2.id_p1 AND dp1.id_p2 = dp2.id_p2 AND dp2.disease_cui > dp1.disease_cui );
  470. CREATE INDEX ix_tmp_disppicomb ON tmp_disppicomb ( id_dis1, id_dis2 );
  471.  
  472. -- Coeficientes de los vectores de enfermedades relacionadas por interaccion proteinas
  473. DROP TABLE IF EXISTS tmp_disppicoef;
  474. CREATE TABLE tmp_disppicoef
  475.        ( id_dis1 CHAR(8)
  476.         ,id_dis2 CHAR(8)
  477.         ,ppi_cos FLOAT
  478.         ,ppi_jac FLOAT
  479.         ,ppi_dic FLOAT )
  480.     AS
  481. SELECT dpvc.id_dis1
  482.       ,dpvc.id_dis2
  483.       ,dpvc.ppitot / (SQRT(dpv1.ppitot) * SQRT(dpv2.ppitot)) AS ppi_cos
  484.       ,dpvc.ppitot / (dpv1.ppitot + dpv2.ppitot - dpvc.ppitot) AS ppi_jac
  485.       ,dpvc.ppitot * 2 / (dpv1.ppitot + dpv2.ppitot) AS ppi_dic
  486.   FROM (SELECT id_dis1, id_dis2, count(*) AS ppitot
  487.           FROM tmp_disppicomb
  488.          GROUP BY id_dis1, id_dis2 ) AS dpvc
  489.        ,tmp_disppitot AS dpv1
  490.        ,tmp_disppitot AS dpv2
  491.  WHERE dpvc.id_dis1 = dpv1.disease_cui
  492.    AND dpvc.id_dis2 = dpv2.disease_cui;
  493. CREATE INDEX ix_tmp_disppicoef ON tmp_disppicoef ( id_dis1, id_dis2 );
  494.  
  495. -- ----------------------------------------------------------------
  496. -- -- BioSimilarity                                              --
  497. -- ----------------------------------------------------------------
  498.  
  499. DROP TABLE IF EXISTS bio_similarity;
  500. CREATE TABLE bio_similarity (
  501.            id_dis1 char(8) NOT NULL
  502.           ,id_dis2 char(8) NOT NULL
  503.           ,S_gen_cos float DEFAULT 0
  504.           ,S_gen_jaccard float DEFAULT 0
  505.           ,S_gen_dice float DEFAULT 0
  506.           ,S_prot_cos float DEFAULT 0
  507.           ,S_prot_jaccard float DEFAULT 0
  508.           ,S_prot_dice float DEFAULT 0
  509.           ,S_path_cos float DEFAULT 0
  510.           ,S_path_jaccard float DEFAULT 0
  511.           ,S_path_dice float DEFAULT 0
  512.           ,S_ppi_cos float DEFAULT 0
  513.           ,S_ppi_jaccard float DEFAULT 0
  514.           ,S_ppi_dice float DEFAULT 0  )
  515.     AS
  516. SELECT  comb.id_dis1 AS id_dis1
  517.        ,comb.id_dis2 AS id_dis2
  518.        ,COALESCE(gen.gen_cos,0) AS S_gen_cos
  519.        ,COALESCE(gen.gen_jac,0) AS S_gen_jaccard
  520.        ,COALESCE(gen.gen_dic,0) AS S_gen_dice
  521.        ,COALESCE(prot.prot_cos,0) AS S_prot_cos
  522.        ,COALESCE(prot.prot_jac,0) AS S_prot_jaccard
  523.        ,COALESCE(prot.prot_dic,0) AS S_prot_dice
  524.        ,COALESCE(path.path_cos,0) AS S_path_cos
  525.        ,COALESCE(path.path_jac,0) AS S_path_jaccard
  526.        ,COALESCE(path.path_dic,0) AS S_path_dice
  527.        ,COALESCE(ppi.ppi_cos,0) AS S_ppi_cos
  528.        ,COALESCE(ppi.ppi_jac,0) AS S_ppi_jaccard
  529.        ,COALESCE(ppi.ppi_dic,0) AS S_ppi_dice
  530.   FROM ( SELECT a.cui AS id_dis1, b.cui AS id_dis2
  531.            FROM (SELECT DISTINCT cui AS cui FROM edsssdb.has_biological_info) a,
  532.                 (SELECT DISTINCT cui AS cui FROM edsssdb.has_biological_info) b
  533.           WHERE b.cui > a.cui ) comb
  534.        LEFT JOIN tmp_disgencoef AS gen
  535.          ON (comb.id_dis1 = gen.id_dis1 AND comb.id_dis2 = gen.id_dis2)
  536.        LEFT JOIN tmp_disprotcoef AS prot
  537.          ON (comb.id_dis1 = prot.id_dis1 AND comb.id_dis2 = prot.id_dis2)
  538.        LEFT JOIN tmp_dispathcoef AS path
  539.          ON (comb.id_dis1 = path.id_dis1 AND comb.id_dis2 = path.id_dis2)
  540.        LEFT JOIN tmp_disppicoef AS ppi
  541.          ON (comb.id_dis1 = ppi.id_dis1 AND comb.id_dis2 = ppi.id_dis2);
  542. ALTER TABLE bio_similarity
  543. ADD PRIMARY KEY (id_dis1, id_dis2);
  544.  
  545. -- ----------------------------------------------------------------
  546. -- -- Housekeeping                                               --
  547. -- ----------------------------------------------------------------
  548.  
  549. DROP TABLE IF EXISTS tmp_disgentot;
  550. DROP TABLE IF EXISTS tmp_disgencomb;
  551. DROP TABLE IF EXISTS tmp_disgencoef;
  552. DROP TABLE IF EXISTS tmp_disprot;
  553. DROP TABLE IF EXISTS tmp_disprottot;
  554. DROP TABLE IF EXISTS tmp_disprotcomb;
  555. DROP TABLE IF EXISTS tmp_disprotcoef;
  556. DROP TABLE IF EXISTS tmp_dispath;
  557. DROP TABLE IF EXISTS tmp_dispathtot;
  558. DROP TABLE IF EXISTS tmp_dispathcomb;
  559. DROP TABLE IF EXISTS tmp_dispathcoef;
  560. DROP TABLE IF EXISTS tmp_disppi;
  561. DROP TABLE IF EXISTS tmp_disppitot;
  562. DROP TABLE IF EXISTS tmp_disppicomb;
  563. DROP TABLE IF EXISTS tmp_disppicoef;
  564.  
  565.  
  566. END;
  567.  
  568. @@
  569.  
  570. delimiter ;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement