Advertisement
Guest User

Untitled

a guest
Mar 21st, 2019
53
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.40 KB | None | 0 0
  1. id1 id2 weights
  2. 0 a 2a 144.0
  3. 1 a 2b 52.5
  4. 2 a 2c 2.0
  5. 3 a 2d 1.0
  6. 4 a 2e 1.0
  7. 5 b 2a 2.0
  8. 6 b 2e 1.0
  9. 7 b 2f 1.0
  10. 8 b 2b 1.0
  11. 9 b 2c 0.008
  12.  
  13. 2a 2b 2c 2d 2e 2f
  14. 2a 1 0.5 0.7 0.2 0.1 0.3
  15. 2b 0.5 1 0.6 0.4 0.3 0.4
  16. 2c 0.7 0.6 1 0.1 0.4 0.2
  17. 2d 0.2 0.4 0.1 1 0.8 0.7
  18. 2e 0.1 0.3 0.4 0.8 1 0.8
  19. 2f 0.3 0.4 0.2 0.7 0.8 1
  20.  
  21. ids = df.id1.unique()
  22. output = pd.DataFrame(columns = mat.columns,index = ids)
  23. for id in ids:
  24. df_slice = df.loc[df.id1 == id]
  25. to_normalize = df_slice.weights.sum()
  26. temp = mat.loc[df_slice.id2]
  27. for art in df_slice.id2:
  28. temp.loc[art] *= df_slice.ix[df_slice.id2 == art,'weights'].values[0]
  29. temp.loc[art] /= (1.*to_normalize)
  30. output.loc[id] = temp.sum()
  31.  
  32. 2a 2b 2c 2d 2e 2f
  33. a 0.857606 0.630424 0.672319 0.258354 0.163342 0.329676
  34. b 0.580192 0.540096 0.520767 0.459425 0.459904 0.559425
  35.  
  36. import io
  37.  
  38. import pandas as pd
  39.  
  40.  
  41. raw_df = io.StringIO("""
  42. id1 id2 weights
  43. 0 a 2a 144.0
  44. 1 a 2b 52.5
  45. 2 a 2c 2.0
  46. 3 a 2d 1.0
  47. 4 a 2e 1.0
  48. 5 b 2a 2.0
  49. 6 b 2e 1.0
  50. 7 b 2f 1.0
  51. 8 b 2b 1.0
  52. 9 b 2c 0.008
  53. """)
  54. df = pd.read_csv(raw_df, delim_whitespace=True)
  55.  
  56. raw_mat = io.StringIO("""
  57. 2a 2b 2c 2d 2e 2f
  58. 2a 1 0.5 0.7 0.2 0.1 0.3
  59. 2b 0.5 1 0.6 0.4 0.3 0.4
  60. 2c 0.7 0.6 1 0.1 0.4 0.2
  61. 2d 0.2 0.4 0.1 1 0.8 0.7
  62. 2e 0.1 0.3 0.4 0.8 1 0.8
  63. 2f 0.3 0.4 0.2 0.7 0.8 1
  64. """)
  65. mat = pd.read_csv(raw_mat, delim_whitespace=True)
  66.  
  67.  
  68. df['norm'] = df.groupby('id1')['weights'].transform('sum')
  69.  
  70. m = pd.merge(df, mat, left_on='id2', right_index=True)
  71. m[mat.index] = m[mat.index].multiply(m['weights'] / m['norm'], axis=0)
  72.  
  73. output = m.groupby('id1')[mat.index].sum()
  74. output.columns.name = 'id2'
  75. print(output)
  76.  
  77. id2 2a 2b 2c 2d 2e 2f
  78. id1
  79. a 0.857606 0.630424 0.672319 0.258354 0.163342 0.329676
  80. b 0.580192 0.540096 0.520767 0.459425 0.459904 0.559425
  81.  
  82. disease symptom frequence
  83. 0 d1 s1 Very frequent (99-80%)
  84. 1 d1 s2 Very frequent (99-80%)
  85. 2 d2 s1 Frequent (79-30%)
  86. 3 d2 s3 Very frequent (99-80%)
  87. 4 d3 s2 Occasional (29-5%
  88. 5 d4 s1 Very frequent (99-80%)
  89. 6 d4 s2 Frequent (79-30%)
  90. 7 d4 s3 Occasional (29-5%
  91. 8 d5 s3 Occasional (29-5%
  92. 9 d5 s4 Very frequent (99-80%)
  93.  
  94. s1 s2 s3 s4 s5 s6
  95. d1 1 1 0 0 0 0
  96. d2 1 0 1 0 0 0
  97. d3 0 1 1 1 1 1
  98. d4 1 0 1 0 0 0
  99. d5 0 0 1 1 0 0
  100.  
  101. import pandas as pd
  102. import numpy as np
  103. import io
  104.  
  105. data = pd.read_csv("disease_sym_frq_list.csv", sep="[;,]", engine='python')
  106. data
  107.  
  108. dat_mat= io.StringIO("""data
  109.  
  110. """)
  111. mat = pd.read_csv(dat_mat, delim_whitespace=True)
  112.  
  113. data['norm'] = data.groupby('Disease')['Frequence'].transform('sum')
  114.  
  115. m = pd.merge(data, mat, left_on='Symptom', right_index=True)
  116. m[mat.index] = m[mat.index].multiply(m['Frequence'] / m['norm'], axis=0)
  117.  
  118. output = m.groupby('Disease')[mat.index].sum()
  119. output.columns.name = 'Symptom'
  120. print(output)
  121.  
  122. Empty DataFrame
  123. Columns: []
  124. Index: []
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement