Advertisement
Guest User

Untitled

a guest
May 21st, 2018
76
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.04 KB | None | 0 0
  1. import matplotlib.pyplot as plt
  2.  
  3.  
  4. f = open('Test.txt', 'r')
  5. s = f.readlines()
  6. a = [0]*len(s)
  7. Sample_Mutations = {}
  8. for i in range (len(s) - 1):
  9.     a[i] = s[i].split('\t')
  10.     if a[i][32] in Sample_Mutations:
  11.         Sample_Mutations[a[i][32]] += 1
  12.     else:
  13.         Sample_Mutations[a[i][32]] = 1
  14.  
  15. p = open('!ДАТАСЕТ.txt', 'r')
  16. t = p.readlines()
  17. b = [0]*len(t)
  18. Sample_Matches = {}
  19. for i in range (len(t) - 1):
  20.     b[i] = t[i].split('\t')
  21.    
  22. for key in Sample_Mutations:
  23.     Sample_Matches[key] = 0
  24.  
  25. for i in range (len(a) - 1):
  26.     for j in range (len(b) - 1):
  27.         if a[i][4] == b[j][0] and (int(b[j][1]) <= int(a[i][5]) <= int(b[j][2])):
  28.             Sample_Matches[a[i][32]] += 1
  29.  
  30.  
  31. w = open('Out.txt', 'w')
  32. w.write('Tumor_Sample_UUID' + '\t' + 'Mutations' + '\t' +'Matches' + '\n')
  33. for key in Sample_Mutations:
  34.     w.write(key + '\t' + str(Sample_Mutations[key]) + '\t' + str(Sample_Matches[key])+ '\n')
  35. w.close()
  36.  
  37.  
  38. if __name__ == '__main__':
  39.     x = []
  40.     y = []
  41.     for key in Sample_Mutations:
  42.         if Sample_Mutations[key] != 405:
  43.             x.append(Sample_Mutations[key])
  44.             y.append(Sample_Matches[key])
  45.  
  46.     plt.title("Correlation")
  47.     plt.xlabel("Matches")
  48.     plt.ylabel("Mutations")
  49.     plt.scatter(y, x, edgecolors='r', s=10)
  50.     plt.grid(True, linestyle='-', color='0.75')
  51.     plt.show()
  52.  
  53. from sklearn.linear_model import LinearRegression
  54. from sklearn.metrics import mean_squared_error, r2_score
  55. M = [[0] * len(x) for i in range(len(y))]
  56. lr2 = LinearRegression()
  57. xs = x
  58. lr2.fit([[x] for x in xs], y)
  59. y2 = lr2.predict([[x] for x in xs])
  60. #plt.figure()
  61. #plt.plot(x, y, color='b', linewidth=1, label='Linear')
  62. plt.scatter(y, x, edgecolors='r', s=10)
  63. plt.plot(y2, x, color='b', linewidth=1, label='Linear')
  64.  
  65. plt.grid(True, linestyle='-', color='0.75')
  66. plt.title("Linear Regression")
  67. plt.ylabel('Mutations')
  68. plt.xlabel('Matches')
  69. plt.show()
  70.  
  71. print('Coefficients: \n', 'a = ', lr2.coef_,
  72.       'b = ', lr2.intercept_, '\n',
  73.       'R^2 = ', lr2.score([[x] for x in xs], y),)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement