Advertisement
Guest User

Untitled

a guest
Jan 18th, 2020
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.82 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import random
  4. import operator
  5. import math
  6.  
  7. from sklearn.decomposition import TruncatedSVD
  8. from sklearn.preprocessing import StandardScaler, Normalizer
  9.  
  10. seeds = pd.read_csv("uczace.txt")
  11. colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']
  12.  
  13. x = pd.DataFrame(seeds, columns=['atr1', 'atr2', 'atr3', 'atr4', 'atr5', 'atr6', 'atr7'])
  14. y = pd.DataFrame(seeds, columns=['class'])
  15. scaler = StandardScaler()
  16. X_std = scaler.fit_transform(x)
  17. lsa = TruncatedSVD(2, algorithm = 'arpack')
  18. dtm_lsa = lsa.fit_transform(X_std)
  19. dtm_lsa = Normalizer(copy=False).fit_transform(dtm_lsa)
  20. a= pd.DataFrame(dtm_lsa)
  21.  
  22. export_csv = a.to_csv("a.txt",sep=",",index=False, header=False)
  23.  
  24. a['class'] = y
  25. df_full = pd.read_csv("uczace.txt")
  26. columns = list(a.columns)
  27. features = columns[:len(columns)-1]
  28. class_labels = list(a[columns[-1]])
  29. df = a[features]
  30. print(df)
  31. # Number of Attributes
  32. num_attr = len(df.columns) - 1
  33.  
  34. # Number of Clusters
  35. k = 3
  36.  
  37. # Maximum number of iterations
  38. MAX_ITER = 100
  39.  
  40. # Number of data points
  41. n = len(df)
  42.  
  43. # Fuzzy parameter
  44. m = 3.00
  45.  
  46. def accuracy(cluster_labels, class_labels):
  47. county = [0,0]
  48. countn = [0,0]
  49. tp = [0, 0]
  50. tn = [0, 0]
  51. fp = [0, 0]
  52. fn = [0, 0]
  53.  
  54. for i in range(len(df)):
  55. # Yes = 1, No = 0
  56. if cluster_labels[i] == 1 and class_labels[i] == 'Yes':
  57. tp[0] = tp[0] + 1
  58. if cluster_labels[i] == 0 and class_labels[i] == 'No':
  59. tn[0] = tn[0] + 1
  60. if cluster_labels[i] == 1 and class_labels[i] == 'No':
  61. fp[0] = fp[0] + 1
  62. if cluster_labels[i] == 0 and class_labels[i] == 'Yes':
  63. fn[0] = fn[0] + 1
  64.  
  65. for i in range(len(df)):
  66. # Yes = 0, No = 1
  67. if cluster_labels[i] == 0 and class_labels[i] == 'Yes':
  68. tp[1] = tp[1] + 1
  69. if cluster_labels[i] == 1 and class_labels[i] == 'No':
  70. tn[1] = tn[1] + 1
  71. if cluster_labels[i] == 0 and class_labels[i] == 'No':
  72. fp[1] = fp[1] + 1
  73. if cluster_labels[i] == 1 and class_labels[i] == 'Yes':
  74. fn[1] = fn[1] + 1
  75.  
  76. a0 = float((tp[0] + tn[0]))/(tp[0] + tn[0] + fn[0] + fp[0])
  77. a1 = float((tp[1] + tn[1]))/(tp[1] + tn[1] + fn[1] + fp[1])
  78. p0 = float(tp[0])/(tp[0] + fp[0])
  79. p1 = float(tp[1])/(tp[1] + fp[1])
  80. r0 = float(tp[0])/(tp[0] + fn[0])
  81. r1 = float(tp[1])/(tp[1] + fn[1])
  82.  
  83. accuracy = [a0*100,a1*100]
  84. precision = [p0*100,p1*100]
  85. recall = [r0*100,r1*100]
  86.  
  87. return accuracy, precision, recall
  88.  
  89.  
  90. def initializeMembershipMatrix():
  91. membership_mat = list()
  92. for i in range(n):
  93. random_num_list = [random.random() for i in range(k)]
  94. summation = sum(random_num_list)
  95. temp_list = [x/summation for x in random_num_list]
  96. membership_mat.append(temp_list)
  97. return membership_mat
  98.  
  99.  
  100. def calculateClusterCenter(membership_mat):
  101. cluster_mem_val = list(zip(*membership_mat))
  102. cluster_centers = list()
  103. for j in range(k):
  104. x = list(cluster_mem_val[j])
  105. xraised = [e ** m for e in x]
  106. denominator = sum(xraised)
  107. temp_num = list()
  108. for i in range(n):
  109. data_point = list(df.iloc[i])
  110. prod = [xraised[i] * val for val in data_point]
  111. temp_num.append(prod)
  112. numerator = list(map(sum, list(zip(*temp_num))))
  113. center = [z/denominator for z in numerator]
  114. cluster_centers.append(center)
  115. return cluster_centers
  116.  
  117.  
  118. def updateMembershipValue(membership_mat, cluster_centers):
  119. p = float(2/(m-1))
  120. for i in range(n):
  121. x = list(df.iloc[i])
  122. distances = [np.linalg.norm(list(map(operator.sub, x, cluster_centers[j]))) for j in range(k)]
  123. for j in range(k):
  124. den = sum([math.pow(float(distances[j]/distances[c]), p) for c in range(k)])
  125. membership_mat[i][j] = float(1/den)
  126. ##print(membership_mat)
  127. return membership_mat
  128.  
  129.  
  130. def getClusters(membership_mat):
  131. cluster_labels = list()
  132. for i in range(n):
  133. max_val, idx = max((val, idx) for (idx, val) in enumerate(membership_mat[i]))
  134. cluster_labels.append(idx)
  135. return cluster_labels
  136.  
  137.  
  138. def fuzzyCMeansClustering():
  139. # Membership Matrix
  140. membership_mat = initializeMembershipMatrix()
  141. curr = 0
  142. while curr <= MAX_ITER:
  143. cluster_centers = calculateClusterCenter(membership_mat)
  144. membership_mat = updateMembershipValue(membership_mat, cluster_centers)
  145. cluster_labels = getClusters(membership_mat)
  146. print(membership_mat)
  147. curr += 1
  148. #print(cluster_centers)
  149. return cluster_labels, cluster_centers
  150.  
  151.  
  152. labels, centers = fuzzyCMeansClustering()
  153. print(labels)
  154. print(centers)
  155.  
  156.  
  157. #print("Accuracy = " + str(a))
  158. #print("Precision = " + str(p))
  159. #print("Recall = " + str(r))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement