Advertisement
Guest User

govno

a guest
Jan 20th, 2019
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.59 KB | None | 0 0
  1. %pylab inline
  2.  
  3. class Cluster:
  4.     def __init__(self, data):
  5.         self.data = data
  6.         self.number = 0
  7.         self.next = None
  8.         self.prev = None
  9.         self.next_inside = None
  10.         self.prev_inside = None
  11. #print all clusters        
  12.     def print_all(self):
  13.         cluster = self
  14.         while (cluster != None):
  15.             print("########CLUSTER NUMBER", cluster.number, "########")
  16.             print (cluster.data)
  17.            
  18.             cluster_inside = cluster.next_inside
  19.             while (cluster_inside != None):
  20.                 print (cluster_inside.data)
  21.                 cluster_inside = cluster_inside.next_inside
  22.            
  23.             cluster = cluster.next
  24. #add cluster            
  25.     def add_new(self, data, num):
  26.         cluster = self
  27.         while (cluster.next != None):
  28.             cluster = cluster.next
  29.         cl_next = Cluster(data)
  30.         cl_next.number = num
  31.         cl_next.prev = cluster
  32.         cluster.next = cl_next
  33.    
  34.     def merge_clusters(self, num1, num2):
  35.         cluster = self
  36.         cluster1 = self
  37.         cluster2 = self
  38.        
  39.         while ((cluster1 != None)and(cluster1.number != num1)):
  40.             cluster1 = cluster1.next
  41.            
  42.         while ((cluster2 != None)and(cluster2.number != num2)):
  43.             cluster2 = cluster2.next
  44.        
  45.         cluster4 = cluster2
  46.        
  47.         while(cluster1.next_inside != None):
  48.             cluster1 = cluster1.next_inside
  49.        
  50.         cluster1.next_inside = cluster2
  51.        
  52.         #cluster4.next_inside = cluster2.next_inside
  53.        
  54.         cluster3 = cluster2.prev
  55.         cluster3.next = cluster2.next
  56.        
  57.        
  58.            
  59.        
  60.        
  61. #we need to count distance from elements of clusters to each other : many to many      
  62.     def pair_distance_inside(self, cluster1, cluster2, NUM_ATTRS):
  63.         attributes = NUM_ATTRS
  64.         delta = np.zeros(attributes)
  65.        
  66.         cluster_main = cluster1
  67.         cluster_pairable = cluster2
  68.        
  69.         distance = 10000
  70.        
  71.         while(cluster_pairable != None):
  72.             attr = 0
  73.             distance_inner = 100000
  74.             while(attr < attributes):
  75.                 temp = cluster_main.data[0][attr]
  76.                 temp2 = cluster_pairable.data[0][attr]
  77.                 delta[attr] = temp % temp2
  78.                 attr = attr + 1
  79.            
  80.             attr = 0
  81.             while(attr < attributes):
  82.                     delta[attr] = delta[attr] ** 2
  83.                     delta[attr] = delta[attr] * weight[attr]
  84.                     distance_inner = distance_inner + delta[attr]
  85.                     #distance calculated from the first element in inner list
  86.                     distance_inner = math.sqrt(distance_inner)
  87.            
  88.             if(distance_inner < distance):
  89.                 distance = distance_inner
  90.                
  91.             cluster_pairable = cluster_pairable.next_inside
  92.                
  93.         return distance  
  94.    
  95.   #find nearest clusters
  96.     def find_nearest(distance_matrix, shape, max_num):
  97.         lowest = 10000
  98.         i = shape[0]
  99.  
  100.         j = shape[1]
  101.        
  102.         it_i = 0
  103.         it_j = 0
  104.         num1 = 0
  105.         num2 = 0
  106.  
  107.         while((it_i < i)and(it_i < max_num)):
  108.             while((it_j < j)and(it_j < max_num)):
  109.                 if(it_i != it_j):
  110.                    
  111.                     if(distance_matrix[it_i][it_j] < lowest):
  112.                         lowest = distance_matrix[it_i][it_j]
  113.                         num1 = it_i
  114.                         num2 = it_j
  115.                        
  116.                 it_j = it_j + 1
  117.             it_i = it_i +1
  118.             it_j = 0
  119.        
  120.         data = [num1, num2, lowest]
  121.         return  data    
  122.            
  123.          
  124.             #while(attr < NUM_COLS):
  125.     #weight[attr] = 1 / weight[attr]
  126.     #print(weight[attr])
  127.     #attr = attr + 1
  128.        
  129.        
  130.        
  131.            
  132.        
  133. #creating a distance matrix by nearest neighbor euristics      
  134.     def clusterize(self, max_num, NUM_ATTRS, weight, matrix):
  135.         distance_matrix = matrix
  136.         cluster = self
  137.         attributes = NUM_ATTRS
  138.        
  139.         while  (cluster != None):
  140.             current_num = cluster.number    
  141.             cluster_pair = cluster.next
  142.             next_num = cluster_pair.number
  143.            
  144.             while(cluster_pair != None):
  145.                 delta = np.zeros(attributes)
  146.                 distance = 0
  147.                 attr = 0
  148.                 while(attr < attributes):
  149.                     temp = cluster.data[0][attr]
  150.                     temp2 = cluster_pair.data[0][attr]
  151.                     delta[attr] = temp % temp2
  152.                     attr = attr + 1
  153.                    
  154.                 attr = 0
  155.                 while(attr < attributes):
  156.                     delta[attr] = delta[attr] ** 2
  157.                     delta[attr] = delta[attr] * weight[attr]
  158.                     distance = distance + delta[attr]
  159.                     #distance calculated from the first element in inner list
  160.                 distance = math.sqrt(distance)
  161.                
  162.                 distance_all = pair_distance_inside(cluster, cluster_pair, attributes)
  163.                
  164.                 if(distance_all < distance):
  165.                     distance = distance_all
  166.                
  167.                 #entering inside clusters
  168.                 cluster_inner = cluster.next_inside
  169.                 attr = 0    
  170.                
  171.                 while(cluster_inner != None):
  172.                     distance_inner = 0
  173.                     while(attr < attributes):
  174.                         temp = cluster_inner.data[0][attr]
  175.                         temp2 = cluster_pair.data[0][attr]
  176.                         delta[attr] = temp % temp2
  177.                         attr = attr + 1
  178.                    
  179.                     attr = 0
  180.                     while(attr < attributes):
  181.                         delta[attr] = delta[attr] ** 2
  182.                         delta[attr] = delta[attr] * weight[attr]
  183.                         distance_inner = distance_inner + delta[attr]
  184.                     distance_inner = math.sqrt(distance_inner)
  185.                    
  186.                     #distance from the nearest cluster element is the distance
  187.                     if(distance_inner < distance):
  188.                         distance = distance_inner
  189.                    
  190.                     distance_all_inside = pair_distance_inside(cluster_inner, cluster_pair, attributes)
  191.                     if(distance_all_inside < distance):
  192.                         distance = distance_all_inside
  193.                     #going deeper inside cluster
  194.                     cluster_inner = cluster_inner.next_inside
  195.                    
  196.                
  197.                
  198.                 #distance matrices
  199.                 distance_matrix[current_num][next_num] = distance
  200.                 distance_matrix[next_num][current_num] = distance
  201.                 distance_matrix[current_num][current_num] = 999999
  202.                 distance_matrix[next_num][next_num] = 999999
  203.                
  204.                 cluster_pair = cluster_pair.next
  205.                 next_num = cluster_pair.number
  206.            
  207.             cluster = cluster.next          
  208.        
  209.         to_merge = find_nearest(distance_matrix, distance_matrix.shape, max_num)
  210.         cluster1_num = to_merge[0]
  211.         cluster2_num = to_merge[1]
  212.        
  213.        
  214.         it_j = 0
  215.         cluster2_distances = np.zeros(max_num)
  216.         while(it_j < max_num):
  217.             temp = distance_matrix[it_j][cluster2_num]
  218.             cluster2_distances[j] = temp
  219.        
  220.         it_j = 0
  221.         cluster1_distances = np.zeros(max_num)
  222.         while(it_j < max_num):
  223.             temp = distance_matrix[it_j][cluster1_num]
  224.             cluster1_distances[j] = temp
  225.        
  226.         it_i = 0
  227.         it_j = cluster2_num
  228.        
  229.         #removing cluster2 from distance_matrix
  230.         while(it_i < max_num):
  231.                 distance_matrix[it_i][cluster2_num] = 999999
  232.                 distance_matrix[cluster2_num][it_i] = 999999
  233.        
  234.         #Lance-Williams formula coefficients
  235.         it_i = 0
  236.         Au = 1/2
  237.         Av = 1/2
  238.         Y = -1/2
  239.         RWS = 0
  240.        
  241.         while((it_i < max_num)and(it_i != cluster2_num)and(it_i != cluster1_num)):
  242.                 #Lance-Williams formula
  243.             RWS = Au*(cluster2_distances[it_i]) + Av*(cluster1_distances[it_i]) + Y*(cluster2_distances[it_i] % cluster1_distances[it_i])
  244.             distance_matrix[it_i][cluster1_num] = RWS
  245.            
  246.        
  247.         merge_clusters(cluster1_num, cluster2_num)
  248.         return distance_matrix
  249.  
  250. #main  
  251. sample = np.loadtxt("/Users/a1234/Desktop/Datasets/data.txt", delimiter = ',')
  252. NUM_ROWS = sample.shape[0] #number of rows
  253. NUM_COLS = sample.shape[1] #number of columns
  254. NUM_CLUSTERS = NUM_ROWS #number of clusters
  255.  
  256. array = sample[0:1:4]
  257.  
  258. Clusters = Cluster(array)
  259. Clusters.number = 0
  260. weight = np.zeros(NUM_COLS)
  261.  
  262. count = 1
  263.  
  264. #simple clusters by 1 objects
  265. while (count < NUM_ROWS):
  266.     data = sample[count:(count+1):NUM_COLS]
  267.  
  268. #counting weight for each attribute
  269.     attr = 0
  270.     while(attr < NUM_COLS):
  271.         temp = data[0][attr]
  272.         if(weight[attr] < temp):
  273.             weight[attr] = temp
  274.         attr = attr+1    
  275.  
  276.     Clusters.add_new(data,count+1)
  277.     count = count+1  
  278.  
  279. attr = 0
  280. while(attr < NUM_COLS):
  281.     weight[attr] = 1 / weight[attr]
  282.     print(weight[attr])
  283.     attr = attr + 1
  284.  
  285. #Clusters.print_all()
  286. distance_matrix = np.zeros((NUM_CLUSTERS,NUM_CLUSTERS))
  287. Clusters.clusterize(NUM_CLUSTERS, NUM_COLS, weight, distance_matrix)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement