Advertisement
Guest User

Untitled

a guest
Mar 25th, 2017
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.19 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import csv
  3. import math
  4.  
  5. import numpy as np
  6.  
  7. VISITED = []
  8. NOISE = []
  9.  
  10.  
  11. def euclid_distance(p,q):
  12.     return math.sqrt(np.square(np.subtract(p,q)).sum())
  13.  
  14. def are_neighbors(p,q,eps):
  15.     return euclid_distance(p,q) < eps
  16.  
  17. def region_query(dataset, point, eps):
  18.     neighbors = []
  19.     for neighbor in dataset:
  20.         if are_neighbors(neighbor,point,eps):
  21.             neighbors.append(neighbor)
  22.     return neighbors
  23.  
  24. def is_visited(point):    
  25.     for visitedPoint in VISITED:
  26.         if np.array_equal(point, visitedPoint):
  27.             return True
  28.     return False
  29.  
  30. def cluster_contains(clusters, point):
  31.     for value in clusters.values():
  32.         for val in value:
  33.             if np.array_equal(val, point):
  34.                 return True
  35.     return False
  36.  
  37. def expand_cluster(dataset, point, clusters, neighborPoints, Cluster_ID, eps, minPts):
  38.     clusters[Cluster_ID].append(point)
  39.     for neighbor in neighborPoints:
  40.         if not is_visited(neighbor):
  41.             VISITED.append(neighbor)
  42.             newNeighbors = region_query(dataset,neighbor,eps)          
  43.             if len(newNeighbors) >= minPts:
  44.                 neighborPoints.extend(newNeighbors) # Toto nie je vhodne v pythone
  45.         if not cluster_contains(clusters, neighbor):
  46.             clusters[Cluster_ID].append(neighbor)
  47.  
  48. def DBSCAN(dataset, eps, minPts):
  49.     Cluster_ID = 0
  50.     clusters = dict()
  51.     for point in dataset:
  52.         if is_visited(point):
  53.             continue
  54.         VISITED.append(point)
  55.         NeighborPoints = region_query(dataset,point,eps)
  56.         if len(NeighborPoints) < minPts:
  57.             NOISE.append(point)
  58.         else:
  59.             clusters[Cluster_ID]=[]
  60.             expand_cluster(dataset, point, clusters, NeighborPoints, Cluster_ID, eps, minPts)
  61.             Cluster_ID = Cluster_ID + 1
  62.    
  63.     sum = 0
  64.     i = 0
  65.     for cluster in clusters.values():
  66.         print('Cluster '+ repr(i) + ' contains: ' + repr(len(cluster)))
  67.         i = i+1
  68.         sum = sum + len(cluster)
  69.     print('There has been ' + repr(len(NOISE)) + ' noise points')
  70.     print('Sum of cluster\'s length is ' + repr(sum))
  71.     print('Together with noise\'s ' + repr(sum+len(NOISE)))
  72.     print('Total length of dataset is: ' + repr(len(dataset)))
  73.     return clusters
  74.  
  75. def main():
  76.     Irismatrix = []
  77.     with  open('iris.csv', newline='') as csvFile:
  78.         reader = csv.reader(csvFile, delimiter=',')
  79.         next(csvFile)
  80.         for row in reader:
  81.             Irismatrix.append([float(i) for i in row[1:5]])
  82.            
  83.     Irislength = len(Irismatrix)
  84.     IrisDataSet = np.array(Irismatrix[0:Irislength])
  85.  
  86.     Yeastmatrix = []
  87.     with  open('yeast.csv', newline='') as csvFile:
  88.         reader = csv.reader(csvFile, delimiter=',')
  89.         next(csvFile)
  90.         for row in reader:
  91.             Yeastmatrix.append([float(i) for i in row[0:7]])
  92.            
  93.     Yeastlength = len(Yeastmatrix)
  94.     YeastDataSet = np.array(Yeastmatrix[0:Yeastlength])
  95.  
  96.     eps = 0.5
  97.     min_points = 5
  98.  
  99.     #print(YeastDataSet)
  100.  
  101.     print('Parameters: eps=' + repr(eps) + ', minPts=' + repr(min_points))
  102.     DBSCAN(IrisDataSet, eps, min_points)    
  103. if __name__ == "__main__":
  104.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement