Advertisement
Guest User

Stackoverflow mahalanobis numpy question

a guest
Apr 18th, 2013
339
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.42 KB | None | 0 0
  1. #! /usr/bin/python
  2.  
  3. import sys
  4. from numpy import *
  5. # data sets are { {sepal length, sepal width, petal length, petal width} ...}
  6. # one data set per class
  7.  
  8. #returns the average euclidian distance between each vector in the data set
  9. #data = [ [vector], [vector 2]....]
  10. def euclid_dist(data):
  11.    from scipy import spatial as spat
  12.    count = 0
  13.    avg  = 0
  14.    for i in range(len(data)):
  15.       for j in range(i + 1, len(data)):
  16.          if(j == len(data)):
  17.             break
  18.          avg += spat.distance.euclidean(data[i], data[j])
  19.          count += 1
  20.    return avg / count
  21.  
  22. #returns the average mahalanobis distance between each vector in the data set
  23. #data = [ [vector], [vector 2]....]
  24. def mahalanobis(data):
  25.    import numpy as np;
  26.    import scipy.spatial.distance;
  27.    avg   = 0
  28.    count = 0
  29.  
  30.    covar = np.cov(data, rowvar=0);
  31.    invcovar = np.linalg.inv(covar)
  32.  
  33.    for i in range(len(data)):
  34.       for j in range(i + 1, len(data)):
  35.          if(j == len(data)):
  36.             break
  37.          avg += scipy.spatial.distance.mahalanobis(data[i], data[j], invcovar)
  38.          count += 1
  39.    return avg / count
  40.  
  41. def hausdorf(data):
  42.    return data
  43.  
  44. #normalizes data over columns. Expects a list of lists. each sub list needs to be of the same size
  45. #this function can be found at http://stackoverflow.com/questions/8904694/how-to-normalize-a-2-dimensional-numpy-array-in-python-less-verbose
  46. def normalize(data):
  47.    import numpy as np
  48.    row_sums = data.sum(axis=1)
  49.    norm_data = np.zeros((50, 4))
  50.    for i, (row, row_sum) in enumerate(zip(data, row_sums)):
  51.       norm_data[i,:] = row / row_sum
  52.    return norm_data
  53.  
  54. def main():
  55.    if(len(sys.argv) < 2):
  56.        print("No file name to open. USAGE: ./" + argv[0] + " \"file_name\"")
  57.        exit(1);
  58.  
  59.    try:
  60.        data_file = open(sys.argv[1])
  61.    except IOError:
  62.        print("Could not open file! Closing " + argv[0])
  63.  
  64.    with data_file:
  65.       data = [] # each element in data is a data set of the form specified above
  66.       norm = []
  67.       data_set = []
  68.  
  69.       classification = "Iris-setosa"
  70.  
  71.       for line in data_file:
  72.          line_data = []
  73.          comma_pos = 0
  74.  
  75.          while True:
  76.             comma_pos = line.find(',')
  77.             if( comma_pos == -1):
  78.                if(line.strip() != classification):
  79.                   classification = line.strip()
  80.                   data.append(array(data_set))
  81.                   data_set = []
  82.                break;
  83.             line_data.append(float(line[:comma_pos]))
  84.             line = line[comma_pos + 1:]
  85.          data_set.append(line_data)
  86.  
  87.  
  88.       #for data_set in data:
  89.        #  for line in data_set:
  90.         #    print(line)
  91.          #print("start of new set")
  92.  
  93.       norm.append( normalize(data[0]))
  94.       norm.append( normalize(data[1]))
  95.       norm.append( normalize(data[2]))
  96.  
  97.       print(euclid_dist( data[0]))
  98.       print(euclid_dist( data[1]))
  99.       print(euclid_dist( data[2]))
  100.  
  101.       print(euclid_dist( norm[0]))
  102.       print(euclid_dist( norm[1]))
  103.       print(euclid_dist( norm[2]))
  104.  
  105.       print(mahalanobis( data[0]))
  106.       print(mahalanobis( data[1]))
  107.       print(mahalanobis( data[1]))
  108.  
  109.       print("")
  110.  
  111.       print(mahalanobis( norm[0]))
  112.       print(mahalanobis( norm[1]))
  113.       print(mahalanobis( norm[2]))
  114.  
  115.       #hausdorf( data[0])
  116.       #hausdorf( data[1])
  117.       #hausdorf( data[1])
  118.  
  119.       #hausdorf( norm[0])
  120.       #hausdorf( norm[1])
  121.       #hausdorf( norm[2])
  122.  
  123. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement