Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import math
- from scipy.special import expit
- #Function for finding the mean of a list of numbers
- def mean(data):
- mean = []
- sum = 0
- for list in data:
- for val in list:
- sum += val
- #The added values is divided by the amount of values
- mean.append(sum / len(list))
- sum = 0
- print("Mean", mean)
- return mean
- #Function for finding the standard deviation of a list of numbers
- def std(data):
- #The mean is calculated from the mean function
- meanList = mean(data)
- deviation = []
- sum = 0
- #All the values have the mean subtracted
- #then squared and added together
- for i in range(len(data)):
- for val in data[i]:
- sum += (val - meanList[i])**2
- #The added value is then divided by the amount of values
- #and at last the squareroot is found
- deviation.append(math.sqrt(sum / len(data)))
- sum = 0
- print("Deviation", deviation)
- return deviation
- #Get distance between two vectors (manhattan distance)
- def distance(v1, v2):
- if (len(v1) != len(v2)):
- print("CALC ERROR in commonFunctions.py:" + "\n" +
- "The two vectors does not have the same dimension")
- return
- dist = 0
- for i in range(len(v1)):
- dist += (v1[i] - v2[i])**2
- return math.sqrt(dist)
- #function for finding the equidistant middle point
- def middlePoint(points):
- d = dimensions = len(points[0])
- midPoint = []
- for i in range(d):
- sum = 0
- for point in points:
- sum += point[i]
- midPoint.append(sum / len(points))
- return midPoint
- #Function for arranging data with more dimensions
- #Where dimenstions is the dimensions in the data (columns)
- #And Data is the data, arranged in an array
- def arrangeData(dimensions, data):
- dataArranged = []
- for i in range(len(data[0])):
- tmp = []
- for j in range(dimensions):
- tmp.append(data[j][i])
- dataArranged.append(tmp)
- return dataArranged
- #function for showing data in a graph
- #Data needs to be 1 array with array of data
- #
- def show2DGraph(data):
- dataSet = np.array(data)
- colors = [['b'], ['r'], ['g'], ['y'], ['p']]
- color_array = []
- area = []
- if len(dataSet[0]) >= 3:
- for i in range(len(dataSet)):
- color_array += colors[clusters[i]]
- area.append(((dataSet[i][2]) + 2) ** 5)
- else:
- for i in range(len(clusters)):
- color_array += colors[clusters[i]]
- area.append(10)
- plt.scatter(data[:, 0], data[:, 1], s=area, c=color_array, alpha=0.5)
- plt.show()
- def sigmoid(x):
- return 1 / (1 + (math.e **(-x)))
- def getZ(a, b, c, n):
- zArray = []
- for x in range(n):
- for y in range(n):
- zArray.append([x, y, a + (x*b) + (y*c) ])
- return zArray
- testArray = getZ(123.47, -4.49, 16.26, 10)
- def prepare_data(data, prepare_for_cross_validation, prepare_for_KNN):
- features = []
- labels = []
- if not prepare_for_KNN:
- for i in range(len(data)):
- features.append(data[i][:-1])
- labels.append(data[i][-1])
- featuers_div = []
- labels_div = []
- if prepare_for_cross_validation and not prepare_for_KNN:
- n = len(labels)
- k = (n / 10)
- x = 0
- for i in range(10):
- tmp_f = []
- tmp_l = []
- for j in range(50):
- tmp_f.append(features[j + x])
- tmp_l.append(labels[j + x])
- featuers_div.append(tmp_f)
- labels_div.append(tmp_l)
- x += 50
- features = featuers_div
- labels = labels_div
- data_div = []
- if prepare_for_cross_validation and prepare_for_KNN:
- n = len(labels)
- k = (n / 10)
- x = 0
- for i in range(10):
- tmp = []
- for j in range(50):
- tmp.append(data[j + x])
- data_div.append(tmp)
- x += 50
- data = data_div
- return data
- return features, labels
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement