Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import matplotlib.pyplot as plt
- import numpy
- def pca(datas, dim):
- cov_matrix = cov(transposition(datas))
- evs = eigen_vectors(cov_matrix)
- return [tuple(sum([a*b for a, b in zip(v,d)]) for v in evs[:dim]) for d in datas]
- def eigen_vectors(matrix):
- eigen_values, eigen_matrix = numpy.linalg.eig(matrix)
- eigens = sorted([
- (eigen_values[i], eigen_matrix[:, i].tolist()) for i in range(len(eigen_values))
- ], key=lambda x: x[0], reverse=True)
- return [e[1] for e in eigens]
- def transposition(datas):
- return [ [d[i] for d in datas] for i in range(len(datas[1])) ]
- def cov(td):
- variance_matrix = numpy.array([
- [covariance(td[i], td[j]) for i in range(len(td))]
- for j in range(len(td))
- ])
- return variance_matrix
- def average(data_series):
- return sum(data_series) / len(data_series)
- def covariance(data_series_a, data_series_b):
- return sum([(d_a - average(data_series_a)) * (d_b - average(data_series_b)) for d_a, d_b in zip(data_series_a, data_series_b)]) / len(data_series_a)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement