Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import matplotlib.pyplot as plt
- import numpy as np
- import pandas as pd
- import seaborn as sns
- import stumpy
- from scipy.signal import stft
- from sklearn.preprocessing import MinMaxScaler
- from sklearn.decomposition import PCA,NMF
- from sklearn.manifold import Isomap,LocallyLinearEmbedding,MDS,TSNE
- import umap
- from skimage.feature import match_template
- data = pd.read_csv('TaxiData')
- data.index = pd.to_datetime(data['timestamp'])
- data = data.resample('30T').mean()
- data = data.dropna(axis=0)
- m = 48
- COLUMN = 'value'
- data[COLUMN] = data[COLUMN].astype(np.float)
- # data = pd.read_csv('StumpyData')
- # m = 640
- # COLUMN = 'steam flow'
- start = time.time()
- mp = stumpy.stump(data[COLUMN], m)
- print(time.time() - start)
- start = time.time()
- freq = data[COLUMN].values
- freq = stft(freq, nperseg=m, noverlap=m - 1, boundary=None)[2][0:]
- freq = freq / np.linalg.norm(freq, axis=0, keepdims=True)
- freq = 1 - np.abs(np.dot(freq.T, freq.conj()).imag)
- # freq = np.abs(freq).T
- # freq = PCA().fit_transform(freq)
- # dims = umap.UMAP().fit_transform(freq)
- # plt.scatter(dims[:,0],dims[:,1],c=np.arange(len(dims)))
- # plt.show()
- print(time.time() - start)
- sub_gen = (data[i:i + m] for i in range(len(data) - m))
- data = data[COLUMN].values
- start = time.time()
- tot = np.ones((len(data) - m + 1, len(data) - m + 1))
- for i, subsequence in enumerate(sub_gen):
- cross = match_template(data[i:].reshape(1,-1), subsequence.reshape(1,-1))
- tot[i, i:] = cross.flatten()
- tot[i:, i] = cross.flatten()
- tot = np.sqrt(1 - tot)
- np.fill_diagonal(tot,2)
- print(time.time() - start)
- # tot = tot / tot.diagonal()
- # tot = 1 - np.abs(tot)
- # freq = MinMaxScaler().fit_transform(freq.min(axis=1).reshape(-1, 1)).flatten()
- mp = MinMaxScaler().fit_transform(mp[:, 0].reshape(-1, 1)).flatten()
- tot = MinMaxScaler().fit_transform(tot.min(axis=1).reshape(-1, 1)).flatten()
- # tot = tot.min(axis=1)
- plt.plot(mp)
- # plt.plot(freq)
- plt.plot(tot)
- plt.show()
- final = pd.DataFrame(np.concatenate((mp.reshape(-1, 1),freq.reshape(-1, 1), tot.reshape(-1, 1)),axis=1))
- sns.pairplot(final,alpha=0.1)
- plt.show()
- print()
Advertisement
Add Comment
Please, Sign In to add comment