Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from sklearn.manifold import TSNE
- import glob
- import os
- import sys
- from numpy import genfromtxt
- import matplotlib.pyplot as plt
- numFilesRead = 0
- data = np.zeros((144,1))
- dataRows = []
- dataFiles = []
- def processData(fullPath) :
- global data
- global numFilesRead
- if ( numFilesRead > 10000) :
- return
- featureData = genfromtxt(fullPath, delimiter=',')
- dataRows.append(featureData)
- pathparts = fullPath.split("/")
- # dataFiles.append(pathparts[len(pathparts)-1].replace(".features",""))
- dataFiles.append(fullPath)
- numFilesRead += 1
- for root, dirs, files in os.walk("."):
- if ( numFilesRead > 10000) :
- break
- path = root.split(os.sep)
- for fileName in files:
- if fileName.endswith(".features") :
- fullPath = root +"/"+ fileName
- processData(fullPath)
- data = np.vstack(dataRows)
- print(data)
- X_embedded = TSNE(n_components=2).fit_transform(data)
- for i in range(len(X_embedded)) :
- print(dataFiles[i] + "," + str(X_embedded[i][0]) + "," + str(X_embedded[i][1]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement