Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from os.path import join, basename, splitext
- import argparse
- import numpy as np
- from sklearn.decomposition import PCA
- import matplotlib.pyplot as plt
- # setup parser
- parser = argparse.ArgumentParser()
- parser.add_argument("X", help="filename of the feature file (`.npy`) to visualize")
- parser.add_argument("y", help="filename of the label file (`.csv` or `.npy`) to visualize classes")
- parser.add_argument("out_fn", help="filename for the outputing image (`.pdf`)")
- args = parser.parse_args()
- # load the feature file
- X = np.load(args.X)
- # load the label file
- ext = splitext(args.y)[1]
- if ext == '.csv':
- with open(args.y) as f:
- y = np.array([l.split('\n')[0] for l in f])
- elif ext == '.npy':
- y = np.load(args.y)
- else:
- raise NotImplementedError('{} is not supported!'.format(ext))
- # check shape
- if X.shape[0] != len(y):
- raise ValueError('Feature & label should have same number of samples!')
- # run the PCA
- pca = PCA(2)
- z = pca.fit_transform(X)
- # markers
- markers = ['o', '.', ',', 'x', '+', 'v', '^', '<', '>', 's', 'd']
- # visualize per label
- for k, label in enumerate(set(y)):
- idx = np.where(y == label)[0]
- plt.scatter(z[idx, 0], z[idx, 1], label=label,
- marker=markers[(len(markers) % (k + 1)) - 1])
- # save fig
- plt.legend()
- plt.tight_layout()
- plt.savefig(args.out_fn)
Add Comment
Please, Sign In to add comment