Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import print_function
- import sys
- import os
- import time
- import timeit
- import numpy as np
- import theano
- import theano.tensor as T
- import lasagne
- from theano.sandbox.rng_mrg import MRG_RandomStreams
- from sklearn.cluster import KMeans
- from sklearn.metrics.cluster import normalized_mutual_info_score, adjusted_rand_score
- from sklearn.model_selection import train_test_split
- from sklearn.decomposition import PCA
- from sklearn.utils import linear_assignment_
- from sklearn.metrics import accuracy_score
- try:
- import cPickle as pickle
- except:
- import pickle
- import h5py
- from sklearn.metrics.pairwise import euclidean_distances
- from sklearn.metrics import mean_squared_error
- try:
- from six.moves import xrange
- except:
- pass
- import scipy
- from numpy.matlib import repmat
- from scipy.spatial.distance import cdist
- from scipy import sparse
- def gacPathCondEntropy(IminuszW, cluster_i, cluster_j):
- # Compute conditional complexity from the subpart of the weighted adjacency matrix
- # Inputs:
- # - IminuszW: the matrix (I - z*P)
- # - cluster_i: index vector of cluster i
- # - cluster_j: index vector of cluster j
- # Output:
- # - L_ij - the sum of conditional complexities of cluster i and j after merging.
- # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
- num_i = np.size(cluster_i)
- num_j = np.size(cluster_j)
- # detecting cross elements (this check costs much and is unnecessary)
- ijGroupIndex = np.append(cluster_i, cluster_j)
- y_ij = np.zeros((num_i + num_j, 2)) # [y_i, y_j]
- y_ij[:num_i, 0] = 1
- y_ij[num_i:, 1] = 1
- idx = np.ix_(ijGroupIndex, ijGroupIndex)
- L_ij = scipy.linalg.inv(IminuszW[idx]).dot(y_ij)
- L_ij = sum(L_ij[:num_i, 0]) / (num_i * num_i) + sum(L_ij[num_i:, 1]) / (num_j * num_j)
- return L_ij
- def gacPathEntropy(subIminuszW):
- # Compute structural complexity from the subpart of the weighted adjacency matrix
- # Input:
- # - subIminuszW: the subpart of (I - z*P)
- # Output:
- # - clusterComp - strucutral complexity of a cluster.
- # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
- N = subIminuszW.shape[0]
- clusterComp = scipy.linalg.inv(subIminuszW).dot(np.ones((N, 1)))
- clusterComp = sum(clusterComp) / (N * N)
- return clusterComp
- def gacMerging(graphW, initClusters, groupNumber, strDescr, z):
- # Cluster merging for Graph Agglomerative Clustering
- # Implements an agglomerative clustering algorithm based on maiximum graph
- # strcutural affinity of two groups
- # Inputs:
- # - graphW: asymmetric weighted adjacency matrix
- # - initClusters: a cell array of clustered vertices
- # - groupNumber: the final number of clusters
- # - strDescr: structural descriptor, 'zeta' or 'path'
- # - z: (I - z*P), default: 0.01
- # Outputs:
- # - clusterLabels: 1 x m list whose i-th entry is the group assignment of
- # the i-th data vector w_i. Groups are indexed
- # sequentially, starting from 1.
- # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
- numSample = graphW.shape[0]
- IminuszW = np.eye(numSample) - z * graphW
- myInf = 1e10
- # initialization
- VERBOSE = True
- numClusters = len(initClusters)
- if numClusters <= groupNumber:
- print('GAC: too few initial clusters. Do not need merging!');
- # compute the structural complexity of each initial cluster
- clusterComp = np.zeros((numClusters, 1))
- for i in xrange(numClusters):
- clusterComp[i] = gacPathEntropy(IminuszW[np.ix_(initClusters[i], initClusters[i])])
- # compute initial(negative) affinity table(upper trianglar matrix), very slow
- if VERBOSE:
- print(' Computing initial table.')
- affinityTab = np.full(shape=(numClusters, numClusters), fill_value=np.inf)
- for j in xrange(numClusters):
- for i in xrange(j):
- affinityTab[i, j] = -1 * gacPathCondEntropy(IminuszW, initClusters[i], initClusters[j])
- affinityTab = (clusterComp + clusterComp.T) + affinityTab
- if VERBOSE:
- print(' Starting merging process')
- curGroupNum = numClusters
- while True:
- if np.mod(curGroupNum, 20) == 0 & VERBOSE:
- print(' Group count: ', str(curGroupNum))
- # Find two clusters with the best affinity
- minAff = np.min(affinityTab[:curGroupNum, :curGroupNum], axis=0)
- minIndex1 = np.argmin(affinityTab[:curGroupNum, :curGroupNum], axis=0)
- minIndex2 = np.argmin(minAff)
- minIndex1 = minIndex1[minIndex2]
- if minIndex2 < minIndex1:
- minIndex1, minIndex2 = minIndex2, minIndex1
- # merge the two clusters
- new_cluster = np.unique(np.append(initClusters[minIndex1], initClusters[minIndex2]))
- # move the second cluster to be merged to the end of the cluster array
- # note that we only need to copy the end cluster's information to
- # the second cluster 's position
- if minIndex2 != curGroupNum:
- initClusters[minIndex2] = initClusters[-1]
- clusterComp[minIndex2] = clusterComp[curGroupNum - 1]
- # affinityTab is an upper triangular matrix
- affinityTab[: minIndex2, minIndex2] = affinityTab[:minIndex2, curGroupNum - 1]
- affinityTab[minIndex2, minIndex2 + 1: curGroupNum - 1] = affinityTab[minIndex2 + 1:curGroupNum - 1,
- curGroupNum - 1]
- # update the first cluster and remove the second cluster
- initClusters[minIndex1] = new_cluster
- initClusters.pop()
- clusterComp[minIndex1] = gacPathEntropy(IminuszW[np.ix_(new_cluster, new_cluster)])
- clusterComp[curGroupNum - 1] = myInf
- affinityTab[:, curGroupNum - 1] = myInf
- affinityTab[curGroupNum - 1, :] = myInf
- curGroupNum = curGroupNum - 1
- if curGroupNum <= groupNumber:
- break
- # update the affinity table for the merged cluster
- for groupIndex1 in xrange(minIndex1):
- affinityTab[groupIndex1, minIndex1] = -1 * gacPathCondEntropy(IminuszW, initClusters[groupIndex1],
- new_cluster)
- for groupIndex1 in xrange(minIndex1 + 1, curGroupNum):
- affinityTab[minIndex1, groupIndex1] = -1 * gacPathCondEntropy(IminuszW, initClusters[groupIndex1],
- new_cluster)
- affinityTab[:minIndex1, minIndex1] = clusterComp[:minIndex1].reshape(-1) + clusterComp[minIndex1] + affinityTab[
- :minIndex1,
- minIndex1]
- affinityTab[minIndex1, minIndex1 + 1: curGroupNum] = clusterComp[minIndex1 + 1: curGroupNum].T + clusterComp[
- minIndex1] + affinityTab[minIndex1, minIndex1 + 1:curGroupNum]
- # generate sample labels
- clusterLabels = np.ones((numSample, 1))
- for i in xrange(len(initClusters)):
- clusterLabels[initClusters[i]] = i
- if VERBOSE:
- print(' Final group count: ', str(curGroupNum))
- return clusterLabels
- def gacNNMerge(distance_matrix, NNIndex):
- # merge each vertex with its nearest neighbor
- # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
- #
- # NN indices
- sampleNum = distance_matrix.shape[0]
- clusterLabels = np.zeros((sampleNum, 1))
- counter = 1
- for i in xrange(sampleNum):
- idx = NNIndex[i, :2]
- assignedCluster = clusterLabels[idx]
- assignedCluster = np.unique(assignedCluster[np.where(assignedCluster > 0)])
- if len(assignedCluster) == 0:
- clusterLabels[idx] = counter
- counter = counter + 1
- elif len(assignedCluster) == 1:
- clusterLabels[idx] = assignedCluster
- else:
- clusterLabels[idx] = assignedCluster[0]
- for j in xrange(1, len(assignedCluster)):
- clusterLabels[np.where(clusterLabels == assignedCluster[j])] = assignedCluster[0]
- uniqueLabels = np.unique(clusterLabels)
- clusterNumber = len(uniqueLabels)
- initialClusters = []
- for i in xrange(clusterNumber):
- initialClusters.append(np.where(clusterLabels[:].flatten() == uniqueLabels[i])[0])
- return initialClusters
- def gacBuildDigraph(distance_matrix, K, a):
- # Build directed graph
- # Input:
- # - distance_matrix: pairwise distances, d_{i -> j}
- # - K: the number of nearest neighbors for KNN graph
- # - a: for covariance estimation
- # sigma^2 = (\sum_{i=1}^n \sum_{j \in N_i^K} d_{ij}^2) * a
- # - graphW: asymmetric weighted adjacency matrix,
- # w_{ij} = exp(- d_{ij}^2 / sig2), if j \in N_i^K
- # - NNIndex: (2K) nearest neighbors, N x (2K+1) matrix
- # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
- # NN indices
- N = distance_matrix.shape[0]
- # find 2*K NNs in the sense of given distances
- sortedDist = np.sort(distance_matrix, axis=1)
- NNIndex = np.argsort(distance_matrix, axis=1)
- NNIndex = NNIndex[:, :K + 1]
- # estimate derivation
- sig2 = np.mean(np.mean(sortedDist[:, 1:max(K + 1, 4)])) * a
- #########
- tmpNNDist = np.min(sortedDist[:, 1:], axis=1)
- while any(np.exp(- tmpNNDist / sig2) < 1e-5): # check sig2 and magnify it if it is too small
- sig2 = 2 * sig2
- #########
- print(' sigma = ', str(np.sqrt(sig2)))
- # build graph
- ND = sortedDist[:, 1:K + 1]
- NI = NNIndex[:, 1:K + 2]
- XI = repmat(np.arange(0, N).reshape(-1, 1), 1, K)
- sig2 = np.double(sig2)
- ND = np.double(ND)
- graphW = sparse.csc_matrix((np.exp(-ND[:] * (1 / sig2)).flatten(), (XI[:].flatten(), NI[:].flatten())),
- shape=(N, N)).todense()
- graphW += np.eye(N)
- return graphW, NNIndex
- def gacCluster(distance_matrix, groupNumber, strDescr, K, a, z):
- # Graph Agglomerative Clustering toolbox
- # Input:
- # - distance_matrix: pairwise distances, d_{i -> j}
- # - groupNumber: the final number of clusters
- # - strDescr: structural descriptor. The choice can be
- # - 'zeta': zeta function based descriptor
- # - 'path': path integral based descriptor
- # - K: the number of nearest neighbors for KNN graph, default: 20
- # - p: merging (p+1)-links in l-links algorithm, default: 1
- # - a: for covariance estimation, default: 1
- # sigma^2 = (\sum_{i=1}^n \sum_{j \in N_i^K} d_{ij}^2) * a
- # - z: (I - z*P), default: 0.01
- # Output:
- # - clusteredLabels: clustering results
- # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
- #
- # Please cite the following papers, if you find the code is helpful
- #
- # W. Zhang, D. Zhao, and X. Wang.
- # Agglomerative clustering via maximum incremental path integral.
- # Pattern Recognition, 46 (11): 3056-3065, 2013.
- #
- # W. Zhang, X. Wang, D. Zhao, and X. Tang.
- # Graph Degree Linkage: Agglomerative Clustering on a Directed Graph.
- # in Proceedings of European Conference on Computer Vision (ECCV), 2012.
- print('--------------- Graph Structural Agglomerative Clustering ---------------------');
- # initialization
- print('---------- Building graph and forming initial clusters with l-links ---------');
- [graphW, NNIndex] = gacBuildDigraph(distance_matrix, K, a);
- # from adjacency matrix to probability transition matrix
- graphW = np.array((1. / np.sum(graphW, axis=1))) * np.array(graphW) # row sum is 1
- initialClusters = gacNNMerge(distance_matrix, NNIndex)
- print('-------------------------- Zeta merging --------------------------');
- clusteredLabels = gacMerging(graphW, initialClusters, groupNumber, strDescr, z);
- return clusteredLabels
- def predict_ac_mpi(feat, nClass, nSamples, nfeatures):
- K = 20
- a = 1
- z = 0.01
- distance_matrix = cdist(feat, feat) ** 2
- # path intergral
- label_pre = gacCluster(distance_matrix, nClass, 'path', K, a, z)
- return label_pre[:, 0]
- def bestMap(L1, L2):
- if L1.__len__() != L2.__len__():
- print('size(L1) must == size(L2)')
- Label1 = np.unique(L1)
- nClass1 = Label1.__len__()
- Label2 = np.unique(L2)
- nClass2 = Label2.__len__()
- nClass = max(nClass1, nClass2)
- G = np.zeros((nClass, nClass))
- for i in range(nClass1):
- for j in range(nClass2):
- G[i][j] = np.nonzero((L1 == Label1[i]) * (L2 == Label2[j]))[0].__len__()
- c = linear_assignment_.linear_assignment(-G.T)[:, 1]
- newL2 = np.zeros(L2.__len__())
- for i in range(nClass2):
- for j in np.nonzero(L2 == Label2[i])[0]:
- if len(Label1) > c[i]:
- newL2[j] = Label1[c[i]]
- return accuracy_score(L1, newL2)
- def dataset_settings(dataset):
- if (dataset == 'MNIST-full') | (dataset == 'MNIST-test'):
- kernel_sizes = [4, 5]
- strides = [2, 2]
- paddings = [0, 2]
- test_batch_size = 100
- elif dataset == 'USPS':
- kernel_sizes = [4, 5]
- strides = [2, 2]
- paddings = [0, 2]
- test_batch_size = 100
- elif dataset == 'FRGC':
- kernel_sizes = [4, 5]
- strides = [2, 2]
- paddings = [2, 2]
- test_batch_size = 1231
- elif dataset == 'CMU-PIE':
- kernel_sizes = [4, 5]
- strides = [2, 2]
- paddings = [2, 2]
- test_batch_size = 8
- elif dataset == 'YTF':
- kernel_sizes = [5, 4]
- strides = [2, 2]
- paddings = [2, 0]
- test_batch_size = 100
- return kernel_sizes, strides, paddings, test_batch_size
- def create_result_dirs(output_path, file_name):
- if not os.path.exists(output_path):
- print('creating log folder')
- os.makedirs(output_path)
- try:
- os.makedirs(os.path.join(output_path, '../params'))
- except:
- pass
- func_file_name = os.path.basename(__file__)
- if func_file_name.split('.')[1] == 'pyc':
- func_file_name = func_file_name[:-1]
- functions_full_path = os.path.join(output_path, func_file_name)
- cmd = 'cp ' + func_file_name + ' "' + functions_full_path + '"'
- os.popen(cmd)
- run_file_full_path = os.path.join(output_path, file_name)
- cmd = 'cp ' + file_name + ' "' + run_file_full_path + '"'
- os.popen(cmd)
- class Logger(object):
- def __init__(self, output_path):
- self.terminal = sys.stdout
- self.log = open(output_path + "log.txt", "w+")
- def write(self, message):
- self.terminal.write(message)
- self.log.write(message)
- def flush(self):
- # this flush method is needed for python 3 compatibility.
- # this handles the flush command by doing nothing.
- # you might want to specify some extra behavior here.
- pass
- def kmeans(encoder_val_clean, y, nClusters, y_pred_prev=None, weight_initilization='k-means++', seed=42, n_init=40,
- max_iter=300):
- # weight_initilization = { 'kmeans-pca', 'kmean++', 'random', None }
- if weight_initilization == 'kmeans-pca':
- start_time = timeit.default_timer()
- pca = PCA(n_components=nClusters).fit(encoder_val_clean)
- kmeans_model = KMeans(init=pca.components_, n_clusters=nClusters, n_init=1, max_iter=300, random_state=seed)
- y_pred = kmeans_model.fit_predict(encoder_val_clean)
- centroids = kmeans_model.cluster_centers_.T
- centroids = centroids / np.sqrt(np.diag(np.matmul(centroids.T, centroids)))
- end_time = timeit.default_timer()
- elif weight_initilization == 'k-means++':
- start_time = timeit.default_timer()
- kmeans_model = KMeans(init='k-means++', n_clusters=nClusters, n_init=n_init, max_iter=max_iter, n_jobs=15,
- random_state=seed)
- y_pred = kmeans_model.fit_predict(encoder_val_clean)
- D = 1.0 / euclidean_distances(encoder_val_clean, kmeans_model.cluster_centers_, squared=True)
- D **= 2.0 / (2 - 1)
- D /= np.sum(D, axis=1)[:, np.newaxis]
- centroids = kmeans_model.cluster_centers_.T
- centroids = centroids / np.sqrt(np.diag(np.matmul(centroids.T, centroids)))
- end_time = timeit.default_timer()
- print('k-means: \t nmi =', normalized_mutual_info_score(y, y_pred), '\t arc =', adjusted_rand_score(y, y_pred),
- '\t acc = {:.4f} '.format(bestMap(y, y_pred)),
- 'K-means objective = {:.1f} '.format(kmeans_model.inertia_), '\t runtime =', end_time - start_time)
- if y_pred_prev is not None:
- print('Different Assignments: ', sum(y_pred == y_pred_prev), '\tbestMap: ', bestMap(y_pred, y_pred_prev),
- '\tdatapoints-bestMap*datapoints: ',
- encoder_val_clean.shape[0] - bestMap(y_pred, y_pred_prev) * encoder_val_clean.shape[0])
- return centroids, kmeans_model.inertia_, y_pred
- def load_dataset(dataset_path):
- #hf = h5py.File(dataset_path + '/data.h5', 'r')
- #X = np.asarray(hf.get('data'), dtype='float32')
- #X_train = (X - np.float32(127.5)) / np.float32(127.5)
- #y_train = np.asarray(hf.get('labels'), dtype='int32')
- #return X_train, y_train
- from keras.datasets import mnist
- (x_train, y_train), (x_test, y_test) = mnist.load_data()
- x_raw = np.concatenate((x_train, x_test))
- y = np.concatenate((y_train, y_test))
- x = x_raw.reshape(-1, 28, 28, 1).astype('float32')
- x = x/255.
- x0 = x[np.isin(y, 0)]
- x1 = x[np.isin(y, 1)]
- x2 = x[np.isin(y, 2)]
- x3 = x[np.isin(y, 3)]
- x4 = x[np.isin(y, 4)]
- x5 = x[np.isin(y, 5)]
- x6 = x[np.isin(y, 6)]
- x7 = x[np.isin(y, 7)]
- x8 = x[np.isin(y, 8)]
- x9 = x[np.isin(y, 9)]
- x = np.concatenate((x0[0:int(0.55*len(x0))], x1[0:int(0.6*len(x1))], x2[0:int(0.65*len(x2))],
- x3[0:int(0.7*len(x3))], x4[0:int(0.75*len(x4))], x5[0:int(0.8*len(x5))],
- x6[0:int(0.85*len(x6))], x7[0:int(0.9*len(x7))], x8[0:int(0.95*len(x8))],
- x9[0:len(x9)]))
- y = np.concatenate(([0]*int(0.55*len(x0)), [1]*int(0.6*len(x1)), [2]*int(0.65*len(x2)),
- [3]*int(0.7*len(x3)), [4]*int(0.75*len(x4)), [5]*int(0.8*len(x5)),
- [6]*int(0.85*len(x6)), [7]*int(0.9*len(x7)), [8]*int(0.95*len(x8)),
- [9]*int(1*len(x9))))
- return x, y
- def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
- assert len(inputs) == len(targets)
- if shuffle:
- indices = np.arange(len(inputs))
- np.random.shuffle(indices)
- for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
- if shuffle:
- excerpt = indices[start_idx:start_idx + batchsize]
- else:
- excerpt = slice(start_idx, start_idx + batchsize)
- yield inputs[excerpt], targets[excerpt], excerpt
- def build_eml(input_var=None, n_out=None, W_initial=None):
- l_in = input_var
- if W_initial is None:
- l_out = lasagne.layers.DenseLayer(
- l_in, num_units=n_out,
- nonlinearity=lasagne.nonlinearities.softmax,
- W=lasagne.init.Uniform(std=0.5, mean=0.5), b=lasagne.init.Constant(1))
- else:
- l_out = lasagne.layers.DenseLayer(
- l_in, num_units=n_out,
- nonlinearity=lasagne.nonlinearities.softmax,
- W=W_initial, b=lasagne.init.Constant(0))
- return l_out
- def build_depict(input_var=None, n_in=[None, None, None], feature_map_sizes=[50, 50],
- dropouts=[0.1, 0.1, 0.1], kernel_sizes=[5, 5], strides=[2, 2],
- paddings=[2, 2], hlayer_loss_param=0.1):
- # ENCODER
- l_e0 = lasagne.layers.DropoutLayer(
- lasagne.layers.InputLayer(shape=(None, n_in[0], n_in[1], n_in[2]), input_var=input_var), p=dropouts[0])
- l_e1 = lasagne.layers.DropoutLayer(
- (lasagne.layers.Conv2DLayer(l_e0, num_filters=feature_map_sizes[0], stride=(strides[0], strides[0]),
- filter_size=(kernel_sizes[0], kernel_sizes[0]), pad=paddings[0],
- nonlinearity=lasagne.nonlinearities.LeakyRectify(leakiness=0.01),
- W=lasagne.init.GlorotUniform())),
- p=dropouts[1])
- l_e2 = lasagne.layers.DropoutLayer(
- (lasagne.layers.Conv2DLayer(l_e1, num_filters=feature_map_sizes[1], stride=(strides[1], strides[1]),
- filter_size=(kernel_sizes[1], kernel_sizes[1]), pad=paddings[1],
- nonlinearity=lasagne.nonlinearities.LeakyRectify(leakiness=0.01),
- W=lasagne.init.GlorotUniform())),
- p=dropouts[2])
- l_e2_flat = lasagne.layers.flatten(l_e2)
- l_e3 = lasagne.layers.DenseLayer(l_e2_flat, num_units=feature_map_sizes[2],
- nonlinearity=lasagne.nonlinearities.tanh)
- # DECODER
- l_d2_flat = lasagne.layers.DenseLayer(l_e3, num_units=l_e2_flat.output_shape[1],
- nonlinearity=lasagne.nonlinearities.LeakyRectify(leakiness=0.01))
- l_d2 = lasagne.layers.reshape(l_d2_flat,
- shape=[-1, l_e2.output_shape[1], l_e2.output_shape[2], l_e2.output_shape[3]])
- l_d1 = lasagne.layers.Deconv2DLayer(l_d2, num_filters=feature_map_sizes[0], stride=(strides[1], strides[1]),
- filter_size=(kernel_sizes[1], kernel_sizes[1]), crop=paddings[1],
- nonlinearity=lasagne.nonlinearities.LeakyRectify(leakiness=0.01))
- l_d0 = lasagne.layers.Deconv2DLayer(l_d1, num_filters=n_in[0], stride=(strides[0], strides[0]),
- filter_size=(kernel_sizes[0], kernel_sizes[0]), crop=paddings[0],
- nonlinearity=lasagne.nonlinearities.tanh)
- # Loss
- tar0 = input_var
- tar1 = lasagne.layers.get_output(l_e1, deterministic=True)
- tar2 = lasagne.layers.get_output(l_e2, deterministic=True)
- rec2 = lasagne.layers.get_output(l_d2)
- rec1 = lasagne.layers.get_output(l_d1)
- rec0 = lasagne.layers.get_output(l_d0)
- rec2_clean = lasagne.layers.get_output(l_d2, deterministic=True)
- rec1_clean = lasagne.layers.get_output(l_d1, deterministic=True)
- rec0_clean = lasagne.layers.get_output(l_d0, deterministic=True)
- loss0 = lasagne.objectives.squared_error(rec0, tar0)
- loss1 = lasagne.objectives.squared_error(rec1, tar1) * hlayer_loss_param
- loss2 = lasagne.objectives.squared_error(rec2, tar2) * hlayer_loss_param
- loss0_clean = lasagne.objectives.squared_error(rec0_clean, tar0)
- loss1_clean = lasagne.objectives.squared_error(rec1_clean, tar1) * hlayer_loss_param
- loss2_clean = lasagne.objectives.squared_error(rec2_clean, tar2) * hlayer_loss_param
- loss_recons = loss0.mean() + loss1.mean() + loss2.mean()
- loss_recons_clean = loss0_clean.mean() + loss1_clean.mean() + loss2_clean.mean()
- return l_e3, l_d0, loss_recons, loss_recons_clean
- def train_depict_ae(dataset, X, y, input_var, decoder, encoder, loss_recons, loss_recons_clean, num_clusters, output_path,
- batch_size=100, test_batch_size=100, num_epochs=1000, learning_rate=1e-4, verbose=1, seed=42,
- continue_training=False):
- learning_rate_shared = theano.shared(lasagne.utils.floatX(learning_rate))
- params = lasagne.layers.get_all_params(decoder, trainable=True)
- updates = lasagne.updates.adam(loss_recons, params, learning_rate=learning_rate_shared)
- train_fn = theano.function([input_var], loss_recons, updates=updates)
- val_fn = theano.function([input_var], loss_recons_clean)
- X_train, X_val, y_train, y_val = train_test_split(
- X, y, stratify=y, test_size=0.10, random_state=42)
- best_val = np.inf
- last_update = 0
- # Load if pretrained weights are available.
- if os.path.isfile(os.path.join(output_path, '../params/params_' + dataset + '_values_best.pickle')) & continue_training:
- with open(os.path.join(output_path, '../params/params_' + dataset + '_values_best.pickle'),
- "rb") as input_file:
- best_params = pickle.load(input_file, encoding='latin1')
- lasagne.layers.set_all_param_values(decoder, best_params)
- else:
- # TRAIN MODEL
- if verbose > 1:
- encoder_clean = lasagne.layers.get_output(encoder, deterministic=True)
- encoder_clean_function = theano.function([input_var], encoder_clean)
- for epoch in range(num_epochs + 1):
- train_err = 0
- num_batches = 0
- # Training
- for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
- inputs, targets, idx = batch
- train_err += train_fn(inputs)
- num_batches += 1
- validation_error = np.float32(val_fn(X_val))
- print("Epoch {} of {}".format(epoch + 1, num_epochs),
- "\t training loss:{:.6f}".format(train_err / num_batches),
- "\t validation loss:{:.6f}".format(validation_error))
- # if epoch % 10 == 0:
- last_update += 1
- if validation_error < best_val:
- last_update = 0
- print("new best error: ", validation_error)
- best_val = validation_error
- best_params_values = lasagne.layers.get_all_param_values(decoder)
- with open(os.path.join(output_path, '../params/params_' + dataset + '_values_best.pickle'),
- "wb") as output_file:
- pickle.dump(best_params_values, output_file)
- if last_update > 100:
- break
- if (verbose > 1) & (epoch % 50 == 0):
- # Extract MdA features
- minibatch_flag = 1
- for batch in iterate_minibatches(X, y, test_batch_size, shuffle=False):
- inputs, targets, idx = batch
- minibatch_x = encoder_clean_function(inputs)
- if minibatch_flag:
- encoder_val_clean = minibatch_x
- minibatch_flag = 0
- else:
- encoder_val_clean = np.concatenate((encoder_val_clean, minibatch_x), axis=0)
- kmeans(encoder_val_clean, y, num_clusters, seed=seed)
- last_params_values = lasagne.layers.get_all_param_values(decoder)
- with open(os.path.join(output_path, '../params/params_' + dataset + '_last.pickle'), "wb") as output_file:
- pickle.dump(params, output_file)
- with open(os.path.join(output_path, '../params/params_' + dataset + '_values_last.pickle'),
- "wb") as output_file:
- pickle.dump(last_params_values, output_file)
- lasagne.layers.set_all_param_values(decoder, best_params_values)
- def clustering(dataset, X, y, input_var, encoder, num_clusters, output_path, test_batch_size=100, seed=42,
- continue_training=False):
- encoder_clean = lasagne.layers.get_output(encoder, deterministic=True)
- encoder_clean_function = theano.function([input_var], encoder_clean)
- # Extract MdA features
- minibatch_flag = 1
- for batch in iterate_minibatches(X, y, test_batch_size, shuffle=False):
- inputs, targets, idx = batch
- minibatch_x = encoder_clean_function(inputs)
- if minibatch_flag:
- encoder_val_clean = minibatch_x
- minibatch_flag = 0
- else:
- encoder_val_clean = np.concatenate((encoder_val_clean, minibatch_x), axis=0)
- # Check kmeans results
- kmeans(encoder_val_clean, y, num_clusters, seed=seed)
- initial_time = timeit.default_timer()
- if (dataset == 'MNIST-full') | (dataset == 'FRGC') | (dataset == 'YTF') | (dataset == 'CMU-PIE'):
- # K-means on MdA Features
- centroids, inertia, y_pred = kmeans(encoder_val_clean, y, num_clusters, seed=seed)
- y_pred = (np.array(y_pred)).reshape(np.array(y_pred).shape[0], )
- y_pred = y_pred - 1
- else:
- # AC-PIC on MdA Features
- if os.path.isfile(os.path.join(output_path, '../params/pred' + dataset + '.pickle')) & continue_training:
- with open(os.path.join(output_path, '../params/pred' + dataset + '.pickle'), "rb") as input_file:
- y_pred = pickle.load(input_file, encoding='latin1')
- else:
- try:
- import matlab.engine
- eng = matlab.engine.start_matlab()
- eng.addpath(eng.genpath('matlab'))
- targets_init = eng.predict_ac_mpi(
- matlab.double(
- encoder_val_clean.reshape(encoder_val_clean.shape[0] * encoder_val_clean.shape[1]).tolist()),
- num_clusters, encoder_val_clean.shape[0], encoder_val_clean.shape[1])
- y_pred = (np.array(targets_init)).reshape(np.array(targets_init).shape[0], )
- eng.quit()
- y_pred = y_pred - 1
- except:
- y_pred = predict_ac_mpi(encoder_val_clean, num_clusters, encoder_val_clean.shape[0],
- encoder_val_clean.shape[1])
- with open(os.path.join(output_path, '../params/pred' + dataset + '.pickle'), "wb") as output_file:
- pickle.dump(y_pred, output_file)
- final_time = timeit.default_timer()
- print('AC-PIC: \t nmi = ', normalized_mutual_info_score(y, y_pred),
- '\t arc = ', adjusted_rand_score(y, y_pred),
- '\t acc = {:.4f} '.format(bestMap(y, y_pred)),
- '\t time taken = {:.4f}'.format(final_time - initial_time))
- centroids_acpic = np.zeros(shape=(num_clusters, encoder_val_clean.shape[1]))
- for i in range(num_clusters):
- centroids_acpic[i] = encoder_val_clean[y_pred == i].mean(axis=0)
- centroids = centroids_acpic.T
- centroids = centroids_acpic / np.sqrt(np.diag(np.matmul(centroids.T, centroids)))
- return np.int32(y_pred), np.float32(centroids)
- def train_depict(dataset, X, y, input_var, decoder, encoder, loss_recons, num_clusters, y_pred, output_path,
- batch_size=100, test_batch_size=100, num_epochs=1000, learning_rate=1e-4, prediction_status='soft',
- rec_mult=1, clus_mult=1, centroids=None, init_flag=1, continue_training=False):
- ######################
- # ADD RLC TO MdA #
- ######################
- initial_time = timeit.default_timer()
- rec_lambda = theano.shared(lasagne.utils.floatX(rec_mult))
- clus_lambda = theano.shared(lasagne.utils.floatX(clus_mult))
- pred_normalizition_flag = 1
- num_batches = X.shape[0] // batch_size
- if prediction_status == 'soft':
- target_var = T.matrix('minibatch_out')
- target_init = T.ivector('kmeans_out')
- elif prediction_status == 'hard':
- target_var = T.ivector('minibatch_out')
- target_val = T.vector()
- network2 = build_eml(encoder, n_out=num_clusters, W_initial=centroids)
- network_prediction_noisy = lasagne.layers.get_output(network2, input_var, deterministic=False)
- network_prediction_clean = lasagne.layers.get_output(network2, input_var, deterministic=True)
- loss_clus_init = lasagne.objectives.categorical_crossentropy(network_prediction_noisy, target_init).mean()
- params_init = lasagne.layers.get_all_params([decoder, network2], trainable=True)
- if prediction_status == 'soft':
- loss_clus = lasagne.objectives.categorical_crossentropy(network_prediction_noisy,
- target_var)
- elif prediction_status == 'hard':
- loss_clus = target_val * lasagne.objectives.categorical_crossentropy(network_prediction_noisy, target_var)
- loss_clus = clus_lambda * loss_clus.mean()
- loss_recons = rec_lambda * loss_recons
- loss = loss_recons + loss_clus
- params2 = lasagne.layers.get_all_params([decoder, network2], trainable=True)
- updates = lasagne.updates.adam(
- loss, params2, learning_rate=learning_rate)
- train_fn = theano.function([input_var, target_var],
- [loss, loss_recons, loss_clus], updates=updates)
- loss_clus_init = clus_lambda * loss_clus_init
- loss_init = loss_clus_init + loss_recons
- updates_init = lasagne.updates.adam(
- loss_init, params_init, learning_rate=learning_rate)
- train_fn_init = theano.function([input_var, target_init],
- [loss_init, loss_recons, loss_clus_init], updates=updates_init)
- test_fn = theano.function([input_var], network_prediction_clean)
- final_time = timeit.default_timer()
- print("\n...Start DEPICT initialization")
- if init_flag:
- if os.path.isfile(os.path.join(output_path, '../params/weights' + dataset + '.pickle')) & continue_training:
- with open(os.path.join(output_path, '../params/weights' + dataset + '.pickle'),
- "rb") as input_file:
- weights = pickle.load(input_file, encoding='latin1')
- lasagne.layers.set_all_param_values([decoder, network2], weights)
- else:
- X_train, X_val, y_train, y_val, y_pred_train, y_pred_val = train_test_split(
- X, y, y_pred, stratify=y, test_size=0.10, random_state=42)
- last_update = 0
- # Initilization
- y_targ_train = np.copy(y_pred_train)
- y_targ_val = np.copy(y_pred_val)
- y_val_prob = test_fn(X_val)
- y_val_pred = np.argmax(y_val_prob, axis=1)
- val_nmi = normalized_mutual_info_score(y_targ_val, y_val_pred)
- best_val = val_nmi
- print('initial val nmi: ', val_nmi)
- best_params_values = lasagne.layers.get_all_param_values([decoder, network2])
- for epoch in range(1000):
- train_err, val_err = 0, 0
- lossre_train, lossre_val = 0, 0
- losspre_train, losspre_val = 0, 0
- num_batches_train = 0
- for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
- minibatch_inputs, targets, idx = batch
- minibatch_error, lossrec, losspred = train_fn_init(minibatch_inputs, np.int32(y_targ_train[idx]))
- train_err += minibatch_error
- lossre_train += lossrec
- losspre_train += losspred
- num_batches_train += 1
- y_val_prob = test_fn(X_val)
- y_val_pred = np.argmax(y_val_prob, axis=1)
- y_pred = np.zeros(X.shape[0])
- for batch in iterate_minibatches(X, y, test_batch_size, shuffle=False):
- minibatch_inputs, targets, idx = batch
- minibatch_prob = test_fn(minibatch_inputs)
- minibatch_pred = np.argmax(minibatch_prob, axis=1)
- y_pred[idx] = minibatch_pred
- val_nmi = normalized_mutual_info_score(y_targ_val, y_val_pred)
- print('epoch:', epoch + 1, '\t nmi = {:.4f} '.format(normalized_mutual_info_score(y, y_pred)),
- '\t arc = {:.4f} '.format(adjusted_rand_score(y, y_pred)),
- '\t acc = {:.4f} '.format(bestMap(y, y_pred)),
- '\t loss= {:.10f}'.format(train_err / num_batches_train),
- '\t loss_reconstruction= {:.10f}'.format(lossre_train / num_batches_train),
- '\t loss_prediction= {:.10f}'.format(losspre_train / num_batches_train),
- '\t val nmi = {:.4f} '.format(val_nmi))
- last_update += 1
- if val_nmi > best_val:
- last_update = 0
- print("new best val nmi: ", val_nmi)
- best_val = val_nmi
- best_params_values = lasagne.layers.get_all_param_values([decoder, network2])
- # if (losspre_val / num_batches_val) < 0.2:
- # break
- if last_update > 5:
- break
- lasagne.layers.set_all_param_values([decoder, network2], best_params_values)
- with open(os.path.join(output_path, '../params/weights' + dataset + '.pickle'), "wb") as output_file:
- pickle.dump(lasagne.layers.get_all_param_values([decoder, network2]), output_file)
- # Epoch 0
- print("\n...Start DEPICT training")
- y_prob = np.zeros((X.shape[0], num_clusters))
- y_prob_prev = np.zeros((X.shape[0], num_clusters))
- for batch in iterate_minibatches(X, y, test_batch_size, shuffle=False):
- minibatch_inputs, targets, idx = batch
- minibatch_prob = test_fn(minibatch_inputs)
- y_prob[idx] = minibatch_prob
- y_prob_max = np.max(y_prob, axis=1)
- if pred_normalizition_flag:
- cluster_frequency = np.sum(y_prob, axis=0)
- y_prob = y_prob ** 2 / cluster_frequency
- y_prob = np.transpose(y_prob.T / np.sum(y_prob, axis=1))
- y_pred = np.argmax(y_prob, axis=1)
- print('epoch: 0', '\t nmi = {:.4f} '.format(normalized_mutual_info_score(y, y_pred)),
- '\t arc = {:.4f} '.format(adjusted_rand_score(y, y_pred)),
- '\t acc = {:.4f} '.format(bestMap(y, y_pred)))
- if os.path.isfile(os.path.join(output_path, '../params/rlc' + dataset + '.pickle')) & continue_training:
- with open(os.path.join(output_path, '../params/rlc' + dataset + '.pickle'),
- "rb") as input_file:
- weights = pickle.load(input_file, encoding='latin1')
- lasagne.layers.set_all_param_values([decoder, network2], weights)
- else:
- for epoch in range(num_epochs):
- # In each epoch, we do a full pass over the training data:
- train_err = 0
- lossre = 0
- losspre = 0
- for batch in iterate_minibatches(X, y, batch_size, shuffle=True):
- minibatch_inputs, targets, idx = batch
- # M_step
- if prediction_status == 'hard':
- minibatch_err, lossrec, losspred = train_fn(minibatch_inputs,
- np.ndarray.astype(y_pred[idx], 'int32'),
- np.ndarray.astype(y_prob_max[idx],
- 'float32'))
- elif prediction_status == 'soft':
- minibatch_err, lossrec, losspred = train_fn(minibatch_inputs,
- np.ndarray.astype(y_prob[idx], 'float32'))
- minibatch_prob = test_fn(minibatch_inputs)
- y_prob[idx] = minibatch_prob
- train_err += minibatch_err
- lossre += lossrec
- losspre += losspred
- y_prob_max = np.max(y_prob, axis=1)
- if pred_normalizition_flag:
- cluster_frequency = np.sum(y_prob, axis=0) # avoid unbalanced assignment
- y_prob = y_prob ** 2 / cluster_frequency
- # y_prob = y_prob / np.sqrt(cluster_frequency)
- y_prob = np.transpose(y_prob.T / np.sum(y_prob, axis=1))
- y_pred = np.argmax(y_prob, axis=1)
- # print('mse: ', mean_squared_error(y_prob, y_prob_prev))
- if mean_squared_error(y_prob, y_prob_prev) < 1e-7:
- with open(os.path.join(output_path, '../params/rlc' + dataset + '.pickle'), "wb") as output_file:
- pickle.dump(lasagne.layers.get_all_param_values([decoder, network2]), output_file)
- break
- y_prob_prev = np.copy(y_prob)
- print('epoch:', epoch + 1, '\t nmi = {:.4f} '.format(normalized_mutual_info_score(y, y_pred)),
- '\t arc = {:.4f} '.format(adjusted_rand_score(y, y_pred)),
- '\t acc = {:.4f} '.format(bestMap(y, y_pred)), '\t loss= {:.10f}'.format(train_err / num_batches),
- '\t loss_recons= {:.10f}'.format(lossre / num_batches),
- '\t loss_pred= {:.10f}'.format(losspre / num_batches))
- # test
- y_pred = np.zeros(X.shape[0])
- for batch in iterate_minibatches(X, y, test_batch_size, shuffle=False):
- minibatch_inputs, targets, idx = batch
- minibatch_prob = test_fn(minibatch_inputs)
- minibatch_pred = np.argmax(minibatch_prob, axis=1)
- y_pred[idx] = minibatch_pred
- print('final: ', '\t nmi = {:.4f} '.format(normalized_mutual_info_score(y, y_pred)),
- '\t arc = {:.4f} '.format(adjusted_rand_score(y, y_pred)),
- '\t acc = {:.4f} '.format(bestMap(y, y_pred)))
Add Comment
Please, Sign In to add comment