Guest User

Untitled

a guest
Jun 6th, 2018
50
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 40.80 KB | None | 0 0
  1. from __future__ import print_function
  2.  
  3. import sys
  4. import os
  5. import time
  6. import timeit
  7. import numpy as np
  8. import theano
  9. import theano.tensor as T
  10. import lasagne
  11. from theano.sandbox.rng_mrg import MRG_RandomStreams
  12.  
  13. from sklearn.cluster import KMeans
  14. from sklearn.metrics.cluster import normalized_mutual_info_score, adjusted_rand_score
  15. from sklearn.model_selection import train_test_split
  16. from sklearn.decomposition import PCA
  17. from sklearn.utils import linear_assignment_
  18. from sklearn.metrics import accuracy_score
  19.  
  20. try:
  21. import cPickle as pickle
  22. except:
  23. import pickle
  24. import h5py
  25. from sklearn.metrics.pairwise import euclidean_distances
  26. from sklearn.metrics import mean_squared_error
  27. try:
  28. from six.moves import xrange
  29. except:
  30. pass
  31.  
  32. import scipy
  33. from numpy.matlib import repmat
  34. from scipy.spatial.distance import cdist
  35. from scipy import sparse
  36.  
  37.  
  38. def gacPathCondEntropy(IminuszW, cluster_i, cluster_j):
  39. # Compute conditional complexity from the subpart of the weighted adjacency matrix
  40. # Inputs:
  41. # - IminuszW: the matrix (I - z*P)
  42. # - cluster_i: index vector of cluster i
  43. # - cluster_j: index vector of cluster j
  44. # Output:
  45. # - L_ij - the sum of conditional complexities of cluster i and j after merging.
  46. # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
  47.  
  48. num_i = np.size(cluster_i)
  49. num_j = np.size(cluster_j)
  50.  
  51. # detecting cross elements (this check costs much and is unnecessary)
  52.  
  53. ijGroupIndex = np.append(cluster_i, cluster_j)
  54.  
  55. y_ij = np.zeros((num_i + num_j, 2)) # [y_i, y_j]
  56. y_ij[:num_i, 0] = 1
  57. y_ij[num_i:, 1] = 1
  58. idx = np.ix_(ijGroupIndex, ijGroupIndex)
  59. L_ij = scipy.linalg.inv(IminuszW[idx]).dot(y_ij)
  60. L_ij = sum(L_ij[:num_i, 0]) / (num_i * num_i) + sum(L_ij[num_i:, 1]) / (num_j * num_j)
  61.  
  62. return L_ij
  63.  
  64.  
  65. def gacPathEntropy(subIminuszW):
  66. # Compute structural complexity from the subpart of the weighted adjacency matrix
  67. # Input:
  68. # - subIminuszW: the subpart of (I - z*P)
  69. # Output:
  70. # - clusterComp - strucutral complexity of a cluster.
  71. # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
  72.  
  73. N = subIminuszW.shape[0]
  74. clusterComp = scipy.linalg.inv(subIminuszW).dot(np.ones((N, 1)))
  75. clusterComp = sum(clusterComp) / (N * N)
  76.  
  77. return clusterComp
  78.  
  79.  
  80. def gacMerging(graphW, initClusters, groupNumber, strDescr, z):
  81. # Cluster merging for Graph Agglomerative Clustering
  82. # Implements an agglomerative clustering algorithm based on maiximum graph
  83. # strcutural affinity of two groups
  84. # Inputs:
  85. # - graphW: asymmetric weighted adjacency matrix
  86. # - initClusters: a cell array of clustered vertices
  87. # - groupNumber: the final number of clusters
  88. # - strDescr: structural descriptor, 'zeta' or 'path'
  89. # - z: (I - z*P), default: 0.01
  90. # Outputs:
  91. # - clusterLabels: 1 x m list whose i-th entry is the group assignment of
  92. # the i-th data vector w_i. Groups are indexed
  93. # sequentially, starting from 1.
  94. # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
  95.  
  96. numSample = graphW.shape[0]
  97. IminuszW = np.eye(numSample) - z * graphW
  98. myInf = 1e10
  99.  
  100. # initialization
  101. VERBOSE = True
  102.  
  103. numClusters = len(initClusters)
  104. if numClusters <= groupNumber:
  105. print('GAC: too few initial clusters. Do not need merging!');
  106.  
  107. # compute the structural complexity of each initial cluster
  108. clusterComp = np.zeros((numClusters, 1))
  109. for i in xrange(numClusters):
  110. clusterComp[i] = gacPathEntropy(IminuszW[np.ix_(initClusters[i], initClusters[i])])
  111.  
  112. # compute initial(negative) affinity table(upper trianglar matrix), very slow
  113. if VERBOSE:
  114. print(' Computing initial table.')
  115.  
  116. affinityTab = np.full(shape=(numClusters, numClusters), fill_value=np.inf)
  117. for j in xrange(numClusters):
  118. for i in xrange(j):
  119. affinityTab[i, j] = -1 * gacPathCondEntropy(IminuszW, initClusters[i], initClusters[j])
  120.  
  121. affinityTab = (clusterComp + clusterComp.T) + affinityTab
  122.  
  123. if VERBOSE:
  124. print(' Starting merging process')
  125.  
  126. curGroupNum = numClusters
  127. while True:
  128. if np.mod(curGroupNum, 20) == 0 & VERBOSE:
  129. print(' Group count: ', str(curGroupNum))
  130.  
  131. # Find two clusters with the best affinity
  132. minAff = np.min(affinityTab[:curGroupNum, :curGroupNum], axis=0)
  133. minIndex1 = np.argmin(affinityTab[:curGroupNum, :curGroupNum], axis=0)
  134. minIndex2 = np.argmin(minAff)
  135. minIndex1 = minIndex1[minIndex2]
  136. if minIndex2 < minIndex1:
  137. minIndex1, minIndex2 = minIndex2, minIndex1
  138.  
  139. # merge the two clusters
  140.  
  141. new_cluster = np.unique(np.append(initClusters[minIndex1], initClusters[minIndex2]))
  142.  
  143. # move the second cluster to be merged to the end of the cluster array
  144. # note that we only need to copy the end cluster's information to
  145. # the second cluster 's position
  146. if minIndex2 != curGroupNum:
  147. initClusters[minIndex2] = initClusters[-1]
  148. clusterComp[minIndex2] = clusterComp[curGroupNum - 1]
  149. # affinityTab is an upper triangular matrix
  150. affinityTab[: minIndex2, minIndex2] = affinityTab[:minIndex2, curGroupNum - 1]
  151. affinityTab[minIndex2, minIndex2 + 1: curGroupNum - 1] = affinityTab[minIndex2 + 1:curGroupNum - 1,
  152. curGroupNum - 1]
  153.  
  154. # update the first cluster and remove the second cluster
  155. initClusters[minIndex1] = new_cluster
  156. initClusters.pop()
  157. clusterComp[minIndex1] = gacPathEntropy(IminuszW[np.ix_(new_cluster, new_cluster)])
  158. clusterComp[curGroupNum - 1] = myInf
  159. affinityTab[:, curGroupNum - 1] = myInf
  160. affinityTab[curGroupNum - 1, :] = myInf
  161. curGroupNum = curGroupNum - 1
  162. if curGroupNum <= groupNumber:
  163. break
  164.  
  165. # update the affinity table for the merged cluster
  166. for groupIndex1 in xrange(minIndex1):
  167. affinityTab[groupIndex1, minIndex1] = -1 * gacPathCondEntropy(IminuszW, initClusters[groupIndex1],
  168. new_cluster)
  169. for groupIndex1 in xrange(minIndex1 + 1, curGroupNum):
  170. affinityTab[minIndex1, groupIndex1] = -1 * gacPathCondEntropy(IminuszW, initClusters[groupIndex1],
  171. new_cluster)
  172. affinityTab[:minIndex1, minIndex1] = clusterComp[:minIndex1].reshape(-1) + clusterComp[minIndex1] + affinityTab[
  173. :minIndex1,
  174. minIndex1]
  175. affinityTab[minIndex1, minIndex1 + 1: curGroupNum] = clusterComp[minIndex1 + 1: curGroupNum].T + clusterComp[
  176. minIndex1] + affinityTab[minIndex1, minIndex1 + 1:curGroupNum]
  177.  
  178. # generate sample labels
  179. clusterLabels = np.ones((numSample, 1))
  180. for i in xrange(len(initClusters)):
  181. clusterLabels[initClusters[i]] = i
  182. if VERBOSE:
  183. print(' Final group count: ', str(curGroupNum))
  184.  
  185. return clusterLabels
  186.  
  187.  
  188. def gacNNMerge(distance_matrix, NNIndex):
  189. # merge each vertex with its nearest neighbor
  190. # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
  191. #
  192.  
  193. # NN indices
  194. sampleNum = distance_matrix.shape[0]
  195. clusterLabels = np.zeros((sampleNum, 1))
  196. counter = 1
  197. for i in xrange(sampleNum):
  198. idx = NNIndex[i, :2]
  199. assignedCluster = clusterLabels[idx]
  200. assignedCluster = np.unique(assignedCluster[np.where(assignedCluster > 0)])
  201. if len(assignedCluster) == 0:
  202. clusterLabels[idx] = counter
  203. counter = counter + 1
  204. elif len(assignedCluster) == 1:
  205. clusterLabels[idx] = assignedCluster
  206. else:
  207. clusterLabels[idx] = assignedCluster[0]
  208. for j in xrange(1, len(assignedCluster)):
  209. clusterLabels[np.where(clusterLabels == assignedCluster[j])] = assignedCluster[0]
  210.  
  211. uniqueLabels = np.unique(clusterLabels)
  212. clusterNumber = len(uniqueLabels)
  213.  
  214. initialClusters = []
  215. for i in xrange(clusterNumber):
  216. initialClusters.append(np.where(clusterLabels[:].flatten() == uniqueLabels[i])[0])
  217.  
  218. return initialClusters
  219.  
  220.  
  221. def gacBuildDigraph(distance_matrix, K, a):
  222. # Build directed graph
  223. # Input:
  224. # - distance_matrix: pairwise distances, d_{i -> j}
  225. # - K: the number of nearest neighbors for KNN graph
  226. # - a: for covariance estimation
  227. # sigma^2 = (\sum_{i=1}^n \sum_{j \in N_i^K} d_{ij}^2) * a
  228. # - graphW: asymmetric weighted adjacency matrix,
  229. # w_{ij} = exp(- d_{ij}^2 / sig2), if j \in N_i^K
  230. # - NNIndex: (2K) nearest neighbors, N x (2K+1) matrix
  231. # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
  232.  
  233. # NN indices
  234. N = distance_matrix.shape[0]
  235. # find 2*K NNs in the sense of given distances
  236. sortedDist = np.sort(distance_matrix, axis=1)
  237. NNIndex = np.argsort(distance_matrix, axis=1)
  238. NNIndex = NNIndex[:, :K + 1]
  239.  
  240. # estimate derivation
  241. sig2 = np.mean(np.mean(sortedDist[:, 1:max(K + 1, 4)])) * a
  242. #########
  243. tmpNNDist = np.min(sortedDist[:, 1:], axis=1)
  244. while any(np.exp(- tmpNNDist / sig2) < 1e-5): # check sig2 and magnify it if it is too small
  245. sig2 = 2 * sig2
  246.  
  247. #########
  248. print(' sigma = ', str(np.sqrt(sig2)))
  249.  
  250. # build graph
  251. ND = sortedDist[:, 1:K + 1]
  252. NI = NNIndex[:, 1:K + 2]
  253. XI = repmat(np.arange(0, N).reshape(-1, 1), 1, K)
  254. sig2 = np.double(sig2)
  255. ND = np.double(ND)
  256. graphW = sparse.csc_matrix((np.exp(-ND[:] * (1 / sig2)).flatten(), (XI[:].flatten(), NI[:].flatten())),
  257. shape=(N, N)).todense()
  258. graphW += np.eye(N)
  259.  
  260. return graphW, NNIndex
  261.  
  262.  
  263. def gacCluster(distance_matrix, groupNumber, strDescr, K, a, z):
  264. # Graph Agglomerative Clustering toolbox
  265. # Input:
  266. # - distance_matrix: pairwise distances, d_{i -> j}
  267. # - groupNumber: the final number of clusters
  268. # - strDescr: structural descriptor. The choice can be
  269. # - 'zeta': zeta function based descriptor
  270. # - 'path': path integral based descriptor
  271. # - K: the number of nearest neighbors for KNN graph, default: 20
  272. # - p: merging (p+1)-links in l-links algorithm, default: 1
  273. # - a: for covariance estimation, default: 1
  274. # sigma^2 = (\sum_{i=1}^n \sum_{j \in N_i^K} d_{ij}^2) * a
  275. # - z: (I - z*P), default: 0.01
  276. # Output:
  277. # - clusteredLabels: clustering results
  278. # by Wei Zhang (wzhang009 at gmail.com), June, 8, 2011
  279. #
  280. # Please cite the following papers, if you find the code is helpful
  281. #
  282. # W. Zhang, D. Zhao, and X. Wang.
  283. # Agglomerative clustering via maximum incremental path integral.
  284. # Pattern Recognition, 46 (11): 3056-3065, 2013.
  285. #
  286. # W. Zhang, X. Wang, D. Zhao, and X. Tang.
  287. # Graph Degree Linkage: Agglomerative Clustering on a Directed Graph.
  288. # in Proceedings of European Conference on Computer Vision (ECCV), 2012.
  289.  
  290. print('--------------- Graph Structural Agglomerative Clustering ---------------------');
  291.  
  292. # initialization
  293.  
  294. print('---------- Building graph and forming initial clusters with l-links ---------');
  295. [graphW, NNIndex] = gacBuildDigraph(distance_matrix, K, a);
  296. # from adjacency matrix to probability transition matrix
  297. graphW = np.array((1. / np.sum(graphW, axis=1))) * np.array(graphW) # row sum is 1
  298. initialClusters = gacNNMerge(distance_matrix, NNIndex)
  299.  
  300. print('-------------------------- Zeta merging --------------------------');
  301. clusteredLabels = gacMerging(graphW, initialClusters, groupNumber, strDescr, z);
  302.  
  303. return clusteredLabels
  304.  
  305.  
  306. def predict_ac_mpi(feat, nClass, nSamples, nfeatures):
  307. K = 20
  308. a = 1
  309. z = 0.01
  310.  
  311. distance_matrix = cdist(feat, feat) ** 2
  312. # path intergral
  313. label_pre = gacCluster(distance_matrix, nClass, 'path', K, a, z)
  314.  
  315. return label_pre[:, 0]
  316.  
  317.  
  318. def bestMap(L1, L2):
  319. if L1.__len__() != L2.__len__():
  320. print('size(L1) must == size(L2)')
  321.  
  322. Label1 = np.unique(L1)
  323. nClass1 = Label1.__len__()
  324. Label2 = np.unique(L2)
  325. nClass2 = Label2.__len__()
  326.  
  327. nClass = max(nClass1, nClass2)
  328. G = np.zeros((nClass, nClass))
  329. for i in range(nClass1):
  330. for j in range(nClass2):
  331. G[i][j] = np.nonzero((L1 == Label1[i]) * (L2 == Label2[j]))[0].__len__()
  332.  
  333. c = linear_assignment_.linear_assignment(-G.T)[:, 1]
  334. newL2 = np.zeros(L2.__len__())
  335. for i in range(nClass2):
  336. for j in np.nonzero(L2 == Label2[i])[0]:
  337. if len(Label1) > c[i]:
  338. newL2[j] = Label1[c[i]]
  339.  
  340. return accuracy_score(L1, newL2)
  341.  
  342.  
  343. def dataset_settings(dataset):
  344. if (dataset == 'MNIST-full') | (dataset == 'MNIST-test'):
  345. kernel_sizes = [4, 5]
  346. strides = [2, 2]
  347. paddings = [0, 2]
  348. test_batch_size = 100
  349. elif dataset == 'USPS':
  350. kernel_sizes = [4, 5]
  351. strides = [2, 2]
  352. paddings = [0, 2]
  353. test_batch_size = 100
  354. elif dataset == 'FRGC':
  355. kernel_sizes = [4, 5]
  356. strides = [2, 2]
  357. paddings = [2, 2]
  358. test_batch_size = 1231
  359. elif dataset == 'CMU-PIE':
  360. kernel_sizes = [4, 5]
  361. strides = [2, 2]
  362. paddings = [2, 2]
  363. test_batch_size = 8
  364. elif dataset == 'YTF':
  365. kernel_sizes = [5, 4]
  366. strides = [2, 2]
  367. paddings = [2, 0]
  368. test_batch_size = 100
  369.  
  370. return kernel_sizes, strides, paddings, test_batch_size
  371.  
  372.  
  373. def create_result_dirs(output_path, file_name):
  374. if not os.path.exists(output_path):
  375. print('creating log folder')
  376. os.makedirs(output_path)
  377. try:
  378. os.makedirs(os.path.join(output_path, '../params'))
  379. except:
  380. pass
  381. func_file_name = os.path.basename(__file__)
  382. if func_file_name.split('.')[1] == 'pyc':
  383. func_file_name = func_file_name[:-1]
  384. functions_full_path = os.path.join(output_path, func_file_name)
  385. cmd = 'cp ' + func_file_name + ' "' + functions_full_path + '"'
  386. os.popen(cmd)
  387. run_file_full_path = os.path.join(output_path, file_name)
  388. cmd = 'cp ' + file_name + ' "' + run_file_full_path + '"'
  389. os.popen(cmd)
  390.  
  391.  
  392. class Logger(object):
  393. def __init__(self, output_path):
  394. self.terminal = sys.stdout
  395. self.log = open(output_path + "log.txt", "w+")
  396.  
  397. def write(self, message):
  398. self.terminal.write(message)
  399. self.log.write(message)
  400.  
  401. def flush(self):
  402. # this flush method is needed for python 3 compatibility.
  403. # this handles the flush command by doing nothing.
  404. # you might want to specify some extra behavior here.
  405. pass
  406.  
  407.  
  408. def kmeans(encoder_val_clean, y, nClusters, y_pred_prev=None, weight_initilization='k-means++', seed=42, n_init=40,
  409. max_iter=300):
  410. # weight_initilization = { 'kmeans-pca', 'kmean++', 'random', None }
  411.  
  412. if weight_initilization == 'kmeans-pca':
  413.  
  414. start_time = timeit.default_timer()
  415. pca = PCA(n_components=nClusters).fit(encoder_val_clean)
  416. kmeans_model = KMeans(init=pca.components_, n_clusters=nClusters, n_init=1, max_iter=300, random_state=seed)
  417. y_pred = kmeans_model.fit_predict(encoder_val_clean)
  418.  
  419. centroids = kmeans_model.cluster_centers_.T
  420. centroids = centroids / np.sqrt(np.diag(np.matmul(centroids.T, centroids)))
  421.  
  422. end_time = timeit.default_timer()
  423.  
  424. elif weight_initilization == 'k-means++':
  425.  
  426. start_time = timeit.default_timer()
  427. kmeans_model = KMeans(init='k-means++', n_clusters=nClusters, n_init=n_init, max_iter=max_iter, n_jobs=15,
  428. random_state=seed)
  429. y_pred = kmeans_model.fit_predict(encoder_val_clean)
  430.  
  431. D = 1.0 / euclidean_distances(encoder_val_clean, kmeans_model.cluster_centers_, squared=True)
  432. D **= 2.0 / (2 - 1)
  433. D /= np.sum(D, axis=1)[:, np.newaxis]
  434.  
  435. centroids = kmeans_model.cluster_centers_.T
  436. centroids = centroids / np.sqrt(np.diag(np.matmul(centroids.T, centroids)))
  437.  
  438. end_time = timeit.default_timer()
  439.  
  440. print('k-means: \t nmi =', normalized_mutual_info_score(y, y_pred), '\t arc =', adjusted_rand_score(y, y_pred),
  441. '\t acc = {:.4f} '.format(bestMap(y, y_pred)),
  442. 'K-means objective = {:.1f} '.format(kmeans_model.inertia_), '\t runtime =', end_time - start_time)
  443.  
  444. if y_pred_prev is not None:
  445. print('Different Assignments: ', sum(y_pred == y_pred_prev), '\tbestMap: ', bestMap(y_pred, y_pred_prev),
  446. '\tdatapoints-bestMap*datapoints: ',
  447. encoder_val_clean.shape[0] - bestMap(y_pred, y_pred_prev) * encoder_val_clean.shape[0])
  448.  
  449. return centroids, kmeans_model.inertia_, y_pred
  450.  
  451.  
  452. def load_dataset(dataset_path):
  453.  
  454.  
  455. #hf = h5py.File(dataset_path + '/data.h5', 'r')
  456. #X = np.asarray(hf.get('data'), dtype='float32')
  457. #X_train = (X - np.float32(127.5)) / np.float32(127.5)
  458. #y_train = np.asarray(hf.get('labels'), dtype='int32')
  459. #return X_train, y_train
  460. from keras.datasets import mnist
  461. (x_train, y_train), (x_test, y_test) = mnist.load_data()
  462.  
  463. x_raw = np.concatenate((x_train, x_test))
  464. y = np.concatenate((y_train, y_test))
  465. x = x_raw.reshape(-1, 28, 28, 1).astype('float32')
  466. x = x/255.
  467.  
  468. x0 = x[np.isin(y, 0)]
  469. x1 = x[np.isin(y, 1)]
  470. x2 = x[np.isin(y, 2)]
  471. x3 = x[np.isin(y, 3)]
  472. x4 = x[np.isin(y, 4)]
  473. x5 = x[np.isin(y, 5)]
  474. x6 = x[np.isin(y, 6)]
  475. x7 = x[np.isin(y, 7)]
  476. x8 = x[np.isin(y, 8)]
  477. x9 = x[np.isin(y, 9)]
  478.  
  479. x = np.concatenate((x0[0:int(0.55*len(x0))], x1[0:int(0.6*len(x1))], x2[0:int(0.65*len(x2))],
  480. x3[0:int(0.7*len(x3))], x4[0:int(0.75*len(x4))], x5[0:int(0.8*len(x5))],
  481. x6[0:int(0.85*len(x6))], x7[0:int(0.9*len(x7))], x8[0:int(0.95*len(x8))],
  482. x9[0:len(x9)]))
  483.  
  484. y = np.concatenate(([0]*int(0.55*len(x0)), [1]*int(0.6*len(x1)), [2]*int(0.65*len(x2)),
  485. [3]*int(0.7*len(x3)), [4]*int(0.75*len(x4)), [5]*int(0.8*len(x5)),
  486. [6]*int(0.85*len(x6)), [7]*int(0.9*len(x7)), [8]*int(0.95*len(x8)),
  487. [9]*int(1*len(x9))))
  488.  
  489.  
  490. return x, y
  491.  
  492.  
  493. def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
  494. assert len(inputs) == len(targets)
  495. if shuffle:
  496. indices = np.arange(len(inputs))
  497. np.random.shuffle(indices)
  498. for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
  499. if shuffle:
  500. excerpt = indices[start_idx:start_idx + batchsize]
  501. else:
  502. excerpt = slice(start_idx, start_idx + batchsize)
  503. yield inputs[excerpt], targets[excerpt], excerpt
  504.  
  505.  
  506. def build_eml(input_var=None, n_out=None, W_initial=None):
  507. l_in = input_var
  508.  
  509. if W_initial is None:
  510. l_out = lasagne.layers.DenseLayer(
  511. l_in, num_units=n_out,
  512. nonlinearity=lasagne.nonlinearities.softmax,
  513. W=lasagne.init.Uniform(std=0.5, mean=0.5), b=lasagne.init.Constant(1))
  514.  
  515. else:
  516. l_out = lasagne.layers.DenseLayer(
  517. l_in, num_units=n_out,
  518. nonlinearity=lasagne.nonlinearities.softmax,
  519. W=W_initial, b=lasagne.init.Constant(0))
  520.  
  521. return l_out
  522.  
  523.  
  524. def build_depict(input_var=None, n_in=[None, None, None], feature_map_sizes=[50, 50],
  525. dropouts=[0.1, 0.1, 0.1], kernel_sizes=[5, 5], strides=[2, 2],
  526. paddings=[2, 2], hlayer_loss_param=0.1):
  527. # ENCODER
  528. l_e0 = lasagne.layers.DropoutLayer(
  529. lasagne.layers.InputLayer(shape=(None, n_in[0], n_in[1], n_in[2]), input_var=input_var), p=dropouts[0])
  530.  
  531. l_e1 = lasagne.layers.DropoutLayer(
  532. (lasagne.layers.Conv2DLayer(l_e0, num_filters=feature_map_sizes[0], stride=(strides[0], strides[0]),
  533. filter_size=(kernel_sizes[0], kernel_sizes[0]), pad=paddings[0],
  534. nonlinearity=lasagne.nonlinearities.LeakyRectify(leakiness=0.01),
  535. W=lasagne.init.GlorotUniform())),
  536. p=dropouts[1])
  537.  
  538. l_e2 = lasagne.layers.DropoutLayer(
  539. (lasagne.layers.Conv2DLayer(l_e1, num_filters=feature_map_sizes[1], stride=(strides[1], strides[1]),
  540. filter_size=(kernel_sizes[1], kernel_sizes[1]), pad=paddings[1],
  541. nonlinearity=lasagne.nonlinearities.LeakyRectify(leakiness=0.01),
  542. W=lasagne.init.GlorotUniform())),
  543. p=dropouts[2])
  544.  
  545. l_e2_flat = lasagne.layers.flatten(l_e2)
  546.  
  547. l_e3 = lasagne.layers.DenseLayer(l_e2_flat, num_units=feature_map_sizes[2],
  548. nonlinearity=lasagne.nonlinearities.tanh)
  549.  
  550. # DECODER
  551. l_d2_flat = lasagne.layers.DenseLayer(l_e3, num_units=l_e2_flat.output_shape[1],
  552. nonlinearity=lasagne.nonlinearities.LeakyRectify(leakiness=0.01))
  553.  
  554. l_d2 = lasagne.layers.reshape(l_d2_flat,
  555. shape=[-1, l_e2.output_shape[1], l_e2.output_shape[2], l_e2.output_shape[3]])
  556.  
  557. l_d1 = lasagne.layers.Deconv2DLayer(l_d2, num_filters=feature_map_sizes[0], stride=(strides[1], strides[1]),
  558. filter_size=(kernel_sizes[1], kernel_sizes[1]), crop=paddings[1],
  559. nonlinearity=lasagne.nonlinearities.LeakyRectify(leakiness=0.01))
  560.  
  561. l_d0 = lasagne.layers.Deconv2DLayer(l_d1, num_filters=n_in[0], stride=(strides[0], strides[0]),
  562. filter_size=(kernel_sizes[0], kernel_sizes[0]), crop=paddings[0],
  563. nonlinearity=lasagne.nonlinearities.tanh)
  564.  
  565. # Loss
  566. tar0 = input_var
  567. tar1 = lasagne.layers.get_output(l_e1, deterministic=True)
  568. tar2 = lasagne.layers.get_output(l_e2, deterministic=True)
  569. rec2 = lasagne.layers.get_output(l_d2)
  570. rec1 = lasagne.layers.get_output(l_d1)
  571. rec0 = lasagne.layers.get_output(l_d0)
  572. rec2_clean = lasagne.layers.get_output(l_d2, deterministic=True)
  573. rec1_clean = lasagne.layers.get_output(l_d1, deterministic=True)
  574. rec0_clean = lasagne.layers.get_output(l_d0, deterministic=True)
  575.  
  576. loss0 = lasagne.objectives.squared_error(rec0, tar0)
  577. loss1 = lasagne.objectives.squared_error(rec1, tar1) * hlayer_loss_param
  578. loss2 = lasagne.objectives.squared_error(rec2, tar2) * hlayer_loss_param
  579.  
  580. loss0_clean = lasagne.objectives.squared_error(rec0_clean, tar0)
  581. loss1_clean = lasagne.objectives.squared_error(rec1_clean, tar1) * hlayer_loss_param
  582. loss2_clean = lasagne.objectives.squared_error(rec2_clean, tar2) * hlayer_loss_param
  583.  
  584. loss_recons = loss0.mean() + loss1.mean() + loss2.mean()
  585. loss_recons_clean = loss0_clean.mean() + loss1_clean.mean() + loss2_clean.mean()
  586.  
  587. return l_e3, l_d0, loss_recons, loss_recons_clean
  588.  
  589.  
  590. def train_depict_ae(dataset, X, y, input_var, decoder, encoder, loss_recons, loss_recons_clean, num_clusters, output_path,
  591. batch_size=100, test_batch_size=100, num_epochs=1000, learning_rate=1e-4, verbose=1, seed=42,
  592. continue_training=False):
  593. learning_rate_shared = theano.shared(lasagne.utils.floatX(learning_rate))
  594. params = lasagne.layers.get_all_params(decoder, trainable=True)
  595. updates = lasagne.updates.adam(loss_recons, params, learning_rate=learning_rate_shared)
  596. train_fn = theano.function([input_var], loss_recons, updates=updates)
  597. val_fn = theano.function([input_var], loss_recons_clean)
  598. X_train, X_val, y_train, y_val = train_test_split(
  599. X, y, stratify=y, test_size=0.10, random_state=42)
  600. best_val = np.inf
  601. last_update = 0
  602.  
  603. # Load if pretrained weights are available.
  604. if os.path.isfile(os.path.join(output_path, '../params/params_' + dataset + '_values_best.pickle')) & continue_training:
  605. with open(os.path.join(output_path, '../params/params_' + dataset + '_values_best.pickle'),
  606. "rb") as input_file:
  607. best_params = pickle.load(input_file, encoding='latin1')
  608. lasagne.layers.set_all_param_values(decoder, best_params)
  609. else:
  610. # TRAIN MODEL
  611. if verbose > 1:
  612. encoder_clean = lasagne.layers.get_output(encoder, deterministic=True)
  613. encoder_clean_function = theano.function([input_var], encoder_clean)
  614.  
  615. for epoch in range(num_epochs + 1):
  616. train_err = 0
  617. num_batches = 0
  618.  
  619. # Training
  620. for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
  621. inputs, targets, idx = batch
  622. train_err += train_fn(inputs)
  623. num_batches += 1
  624.  
  625. validation_error = np.float32(val_fn(X_val))
  626.  
  627. print("Epoch {} of {}".format(epoch + 1, num_epochs),
  628. "\t training loss:{:.6f}".format(train_err / num_batches),
  629. "\t validation loss:{:.6f}".format(validation_error))
  630. # if epoch % 10 == 0:
  631. last_update += 1
  632. if validation_error < best_val:
  633. last_update = 0
  634. print("new best error: ", validation_error)
  635. best_val = validation_error
  636. best_params_values = lasagne.layers.get_all_param_values(decoder)
  637. with open(os.path.join(output_path, '../params/params_' + dataset + '_values_best.pickle'),
  638. "wb") as output_file:
  639. pickle.dump(best_params_values, output_file)
  640. if last_update > 100:
  641. break
  642.  
  643. if (verbose > 1) & (epoch % 50 == 0):
  644. # Extract MdA features
  645. minibatch_flag = 1
  646. for batch in iterate_minibatches(X, y, test_batch_size, shuffle=False):
  647. inputs, targets, idx = batch
  648. minibatch_x = encoder_clean_function(inputs)
  649. if minibatch_flag:
  650. encoder_val_clean = minibatch_x
  651. minibatch_flag = 0
  652. else:
  653. encoder_val_clean = np.concatenate((encoder_val_clean, minibatch_x), axis=0)
  654.  
  655. kmeans(encoder_val_clean, y, num_clusters, seed=seed)
  656.  
  657. last_params_values = lasagne.layers.get_all_param_values(decoder)
  658. with open(os.path.join(output_path, '../params/params_' + dataset + '_last.pickle'), "wb") as output_file:
  659. pickle.dump(params, output_file)
  660. with open(os.path.join(output_path, '../params/params_' + dataset + '_values_last.pickle'),
  661. "wb") as output_file:
  662. pickle.dump(last_params_values, output_file)
  663. lasagne.layers.set_all_param_values(decoder, best_params_values)
  664.  
  665.  
  666. def clustering(dataset, X, y, input_var, encoder, num_clusters, output_path, test_batch_size=100, seed=42,
  667. continue_training=False):
  668. encoder_clean = lasagne.layers.get_output(encoder, deterministic=True)
  669. encoder_clean_function = theano.function([input_var], encoder_clean)
  670.  
  671. # Extract MdA features
  672. minibatch_flag = 1
  673. for batch in iterate_minibatches(X, y, test_batch_size, shuffle=False):
  674. inputs, targets, idx = batch
  675. minibatch_x = encoder_clean_function(inputs)
  676. if minibatch_flag:
  677. encoder_val_clean = minibatch_x
  678. minibatch_flag = 0
  679. else:
  680. encoder_val_clean = np.concatenate((encoder_val_clean, minibatch_x), axis=0)
  681.  
  682. # Check kmeans results
  683. kmeans(encoder_val_clean, y, num_clusters, seed=seed)
  684. initial_time = timeit.default_timer()
  685. if (dataset == 'MNIST-full') | (dataset == 'FRGC') | (dataset == 'YTF') | (dataset == 'CMU-PIE'):
  686. # K-means on MdA Features
  687. centroids, inertia, y_pred = kmeans(encoder_val_clean, y, num_clusters, seed=seed)
  688. y_pred = (np.array(y_pred)).reshape(np.array(y_pred).shape[0], )
  689. y_pred = y_pred - 1
  690. else:
  691. # AC-PIC on MdA Features
  692. if os.path.isfile(os.path.join(output_path, '../params/pred' + dataset + '.pickle')) & continue_training:
  693. with open(os.path.join(output_path, '../params/pred' + dataset + '.pickle'), "rb") as input_file:
  694. y_pred = pickle.load(input_file, encoding='latin1')
  695. else:
  696. try:
  697. import matlab.engine
  698. eng = matlab.engine.start_matlab()
  699. eng.addpath(eng.genpath('matlab'))
  700. targets_init = eng.predict_ac_mpi(
  701. matlab.double(
  702. encoder_val_clean.reshape(encoder_val_clean.shape[0] * encoder_val_clean.shape[1]).tolist()),
  703. num_clusters, encoder_val_clean.shape[0], encoder_val_clean.shape[1])
  704. y_pred = (np.array(targets_init)).reshape(np.array(targets_init).shape[0], )
  705. eng.quit()
  706. y_pred = y_pred - 1
  707. except:
  708. y_pred = predict_ac_mpi(encoder_val_clean, num_clusters, encoder_val_clean.shape[0],
  709. encoder_val_clean.shape[1])
  710. with open(os.path.join(output_path, '../params/pred' + dataset + '.pickle'), "wb") as output_file:
  711. pickle.dump(y_pred, output_file)
  712.  
  713. final_time = timeit.default_timer()
  714. print('AC-PIC: \t nmi = ', normalized_mutual_info_score(y, y_pred),
  715. '\t arc = ', adjusted_rand_score(y, y_pred),
  716. '\t acc = {:.4f} '.format(bestMap(y, y_pred)),
  717. '\t time taken = {:.4f}'.format(final_time - initial_time))
  718. centroids_acpic = np.zeros(shape=(num_clusters, encoder_val_clean.shape[1]))
  719. for i in range(num_clusters):
  720. centroids_acpic[i] = encoder_val_clean[y_pred == i].mean(axis=0)
  721.  
  722. centroids = centroids_acpic.T
  723. centroids = centroids_acpic / np.sqrt(np.diag(np.matmul(centroids.T, centroids)))
  724.  
  725. return np.int32(y_pred), np.float32(centroids)
  726.  
  727.  
  728. def train_depict(dataset, X, y, input_var, decoder, encoder, loss_recons, num_clusters, y_pred, output_path,
  729. batch_size=100, test_batch_size=100, num_epochs=1000, learning_rate=1e-4, prediction_status='soft',
  730. rec_mult=1, clus_mult=1, centroids=None, init_flag=1, continue_training=False):
  731. ######################
  732. # ADD RLC TO MdA #
  733. ######################
  734.  
  735. initial_time = timeit.default_timer()
  736. rec_lambda = theano.shared(lasagne.utils.floatX(rec_mult))
  737. clus_lambda = theano.shared(lasagne.utils.floatX(clus_mult))
  738. pred_normalizition_flag = 1
  739. num_batches = X.shape[0] // batch_size
  740.  
  741. if prediction_status == 'soft':
  742. target_var = T.matrix('minibatch_out')
  743. target_init = T.ivector('kmeans_out')
  744. elif prediction_status == 'hard':
  745. target_var = T.ivector('minibatch_out')
  746. target_val = T.vector()
  747.  
  748. network2 = build_eml(encoder, n_out=num_clusters, W_initial=centroids)
  749. network_prediction_noisy = lasagne.layers.get_output(network2, input_var, deterministic=False)
  750. network_prediction_clean = lasagne.layers.get_output(network2, input_var, deterministic=True)
  751.  
  752. loss_clus_init = lasagne.objectives.categorical_crossentropy(network_prediction_noisy, target_init).mean()
  753. params_init = lasagne.layers.get_all_params([decoder, network2], trainable=True)
  754.  
  755. if prediction_status == 'soft':
  756. loss_clus = lasagne.objectives.categorical_crossentropy(network_prediction_noisy,
  757. target_var)
  758. elif prediction_status == 'hard':
  759. loss_clus = target_val * lasagne.objectives.categorical_crossentropy(network_prediction_noisy, target_var)
  760.  
  761. loss_clus = clus_lambda * loss_clus.mean()
  762. loss_recons = rec_lambda * loss_recons
  763. loss = loss_recons + loss_clus
  764. params2 = lasagne.layers.get_all_params([decoder, network2], trainable=True)
  765. updates = lasagne.updates.adam(
  766. loss, params2, learning_rate=learning_rate)
  767. train_fn = theano.function([input_var, target_var],
  768. [loss, loss_recons, loss_clus], updates=updates)
  769.  
  770. loss_clus_init = clus_lambda * loss_clus_init
  771. loss_init = loss_clus_init + loss_recons
  772. updates_init = lasagne.updates.adam(
  773. loss_init, params_init, learning_rate=learning_rate)
  774. train_fn_init = theano.function([input_var, target_init],
  775. [loss_init, loss_recons, loss_clus_init], updates=updates_init)
  776.  
  777. test_fn = theano.function([input_var], network_prediction_clean)
  778. final_time = timeit.default_timer()
  779.  
  780. print("\n...Start DEPICT initialization")
  781. if init_flag:
  782. if os.path.isfile(os.path.join(output_path, '../params/weights' + dataset + '.pickle')) & continue_training:
  783. with open(os.path.join(output_path, '../params/weights' + dataset + '.pickle'),
  784. "rb") as input_file:
  785. weights = pickle.load(input_file, encoding='latin1')
  786. lasagne.layers.set_all_param_values([decoder, network2], weights)
  787. else:
  788. X_train, X_val, y_train, y_val, y_pred_train, y_pred_val = train_test_split(
  789. X, y, y_pred, stratify=y, test_size=0.10, random_state=42)
  790. last_update = 0
  791. # Initilization
  792. y_targ_train = np.copy(y_pred_train)
  793. y_targ_val = np.copy(y_pred_val)
  794. y_val_prob = test_fn(X_val)
  795. y_val_pred = np.argmax(y_val_prob, axis=1)
  796. val_nmi = normalized_mutual_info_score(y_targ_val, y_val_pred)
  797. best_val = val_nmi
  798. print('initial val nmi: ', val_nmi)
  799. best_params_values = lasagne.layers.get_all_param_values([decoder, network2])
  800. for epoch in range(1000):
  801. train_err, val_err = 0, 0
  802. lossre_train, lossre_val = 0, 0
  803. losspre_train, losspre_val = 0, 0
  804. num_batches_train = 0
  805. for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
  806. minibatch_inputs, targets, idx = batch
  807. minibatch_error, lossrec, losspred = train_fn_init(minibatch_inputs, np.int32(y_targ_train[idx]))
  808. train_err += minibatch_error
  809. lossre_train += lossrec
  810. losspre_train += losspred
  811. num_batches_train += 1
  812.  
  813. y_val_prob = test_fn(X_val)
  814. y_val_pred = np.argmax(y_val_prob, axis=1)
  815.  
  816. y_pred = np.zeros(X.shape[0])
  817. for batch in iterate_minibatches(X, y, test_batch_size, shuffle=False):
  818. minibatch_inputs, targets, idx = batch
  819. minibatch_prob = test_fn(minibatch_inputs)
  820. minibatch_pred = np.argmax(minibatch_prob, axis=1)
  821. y_pred[idx] = minibatch_pred
  822.  
  823. val_nmi = normalized_mutual_info_score(y_targ_val, y_val_pred)
  824.  
  825. print('epoch:', epoch + 1, '\t nmi = {:.4f} '.format(normalized_mutual_info_score(y, y_pred)),
  826. '\t arc = {:.4f} '.format(adjusted_rand_score(y, y_pred)),
  827. '\t acc = {:.4f} '.format(bestMap(y, y_pred)),
  828. '\t loss= {:.10f}'.format(train_err / num_batches_train),
  829. '\t loss_reconstruction= {:.10f}'.format(lossre_train / num_batches_train),
  830. '\t loss_prediction= {:.10f}'.format(losspre_train / num_batches_train),
  831. '\t val nmi = {:.4f} '.format(val_nmi))
  832. last_update += 1
  833. if val_nmi > best_val:
  834. last_update = 0
  835. print("new best val nmi: ", val_nmi)
  836. best_val = val_nmi
  837. best_params_values = lasagne.layers.get_all_param_values([decoder, network2])
  838. # if (losspre_val / num_batches_val) < 0.2:
  839. # break
  840.  
  841. if last_update > 5:
  842. break
  843.  
  844. lasagne.layers.set_all_param_values([decoder, network2], best_params_values)
  845. with open(os.path.join(output_path, '../params/weights' + dataset + '.pickle'), "wb") as output_file:
  846. pickle.dump(lasagne.layers.get_all_param_values([decoder, network2]), output_file)
  847.  
  848. # Epoch 0
  849. print("\n...Start DEPICT training")
  850. y_prob = np.zeros((X.shape[0], num_clusters))
  851. y_prob_prev = np.zeros((X.shape[0], num_clusters))
  852. for batch in iterate_minibatches(X, y, test_batch_size, shuffle=False):
  853. minibatch_inputs, targets, idx = batch
  854. minibatch_prob = test_fn(minibatch_inputs)
  855. y_prob[idx] = minibatch_prob
  856.  
  857. y_prob_max = np.max(y_prob, axis=1)
  858. if pred_normalizition_flag:
  859. cluster_frequency = np.sum(y_prob, axis=0)
  860. y_prob = y_prob ** 2 / cluster_frequency
  861. y_prob = np.transpose(y_prob.T / np.sum(y_prob, axis=1))
  862. y_pred = np.argmax(y_prob, axis=1)
  863.  
  864. print('epoch: 0', '\t nmi = {:.4f} '.format(normalized_mutual_info_score(y, y_pred)),
  865. '\t arc = {:.4f} '.format(adjusted_rand_score(y, y_pred)),
  866. '\t acc = {:.4f} '.format(bestMap(y, y_pred)))
  867. if os.path.isfile(os.path.join(output_path, '../params/rlc' + dataset + '.pickle')) & continue_training:
  868. with open(os.path.join(output_path, '../params/rlc' + dataset + '.pickle'),
  869. "rb") as input_file:
  870. weights = pickle.load(input_file, encoding='latin1')
  871. lasagne.layers.set_all_param_values([decoder, network2], weights)
  872. else:
  873. for epoch in range(num_epochs):
  874.  
  875. # In each epoch, we do a full pass over the training data:
  876. train_err = 0
  877. lossre = 0
  878. losspre = 0
  879.  
  880. for batch in iterate_minibatches(X, y, batch_size, shuffle=True):
  881. minibatch_inputs, targets, idx = batch
  882.  
  883. # M_step
  884. if prediction_status == 'hard':
  885. minibatch_err, lossrec, losspred = train_fn(minibatch_inputs,
  886. np.ndarray.astype(y_pred[idx], 'int32'),
  887. np.ndarray.astype(y_prob_max[idx],
  888. 'float32'))
  889. elif prediction_status == 'soft':
  890. minibatch_err, lossrec, losspred = train_fn(minibatch_inputs,
  891. np.ndarray.astype(y_prob[idx], 'float32'))
  892.  
  893. minibatch_prob = test_fn(minibatch_inputs)
  894. y_prob[idx] = minibatch_prob
  895. train_err += minibatch_err
  896. lossre += lossrec
  897. losspre += losspred
  898.  
  899. y_prob_max = np.max(y_prob, axis=1)
  900. if pred_normalizition_flag:
  901. cluster_frequency = np.sum(y_prob, axis=0) # avoid unbalanced assignment
  902. y_prob = y_prob ** 2 / cluster_frequency
  903. # y_prob = y_prob / np.sqrt(cluster_frequency)
  904. y_prob = np.transpose(y_prob.T / np.sum(y_prob, axis=1))
  905. y_pred = np.argmax(y_prob, axis=1)
  906.  
  907. # print('mse: ', mean_squared_error(y_prob, y_prob_prev))
  908.  
  909. if mean_squared_error(y_prob, y_prob_prev) < 1e-7:
  910. with open(os.path.join(output_path, '../params/rlc' + dataset + '.pickle'), "wb") as output_file:
  911. pickle.dump(lasagne.layers.get_all_param_values([decoder, network2]), output_file)
  912. break
  913. y_prob_prev = np.copy(y_prob)
  914.  
  915. print('epoch:', epoch + 1, '\t nmi = {:.4f} '.format(normalized_mutual_info_score(y, y_pred)),
  916. '\t arc = {:.4f} '.format(adjusted_rand_score(y, y_pred)),
  917. '\t acc = {:.4f} '.format(bestMap(y, y_pred)), '\t loss= {:.10f}'.format(train_err / num_batches),
  918. '\t loss_recons= {:.10f}'.format(lossre / num_batches),
  919. '\t loss_pred= {:.10f}'.format(losspre / num_batches))
  920.  
  921. # test
  922. y_pred = np.zeros(X.shape[0])
  923. for batch in iterate_minibatches(X, y, test_batch_size, shuffle=False):
  924. minibatch_inputs, targets, idx = batch
  925. minibatch_prob = test_fn(minibatch_inputs)
  926. minibatch_pred = np.argmax(minibatch_prob, axis=1)
  927. y_pred[idx] = minibatch_pred
  928.  
  929. print('final: ', '\t nmi = {:.4f} '.format(normalized_mutual_info_score(y, y_pred)),
  930. '\t arc = {:.4f} '.format(adjusted_rand_score(y, y_pred)),
  931. '\t acc = {:.4f} '.format(bestMap(y, y_pred)))
Add Comment
Please, Sign In to add comment