Untitled

gmm = pop['samples']['CONTROL']['gmm']
C = pop['samples']['CONTROL']['C']
M = pop['samples']['CONTROL']['M']
types = PA.default_pbmc_types()

typeslist = list(types.keys()) # get list of types for consistency
cols = range(gmm.n_components) # range of components
finaltypes
markers = np.concatenate([types[x] for x in types]) # get entire list of genes from dict
markers = [x for x in markers if x in genes] # make sure to only keep valid genes
genes_idx = [np.where(genes==x)[0][0] for x in markers] # get matching indices

M = M[genes_idx,:] # genes from initial M matrix
M = sp.normalize(M, norm='max', axis=1) # scale rows by max

arr = [] # empty list to store mean vectors for each type
for t in typeslist: # for each cell type t
	l = types[t] # get matching list of genes
	lidx = [markers.index(g) for g in l if g in markers] # retrieve gene indices from marker list (only valid genes in markers list)
	sub = M[lidx,:] # pick only genes for cell type t
	submean = np.array(sub.mean(axis=0)).flatten() # compute mean for each cell
	arr.append(submean) # store mean vector
arr = np.vstack(arr) # stack mean vectors
calls = np.argmax(arr,axis=0) # for each cell, get the index of max value = [] # to store top type for each component

for i in cols: # for each component i
	idx = np.where(prediction==i) # get indices of cells matching component i
	sub = calls[idx] # get the indices of max means for those cells
	unique, counts = np.unique(sub, return_counts=True) # count how many counts for each argmax
	finaltypes.append(typeslist[unique[np.argmax(counts)]]) # append name of most prominant cell type in component i