Advertisement
Guest User

Untitled

a guest
Oct 20th, 2019
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.87 KB | None | 0 0
  1. # compare components from the same gmm
  2.  
  3. import numpy as np
  4. from multiprocessing import Pool
  5.  
  6. def introspection(pop, sample=None, comp1=0, comp2=1, nbins=20, cutoff=.5, renderhists=False, usefiltered=True):
  7. gmm = pop['samples'][sample]['gmm']
  8. C = pop['samples'][sample]['C']
  9. prediction = gmm.predict(C)
  10.  
  11. if usefiltered == True:
  12. M = pop['samples'][sample]['M_norm']
  13. genes = np.array(pop['filtered_genes'])
  14. elif usefiltered == False:
  15. M = pop['samples'][sample]['M']
  16. genes = np.array(pop['genes'])
  17.  
  18. idx1 = np.where(prediction==comp1)[0]
  19. idx2 = np.where(prediction==comp2)[0]
  20. sub1 = M[:,idx1].toarray()
  21. sub2 = M[:,idx2].toarray()
  22.  
  23. with Pool(pop['ncores']) as p:
  24. q = np.array(p.starmap(PA.l1norm, [(ig, sub1[ig,:], sub2[ig,:], nbins) for ig in range(sub1.shape[0])]))
  25.  
  26. idx = np.argsort(q)
  27. q = q[idx]
  28. genes = genes[idx]
  29. sub1 = sub1[idx,:]
  30. sub2 = sub2[idx,:]
  31.  
  32. # render l1norm values
  33. samplename = sample.replace('/','') # remove slash char to not mess up the folder path
  34. dname = 'diffexp/%s_%d_%d/' % (samplename, comp1, comp2) # define directory name
  35. PA.mkdir(os.path.join(pop['output'], dname)) # create directory if needed
  36. x = np.arange(len(q))
  37. y = q
  38. plt.scatter(x, y, s=.1, alpha=1)
  39. plt.axhline(y=cutoff, color='red', linewidth=.5, label='Cutoff')
  40. plt.axhline(y=-cutoff, color='red', linewidth=.5)
  41. plt.xticks([])
  42. plt.ylabel('l1-norm')
  43. plt.xlabel('Genes')
  44. plt.legend()
  45. filename = 'l1norm_values'
  46. filename = os.path.join(pop['output'], dname, '%s.png' % filename)
  47. plt.savefig(filename, dpi=200, bbox_inches='tight')
  48. plt.close()
  49.  
  50. downregulated_idx = np.where(np.array(q)<-cutoff)[0] # get indices of genes with low l1-norm values
  51. upregulated_idx = np.where(np.array(q)>cutoff)[0] # get indices of genes with high l1-norm values
  52. downregulated = [genes[i] for i in downregulated_idx] # get gene labels
  53. upregulated = [genes[i] for i in upregulated_idx] # get gene labels
  54. if len(downregulated+upregulated) == 0:
  55. raise Exception('Cutoff value did not retrieve any gene. Please modify cutoff based on %s' % filename)
  56.  
  57. # gsea
  58. currpath = '/anaconda3/lib/python3.7/site-packages/popalign/' # get current path of this file to find the genesets
  59. ''
  60. geneset = 'c5bp' # name of the geneset file
  61. d = PA.load_dict(os.path.join(currpath, "gsea/%s.npy" % geneset)) # load geneset dictionar
  62. ngenesets = 20
  63.  
  64. dr_genesets = PA.enrichment_analysis(pop, d, downregulated, len(pop['genes']), ngenesets) # find genesets pvalues for the list of downregulated genes
  65. ur_genesets = PA.enrichment_analysis(pop, d, upregulated, len(pop['genes']), ngenesets) # find genesets pvalues for the list of upregulated genes
  66.  
  67. lidx = np.concatenate([downregulated_idx,upregulated_idx])
  68. labels = ['downregulated']*len(downregulated_idx)+['upregulated']*len(upregulated_idx)
  69.  
  70. with open(os.path.join(pop['output'], dname, 'downregulated_genes.txt'),'w') as fout:
  71. fout.write('Downregulated genes for sample %s relative to the reference sample\n\n' % sample)
  72. fout.write('\n'.join(downregulated)) # save list of downregulated gene names
  73. fout.write('\n\nMatching genesets:\n\n')
  74. fout.write('\n'.join(dr_genesets))
  75. with open(os.path.join(pop['output'], dname, 'upregulated_genes.txt'),'w') as fout:
  76. fout.write('Upregulated genes for sample: %s relative to the reference sample\n\n' % sample)
  77. fout.write('\n'.join(upregulated)) # save list of upregulated gene names
  78. fout.write('\n\nMatching genesets:\n\n')
  79. fout.write('\n'.join(ur_genesets))
  80.  
  81. if renderhists == True: # if variable is True, then start histogram rendering
  82. dname = 'diffexp/%d_%s/hists/' % (refcomp, samplename) # define directory name
  83. try:
  84. shutil.rmtree(os.path.join(pop['output'], dname))
  85. except:
  86. pass
  87. mkdir(os.path.join(pop['output'], dname)) # create directory if needed
  88. for lbl,i in zip(labels, lidx): # for each gene index in final list
  89. gname = genes[i]
  90.  
  91. arrref = subref[i,:]
  92. arrtest = subtest[i,:]
  93. maxref, maxtest = np.max(arrref), np.max(arrtest)
  94. max_ = max(maxref,maxtest)
  95.  
  96. nbins = 20
  97. bref, beref = np.histogram(arrref, bins=nbins, range=(0,max_))
  98. btest, betest = np.histogram(arrtest, bins=nbins, range=(0,max_))
  99. bref = bref/len(arrref)
  100. btest = btest/len(arrtest)
  101.  
  102. width = beref[-1]/nbins
  103. plt.bar(beref[:-1], bref, label=xref, alpha=.3, width=width)
  104. plt.bar(beref[:-1], btest, label=xtest, alpha=0.3, width=width)
  105. plt.legend()
  106. plt.xlabel('Normalized counts')
  107. plt.ylabel('Percentage of cells in subpopulation')
  108. plt.title('Gene %s\nSubpopulation #%d of %s' % (gname, refcomp, xref))
  109.  
  110. filename = '%s_%s' % (lbl, gname)
  111. plt.savefig(os.path.join(pop['output'], dname, '%s.png' % filename), dpi=200, bbox_inches='tight')
  112. plt.close()
  113.  
  114. lidx = [genes[i] for i in lidx]
  115. #PA.plot_heatmap(pop, refcomp, lidx, clustersamples=False, clustercells=True, savename='%d_%s_only' % (refcomp, sample), figsize=(15,15), cmap='Purples', samplelimits=False, scalegenes=True, only=sample, equalncells=True)
  116. return lidx
  117.  
  118. sample = 'H-DER-2'
  119. comp1 = 2
  120. comp2 = 3
  121. genelist = introspection(pop, sample=sample, comp1=comp1, comp2=comp2, nbins=20, cutoff=1, renderhists=False, usefiltered=False)
  122.  
  123. importlib.reload(PA)
  124. PA.plot_genes_gmm_cells(pop,
  125. sample=sample,
  126. genelist=genelist,
  127. savename='%s_%d_%d' % (sample,comp1,comp2),
  128. metric='correlation',
  129. method='single',
  130. clustergenes=False,
  131. clustercells=False,
  132. cmap='magma',
  133. figsize=(20,20)
  134. )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement