Guest User

Untitled

a guest
Aug 20th, 2018
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.74 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {
  7. "collapsed": true
  8. },
  9. "outputs": [
  10. {
  11. "name": "stdout",
  12. "output_type": "stream",
  13. "text": [
  14. " pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 \\\n0 0 0 0 0 0 0 0 0 0 \n1 0 0 0 0 0 0 0 0 0 \n2 0 0 0 0 0 0 0 0 0 \n3 0 0 0 0 0 0 0 0 0 \n4 0 0 0 0 0 0 0 0 0 \n\n pixel9 ... pixel774 pixel775 pixel776 pixel777 pixel778 \\\n0 0 ... 0 0 0 0 0 \n1 0 ... 0 0 0 0 0 \n2 0 ... 0 0 0 0 0 \n3 0 ... 0 0 0 0 0 \n4 0 ... 0 0 0 0 0 \n\n pixel779 pixel780 pixel781 pixel782 pixel783 \n0 0 0 0 0 0 \n1 0 0 0 0 0 \n2 0 0 0 0 0 \n3 0 0 0 0 0 \n4 0 0 0 0 0 \n\n[5 rows x 784 columns]\n0 1\n1 0\n2 1\n3 4\n4 0\nName: label, dtype: int64\n(42000, 784)\n(42000,)\n"
  15. ]
  16. }
  17. ],
  18. "source": [
  19. "import numpy as np\n",
  20. "import pandas as pd\n",
  21. "import matplotlib.pyplot as plt\n",
  22. "\n",
  23. "d0 = pd.read_csv('mnist_train.csv')\n",
  24. "\n",
  25. "#print(d0)\n",
  26. "\n",
  27. "l=d0['label']\n",
  28. "\n",
  29. "d=d0.drop('label',axis=1)\n",
  30. "\n",
  31. "print(d.head())\n",
  32. "print(l.head())\n",
  33. "print(d.shape)\n",
  34. "print(l.shape)"
  35. ]
  36. },
  37. {
  38. "cell_type": "code",
  39. "execution_count": 2,
  40. "metadata": {},
  41. "outputs": [
  42. {
  43. "name": "stdout",
  44. "output_type": "stream",
  45. "text": [
  46. "(42000, 784)\n"
  47. ]
  48. }
  49. ],
  50. "source": [
  51. "from sklearn.preprocessing import StandardScaler\n",
  52. "standardised_data = StandardScaler().fit_transform(d)\n",
  53. "print(standardised_data.shape)"
  54. ]
  55. },
  56. {
  57. "cell_type": "code",
  58. "execution_count": 21,
  59. "metadata": {},
  60. "outputs": [],
  61. "source": [
  62. "sample_data = standardised_data\n",
  63. "from sklearn import decomposition\n",
  64. "pca = decomposition.PCA()\n",
  65. "pca.n_components = 2\n",
  66. "pca_data = pca.fit_transform(sample_data)"
  67. ]
  68. },
  69. {
  70. "cell_type": "code",
  71. "execution_count": 23,
  72. "metadata": {},
  73. "outputs": [
  74. {
  75. "name": "stdout",
  76. "output_type": "stream",
  77. "text": [
  78. "(42000, 2)\n"
  79. ]
  80. }
  81. ],
  82. "source": [
  83. "print(pca_data.shape)"
  84. ]
  85. },
  86. {
  87. "cell_type": "code",
  88. "execution_count": 24,
  89. "metadata": {},
  90. "outputs": [],
  91. "source": [
  92. "pca_data = np.vstack((pca_data.T,l)).T\n"
  93. ]
  94. },
  95. {
  96. "cell_type": "code",
  97. "execution_count": 25,
  98. "metadata": {},
  99. "outputs": [
  100. {
  101. "name": "stdout",
  102. "output_type": "stream",
  103. "text": [
  104. "(42000, 3)\n"
  105. ]
  106. }
  107. ],
  108. "source": [
  109. "print(pca_data.shape)"
  110. ]
  111. },
  112. {
  113. "cell_type": "code",
  114. "execution_count": 26,
  115. "metadata": {},
  116. "outputs": [],
  117. "source": [
  118. "import seaborn as sn\n",
  119. "\n",
  120. "pca_dataframe = pd.DataFrame(pca_data,columns = (\"1st_principal\",\"2nd_principal\",\"Lables\"))\n",
  121. "\n",
  122. "sn.FacetGrid(pca_dataframe , hue = \"Lables\", height=6).map(plt.scatter ,\"1st_principal\",\"2nd_principal\").add_legend()\n",
  123. "\n",
  124. "plt.show()"
  125. ]
  126. },
  127. {
  128. "cell_type": "code",
  129. "execution_count": 31,
  130. "metadata": {},
  131. "outputs": [
  132. {
  133. "name": "stdout",
  134. "output_type": "stream",
  135. "text": [
  136. "t-SNE done! Time elapsed: 4151.372444868088 seconds\n"
  137. ]
  138. }
  139. ],
  140. "source": [
  141. "from sklearn.manifold import TSNE\n",
  142. "import time\n",
  143. "\n",
  144. "time_start = time.time()\n",
  145. "model = TSNE(n_components = 2, random_state = 0)\n",
  146. "# configuring the parameteres\n",
  147. "# the number of components = 2\n",
  148. "# default perplexity = 30\n",
  149. "# default learning rate = 200\n",
  150. "# default Maximum number of iterations for the optimization = 1000\n",
  151. "\n",
  152. "tsne_data = model.fit_transform(standardised_data)\n",
  153. "print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))"
  154. ]
  155. },
  156. {
  157. "cell_type": "code",
  158. "execution_count": 32,
  159. "metadata": {},
  160. "outputs": [
  161. {
  162. "name": "stdout",
  163. "output_type": "stream",
  164. "text": [
  165. "(42000, 2)\n(42000, 3)\n"
  166. ]
  167. }
  168. ],
  169. "source": [
  170. "print(tsne_data.shape)\n",
  171. "tsne_data = np.vstack((tsne_data.T,l)).T\n",
  172. "print(tsne_data.shape)"
  173. ]
  174. },
  175. {
  176. "cell_type": "code",
  177. "execution_count": 38,
  178. "metadata": {},
  179. "outputs": [],
  180. "source": [
  181. "tsne_dataframe = pd.DataFrame(data=tsne_data, columns=(\"Dim_1\", \"Dim_2\", \"label\"))\n",
  182. "\n",
  183. "sn.FacetGrid(data=tsne_dataframe, hue=\"label\", height=15).map(plt.scatter,\"Dim_1\", \"Dim_2\").add_legend()\n",
  184. "\n",
  185. "plt.show()\n"
  186. ]
  187. },
  188. {
  189. "cell_type": "code",
  190. "execution_count": null,
  191. "metadata": {},
  192. "outputs": [],
  193. "source": []
  194. }
  195. ],
  196. "metadata": {
  197. "kernelspec": {
  198. "display_name": "Python 2",
  199. "language": "python",
  200. "name": "python2"
  201. },
  202. "language_info": {
  203. "codemirror_mode": {
  204. "name": "ipython",
  205. "version": 2
  206. },
  207. "file_extension": ".py",
  208. "mimetype": "text/x-python",
  209. "name": "python",
  210. "nbconvert_exporter": "python",
  211. "pygments_lexer": "ipython2",
  212. "version": "2.7.6"
  213. }
  214. },
  215. "nbformat": 4,
  216. "nbformat_minor": 0
  217. }
Add Comment
Please, Sign In to add comment