Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": true
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 \\\n0 0 0 0 0 0 0 0 0 0 \n1 0 0 0 0 0 0 0 0 0 \n2 0 0 0 0 0 0 0 0 0 \n3 0 0 0 0 0 0 0 0 0 \n4 0 0 0 0 0 0 0 0 0 \n\n pixel9 ... pixel774 pixel775 pixel776 pixel777 pixel778 \\\n0 0 ... 0 0 0 0 0 \n1 0 ... 0 0 0 0 0 \n2 0 ... 0 0 0 0 0 \n3 0 ... 0 0 0 0 0 \n4 0 ... 0 0 0 0 0 \n\n pixel779 pixel780 pixel781 pixel782 pixel783 \n0 0 0 0 0 0 \n1 0 0 0 0 0 \n2 0 0 0 0 0 \n3 0 0 0 0 0 \n4 0 0 0 0 0 \n\n[5 rows x 784 columns]\n0 1\n1 0\n2 1\n3 4\n4 0\nName: label, dtype: int64\n(42000, 784)\n(42000,)\n"
- ]
- }
- ],
- "source": [
- "import numpy as np\n",
- "import pandas as pd\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "d0 = pd.read_csv('mnist_train.csv')\n",
- "\n",
- "#print(d0)\n",
- "\n",
- "l=d0['label']\n",
- "\n",
- "d=d0.drop('label',axis=1)\n",
- "\n",
- "print(d.head())\n",
- "print(l.head())\n",
- "print(d.shape)\n",
- "print(l.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(42000, 784)\n"
- ]
- }
- ],
- "source": [
- "from sklearn.preprocessing import StandardScaler\n",
- "standardised_data = StandardScaler().fit_transform(d)\n",
- "print(standardised_data.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [],
- "source": [
- "sample_data = standardised_data\n",
- "from sklearn import decomposition\n",
- "pca = decomposition.PCA()\n",
- "pca.n_components = 2\n",
- "pca_data = pca.fit_transform(sample_data)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(42000, 2)\n"
- ]
- }
- ],
- "source": [
- "print(pca_data.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [],
- "source": [
- "pca_data = np.vstack((pca_data.T,l)).T\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(42000, 3)\n"
- ]
- }
- ],
- "source": [
- "print(pca_data.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [],
- "source": [
- "import seaborn as sn\n",
- "\n",
- "pca_dataframe = pd.DataFrame(pca_data,columns = (\"1st_principal\",\"2nd_principal\",\"Lables\"))\n",
- "\n",
- "sn.FacetGrid(pca_dataframe , hue = \"Lables\", height=6).map(plt.scatter ,\"1st_principal\",\"2nd_principal\").add_legend()\n",
- "\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "t-SNE done! Time elapsed: 4151.372444868088 seconds\n"
- ]
- }
- ],
- "source": [
- "from sklearn.manifold import TSNE\n",
- "import time\n",
- "\n",
- "time_start = time.time()\n",
- "model = TSNE(n_components = 2, random_state = 0)\n",
- "# configuring the parameteres\n",
- "# the number of components = 2\n",
- "# default perplexity = 30\n",
- "# default learning rate = 200\n",
- "# default Maximum number of iterations for the optimization = 1000\n",
- "\n",
- "tsne_data = model.fit_transform(standardised_data)\n",
- "print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(42000, 2)\n(42000, 3)\n"
- ]
- }
- ],
- "source": [
- "print(tsne_data.shape)\n",
- "tsne_data = np.vstack((tsne_data.T,l)).T\n",
- "print(tsne_data.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 38,
- "metadata": {},
- "outputs": [],
- "source": [
- "tsne_dataframe = pd.DataFrame(data=tsne_data, columns=(\"Dim_1\", \"Dim_2\", \"label\"))\n",
- "\n",
- "sn.FacetGrid(data=tsne_dataframe, hue=\"label\", height=15).map(plt.scatter,\"Dim_1\", \"Dim_2\").add_legend()\n",
- "\n",
- "plt.show()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
- }
Add Comment
Please, Sign In to add comment