Guest User

Untitled

a guest
Sep 28th, 2018
185
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.38 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "Importing libraries & data into the dataframe"
  8. ]
  9. },
  10. {
  11. "cell_type": "code",
  12. "execution_count": 14,
  13. "metadata": {},
  14. "outputs": [],
  15. "source": [
  16. "import pandas as pd\n",
  17. "import numpy as np\n",
  18. "\n",
  19. "df = pd.read_csv('data.csv', index_col = 'email')"
  20. ]
  21. },
  22. {
  23. "cell_type": "code",
  24. "execution_count": 15,
  25. "metadata": {},
  26. "outputs": [
  27. {
  28. "data": {
  29. "text/html": [
  30. "<div>\n",
  31. "<style scoped>\n",
  32. " .dataframe tbody tr th:only-of-type {\n",
  33. " vertical-align: middle;\n",
  34. " }\n",
  35. "\n",
  36. " .dataframe tbody tr th {\n",
  37. " vertical-align: top;\n",
  38. " }\n",
  39. "\n",
  40. " .dataframe thead th {\n",
  41. " text-align: right;\n",
  42. " }\n",
  43. "</style>\n",
  44. "<table border=\"1\" class=\"dataframe\">\n",
  45. " <thead>\n",
  46. " <tr style=\"text-align: right;\">\n",
  47. " <th></th>\n",
  48. " <th>satisfaction</th>\n",
  49. " <th>score_rep</th>\n",
  50. " <th>score_ps</th>\n",
  51. " <th>score_rank</th>\n",
  52. " <th>score_activity</th>\n",
  53. " <th>score_followers</th>\n",
  54. " <th>score_contribution</th>\n",
  55. " </tr>\n",
  56. " <tr>\n",
  57. " <th>email</th>\n",
  58. " <th></th>\n",
  59. " <th></th>\n",
  60. " <th></th>\n",
  61. " <th></th>\n",
  62. " <th></th>\n",
  63. " <th></th>\n",
  64. " <th></th>\n",
  65. " </tr>\n",
  66. " </thead>\n",
  67. " <tbody>\n",
  68. " <tr>\n",
  69. " <th>poke19962008@gmail.com</th>\n",
  70. " <td>Y</td>\n",
  71. " <td>20</td>\n",
  72. " <td>15</td>\n",
  73. " <td>14</td>\n",
  74. " <td>5</td>\n",
  75. " <td>5</td>\n",
  76. " <td>5</td>\n",
  77. " </tr>\n",
  78. " <tr>\n",
  79. " <th>aaa@gmail.com</th>\n",
  80. " <td>N</td>\n",
  81. " <td>21</td>\n",
  82. " <td>20</td>\n",
  83. " <td>13</td>\n",
  84. " <td>2</td>\n",
  85. " <td>11</td>\n",
  86. " <td>4</td>\n",
  87. " </tr>\n",
  88. " <tr>\n",
  89. " <th>bbb@gmail.com</th>\n",
  90. " <td>N</td>\n",
  91. " <td>15</td>\n",
  92. " <td>19</td>\n",
  93. " <td>11</td>\n",
  94. " <td>3</td>\n",
  95. " <td>4</td>\n",
  96. " <td>3</td>\n",
  97. " </tr>\n",
  98. " <tr>\n",
  99. " <th>ccc@gmail.com</th>\n",
  100. " <td>N</td>\n",
  101. " <td>18</td>\n",
  102. " <td>14</td>\n",
  103. " <td>9</td>\n",
  104. " <td>4</td>\n",
  105. " <td>8</td>\n",
  106. " <td>4</td>\n",
  107. " </tr>\n",
  108. " <tr>\n",
  109. " <th>ddd@gmail.com</th>\n",
  110. " <td>Y</td>\n",
  111. " <td>20</td>\n",
  112. " <td>16</td>\n",
  113. " <td>7</td>\n",
  114. " <td>4</td>\n",
  115. " <td>6</td>\n",
  116. " <td>1</td>\n",
  117. " </tr>\n",
  118. " </tbody>\n",
  119. "</table>\n",
  120. "</div>"
  121. ],
  122. "text/plain": [
  123. " satisfaction score_rep score_ps score_rank \\\n",
  124. "email \n",
  125. "poke19962008@gmail.com Y 20 15 14 \n",
  126. "aaa@gmail.com N 21 20 13 \n",
  127. "bbb@gmail.com N 15 19 11 \n",
  128. "ccc@gmail.com N 18 14 9 \n",
  129. "ddd@gmail.com Y 20 16 7 \n",
  130. "\n",
  131. " score_activity score_followers score_contribution \n",
  132. "email \n",
  133. "poke19962008@gmail.com 5 5 5 \n",
  134. "aaa@gmail.com 2 11 4 \n",
  135. "bbb@gmail.com 3 4 3 \n",
  136. "ccc@gmail.com 4 8 4 \n",
  137. "ddd@gmail.com 4 6 1 "
  138. ]
  139. },
  140. "execution_count": 15,
  141. "metadata": {},
  142. "output_type": "execute_result"
  143. }
  144. ],
  145. "source": [
  146. "df.head()"
  147. ]
  148. },
  149. {
  150. "cell_type": "markdown",
  151. "metadata": {},
  152. "source": [
  153. "Next, we'll convert text to numbers. <br>\n",
  154. "Label encoder is used to convert categories to number"
  155. ]
  156. },
  157. {
  158. "cell_type": "code",
  159. "execution_count": 16,
  160. "metadata": {},
  161. "outputs": [
  162. {
  163. "data": {
  164. "text/plain": [
  165. "Index(['satisfaction'], dtype='object')"
  166. ]
  167. },
  168. "execution_count": 16,
  169. "metadata": {},
  170. "output_type": "execute_result"
  171. }
  172. ],
  173. "source": [
  174. "from sklearn.preprocessing import LabelEncoder\n",
  175. "categorical_variables = df.dtypes[df.dtypes == 'object'].index\n",
  176. "categorical_variables"
  177. ]
  178. },
  179. {
  180. "cell_type": "code",
  181. "execution_count": 17,
  182. "metadata": {},
  183. "outputs": [
  184. {
  185. "data": {
  186. "text/html": [
  187. "<div>\n",
  188. "<style scoped>\n",
  189. " .dataframe tbody tr th:only-of-type {\n",
  190. " vertical-align: middle;\n",
  191. " }\n",
  192. "\n",
  193. " .dataframe tbody tr th {\n",
  194. " vertical-align: top;\n",
  195. " }\n",
  196. "\n",
  197. " .dataframe thead th {\n",
  198. " text-align: right;\n",
  199. " }\n",
  200. "</style>\n",
  201. "<table border=\"1\" class=\"dataframe\">\n",
  202. " <thead>\n",
  203. " <tr style=\"text-align: right;\">\n",
  204. " <th></th>\n",
  205. " <th>satisfaction</th>\n",
  206. " <th>score_rep</th>\n",
  207. " <th>score_ps</th>\n",
  208. " <th>score_rank</th>\n",
  209. " <th>score_activity</th>\n",
  210. " <th>score_followers</th>\n",
  211. " <th>score_contribution</th>\n",
  212. " </tr>\n",
  213. " <tr>\n",
  214. " <th>email</th>\n",
  215. " <th></th>\n",
  216. " <th></th>\n",
  217. " <th></th>\n",
  218. " <th></th>\n",
  219. " <th></th>\n",
  220. " <th></th>\n",
  221. " <th></th>\n",
  222. " </tr>\n",
  223. " </thead>\n",
  224. " <tbody>\n",
  225. " <tr>\n",
  226. " <th>poke19962008@gmail.com</th>\n",
  227. " <td>1</td>\n",
  228. " <td>20</td>\n",
  229. " <td>15</td>\n",
  230. " <td>14</td>\n",
  231. " <td>5</td>\n",
  232. " <td>5</td>\n",
  233. " <td>5</td>\n",
  234. " </tr>\n",
  235. " <tr>\n",
  236. " <th>aaa@gmail.com</th>\n",
  237. " <td>0</td>\n",
  238. " <td>21</td>\n",
  239. " <td>20</td>\n",
  240. " <td>13</td>\n",
  241. " <td>2</td>\n",
  242. " <td>11</td>\n",
  243. " <td>4</td>\n",
  244. " </tr>\n",
  245. " <tr>\n",
  246. " <th>bbb@gmail.com</th>\n",
  247. " <td>0</td>\n",
  248. " <td>15</td>\n",
  249. " <td>19</td>\n",
  250. " <td>11</td>\n",
  251. " <td>3</td>\n",
  252. " <td>4</td>\n",
  253. " <td>3</td>\n",
  254. " </tr>\n",
  255. " <tr>\n",
  256. " <th>ccc@gmail.com</th>\n",
  257. " <td>0</td>\n",
  258. " <td>18</td>\n",
  259. " <td>14</td>\n",
  260. " <td>9</td>\n",
  261. " <td>4</td>\n",
  262. " <td>8</td>\n",
  263. " <td>4</td>\n",
  264. " </tr>\n",
  265. " <tr>\n",
  266. " <th>ddd@gmail.com</th>\n",
  267. " <td>1</td>\n",
  268. " <td>20</td>\n",
  269. " <td>16</td>\n",
  270. " <td>7</td>\n",
  271. " <td>4</td>\n",
  272. " <td>6</td>\n",
  273. " <td>1</td>\n",
  274. " </tr>\n",
  275. " </tbody>\n",
  276. "</table>\n",
  277. "</div>"
  278. ],
  279. "text/plain": [
  280. " satisfaction score_rep score_ps score_rank \\\n",
  281. "email \n",
  282. "poke19962008@gmail.com 1 20 15 14 \n",
  283. "aaa@gmail.com 0 21 20 13 \n",
  284. "bbb@gmail.com 0 15 19 11 \n",
  285. "ccc@gmail.com 0 18 14 9 \n",
  286. "ddd@gmail.com 1 20 16 7 \n",
  287. "\n",
  288. " score_activity score_followers score_contribution \n",
  289. "email \n",
  290. "poke19962008@gmail.com 5 5 5 \n",
  291. "aaa@gmail.com 2 11 4 \n",
  292. "bbb@gmail.com 3 4 3 \n",
  293. "ccc@gmail.com 4 8 4 \n",
  294. "ddd@gmail.com 4 6 1 "
  295. ]
  296. },
  297. "execution_count": 17,
  298. "metadata": {},
  299. "output_type": "execute_result"
  300. }
  301. ],
  302. "source": [
  303. "#Convert them using Label Encoder\n",
  304. "le = LabelEncoder()\n",
  305. "for var in categorical_variables:\n",
  306. " df[var] = le.fit_transform(df[var])\n",
  307. "\n",
  308. "df.head()"
  309. ]
  310. },
  311. {
  312. "cell_type": "markdown",
  313. "metadata": {},
  314. "source": [
  315. "Setting up the target variable"
  316. ]
  317. },
  318. {
  319. "cell_type": "code",
  320. "execution_count": 18,
  321. "metadata": {},
  322. "outputs": [],
  323. "source": [
  324. "X = df.iloc[:, 1:]\n",
  325. "y = df.iloc[:, 0]"
  326. ]
  327. },
  328. {
  329. "cell_type": "markdown",
  330. "metadata": {},
  331. "source": [
  332. "Splitting the dataset into train & test "
  333. ]
  334. },
  335. {
  336. "cell_type": "code",
  337. "execution_count": 19,
  338. "metadata": {},
  339. "outputs": [],
  340. "source": [
  341. "from sklearn.model_selection import train_test_split\n",
  342. "X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)"
  343. ]
  344. },
  345. {
  346. "cell_type": "markdown",
  347. "metadata": {},
  348. "source": [
  349. "Using KNN Classifier "
  350. ]
  351. },
  352. {
  353. "cell_type": "code",
  354. "execution_count": 20,
  355. "metadata": {},
  356. "outputs": [],
  357. "source": [
  358. "from sklearn.neighbors import KNeighborsClassifier\n",
  359. "classifier = KNeighborsClassifier()\n",
  360. "knnClassifier = classifier.fit(X_train, y_train.ravel())\n",
  361. "y_pred = knnClassifier.predict(X_test)"
  362. ]
  363. },
  364. {
  365. "cell_type": "code",
  366. "execution_count": 21,
  367. "metadata": {},
  368. "outputs": [
  369. {
  370. "data": {
  371. "text/plain": [
  372. "66.66666666666666"
  373. ]
  374. },
  375. "execution_count": 21,
  376. "metadata": {},
  377. "output_type": "execute_result"
  378. }
  379. ],
  380. "source": [
  381. "from sklearn.metrics import accuracy_score\n",
  382. "result = accuracy_score(y_test, y_pred)*100\n",
  383. "result"
  384. ]
  385. }
  386. ],
  387. "metadata": {
  388. "kernelspec": {
  389. "display_name": "Python 3",
  390. "language": "python",
  391. "name": "python3"
  392. },
  393. "language_info": {
  394. "codemirror_mode": {
  395. "name": "ipython",
  396. "version": 3
  397. },
  398. "file_extension": ".py",
  399. "mimetype": "text/x-python",
  400. "name": "python",
  401. "nbconvert_exporter": "python",
  402. "pygments_lexer": "ipython3",
  403. "version": "3.6.6"
  404. }
  405. },
  406. "nbformat": 4,
  407. "nbformat_minor": 2
  408. }
Add Comment
Please, Sign In to add comment