Advertisement
Guest User

Untitled

a guest
Oct 16th, 2019
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.73 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "### Finding categorical variables"
  8. ]
  9. },
  10. {
  11. "cell_type": "code",
  12. "execution_count": 2,
  13. "metadata": {},
  14. "outputs": [
  15. {
  16. "data": {
  17. "text/html": [
  18. "<div>\n",
  19. "<style scoped>\n",
  20. " .dataframe tbody tr th:only-of-type {\n",
  21. " vertical-align: middle;\n",
  22. " }\n",
  23. "\n",
  24. " .dataframe tbody tr th {\n",
  25. " vertical-align: top;\n",
  26. " }\n",
  27. "\n",
  28. " .dataframe thead th {\n",
  29. " text-align: right;\n",
  30. " }\n",
  31. "</style>\n",
  32. "<table border=\"1\" class=\"dataframe\">\n",
  33. " <thead>\n",
  34. " <tr style=\"text-align: right;\">\n",
  35. " <th></th>\n",
  36. " <th>satisfaction</th>\n",
  37. " <th>evaluation</th>\n",
  38. " <th>number_of_projects</th>\n",
  39. " <th>average_montly_hours</th>\n",
  40. " <th>time_spend_company</th>\n",
  41. " <th>work_accident</th>\n",
  42. " <th>churn</th>\n",
  43. " <th>promotion</th>\n",
  44. " <th>department</th>\n",
  45. " <th>salary</th>\n",
  46. " </tr>\n",
  47. " </thead>\n",
  48. " <tbody>\n",
  49. " <tr>\n",
  50. " <th>0</th>\n",
  51. " <td>0.38</td>\n",
  52. " <td>0.53</td>\n",
  53. " <td>2</td>\n",
  54. " <td>157</td>\n",
  55. " <td>3</td>\n",
  56. " <td>0</td>\n",
  57. " <td>1</td>\n",
  58. " <td>0</td>\n",
  59. " <td>sales</td>\n",
  60. " <td>low</td>\n",
  61. " </tr>\n",
  62. " <tr>\n",
  63. " <th>1</th>\n",
  64. " <td>0.80</td>\n",
  65. " <td>0.86</td>\n",
  66. " <td>5</td>\n",
  67. " <td>262</td>\n",
  68. " <td>6</td>\n",
  69. " <td>0</td>\n",
  70. " <td>1</td>\n",
  71. " <td>0</td>\n",
  72. " <td>sales</td>\n",
  73. " <td>medium</td>\n",
  74. " </tr>\n",
  75. " <tr>\n",
  76. " <th>2</th>\n",
  77. " <td>0.11</td>\n",
  78. " <td>0.88</td>\n",
  79. " <td>7</td>\n",
  80. " <td>272</td>\n",
  81. " <td>4</td>\n",
  82. " <td>0</td>\n",
  83. " <td>1</td>\n",
  84. " <td>0</td>\n",
  85. " <td>sales</td>\n",
  86. " <td>medium</td>\n",
  87. " </tr>\n",
  88. " <tr>\n",
  89. " <th>3</th>\n",
  90. " <td>0.72</td>\n",
  91. " <td>0.87</td>\n",
  92. " <td>5</td>\n",
  93. " <td>223</td>\n",
  94. " <td>5</td>\n",
  95. " <td>0</td>\n",
  96. " <td>1</td>\n",
  97. " <td>0</td>\n",
  98. " <td>sales</td>\n",
  99. " <td>low</td>\n",
  100. " </tr>\n",
  101. " <tr>\n",
  102. " <th>4</th>\n",
  103. " <td>0.37</td>\n",
  104. " <td>0.52</td>\n",
  105. " <td>2</td>\n",
  106. " <td>159</td>\n",
  107. " <td>3</td>\n",
  108. " <td>0</td>\n",
  109. " <td>1</td>\n",
  110. " <td>0</td>\n",
  111. " <td>sales</td>\n",
  112. " <td>low</td>\n",
  113. " </tr>\n",
  114. " </tbody>\n",
  115. "</table>\n",
  116. "</div>"
  117. ],
  118. "text/plain": [
  119. " satisfaction evaluation number_of_projects average_montly_hours \\\n",
  120. "0 0.38 0.53 2 157 \n",
  121. "1 0.80 0.86 5 262 \n",
  122. "2 0.11 0.88 7 272 \n",
  123. "3 0.72 0.87 5 223 \n",
  124. "4 0.37 0.52 2 159 \n",
  125. "\n",
  126. " time_spend_company work_accident churn promotion department salary \n",
  127. "0 3 0 1 0 sales low \n",
  128. "1 6 0 1 0 sales medium \n",
  129. "2 4 0 1 0 sales medium \n",
  130. "3 5 0 1 0 sales low \n",
  131. "4 3 0 1 0 sales low "
  132. ]
  133. },
  134. "execution_count": 2,
  135. "metadata": {},
  136. "output_type": "execute_result"
  137. }
  138. ],
  139. "source": [
  140. "# Import pandas (as pd) to read the data\n",
  141. "import pandas as pd\n",
  142. "\n",
  143. "# Read \"turnover.csv\" and save it in a DataFrame called data\n",
  144. "data = pd.read_csv(\"turnover.csv\")\n",
  145. "\n",
  146. "# Take a quick look to the first 5 rows of data\n",
  147. "data.head()"
  148. ]
  149. },
  150. {
  151. "cell_type": "markdown",
  152. "metadata": {},
  153. "source": [
  154. "### Encoding categories"
  155. ]
  156. },
  157. {
  158. "cell_type": "code",
  159. "execution_count": 8,
  160. "metadata": {},
  161. "outputs": [],
  162. "source": [
  163. "# Change the type of the \"salary\" column to categorical\n",
  164. "data.salary = data.salary.astype('category')\n",
  165. "\n",
  166. "# Provide the correct order of categories\n",
  167. "data.salary = data.salary.cat.reorder_categories(['low', 'medium', 'high'])\n",
  168. "\n",
  169. "# Encode categories\n",
  170. "data.salary = data.salary.cat.codes"
  171. ]
  172. },
  173. {
  174. "cell_type": "markdown",
  175. "metadata": {},
  176. "source": [
  177. "### Getting dummies"
  178. ]
  179. },
  180. {
  181. "cell_type": "code",
  182. "execution_count": 9,
  183. "metadata": {},
  184. "outputs": [
  185. {
  186. "data": {
  187. "text/html": [
  188. "<div>\n",
  189. "<style scoped>\n",
  190. " .dataframe tbody tr th:only-of-type {\n",
  191. " vertical-align: middle;\n",
  192. " }\n",
  193. "\n",
  194. " .dataframe tbody tr th {\n",
  195. " vertical-align: top;\n",
  196. " }\n",
  197. "\n",
  198. " .dataframe thead th {\n",
  199. " text-align: right;\n",
  200. " }\n",
  201. "</style>\n",
  202. "<table border=\"1\" class=\"dataframe\">\n",
  203. " <thead>\n",
  204. " <tr style=\"text-align: right;\">\n",
  205. " <th></th>\n",
  206. " <th>IT</th>\n",
  207. " <th>RandD</th>\n",
  208. " <th>accounting</th>\n",
  209. " <th>hr</th>\n",
  210. " <th>management</th>\n",
  211. " <th>marketing</th>\n",
  212. " <th>product_mng</th>\n",
  213. " <th>sales</th>\n",
  214. " <th>support</th>\n",
  215. " <th>technical</th>\n",
  216. " </tr>\n",
  217. " </thead>\n",
  218. " <tbody>\n",
  219. " <tr>\n",
  220. " <th>0</th>\n",
  221. " <td>0</td>\n",
  222. " <td>0</td>\n",
  223. " <td>0</td>\n",
  224. " <td>0</td>\n",
  225. " <td>0</td>\n",
  226. " <td>0</td>\n",
  227. " <td>0</td>\n",
  228. " <td>1</td>\n",
  229. " <td>0</td>\n",
  230. " <td>0</td>\n",
  231. " </tr>\n",
  232. " <tr>\n",
  233. " <th>1</th>\n",
  234. " <td>0</td>\n",
  235. " <td>0</td>\n",
  236. " <td>0</td>\n",
  237. " <td>0</td>\n",
  238. " <td>0</td>\n",
  239. " <td>0</td>\n",
  240. " <td>0</td>\n",
  241. " <td>1</td>\n",
  242. " <td>0</td>\n",
  243. " <td>0</td>\n",
  244. " </tr>\n",
  245. " <tr>\n",
  246. " <th>2</th>\n",
  247. " <td>0</td>\n",
  248. " <td>0</td>\n",
  249. " <td>0</td>\n",
  250. " <td>0</td>\n",
  251. " <td>0</td>\n",
  252. " <td>0</td>\n",
  253. " <td>0</td>\n",
  254. " <td>1</td>\n",
  255. " <td>0</td>\n",
  256. " <td>0</td>\n",
  257. " </tr>\n",
  258. " <tr>\n",
  259. " <th>3</th>\n",
  260. " <td>0</td>\n",
  261. " <td>0</td>\n",
  262. " <td>0</td>\n",
  263. " <td>0</td>\n",
  264. " <td>0</td>\n",
  265. " <td>0</td>\n",
  266. " <td>0</td>\n",
  267. " <td>1</td>\n",
  268. " <td>0</td>\n",
  269. " <td>0</td>\n",
  270. " </tr>\n",
  271. " <tr>\n",
  272. " <th>4</th>\n",
  273. " <td>0</td>\n",
  274. " <td>0</td>\n",
  275. " <td>0</td>\n",
  276. " <td>0</td>\n",
  277. " <td>0</td>\n",
  278. " <td>0</td>\n",
  279. " <td>0</td>\n",
  280. " <td>1</td>\n",
  281. " <td>0</td>\n",
  282. " <td>0</td>\n",
  283. " </tr>\n",
  284. " </tbody>\n",
  285. "</table>\n",
  286. "</div>"
  287. ],
  288. "text/plain": [
  289. " IT RandD accounting hr management marketing product_mng sales \\\n",
  290. "0 0 0 0 0 0 0 0 1 \n",
  291. "1 0 0 0 0 0 0 0 1 \n",
  292. "2 0 0 0 0 0 0 0 1 \n",
  293. "3 0 0 0 0 0 0 0 1 \n",
  294. "4 0 0 0 0 0 0 0 1 \n",
  295. "\n",
  296. " support technical \n",
  297. "0 0 0 \n",
  298. "1 0 0 \n",
  299. "2 0 0 \n",
  300. "3 0 0 \n",
  301. "4 0 0 "
  302. ]
  303. },
  304. "execution_count": 9,
  305. "metadata": {},
  306. "output_type": "execute_result"
  307. }
  308. ],
  309. "source": [
  310. "# Get dummies and save them inside a new DataFrame\n",
  311. "departments = pd.get_dummies(data.department)\n",
  312. "\n",
  313. "# Take a quick look to the first 5 rows of the new DataFrame called departments\n",
  314. "departments.head()"
  315. ]
  316. }
  317. ],
  318. "metadata": {
  319. "kernelspec": {
  320. "display_name": "Python 3",
  321. "language": "python",
  322. "name": "python3"
  323. },
  324. "language_info": {
  325. "codemirror_mode": {
  326. "name": "ipython",
  327. "version": 3
  328. },
  329. "file_extension": ".py",
  330. "mimetype": "text/x-python",
  331. "name": "python",
  332. "nbconvert_exporter": "python",
  333. "pygments_lexer": "ipython3",
  334. "version": "3.7.3"
  335. }
  336. },
  337. "nbformat": 4,
  338. "nbformat_minor": 2
  339. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement