Guest User

Untitled

a guest
Mar 23rd, 2018
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 40.81 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "### Imprort libraries"
  8. ]
  9. },
  10. {
  11. "cell_type": "code",
  12. "execution_count": 1,
  13. "metadata": {
  14. "collapsed": true
  15. },
  16. "outputs": [],
  17. "source": [
  18. "import pandas as pd\n",
  19. "import numpy as np"
  20. ]
  21. },
  22. {
  23. "cell_type": "markdown",
  24. "metadata": {},
  25. "source": [
  26. "### Load dataset"
  27. ]
  28. },
  29. {
  30. "cell_type": "code",
  31. "execution_count": 2,
  32. "metadata": {
  33. "collapsed": true
  34. },
  35. "outputs": [],
  36. "source": [
  37. "data = pd.read_csv('gapminder.csv', low_memory=False)"
  38. ]
  39. },
  40. {
  41. "cell_type": "markdown",
  42. "metadata": {},
  43. "source": [
  44. "### Explore data set"
  45. ]
  46. },
  47. {
  48. "cell_type": "code",
  49. "execution_count": 3,
  50. "metadata": {},
  51. "outputs": [
  52. {
  53. "data": {
  54. "text/html": [
  55. "<div>\n",
  56. "<table border=\"1\" class=\"dataframe\">\n",
  57. " <thead>\n",
  58. " <tr style=\"text-align: right;\">\n",
  59. " <th></th>\n",
  60. " <th>country</th>\n",
  61. " <th>incomeperperson</th>\n",
  62. " <th>alcconsumption</th>\n",
  63. " <th>armedforcesrate</th>\n",
  64. " <th>breastcancerper100th</th>\n",
  65. " <th>co2emissions</th>\n",
  66. " <th>femaleemployrate</th>\n",
  67. " <th>hivrate</th>\n",
  68. " <th>internetuserate</th>\n",
  69. " <th>lifeexpectancy</th>\n",
  70. " <th>oilperperson</th>\n",
  71. " <th>polityscore</th>\n",
  72. " <th>relectricperperson</th>\n",
  73. " <th>suicideper100th</th>\n",
  74. " <th>employrate</th>\n",
  75. " <th>urbanrate</th>\n",
  76. " </tr>\n",
  77. " </thead>\n",
  78. " <tbody>\n",
  79. " <tr>\n",
  80. " <th>0</th>\n",
  81. " <td>Afghanistan</td>\n",
  82. " <td></td>\n",
  83. " <td>.03</td>\n",
  84. " <td>.5696534</td>\n",
  85. " <td>26.8</td>\n",
  86. " <td>75944000</td>\n",
  87. " <td>25.6000003814697</td>\n",
  88. " <td></td>\n",
  89. " <td>3.65412162280064</td>\n",
  90. " <td>48.673</td>\n",
  91. " <td></td>\n",
  92. " <td>0</td>\n",
  93. " <td></td>\n",
  94. " <td>6.68438529968262</td>\n",
  95. " <td>55.7000007629394</td>\n",
  96. " <td>24.04</td>\n",
  97. " </tr>\n",
  98. " <tr>\n",
  99. " <th>1</th>\n",
  100. " <td>Albania</td>\n",
  101. " <td>1914.99655094922</td>\n",
  102. " <td>7.29</td>\n",
  103. " <td>1.0247361</td>\n",
  104. " <td>57.4</td>\n",
  105. " <td>223747333.333333</td>\n",
  106. " <td>42.0999984741211</td>\n",
  107. " <td></td>\n",
  108. " <td>44.9899469578783</td>\n",
  109. " <td>76.918</td>\n",
  110. " <td></td>\n",
  111. " <td>9</td>\n",
  112. " <td>636.341383366604</td>\n",
  113. " <td>7.69932985305786</td>\n",
  114. " <td>51.4000015258789</td>\n",
  115. " <td>46.72</td>\n",
  116. " </tr>\n",
  117. " <tr>\n",
  118. " <th>2</th>\n",
  119. " <td>Algeria</td>\n",
  120. " <td>2231.99333515006</td>\n",
  121. " <td>.69</td>\n",
  122. " <td>2.306817</td>\n",
  123. " <td>23.5</td>\n",
  124. " <td>2932108666.66667</td>\n",
  125. " <td>31.7000007629394</td>\n",
  126. " <td>.1</td>\n",
  127. " <td>12.5000733055148</td>\n",
  128. " <td>73.131</td>\n",
  129. " <td>.42009452521537</td>\n",
  130. " <td>2</td>\n",
  131. " <td>590.509814347428</td>\n",
  132. " <td>4.8487696647644</td>\n",
  133. " <td>50.5</td>\n",
  134. " <td>65.22</td>\n",
  135. " </tr>\n",
  136. " <tr>\n",
  137. " <th>3</th>\n",
  138. " <td>Andorra</td>\n",
  139. " <td>21943.3398976022</td>\n",
  140. " <td>10.17</td>\n",
  141. " <td></td>\n",
  142. " <td></td>\n",
  143. " <td></td>\n",
  144. " <td></td>\n",
  145. " <td></td>\n",
  146. " <td>81</td>\n",
  147. " <td></td>\n",
  148. " <td></td>\n",
  149. " <td></td>\n",
  150. " <td></td>\n",
  151. " <td>5.36217880249023</td>\n",
  152. " <td></td>\n",
  153. " <td>88.92</td>\n",
  154. " </tr>\n",
  155. " <tr>\n",
  156. " <th>4</th>\n",
  157. " <td>Angola</td>\n",
  158. " <td>1381.00426770244</td>\n",
  159. " <td>5.57</td>\n",
  160. " <td>1.4613288</td>\n",
  161. " <td>23.1</td>\n",
  162. " <td>248358000</td>\n",
  163. " <td>69.4000015258789</td>\n",
  164. " <td>2</td>\n",
  165. " <td>9.99995388324075</td>\n",
  166. " <td>51.093</td>\n",
  167. " <td></td>\n",
  168. " <td>-2</td>\n",
  169. " <td>172.999227388199</td>\n",
  170. " <td>14.5546770095825</td>\n",
  171. " <td>75.6999969482422</td>\n",
  172. " <td>56.7</td>\n",
  173. " </tr>\n",
  174. " </tbody>\n",
  175. "</table>\n",
  176. "</div>"
  177. ],
  178. "text/plain": [
  179. " country incomeperperson alcconsumption armedforcesrate \\\n",
  180. "0 Afghanistan .03 .5696534 \n",
  181. "1 Albania 1914.99655094922 7.29 1.0247361 \n",
  182. "2 Algeria 2231.99333515006 .69 2.306817 \n",
  183. "3 Andorra 21943.3398976022 10.17 \n",
  184. "4 Angola 1381.00426770244 5.57 1.4613288 \n",
  185. "\n",
  186. " breastcancerper100th co2emissions femaleemployrate hivrate \\\n",
  187. "0 26.8 75944000 25.6000003814697 \n",
  188. "1 57.4 223747333.333333 42.0999984741211 \n",
  189. "2 23.5 2932108666.66667 31.7000007629394 .1 \n",
  190. "3 \n",
  191. "4 23.1 248358000 69.4000015258789 2 \n",
  192. "\n",
  193. " internetuserate lifeexpectancy oilperperson polityscore \\\n",
  194. "0 3.65412162280064 48.673 0 \n",
  195. "1 44.9899469578783 76.918 9 \n",
  196. "2 12.5000733055148 73.131 .42009452521537 2 \n",
  197. "3 81 \n",
  198. "4 9.99995388324075 51.093 -2 \n",
  199. "\n",
  200. " relectricperperson suicideper100th employrate urbanrate \n",
  201. "0 6.68438529968262 55.7000007629394 24.04 \n",
  202. "1 636.341383366604 7.69932985305786 51.4000015258789 46.72 \n",
  203. "2 590.509814347428 4.8487696647644 50.5 65.22 \n",
  204. "3 5.36217880249023 88.92 \n",
  205. "4 172.999227388199 14.5546770095825 75.6999969482422 56.7 "
  206. ]
  207. },
  208. "execution_count": 3,
  209. "metadata": {},
  210. "output_type": "execute_result"
  211. }
  212. ],
  213. "source": [
  214. "data.head(5)"
  215. ]
  216. },
  217. {
  218. "cell_type": "code",
  219. "execution_count": 4,
  220. "metadata": {},
  221. "outputs": [
  222. {
  223. "name": "stdout",
  224. "output_type": "stream",
  225. "text": [
  226. "(213, 16)\n",
  227. "213\n",
  228. "16\n"
  229. ]
  230. }
  231. ],
  232. "source": [
  233. "print(data.shape) # dimension of dataframe\n",
  234. "print (len(data)) # number of observations (rows)\n",
  235. "print (len(data.columns)) # number of variables (columns)"
  236. ]
  237. },
  238. {
  239. "cell_type": "code",
  240. "execution_count": 5,
  241. "metadata": {},
  242. "outputs": [
  243. {
  244. "data": {
  245. "text/plain": [
  246. "Index(['country', 'incomeperperson', 'alcconsumption', 'armedforcesrate',\n",
  247. " 'breastcancerper100th', 'co2emissions', 'femaleemployrate', 'hivrate',\n",
  248. " 'internetuserate', 'lifeexpectancy', 'oilperperson', 'polityscore',\n",
  249. " 'relectricperperson', 'suicideper100th', 'employrate', 'urbanrate'],\n",
  250. " dtype='object')"
  251. ]
  252. },
  253. "execution_count": 5,
  254. "metadata": {},
  255. "output_type": "execute_result"
  256. }
  257. ],
  258. "source": [
  259. "# Columns name\n",
  260. "data.columns"
  261. ]
  262. },
  263. {
  264. "cell_type": "markdown",
  265. "metadata": {},
  266. "source": [
  267. "### Setting variables you will be working with to numeric"
  268. ]
  269. },
  270. {
  271. "cell_type": "code",
  272. "execution_count": 6,
  273. "metadata": {
  274. "collapsed": true
  275. },
  276. "outputs": [],
  277. "source": [
  278. "#setting variables you will be working with to numeric\n",
  279. "data['suicideper100th'] =pd.to_numeric(data['suicideper100th'], errors='coerce')\n",
  280. "data['incomeperperson'] =pd.to_numeric(data['incomeperperson'], errors='coerce')\n",
  281. "data['internetuserate'] =pd.to_numeric(data['internetuserate'], errors='coerce')\n",
  282. "data['urbanrate'] =pd.to_numeric(data['urbanrate'], errors='coerce')"
  283. ]
  284. },
  285. {
  286. "cell_type": "code",
  287. "execution_count": 7,
  288. "metadata": {},
  289. "outputs": [
  290. {
  291. "name": "stdout",
  292. "output_type": "stream",
  293. "text": [
  294. "<class 'pandas.core.frame.DataFrame'>\n",
  295. "RangeIndex: 213 entries, 0 to 212\n",
  296. "Data columns (total 16 columns):\n",
  297. "country 213 non-null object\n",
  298. "incomeperperson 190 non-null float64\n",
  299. "alcconsumption 213 non-null object\n",
  300. "armedforcesrate 213 non-null object\n",
  301. "breastcancerper100th 213 non-null object\n",
  302. "co2emissions 213 non-null object\n",
  303. "femaleemployrate 213 non-null object\n",
  304. "hivrate 213 non-null object\n",
  305. "internetuserate 192 non-null float64\n",
  306. "lifeexpectancy 213 non-null object\n",
  307. "oilperperson 213 non-null object\n",
  308. "polityscore 213 non-null object\n",
  309. "relectricperperson 213 non-null object\n",
  310. "suicideper100th 191 non-null float64\n",
  311. "employrate 213 non-null object\n",
  312. "urbanrate 203 non-null float64\n",
  313. "dtypes: float64(4), object(12)\n",
  314. "memory usage: 26.7+ KB\n"
  315. ]
  316. }
  317. ],
  318. "source": [
  319. "# Check type of data after converting\n",
  320. "data.info()"
  321. ]
  322. },
  323. {
  324. "cell_type": "markdown",
  325. "metadata": {},
  326. "source": [
  327. "### Make subset of the data"
  328. ]
  329. },
  330. {
  331. "cell_type": "code",
  332. "execution_count": 8,
  333. "metadata": {
  334. "collapsed": true
  335. },
  336. "outputs": [],
  337. "source": [
  338. "sub_data = data[['country','suicideper100th', 'incomeperperson', 'internetuserate', 'urbanrate']].copy()"
  339. ]
  340. },
  341. {
  342. "cell_type": "code",
  343. "execution_count": 9,
  344. "metadata": {},
  345. "outputs": [
  346. {
  347. "data": {
  348. "text/html": [
  349. "<div>\n",
  350. "<table border=\"1\" class=\"dataframe\">\n",
  351. " <thead>\n",
  352. " <tr style=\"text-align: right;\">\n",
  353. " <th></th>\n",
  354. " <th>country</th>\n",
  355. " <th>suicideper100th</th>\n",
  356. " <th>incomeperperson</th>\n",
  357. " <th>internetuserate</th>\n",
  358. " <th>urbanrate</th>\n",
  359. " </tr>\n",
  360. " </thead>\n",
  361. " <tbody>\n",
  362. " <tr>\n",
  363. " <th>106</th>\n",
  364. " <td>Lesotho</td>\n",
  365. " <td>7.858619</td>\n",
  366. " <td>495.734247</td>\n",
  367. " <td>3.860565</td>\n",
  368. " <td>25.46</td>\n",
  369. " </tr>\n",
  370. " <tr>\n",
  371. " <th>92</th>\n",
  372. " <td>Italy</td>\n",
  373. " <td>4.930045</td>\n",
  374. " <td>18982.269285</td>\n",
  375. " <td>53.740217</td>\n",
  376. " <td>68.08</td>\n",
  377. " </tr>\n",
  378. " <tr>\n",
  379. " <th>131</th>\n",
  380. " <td>Mozambique</td>\n",
  381. " <td>10.550375</td>\n",
  382. " <td>389.763634</td>\n",
  383. " <td>4.170136</td>\n",
  384. " <td>36.84</td>\n",
  385. " </tr>\n",
  386. " <tr>\n",
  387. " <th>23</th>\n",
  388. " <td>Bosnia and Herzegovina</td>\n",
  389. " <td>11.836716</td>\n",
  390. " <td>2183.344867</td>\n",
  391. " <td>52.002061</td>\n",
  392. " <td>47.44</td>\n",
  393. " </tr>\n",
  394. " <tr>\n",
  395. " <th>199</th>\n",
  396. " <td>Uganda</td>\n",
  397. " <td>12.289122</td>\n",
  398. " <td>377.421113</td>\n",
  399. " <td>12.500255</td>\n",
  400. " <td>12.98</td>\n",
  401. " </tr>\n",
  402. " <tr>\n",
  403. " <th>122</th>\n",
  404. " <td>Mauritania</td>\n",
  405. " <td>6.882952</td>\n",
  406. " <td>609.131206</td>\n",
  407. " <td>2.999803</td>\n",
  408. " <td>41.00</td>\n",
  409. " </tr>\n",
  410. " <tr>\n",
  411. " <th>68</th>\n",
  412. " <td>Georgia</td>\n",
  413. " <td>1.574350</td>\n",
  414. " <td>1258.762596</td>\n",
  415. " <td>26.297251</td>\n",
  416. " <td>52.74</td>\n",
  417. " </tr>\n",
  418. " <tr>\n",
  419. " <th>21</th>\n",
  420. " <td>Bhutan</td>\n",
  421. " <td>15.542603</td>\n",
  422. " <td>1324.194906</td>\n",
  423. " <td>13.598876</td>\n",
  424. " <td>34.48</td>\n",
  425. " </tr>\n",
  426. " <tr>\n",
  427. " <th>133</th>\n",
  428. " <td>Namibia</td>\n",
  429. " <td>8.021970</td>\n",
  430. " <td>2667.246710</td>\n",
  431. " <td>6.500823</td>\n",
  432. " <td>36.84</td>\n",
  433. " </tr>\n",
  434. " <tr>\n",
  435. " <th>30</th>\n",
  436. " <td>Cambodia</td>\n",
  437. " <td>4.961071</td>\n",
  438. " <td>557.947513</td>\n",
  439. " <td>1.259934</td>\n",
  440. " <td>21.56</td>\n",
  441. " </tr>\n",
  442. " </tbody>\n",
  443. "</table>\n",
  444. "</div>"
  445. ],
  446. "text/plain": [
  447. " country suicideper100th incomeperperson \\\n",
  448. "106 Lesotho 7.858619 495.734247 \n",
  449. "92 Italy 4.930045 18982.269285 \n",
  450. "131 Mozambique 10.550375 389.763634 \n",
  451. "23 Bosnia and Herzegovina 11.836716 2183.344867 \n",
  452. "199 Uganda 12.289122 377.421113 \n",
  453. "122 Mauritania 6.882952 609.131206 \n",
  454. "68 Georgia 1.574350 1258.762596 \n",
  455. "21 Bhutan 15.542603 1324.194906 \n",
  456. "133 Namibia 8.021970 2667.246710 \n",
  457. "30 Cambodia 4.961071 557.947513 \n",
  458. "\n",
  459. " internetuserate urbanrate \n",
  460. "106 3.860565 25.46 \n",
  461. "92 53.740217 68.08 \n",
  462. "131 4.170136 36.84 \n",
  463. "23 52.002061 47.44 \n",
  464. "199 12.500255 12.98 \n",
  465. "122 2.999803 41.00 \n",
  466. "68 26.297251 52.74 \n",
  467. "21 13.598876 34.48 \n",
  468. "133 6.500823 36.84 \n",
  469. "30 1.259934 21.56 "
  470. ]
  471. },
  472. "execution_count": 9,
  473. "metadata": {},
  474. "output_type": "execute_result"
  475. }
  476. ],
  477. "source": [
  478. "sub_data.sample(10)"
  479. ]
  480. },
  481. {
  482. "cell_type": "markdown",
  483. "metadata": {},
  484. "source": [
  485. "### Check for missing values"
  486. ]
  487. },
  488. {
  489. "cell_type": "code",
  490. "execution_count": 10,
  491. "metadata": {},
  492. "outputs": [
  493. {
  494. "data": {
  495. "text/plain": [
  496. "country 0\n",
  497. "suicideper100th 22\n",
  498. "incomeperperson 23\n",
  499. "internetuserate 21\n",
  500. "urbanrate 10\n",
  501. "dtype: int64"
  502. ]
  503. },
  504. "execution_count": 10,
  505. "metadata": {},
  506. "output_type": "execute_result"
  507. }
  508. ],
  509. "source": [
  510. "sub_data.isnull().sum()"
  511. ]
  512. },
  513. {
  514. "cell_type": "code",
  515. "execution_count": 11,
  516. "metadata": {},
  517. "outputs": [
  518. {
  519. "data": {
  520. "text/plain": [
  521. "(213, 5)"
  522. ]
  523. },
  524. "execution_count": 11,
  525. "metadata": {},
  526. "output_type": "execute_result"
  527. }
  528. ],
  529. "source": [
  530. "sub_data.shape"
  531. ]
  532. },
  533. {
  534. "cell_type": "markdown",
  535. "metadata": {},
  536. "source": [
  537. "### To group the quantities into appropriate bins"
  538. ]
  539. },
  540. {
  541. "cell_type": "code",
  542. "execution_count": 12,
  543. "metadata": {},
  544. "outputs": [
  545. {
  546. "data": {
  547. "text/html": [
  548. "<div>\n",
  549. "<table border=\"1\" class=\"dataframe\">\n",
  550. " <thead>\n",
  551. " <tr style=\"text-align: right;\">\n",
  552. " <th></th>\n",
  553. " <th>suicideper100th</th>\n",
  554. " <th>incomeperperson</th>\n",
  555. " <th>internetuserate</th>\n",
  556. " <th>urbanrate</th>\n",
  557. " </tr>\n",
  558. " </thead>\n",
  559. " <tbody>\n",
  560. " <tr>\n",
  561. " <th>count</th>\n",
  562. " <td>191.000000</td>\n",
  563. " <td>190.000000</td>\n",
  564. " <td>192.000000</td>\n",
  565. " <td>203.000000</td>\n",
  566. " </tr>\n",
  567. " <tr>\n",
  568. " <th>mean</th>\n",
  569. " <td>9.640839</td>\n",
  570. " <td>8740.966076</td>\n",
  571. " <td>35.632716</td>\n",
  572. " <td>56.769360</td>\n",
  573. " </tr>\n",
  574. " <tr>\n",
  575. " <th>std</th>\n",
  576. " <td>6.300178</td>\n",
  577. " <td>14262.809083</td>\n",
  578. " <td>27.780285</td>\n",
  579. " <td>23.844933</td>\n",
  580. " </tr>\n",
  581. " <tr>\n",
  582. " <th>min</th>\n",
  583. " <td>0.201449</td>\n",
  584. " <td>103.775857</td>\n",
  585. " <td>0.210066</td>\n",
  586. " <td>10.400000</td>\n",
  587. " </tr>\n",
  588. " <tr>\n",
  589. " <th>25%</th>\n",
  590. " <td>4.988449</td>\n",
  591. " <td>748.245151</td>\n",
  592. " <td>9.999604</td>\n",
  593. " <td>36.830000</td>\n",
  594. " </tr>\n",
  595. " <tr>\n",
  596. " <th>50%</th>\n",
  597. " <td>8.262893</td>\n",
  598. " <td>2553.496056</td>\n",
  599. " <td>31.810121</td>\n",
  600. " <td>57.940000</td>\n",
  601. " </tr>\n",
  602. " <tr>\n",
  603. " <th>75%</th>\n",
  604. " <td>12.328551</td>\n",
  605. " <td>9379.891165</td>\n",
  606. " <td>56.416046</td>\n",
  607. " <td>74.210000</td>\n",
  608. " </tr>\n",
  609. " <tr>\n",
  610. " <th>max</th>\n",
  611. " <td>35.752872</td>\n",
  612. " <td>105147.437697</td>\n",
  613. " <td>95.638113</td>\n",
  614. " <td>100.000000</td>\n",
  615. " </tr>\n",
  616. " </tbody>\n",
  617. "</table>\n",
  618. "</div>"
  619. ],
  620. "text/plain": [
  621. " suicideper100th incomeperperson internetuserate urbanrate\n",
  622. "count 191.000000 190.000000 192.000000 203.000000\n",
  623. "mean 9.640839 8740.966076 35.632716 56.769360\n",
  624. "std 6.300178 14262.809083 27.780285 23.844933\n",
  625. "min 0.201449 103.775857 0.210066 10.400000\n",
  626. "25% 4.988449 748.245151 9.999604 36.830000\n",
  627. "50% 8.262893 2553.496056 31.810121 57.940000\n",
  628. "75% 12.328551 9379.891165 56.416046 74.210000\n",
  629. "max 35.752872 105147.437697 95.638113 100.000000"
  630. ]
  631. },
  632. "execution_count": 12,
  633. "metadata": {},
  634. "output_type": "execute_result"
  635. }
  636. ],
  637. "source": [
  638. "sub_data.describe()"
  639. ]
  640. },
  641. {
  642. "cell_type": "code",
  643. "execution_count": 13,
  644. "metadata": {
  645. "collapsed": true
  646. },
  647. "outputs": [],
  648. "source": [
  649. "for col in ('suicideper100th', 'incomeperperson', 'internetuserate', 'urbanrate'): \n",
  650. " if col == 'suicideper100th':\n",
  651. " sub_data.loc[sub_data[col] <= 5, col] = 1\n",
  652. " sub_data.loc[( sub_data[col] > 5) & ( sub_data[col] <= 10), col] = 2\n",
  653. " sub_data.loc[( sub_data[col] > 10) & ( sub_data[col] <= 15), col] = 3\n",
  654. " sub_data.loc[( sub_data[col] > 15) & ( sub_data[col] <= 20), col] = 4\n",
  655. " sub_data.loc[( sub_data[col] > 20) & ( sub_data[col] <= 25), col] = 5\n",
  656. " sub_data.loc[( sub_data[col] > 25) & ( sub_data[col] <= 30), col] = 6\n",
  657. " sub_data.loc[( sub_data[col] > 30) & ( sub_data[col] <= 35), col] = 7\n",
  658. " sub_data.loc[( sub_data[col] > 35) & ( sub_data[col] <= 40), col] = 8\n",
  659. " \n",
  660. " \n",
  661. " if col == 'incomeperperson':\n",
  662. " sub_data.loc[sub_data[col] <= 1000, col] = 1\n",
  663. " sub_data.loc[( sub_data[col] > 1000) & ( sub_data[col] <= 2000), col] = 2\n",
  664. " sub_data.loc[( sub_data[col] > 2000) & ( sub_data[col] <= 3000), col] = 3\n",
  665. " sub_data.loc[( sub_data[col] > 3000) & ( sub_data[col] <= 4000), col] = 4\n",
  666. " sub_data.loc[( sub_data[col] > 4000) & ( sub_data[col] <= 5000), col] = 5\n",
  667. " sub_data.loc[( sub_data[col] > 5000) & ( sub_data[col] <= 6000), col] = 6\n",
  668. " sub_data.loc[( sub_data[col] > 6000) & ( sub_data[col] <= 7000), col] = 7\n",
  669. " sub_data.loc[( sub_data[col] > 7000) & ( sub_data[col] <= 8000), col] = 8\n",
  670. " sub_data.loc[( sub_data[col] > 8000) & ( sub_data[col] <= 9000), col] = 9\n",
  671. " sub_data.loc[( sub_data[col] > 9000) & ( sub_data[col] <= 10000), col] = 10\n",
  672. " sub_data.loc[( sub_data[col] > 10000) & ( sub_data[col] <= 11000), col] =11\n",
  673. " sub_data.loc[( sub_data[col] > 11000) & ( sub_data[col] <= 12000), col] = 12\n",
  674. " sub_data.loc[( sub_data[col] > 12000) & ( sub_data[col] <= 13000), col] = 13\n",
  675. " sub_data.loc[( sub_data[col] > 13000) & ( sub_data[col] <= 14000), col] = 14\n",
  676. " sub_data.loc[( sub_data[col] > 14000) & ( sub_data[col] <= 15000), col] = 15\n",
  677. " sub_data.loc[( sub_data[col] > 15000), col] = 16\n",
  678. " \n",
  679. " \n",
  680. " \n",
  681. " if col == 'internetuserate':\n",
  682. " sub_data.loc[sub_data[col] <= 10, col] = 1\n",
  683. " sub_data.loc[( sub_data[col] > 10) & ( sub_data[col] <= 20), col] = 2\n",
  684. " sub_data.loc[( sub_data[col] > 20) & ( sub_data[col] <= 30), col] = 3\n",
  685. " sub_data.loc[( sub_data[col] > 30) & ( sub_data[col] <= 40), col] = 4\n",
  686. " sub_data.loc[( sub_data[col] > 40) & ( sub_data[col] <= 50), col] = 5\n",
  687. " sub_data.loc[( sub_data[col] > 50) & ( sub_data[col] <= 60), col] = 6\n",
  688. " sub_data.loc[( sub_data[col] > 60) & ( sub_data[col] <= 70), col] = 7\n",
  689. " sub_data.loc[( sub_data[col] > 70) & ( sub_data[col] <= 80), col] = 8\n",
  690. " sub_data.loc[( sub_data[col] > 80) & ( sub_data[col] <= 90), col] = 9\n",
  691. " sub_data.loc[( sub_data[col] > 90) & ( sub_data[col] <= 100), col] = 10\n",
  692. " sub_data.loc[( sub_data[col] > 100) & ( sub_data[col] <= 110), col] =11\n",
  693. " sub_data.loc[( sub_data[col] > 110), col] = 12\n",
  694. " \n",
  695. " \n",
  696. " if col == 'urbanrate':\n",
  697. " sub_data.loc[sub_data[col] <= 10, col] = 1\n",
  698. " sub_data.loc[( sub_data[col] > 10) & ( sub_data[col] <= 20), col] = 2\n",
  699. " sub_data.loc[( sub_data[col] > 20) & ( sub_data[col] <= 30), col] = 3\n",
  700. " sub_data.loc[( sub_data[col] > 30) & ( sub_data[col] <= 40), col] = 4\n",
  701. " sub_data.loc[( sub_data[col] > 40) & ( sub_data[col] <= 50), col] = 5\n",
  702. " sub_data.loc[( sub_data[col] > 50) & ( sub_data[col] <= 60), col] = 6\n",
  703. " sub_data.loc[( sub_data[col] > 60) & ( sub_data[col] <= 70), col] = 7\n",
  704. " sub_data.loc[( sub_data[col] > 70) & ( sub_data[col] <= 80), col] = 8\n",
  705. " sub_data.loc[( sub_data[col] > 80) & ( sub_data[col] <= 90), col] = 9\n",
  706. " sub_data.loc[( sub_data[col] > 90) & ( sub_data[col] <= 100), col] = 10\n",
  707. " sub_data.loc[( sub_data[col] > 100) & ( sub_data[col] <= 110), col] =11\n",
  708. " sub_data.loc[( sub_data[col] > 110), col] = 12\n",
  709. " \n",
  710. " \n",
  711. " \n",
  712. " "
  713. ]
  714. },
  715. {
  716. "cell_type": "markdown",
  717. "metadata": {},
  718. "source": [
  719. "### Counts and percentages (i.e. frequency distributions) for each variable"
  720. ]
  721. },
  722. {
  723. "cell_type": "code",
  724. "execution_count": 14,
  725. "metadata": {},
  726. "outputs": [
  727. {
  728. "data": {
  729. "text/html": [
  730. "<div>\n",
  731. "<table border=\"1\" class=\"dataframe\">\n",
  732. " <thead>\n",
  733. " <tr style=\"text-align: right;\">\n",
  734. " <th></th>\n",
  735. " <th>country</th>\n",
  736. " <th>suicideper100th</th>\n",
  737. " <th>incomeperperson</th>\n",
  738. " <th>internetuserate</th>\n",
  739. " <th>urbanrate</th>\n",
  740. " </tr>\n",
  741. " </thead>\n",
  742. " <tbody>\n",
  743. " <tr>\n",
  744. " <th>47</th>\n",
  745. " <td>Cuba</td>\n",
  746. " <td>3.0</td>\n",
  747. " <td>5.0</td>\n",
  748. " <td>2.0</td>\n",
  749. " <td>8.0</td>\n",
  750. " </tr>\n",
  751. " <tr>\n",
  752. " <th>119</th>\n",
  753. " <td>Malta</td>\n",
  754. " <td>1.0</td>\n",
  755. " <td>12.0</td>\n",
  756. " <td>7.0</td>\n",
  757. " <td>10.0</td>\n",
  758. " </tr>\n",
  759. " <tr>\n",
  760. " <th>171</th>\n",
  761. " <td>Seychelles</td>\n",
  762. " <td>2.0</td>\n",
  763. " <td>9.0</td>\n",
  764. " <td>5.0</td>\n",
  765. " <td>6.0</td>\n",
  766. " </tr>\n",
  767. " <tr>\n",
  768. " <th>142</th>\n",
  769. " <td>Nigeria</td>\n",
  770. " <td>2.0</td>\n",
  771. " <td>1.0</td>\n",
  772. " <td>3.0</td>\n",
  773. " <td>5.0</td>\n",
  774. " </tr>\n",
  775. " <tr>\n",
  776. " <th>16</th>\n",
  777. " <td>Belarus</td>\n",
  778. " <td>6.0</td>\n",
  779. " <td>3.0</td>\n",
  780. " <td>4.0</td>\n",
  781. " <td>8.0</td>\n",
  782. " </tr>\n",
  783. " <tr>\n",
  784. " <th>61</th>\n",
  785. " <td>Faeroe Islands</td>\n",
  786. " <td>NaN</td>\n",
  787. " <td>NaN</td>\n",
  788. " <td>8.0</td>\n",
  789. " <td>5.0</td>\n",
  790. " </tr>\n",
  791. " <tr>\n",
  792. " <th>68</th>\n",
  793. " <td>Georgia</td>\n",
  794. " <td>1.0</td>\n",
  795. " <td>2.0</td>\n",
  796. " <td>3.0</td>\n",
  797. " <td>6.0</td>\n",
  798. " </tr>\n",
  799. " <tr>\n",
  800. " <th>5</th>\n",
  801. " <td>Antigua and Barbuda</td>\n",
  802. " <td>1.0</td>\n",
  803. " <td>12.0</td>\n",
  804. " <td>9.0</td>\n",
  805. " <td>4.0</td>\n",
  806. " </tr>\n",
  807. " <tr>\n",
  808. " <th>117</th>\n",
  809. " <td>Maldives</td>\n",
  810. " <td>5.0</td>\n",
  811. " <td>5.0</td>\n",
  812. " <td>3.0</td>\n",
  813. " <td>4.0</td>\n",
  814. " </tr>\n",
  815. " <tr>\n",
  816. " <th>110</th>\n",
  817. " <td>Lithuania</td>\n",
  818. " <td>7.0</td>\n",
  819. " <td>6.0</td>\n",
  820. " <td>7.0</td>\n",
  821. " <td>7.0</td>\n",
  822. " </tr>\n",
  823. " <tr>\n",
  824. " <th>85</th>\n",
  825. " <td>Iceland</td>\n",
  826. " <td>3.0</td>\n",
  827. " <td>16.0</td>\n",
  828. " <td>10.0</td>\n",
  829. " <td>10.0</td>\n",
  830. " </tr>\n",
  831. " <tr>\n",
  832. " <th>72</th>\n",
  833. " <td>Greece</td>\n",
  834. " <td>1.0</td>\n",
  835. " <td>14.0</td>\n",
  836. " <td>5.0</td>\n",
  837. " <td>7.0</td>\n",
  838. " </tr>\n",
  839. " <tr>\n",
  840. " <th>94</th>\n",
  841. " <td>Japan</td>\n",
  842. " <td>4.0</td>\n",
  843. " <td>16.0</td>\n",
  844. " <td>8.0</td>\n",
  845. " <td>7.0</td>\n",
  846. " </tr>\n",
  847. " <tr>\n",
  848. " <th>21</th>\n",
  849. " <td>Bhutan</td>\n",
  850. " <td>4.0</td>\n",
  851. " <td>2.0</td>\n",
  852. " <td>2.0</td>\n",
  853. " <td>4.0</td>\n",
  854. " </tr>\n",
  855. " <tr>\n",
  856. " <th>137</th>\n",
  857. " <td>Netherlands Antilles</td>\n",
  858. " <td>NaN</td>\n",
  859. " <td>NaN</td>\n",
  860. " <td>NaN</td>\n",
  861. " <td>10.0</td>\n",
  862. " </tr>\n",
  863. " <tr>\n",
  864. " <th>139</th>\n",
  865. " <td>New Zealand</td>\n",
  866. " <td>3.0</td>\n",
  867. " <td>15.0</td>\n",
  868. " <td>9.0</td>\n",
  869. " <td>9.0</td>\n",
  870. " </tr>\n",
  871. " <tr>\n",
  872. " <th>88</th>\n",
  873. " <td>Iran</td>\n",
  874. " <td>2.0</td>\n",
  875. " <td>3.0</td>\n",
  876. " <td>2.0</td>\n",
  877. " <td>7.0</td>\n",
  878. " </tr>\n",
  879. " <tr>\n",
  880. " <th>60</th>\n",
  881. " <td>Ethiopia</td>\n",
  882. " <td>3.0</td>\n",
  883. " <td>1.0</td>\n",
  884. " <td>1.0</td>\n",
  885. " <td>2.0</td>\n",
  886. " </tr>\n",
  887. " <tr>\n",
  888. " <th>13</th>\n",
  889. " <td>Bahrain</td>\n",
  890. " <td>1.0</td>\n",
  891. " <td>13.0</td>\n",
  892. " <td>6.0</td>\n",
  893. " <td>9.0</td>\n",
  894. " </tr>\n",
  895. " <tr>\n",
  896. " <th>116</th>\n",
  897. " <td>Malaysia</td>\n",
  898. " <td>2.0</td>\n",
  899. " <td>6.0</td>\n",
  900. " <td>6.0</td>\n",
  901. " <td>8.0</td>\n",
  902. " </tr>\n",
  903. " </tbody>\n",
  904. "</table>\n",
  905. "</div>"
  906. ],
  907. "text/plain": [
  908. " country suicideper100th incomeperperson internetuserate \\\n",
  909. "47 Cuba 3.0 5.0 2.0 \n",
  910. "119 Malta 1.0 12.0 7.0 \n",
  911. "171 Seychelles 2.0 9.0 5.0 \n",
  912. "142 Nigeria 2.0 1.0 3.0 \n",
  913. "16 Belarus 6.0 3.0 4.0 \n",
  914. "61 Faeroe Islands NaN NaN 8.0 \n",
  915. "68 Georgia 1.0 2.0 3.0 \n",
  916. "5 Antigua and Barbuda 1.0 12.0 9.0 \n",
  917. "117 Maldives 5.0 5.0 3.0 \n",
  918. "110 Lithuania 7.0 6.0 7.0 \n",
  919. "85 Iceland 3.0 16.0 10.0 \n",
  920. "72 Greece 1.0 14.0 5.0 \n",
  921. "94 Japan 4.0 16.0 8.0 \n",
  922. "21 Bhutan 4.0 2.0 2.0 \n",
  923. "137 Netherlands Antilles NaN NaN NaN \n",
  924. "139 New Zealand 3.0 15.0 9.0 \n",
  925. "88 Iran 2.0 3.0 2.0 \n",
  926. "60 Ethiopia 3.0 1.0 1.0 \n",
  927. "13 Bahrain 1.0 13.0 6.0 \n",
  928. "116 Malaysia 2.0 6.0 6.0 \n",
  929. "\n",
  930. " urbanrate \n",
  931. "47 8.0 \n",
  932. "119 10.0 \n",
  933. "171 6.0 \n",
  934. "142 5.0 \n",
  935. "16 8.0 \n",
  936. "61 5.0 \n",
  937. "68 6.0 \n",
  938. "5 4.0 \n",
  939. "117 4.0 \n",
  940. "110 7.0 \n",
  941. "85 10.0 \n",
  942. "72 7.0 \n",
  943. "94 7.0 \n",
  944. "21 4.0 \n",
  945. "137 10.0 \n",
  946. "139 9.0 \n",
  947. "88 7.0 \n",
  948. "60 2.0 \n",
  949. "13 9.0 \n",
  950. "116 8.0 "
  951. ]
  952. },
  953. "execution_count": 14,
  954. "metadata": {},
  955. "output_type": "execute_result"
  956. }
  957. ],
  958. "source": [
  959. "sub_data.sample(20)"
  960. ]
  961. },
  962. {
  963. "cell_type": "code",
  964. "execution_count": 15,
  965. "metadata": {},
  966. "outputs": [
  967. {
  968. "name": "stdout",
  969. "output_type": "stream",
  970. "text": [
  971. "counts for suicideper100th\n",
  972. " 2.0 65\n",
  973. " 3.0 51\n",
  974. " 1.0 49\n",
  975. "NaN 22\n",
  976. " 4.0 12\n",
  977. " 5.0 6\n",
  978. " 6.0 6\n",
  979. " 7.0 1\n",
  980. " 8.0 1\n",
  981. "Name: suicideper100th, dtype: int64\n",
  982. "percentages for suicideper100th\n",
  983. " 2.0 0.305164\n",
  984. " 3.0 0.239437\n",
  985. " 1.0 0.230047\n",
  986. "NaN 0.103286\n",
  987. " 4.0 0.056338\n",
  988. " 5.0 0.028169\n",
  989. " 6.0 0.028169\n",
  990. " 7.0 0.004695\n",
  991. " 8.0 0.004695\n",
  992. "Name: suicideper100th, dtype: float64\n"
  993. ]
  994. }
  995. ],
  996. "source": [
  997. "print ('counts for suicideper100th')\n",
  998. "c1 = sub_data['suicideper100th'].value_counts(sort=True, dropna=False)\n",
  999. "print (c1)\n",
  1000. "\n",
  1001. "\n",
  1002. "print ('percentages for suicideper100th')\n",
  1003. "p1 = sub_data['suicideper100th'].value_counts(sort=True, dropna=False, normalize=True)\n",
  1004. "print (p1)"
  1005. ]
  1006. },
  1007. {
  1008. "cell_type": "code",
  1009. "execution_count": 16,
  1010. "metadata": {},
  1011. "outputs": [
  1012. {
  1013. "name": "stdout",
  1014. "output_type": "stream",
  1015. "text": [
  1016. "counts for incomeperperson\n",
  1017. " 1.0 54\n",
  1018. " 16.0 37\n",
  1019. " 2.0 26\n",
  1020. "NaN 23\n",
  1021. " 3.0 22\n",
  1022. " 6.0 11\n",
  1023. " 7.0 8\n",
  1024. " 5.0 7\n",
  1025. " 4.0 6\n",
  1026. " 12.0 4\n",
  1027. " 10.0 4\n",
  1028. " 9.0 3\n",
  1029. " 11.0 2\n",
  1030. " 13.0 2\n",
  1031. " 8.0 2\n",
  1032. " 14.0 1\n",
  1033. " 15.0 1\n",
  1034. "Name: incomeperperson, dtype: int64\n",
  1035. "percentages for incomeperperson\n",
  1036. " 1.0 0.253521\n",
  1037. " 16.0 0.173709\n",
  1038. " 2.0 0.122066\n",
  1039. "NaN 0.107981\n",
  1040. " 3.0 0.103286\n",
  1041. " 6.0 0.051643\n",
  1042. " 7.0 0.037559\n",
  1043. " 5.0 0.032864\n",
  1044. " 4.0 0.028169\n",
  1045. " 12.0 0.018779\n",
  1046. " 10.0 0.018779\n",
  1047. " 9.0 0.014085\n",
  1048. " 11.0 0.009390\n",
  1049. " 13.0 0.009390\n",
  1050. " 8.0 0.009390\n",
  1051. " 14.0 0.004695\n",
  1052. " 15.0 0.004695\n",
  1053. "Name: incomeperperson, dtype: float64\n"
  1054. ]
  1055. }
  1056. ],
  1057. "source": [
  1058. "print ('counts for incomeperperson')\n",
  1059. "c2 = sub_data['incomeperperson'].value_counts(sort=True, dropna=False)\n",
  1060. "print (c2)\n",
  1061. "\n",
  1062. "\n",
  1063. "print ('percentages for incomeperperson')\n",
  1064. "p2= sub_data['incomeperperson'].value_counts(sort=True, dropna=False, normalize=True)\n",
  1065. "print (p2)"
  1066. ]
  1067. },
  1068. {
  1069. "cell_type": "code",
  1070. "execution_count": 17,
  1071. "metadata": {},
  1072. "outputs": [
  1073. {
  1074. "name": "stdout",
  1075. "output_type": "stream",
  1076. "text": [
  1077. "counts for internetuserate\n",
  1078. " 1.0 49\n",
  1079. " 2.0 27\n",
  1080. " 5.0 25\n",
  1081. "NaN 21\n",
  1082. " 4.0 18\n",
  1083. " 3.0 17\n",
  1084. " 8.0 16\n",
  1085. " 7.0 14\n",
  1086. " 9.0 12\n",
  1087. " 6.0 9\n",
  1088. " 10.0 5\n",
  1089. "Name: internetuserate, dtype: int64\n",
  1090. "percentages for internetuserate\n",
  1091. " 1.0 0.230047\n",
  1092. " 2.0 0.126761\n",
  1093. " 5.0 0.117371\n",
  1094. "NaN 0.098592\n",
  1095. " 4.0 0.084507\n",
  1096. " 3.0 0.079812\n",
  1097. " 8.0 0.075117\n",
  1098. " 7.0 0.065728\n",
  1099. " 9.0 0.056338\n",
  1100. " 6.0 0.042254\n",
  1101. " 10.0 0.023474\n",
  1102. "Name: internetuserate, dtype: float64\n"
  1103. ]
  1104. }
  1105. ],
  1106. "source": [
  1107. "print ('counts for internetuserate')\n",
  1108. "c3 = sub_data['internetuserate'].value_counts(sort=True, dropna=False)\n",
  1109. "print (c3)\n",
  1110. "\n",
  1111. "\n",
  1112. "print ('percentages for internetuserate')\n",
  1113. "p3 = sub_data['internetuserate'].value_counts(sort=True, dropna=False, normalize=True)\n",
  1114. "print (p3)"
  1115. ]
  1116. },
  1117. {
  1118. "cell_type": "code",
  1119. "execution_count": 18,
  1120. "metadata": {},
  1121. "outputs": [
  1122. {
  1123. "name": "stdout",
  1124. "output_type": "stream",
  1125. "text": [
  1126. "counts for urbanrate\n",
  1127. " 7.0 34\n",
  1128. " 8.0 24\n",
  1129. " 4.0 24\n",
  1130. " 6.0 24\n",
  1131. " 5.0 22\n",
  1132. " 3.0 22\n",
  1133. " 9.0 21\n",
  1134. " 10.0 19\n",
  1135. " 2.0 13\n",
  1136. "NaN 10\n",
  1137. "Name: urbanrate, dtype: int64\n",
  1138. "percentages for urbanrate\n",
  1139. " 7.0 0.159624\n",
  1140. " 8.0 0.112676\n",
  1141. " 4.0 0.112676\n",
  1142. " 6.0 0.112676\n",
  1143. " 5.0 0.103286\n",
  1144. " 3.0 0.103286\n",
  1145. " 9.0 0.098592\n",
  1146. " 10.0 0.089202\n",
  1147. " 2.0 0.061033\n",
  1148. "NaN 0.046948\n",
  1149. "Name: urbanrate, dtype: float64\n"
  1150. ]
  1151. }
  1152. ],
  1153. "source": [
  1154. "print ('counts for urbanrate')\n",
  1155. "c4 = sub_data['urbanrate'].value_counts(sort=True, dropna=False)\n",
  1156. "print (c4)\n",
  1157. "\n",
  1158. "\n",
  1159. "print ('percentages for urbanrate')\n",
  1160. "p4 = sub_data['urbanrate'].value_counts(sort=True, dropna=False, normalize=True)\n",
  1161. "print (p4)"
  1162. ]
  1163. },
  1164. {
  1165. "cell_type": "code",
  1166. "execution_count": 19,
  1167. "metadata": {},
  1168. "outputs": [
  1169. {
  1170. "data": {
  1171. "text/html": [
  1172. "<div>\n",
  1173. "<table border=\"1\" class=\"dataframe\">\n",
  1174. " <thead>\n",
  1175. " <tr style=\"text-align: right;\">\n",
  1176. " <th></th>\n",
  1177. " <th>suicideper100th</th>\n",
  1178. " <th>incomeperperson</th>\n",
  1179. " <th>internetuserate</th>\n",
  1180. " <th>urbanrate</th>\n",
  1181. " </tr>\n",
  1182. " </thead>\n",
  1183. " <tbody>\n",
  1184. " <tr>\n",
  1185. " <th>count</th>\n",
  1186. " <td>191.000000</td>\n",
  1187. " <td>190.000000</td>\n",
  1188. " <td>192.000000</td>\n",
  1189. " <td>203.000000</td>\n",
  1190. " </tr>\n",
  1191. " <tr>\n",
  1192. " <th>mean</th>\n",
  1193. " <td>2.413613</td>\n",
  1194. " <td>6.068421</td>\n",
  1195. " <td>4.109375</td>\n",
  1196. " <td>6.162562</td>\n",
  1197. " </tr>\n",
  1198. " <tr>\n",
  1199. " <th>std</th>\n",
  1200. " <td>1.310533</td>\n",
  1201. " <td>5.745074</td>\n",
  1202. " <td>2.789944</td>\n",
  1203. " <td>2.377322</td>\n",
  1204. " </tr>\n",
  1205. " <tr>\n",
  1206. " <th>min</th>\n",
  1207. " <td>1.000000</td>\n",
  1208. " <td>1.000000</td>\n",
  1209. " <td>1.000000</td>\n",
  1210. " <td>2.000000</td>\n",
  1211. " </tr>\n",
  1212. " <tr>\n",
  1213. " <th>25%</th>\n",
  1214. " <td>1.000000</td>\n",
  1215. " <td>1.000000</td>\n",
  1216. " <td>1.000000</td>\n",
  1217. " <td>4.000000</td>\n",
  1218. " </tr>\n",
  1219. " <tr>\n",
  1220. " <th>50%</th>\n",
  1221. " <td>2.000000</td>\n",
  1222. " <td>3.000000</td>\n",
  1223. " <td>4.000000</td>\n",
  1224. " <td>6.000000</td>\n",
  1225. " </tr>\n",
  1226. " <tr>\n",
  1227. " <th>75%</th>\n",
  1228. " <td>3.000000</td>\n",
  1229. " <td>10.000000</td>\n",
  1230. " <td>6.000000</td>\n",
  1231. " <td>8.000000</td>\n",
  1232. " </tr>\n",
  1233. " <tr>\n",
  1234. " <th>max</th>\n",
  1235. " <td>8.000000</td>\n",
  1236. " <td>16.000000</td>\n",
  1237. " <td>10.000000</td>\n",
  1238. " <td>10.000000</td>\n",
  1239. " </tr>\n",
  1240. " </tbody>\n",
  1241. "</table>\n",
  1242. "</div>"
  1243. ],
  1244. "text/plain": [
  1245. " suicideper100th incomeperperson internetuserate urbanrate\n",
  1246. "count 191.000000 190.000000 192.000000 203.000000\n",
  1247. "mean 2.413613 6.068421 4.109375 6.162562\n",
  1248. "std 1.310533 5.745074 2.789944 2.377322\n",
  1249. "min 1.000000 1.000000 1.000000 2.000000\n",
  1250. "25% 1.000000 1.000000 1.000000 4.000000\n",
  1251. "50% 2.000000 3.000000 4.000000 6.000000\n",
  1252. "75% 3.000000 10.000000 6.000000 8.000000\n",
  1253. "max 8.000000 16.000000 10.000000 10.000000"
  1254. ]
  1255. },
  1256. "execution_count": 19,
  1257. "metadata": {},
  1258. "output_type": "execute_result"
  1259. }
  1260. ],
  1261. "source": [
  1262. "sub_data.describe()"
  1263. ]
  1264. },
  1265. {
  1266. "cell_type": "code",
  1267. "execution_count": null,
  1268. "metadata": {
  1269. "collapsed": true
  1270. },
  1271. "outputs": [],
  1272. "source": []
  1273. }
  1274. ],
  1275. "metadata": {
  1276. "kernelspec": {
  1277. "display_name": "Python3",
  1278. "language": "python",
  1279. "name": "python3"
  1280. },
  1281. "language_info": {
  1282. "codemirror_mode": {
  1283. "name": "ipython",
  1284. "version": 3
  1285. },
  1286. "file_extension": ".py",
  1287. "mimetype": "text/x-python",
  1288. "name": "python",
  1289. "nbconvert_exporter": "python",
  1290. "pygments_lexer": "ipython3",
  1291. "version": "3.6.3"
  1292. }
  1293. },
  1294. "nbformat": 4,
  1295. "nbformat_minor": 2
  1296. }
Add Comment
Please, Sign In to add comment