Advertisement
Guest User

Untitled

a guest
May 27th, 2015
256
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.12 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "##The analysis of variance (ANOVA)\n",
  8. "***\n",
  9. "- Grand mean\n",
  10. " - $a$ = number of groups\n",
  11. "\n",
  12. " - $n$ = number of observations within each group\n",
  13. "\n",
  14. " - $Y$ = single observation\n",
  15. " \n",
  16. "$\\overline{\\overline{Y}}=\\Large\\frac{1}{an}\\sum \\limits_{a}\\sum \\limits_{n}Y$\n",
  17. "\n",
  18. "- *Sum of squares among groups*\n",
  19. " - (estimate of the variation among groups)\n",
  20. " \n",
  21. "$SS_{among}=n\\sum \\limits_{a}(\\overline{Y}-\\overline{\\overline{Y}})^{2}$\n",
  22. "\n",
  23. "- *Sum of squares within groups*\n",
  24. " - (estimate of the variation among observations within groups)\n",
  25. " \n",
  26. "$SS_{within}=\\sum \\limits_{a}\\sum \\limits_{n}(Y-\\overline{Y})^{2}$\n"
  27. ]
  28. },
  29. {
  30. "cell_type": "code",
  31. "execution_count": 1,
  32. "metadata": {
  33. "collapsed": false
  34. },
  35. "outputs": [
  36. {
  37. "data": {
  38. "text/html": [
  39. "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
  40. "<table border=\"1\" class=\"dataframe\">\n",
  41. " <thead>\n",
  42. " <tr style=\"text-align: right;\">\n",
  43. " <th></th>\n",
  44. " <th>Group</th>\n",
  45. " <th>Seed 1</th>\n",
  46. " <th>Seed 2</th>\n",
  47. " <th>Ancestor</th>\n",
  48. " <th>Dominant</th>\n",
  49. " </tr>\n",
  50. " </thead>\n",
  51. " <tbody>\n",
  52. " <tr>\n",
  53. " <th>0 </th>\n",
  54. " <td> 20k</td>\n",
  55. " <td> 1000</td>\n",
  56. " <td> 1100</td>\n",
  57. " <td> 1.615690</td>\n",
  58. " <td> 406.0690</td>\n",
  59. " </tr>\n",
  60. " <tr>\n",
  61. " <th>1 </th>\n",
  62. " <td> 20k</td>\n",
  63. " <td> 1000</td>\n",
  64. " <td> 1101</td>\n",
  65. " <td> 1.615690</td>\n",
  66. " <td> 455.7070</td>\n",
  67. " </tr>\n",
  68. " <tr>\n",
  69. " <th>2 </th>\n",
  70. " <td> 20k</td>\n",
  71. " <td> 1000</td>\n",
  72. " <td> 1102</td>\n",
  73. " <td> 1.615690</td>\n",
  74. " <td> 438.2780</td>\n",
  75. " </tr>\n",
  76. " <tr>\n",
  77. " <th>3 </th>\n",
  78. " <td> 20k</td>\n",
  79. " <td> 1000</td>\n",
  80. " <td> 1103</td>\n",
  81. " <td> 1.615690</td>\n",
  82. " <td> 453.4130</td>\n",
  83. " </tr>\n",
  84. " <tr>\n",
  85. " <th>4 </th>\n",
  86. " <td> 20k</td>\n",
  87. " <td> 1000</td>\n",
  88. " <td> 1104</td>\n",
  89. " <td> 1.615690</td>\n",
  90. " <td> 437.8480</td>\n",
  91. " </tr>\n",
  92. " <tr>\n",
  93. " <th>5 </th>\n",
  94. " <td> 20k</td>\n",
  95. " <td> 1001</td>\n",
  96. " <td> 1200</td>\n",
  97. " <td> 0.417722</td>\n",
  98. " <td> 455.8450</td>\n",
  99. " </tr>\n",
  100. " <tr>\n",
  101. " <th>6 </th>\n",
  102. " <td> 20k</td>\n",
  103. " <td> 1001</td>\n",
  104. " <td> 1201</td>\n",
  105. " <td> 0.417722</td>\n",
  106. " <td> 414.4760</td>\n",
  107. " </tr>\n",
  108. " <tr>\n",
  109. " <th>7 </th>\n",
  110. " <td> 20k</td>\n",
  111. " <td> 1001</td>\n",
  112. " <td> 1202</td>\n",
  113. " <td> 0.417722</td>\n",
  114. " <td> 484.0730</td>\n",
  115. " </tr>\n",
  116. " <tr>\n",
  117. " <th>8 </th>\n",
  118. " <td> 20k</td>\n",
  119. " <td> 1001</td>\n",
  120. " <td> 1203</td>\n",
  121. " <td> 0.417722</td>\n",
  122. " <td> 477.8950</td>\n",
  123. " </tr>\n",
  124. " <tr>\n",
  125. " <th>9 </th>\n",
  126. " <td> 20k</td>\n",
  127. " <td> 1001</td>\n",
  128. " <td> 1204</td>\n",
  129. " <td> 0.417722</td>\n",
  130. " <td> 459.4160</td>\n",
  131. " </tr>\n",
  132. " <tr>\n",
  133. " <th>10</th>\n",
  134. " <td> 20k</td>\n",
  135. " <td> 1002</td>\n",
  136. " <td> 1300</td>\n",
  137. " <td> 8.099260</td>\n",
  138. " <td> 18.6589</td>\n",
  139. " </tr>\n",
  140. " <tr>\n",
  141. " <th>11</th>\n",
  142. " <td> 20k</td>\n",
  143. " <td> 1002</td>\n",
  144. " <td> 1301</td>\n",
  145. " <td> 8.099260</td>\n",
  146. " <td> 18.8127</td>\n",
  147. " </tr>\n",
  148. " <tr>\n",
  149. " <th>12</th>\n",
  150. " <td> 20k</td>\n",
  151. " <td> 1002</td>\n",
  152. " <td> 1302</td>\n",
  153. " <td> 8.099260</td>\n",
  154. " <td> 18.7135</td>\n",
  155. " </tr>\n",
  156. " <tr>\n",
  157. " <th>13</th>\n",
  158. " <td> 20k</td>\n",
  159. " <td> 1002</td>\n",
  160. " <td> 1303</td>\n",
  161. " <td> 8.099260</td>\n",
  162. " <td> 19.0716</td>\n",
  163. " </tr>\n",
  164. " <tr>\n",
  165. " <th>14</th>\n",
  166. " <td> 20k</td>\n",
  167. " <td> 1002</td>\n",
  168. " <td> 1304</td>\n",
  169. " <td> 8.099260</td>\n",
  170. " <td> 18.6282</td>\n",
  171. " </tr>\n",
  172. " <tr>\n",
  173. " <th>15</th>\n",
  174. " <td> 20k</td>\n",
  175. " <td> 1003</td>\n",
  176. " <td> 1400</td>\n",
  177. " <td> 0.382609</td>\n",
  178. " <td> 462.7690</td>\n",
  179. " </tr>\n",
  180. " <tr>\n",
  181. " <th>16</th>\n",
  182. " <td> 20k</td>\n",
  183. " <td> 1003</td>\n",
  184. " <td> 1401</td>\n",
  185. " <td> 0.382609</td>\n",
  186. " <td> 461.3630</td>\n",
  187. " </tr>\n",
  188. " <tr>\n",
  189. " <th>17</th>\n",
  190. " <td> 20k</td>\n",
  191. " <td> 1003</td>\n",
  192. " <td> 1402</td>\n",
  193. " <td> 0.382609</td>\n",
  194. " <td> 368.4690</td>\n",
  195. " </tr>\n",
  196. " <tr>\n",
  197. " <th>18</th>\n",
  198. " <td> 20k</td>\n",
  199. " <td> 1003</td>\n",
  200. " <td> 1403</td>\n",
  201. " <td> 0.382609</td>\n",
  202. " <td> 29.5652</td>\n",
  203. " </tr>\n",
  204. " <tr>\n",
  205. " <th>19</th>\n",
  206. " <td> 20k</td>\n",
  207. " <td> 1003</td>\n",
  208. " <td> 1404</td>\n",
  209. " <td> 0.382609</td>\n",
  210. " <td> 30.4262</td>\n",
  211. " </tr>\n",
  212. " <tr>\n",
  213. " <th>20</th>\n",
  214. " <td> 20k</td>\n",
  215. " <td> 1004</td>\n",
  216. " <td> 1500</td>\n",
  217. " <td> 0.217002</td>\n",
  218. " <td> 24.2383</td>\n",
  219. " </tr>\n",
  220. " <tr>\n",
  221. " <th>21</th>\n",
  222. " <td> 20k</td>\n",
  223. " <td> 1004</td>\n",
  224. " <td> 1501</td>\n",
  225. " <td> 0.217002</td>\n",
  226. " <td> 246.3490</td>\n",
  227. " </tr>\n",
  228. " <tr>\n",
  229. " <th>22</th>\n",
  230. " <td> 20k</td>\n",
  231. " <td> 1004</td>\n",
  232. " <td> 1502</td>\n",
  233. " <td> 0.217002</td>\n",
  234. " <td> 27.7736</td>\n",
  235. " </tr>\n",
  236. " <tr>\n",
  237. " <th>23</th>\n",
  238. " <td> 20k</td>\n",
  239. " <td> 1004</td>\n",
  240. " <td> 1503</td>\n",
  241. " <td> 0.217002</td>\n",
  242. " <td> 28.6697</td>\n",
  243. " </tr>\n",
  244. " <tr>\n",
  245. " <th>24</th>\n",
  246. " <td> 20k</td>\n",
  247. " <td> 1004</td>\n",
  248. " <td> 1504</td>\n",
  249. " <td> 0.217002</td>\n",
  250. " <td> 23.5402</td>\n",
  251. " </tr>\n",
  252. " </tbody>\n",
  253. "</table>\n",
  254. "<p>25 rows × 5 columns</p>\n",
  255. "</div>"
  256. ],
  257. "text/plain": [
  258. " Group Seed 1 Seed 2 Ancestor Dominant\n",
  259. "0 20k 1000 1100 1.615690 406.0690\n",
  260. "1 20k 1000 1101 1.615690 455.7070\n",
  261. "2 20k 1000 1102 1.615690 438.2780\n",
  262. "3 20k 1000 1103 1.615690 453.4130\n",
  263. "4 20k 1000 1104 1.615690 437.8480\n",
  264. "5 20k 1001 1200 0.417722 455.8450\n",
  265. "6 20k 1001 1201 0.417722 414.4760\n",
  266. "7 20k 1001 1202 0.417722 484.0730\n",
  267. "8 20k 1001 1203 0.417722 477.8950\n",
  268. "9 20k 1001 1204 0.417722 459.4160\n",
  269. "10 20k 1002 1300 8.099260 18.6589\n",
  270. "11 20k 1002 1301 8.099260 18.8127\n",
  271. "12 20k 1002 1302 8.099260 18.7135\n",
  272. "13 20k 1002 1303 8.099260 19.0716\n",
  273. "14 20k 1002 1304 8.099260 18.6282\n",
  274. "15 20k 1003 1400 0.382609 462.7690\n",
  275. "16 20k 1003 1401 0.382609 461.3630\n",
  276. "17 20k 1003 1402 0.382609 368.4690\n",
  277. "18 20k 1003 1403 0.382609 29.5652\n",
  278. "19 20k 1003 1404 0.382609 30.4262\n",
  279. "20 20k 1004 1500 0.217002 24.2383\n",
  280. "21 20k 1004 1501 0.217002 246.3490\n",
  281. "22 20k 1004 1502 0.217002 27.7736\n",
  282. "23 20k 1004 1503 0.217002 28.6697\n",
  283. "24 20k 1004 1504 0.217002 23.5402\n",
  284. "\n",
  285. "[25 rows x 5 columns]"
  286. ]
  287. },
  288. "execution_count": 1,
  289. "metadata": {},
  290. "output_type": "execute_result"
  291. }
  292. ],
  293. "source": [
  294. "import numpy as np\n",
  295. "import pandas as pd\n",
  296. "import matplotlib.pyplot as plt\n",
  297. "\n",
  298. "from pandas import *\n",
  299. "\n",
  300. "df = DataFrame.from_csv('data.csv',index_col=False)\n",
  301. "df[:25]"
  302. ]
  303. },
  304. {
  305. "cell_type": "code",
  306. "execution_count": 2,
  307. "metadata": {
  308. "collapsed": false
  309. },
  310. "outputs": [
  311. {
  312. "name": "stdout",
  313. "output_type": "stream",
  314. "text": [
  315. "Grand mean = [251.2027240000001, 203.75499199999999, 243.52518800000007]\n"
  316. ]
  317. }
  318. ],
  319. "source": [
  320. "a = 5.0\n",
  321. "n = 5.0\n",
  322. "grand_mean = []\n",
  323. "for i in range(3):\n",
  324. " grand_mean.append((1.0/(a*n))*(df[(i*25):25+(i*25)].sum(numeric_only=True)[3]))\n",
  325. "print 'Grand mean =',grand_mean"
  326. ]
  327. },
  328. {
  329. "cell_type": "code",
  330. "execution_count": 3,
  331. "metadata": {
  332. "collapsed": false
  333. },
  334. "outputs": [
  335. {
  336. "name": "stdout",
  337. "output_type": "stream",
  338. "text": [
  339. "SS among = [825428.52578417759, 881710.70758277329, 826902.13975996012]\n"
  340. ]
  341. }
  342. ],
  343. "source": [
  344. "ss_among = []\n",
  345. "for i in range(3):\n",
  346. " group = 0\n",
  347. " for j in range(5):\n",
  348. " group += (df[(j*5):5+(j*5)].mean()[3] - grand_mean[i])**2\n",
  349. " ss_among.append(n*(group))\n",
  350. "print 'SS among =',ss_among"
  351. ]
  352. },
  353. {
  354. "cell_type": "code",
  355. "execution_count": 4,
  356. "metadata": {
  357. "collapsed": false
  358. },
  359. "outputs": [
  360. {
  361. "name": "stdout",
  362. "output_type": "stream",
  363. "text": [
  364. "SS within = [1210341.92795164, 3114449.8073218209, 4240090.8150265003]\n"
  365. ]
  366. }
  367. ],
  368. "source": [
  369. "ss_within = []\n",
  370. "for i in range(3):\n",
  371. " group = 0\n",
  372. " for j in range(5):\n",
  373. " for k in range(5):\n",
  374. " group += (df.iloc[k+(j*5)+(i*25),4] - df[(j*5)+(i*25):5+(j*5)+(i*25)].mean()[3])**2\n",
  375. " ss_within.append(n*(group))\n",
  376. "print 'SS within =',ss_within"
  377. ]
  378. },
  379. {
  380. "cell_type": "code",
  381. "execution_count": 5,
  382. "metadata": {
  383. "collapsed": false
  384. },
  385. "outputs": [
  386. {
  387. "name": "stdout",
  388. "output_type": "stream",
  389. "text": [
  390. "SS total = [2035770.4537358177, 3996160.514904594, 5066992.9547864608]\n"
  391. ]
  392. }
  393. ],
  394. "source": [
  395. "ss_total = []\n",
  396. "for i in range(3):\n",
  397. " ss_total.append(ss_among[i] + ss_within[i])\n",
  398. "print 'SS total =', ss_total"
  399. ]
  400. },
  401. {
  402. "cell_type": "code",
  403. "execution_count": null,
  404. "metadata": {
  405. "collapsed": true
  406. },
  407. "outputs": [],
  408. "source": []
  409. }
  410. ],
  411. "metadata": {
  412. "kernelspec": {
  413. "display_name": "Python 2",
  414. "language": "python",
  415. "name": "python2"
  416. },
  417. "language_info": {
  418. "codemirror_mode": {
  419. "name": "ipython",
  420. "version": 2
  421. },
  422. "file_extension": ".py",
  423. "mimetype": "text/x-python",
  424. "name": "python",
  425. "nbconvert_exporter": "python",
  426. "pygments_lexer": "ipython2",
  427. "version": "2.7.6"
  428. }
  429. },
  430. "nbformat": 4,
  431. "nbformat_minor": 0
  432. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement