Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "##The analysis of variance (ANOVA)\n",
- "***\n",
- "- Grand mean\n",
- " - $a$ = number of groups\n",
- "\n",
- " - $n$ = number of observations within each group\n",
- "\n",
- " - $Y$ = single observation\n",
- " \n",
- "$\\overline{\\overline{Y}}=\\Large\\frac{1}{an}\\sum \\limits_{a}\\sum \\limits_{n}Y$\n",
- "\n",
- "- *Sum of squares among groups*\n",
- " - (estimate of the variation among groups)\n",
- " \n",
- "$SS_{among}=n\\sum \\limits_{a}(\\overline{Y}-\\overline{\\overline{Y}})^{2}$\n",
- "\n",
- "- *Sum of squares within groups*\n",
- " - (estimate of the variation among observations within groups)\n",
- " \n",
- "$SS_{within}=\\sum \\limits_{a}\\sum \\limits_{n}(Y-\\overline{Y})^{2}$\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>Group</th>\n",
- " <th>Seed 1</th>\n",
- " <th>Seed 2</th>\n",
- " <th>Ancestor</th>\n",
- " <th>Dominant</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0 </th>\n",
- " <td> 20k</td>\n",
- " <td> 1000</td>\n",
- " <td> 1100</td>\n",
- " <td> 1.615690</td>\n",
- " <td> 406.0690</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1 </th>\n",
- " <td> 20k</td>\n",
- " <td> 1000</td>\n",
- " <td> 1101</td>\n",
- " <td> 1.615690</td>\n",
- " <td> 455.7070</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2 </th>\n",
- " <td> 20k</td>\n",
- " <td> 1000</td>\n",
- " <td> 1102</td>\n",
- " <td> 1.615690</td>\n",
- " <td> 438.2780</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3 </th>\n",
- " <td> 20k</td>\n",
- " <td> 1000</td>\n",
- " <td> 1103</td>\n",
- " <td> 1.615690</td>\n",
- " <td> 453.4130</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>4 </th>\n",
- " <td> 20k</td>\n",
- " <td> 1000</td>\n",
- " <td> 1104</td>\n",
- " <td> 1.615690</td>\n",
- " <td> 437.8480</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>5 </th>\n",
- " <td> 20k</td>\n",
- " <td> 1001</td>\n",
- " <td> 1200</td>\n",
- " <td> 0.417722</td>\n",
- " <td> 455.8450</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>6 </th>\n",
- " <td> 20k</td>\n",
- " <td> 1001</td>\n",
- " <td> 1201</td>\n",
- " <td> 0.417722</td>\n",
- " <td> 414.4760</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>7 </th>\n",
- " <td> 20k</td>\n",
- " <td> 1001</td>\n",
- " <td> 1202</td>\n",
- " <td> 0.417722</td>\n",
- " <td> 484.0730</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>8 </th>\n",
- " <td> 20k</td>\n",
- " <td> 1001</td>\n",
- " <td> 1203</td>\n",
- " <td> 0.417722</td>\n",
- " <td> 477.8950</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>9 </th>\n",
- " <td> 20k</td>\n",
- " <td> 1001</td>\n",
- " <td> 1204</td>\n",
- " <td> 0.417722</td>\n",
- " <td> 459.4160</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>10</th>\n",
- " <td> 20k</td>\n",
- " <td> 1002</td>\n",
- " <td> 1300</td>\n",
- " <td> 8.099260</td>\n",
- " <td> 18.6589</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>11</th>\n",
- " <td> 20k</td>\n",
- " <td> 1002</td>\n",
- " <td> 1301</td>\n",
- " <td> 8.099260</td>\n",
- " <td> 18.8127</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>12</th>\n",
- " <td> 20k</td>\n",
- " <td> 1002</td>\n",
- " <td> 1302</td>\n",
- " <td> 8.099260</td>\n",
- " <td> 18.7135</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>13</th>\n",
- " <td> 20k</td>\n",
- " <td> 1002</td>\n",
- " <td> 1303</td>\n",
- " <td> 8.099260</td>\n",
- " <td> 19.0716</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>14</th>\n",
- " <td> 20k</td>\n",
- " <td> 1002</td>\n",
- " <td> 1304</td>\n",
- " <td> 8.099260</td>\n",
- " <td> 18.6282</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>15</th>\n",
- " <td> 20k</td>\n",
- " <td> 1003</td>\n",
- " <td> 1400</td>\n",
- " <td> 0.382609</td>\n",
- " <td> 462.7690</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>16</th>\n",
- " <td> 20k</td>\n",
- " <td> 1003</td>\n",
- " <td> 1401</td>\n",
- " <td> 0.382609</td>\n",
- " <td> 461.3630</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>17</th>\n",
- " <td> 20k</td>\n",
- " <td> 1003</td>\n",
- " <td> 1402</td>\n",
- " <td> 0.382609</td>\n",
- " <td> 368.4690</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>18</th>\n",
- " <td> 20k</td>\n",
- " <td> 1003</td>\n",
- " <td> 1403</td>\n",
- " <td> 0.382609</td>\n",
- " <td> 29.5652</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>19</th>\n",
- " <td> 20k</td>\n",
- " <td> 1003</td>\n",
- " <td> 1404</td>\n",
- " <td> 0.382609</td>\n",
- " <td> 30.4262</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>20</th>\n",
- " <td> 20k</td>\n",
- " <td> 1004</td>\n",
- " <td> 1500</td>\n",
- " <td> 0.217002</td>\n",
- " <td> 24.2383</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>21</th>\n",
- " <td> 20k</td>\n",
- " <td> 1004</td>\n",
- " <td> 1501</td>\n",
- " <td> 0.217002</td>\n",
- " <td> 246.3490</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>22</th>\n",
- " <td> 20k</td>\n",
- " <td> 1004</td>\n",
- " <td> 1502</td>\n",
- " <td> 0.217002</td>\n",
- " <td> 27.7736</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>23</th>\n",
- " <td> 20k</td>\n",
- " <td> 1004</td>\n",
- " <td> 1503</td>\n",
- " <td> 0.217002</td>\n",
- " <td> 28.6697</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>24</th>\n",
- " <td> 20k</td>\n",
- " <td> 1004</td>\n",
- " <td> 1504</td>\n",
- " <td> 0.217002</td>\n",
- " <td> 23.5402</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "<p>25 rows × 5 columns</p>\n",
- "</div>"
- ],
- "text/plain": [
- " Group Seed 1 Seed 2 Ancestor Dominant\n",
- "0 20k 1000 1100 1.615690 406.0690\n",
- "1 20k 1000 1101 1.615690 455.7070\n",
- "2 20k 1000 1102 1.615690 438.2780\n",
- "3 20k 1000 1103 1.615690 453.4130\n",
- "4 20k 1000 1104 1.615690 437.8480\n",
- "5 20k 1001 1200 0.417722 455.8450\n",
- "6 20k 1001 1201 0.417722 414.4760\n",
- "7 20k 1001 1202 0.417722 484.0730\n",
- "8 20k 1001 1203 0.417722 477.8950\n",
- "9 20k 1001 1204 0.417722 459.4160\n",
- "10 20k 1002 1300 8.099260 18.6589\n",
- "11 20k 1002 1301 8.099260 18.8127\n",
- "12 20k 1002 1302 8.099260 18.7135\n",
- "13 20k 1002 1303 8.099260 19.0716\n",
- "14 20k 1002 1304 8.099260 18.6282\n",
- "15 20k 1003 1400 0.382609 462.7690\n",
- "16 20k 1003 1401 0.382609 461.3630\n",
- "17 20k 1003 1402 0.382609 368.4690\n",
- "18 20k 1003 1403 0.382609 29.5652\n",
- "19 20k 1003 1404 0.382609 30.4262\n",
- "20 20k 1004 1500 0.217002 24.2383\n",
- "21 20k 1004 1501 0.217002 246.3490\n",
- "22 20k 1004 1502 0.217002 27.7736\n",
- "23 20k 1004 1503 0.217002 28.6697\n",
- "24 20k 1004 1504 0.217002 23.5402\n",
- "\n",
- "[25 rows x 5 columns]"
- ]
- },
- "execution_count": 1,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "import numpy as np\n",
- "import pandas as pd\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "from pandas import *\n",
- "\n",
- "df = DataFrame.from_csv('data.csv',index_col=False)\n",
- "df[:25]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Grand mean = [251.2027240000001, 203.75499199999999, 243.52518800000007]\n"
- ]
- }
- ],
- "source": [
- "a = 5.0\n",
- "n = 5.0\n",
- "grand_mean = []\n",
- "for i in range(3):\n",
- " grand_mean.append((1.0/(a*n))*(df[(i*25):25+(i*25)].sum(numeric_only=True)[3]))\n",
- "print 'Grand mean =',grand_mean"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "SS among = [825428.52578417759, 881710.70758277329, 826902.13975996012]\n"
- ]
- }
- ],
- "source": [
- "ss_among = []\n",
- "for i in range(3):\n",
- " group = 0\n",
- " for j in range(5):\n",
- " group += (df[(j*5):5+(j*5)].mean()[3] - grand_mean[i])**2\n",
- " ss_among.append(n*(group))\n",
- "print 'SS among =',ss_among"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "SS within = [1210341.92795164, 3114449.8073218209, 4240090.8150265003]\n"
- ]
- }
- ],
- "source": [
- "ss_within = []\n",
- "for i in range(3):\n",
- " group = 0\n",
- " for j in range(5):\n",
- " for k in range(5):\n",
- " group += (df.iloc[k+(j*5)+(i*25),4] - df[(j*5)+(i*25):5+(j*5)+(i*25)].mean()[3])**2\n",
- " ss_within.append(n*(group))\n",
- "print 'SS within =',ss_within"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "SS total = [2035770.4537358177, 3996160.514904594, 5066992.9547864608]\n"
- ]
- }
- ],
- "source": [
- "ss_total = []\n",
- "for i in range(3):\n",
- " ss_total.append(ss_among[i] + ss_within[i])\n",
- "print 'SS total =', ss_total"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement