daily pastebin goal
11%
SHARE
TWEET

Untitled

a guest Jul 12th, 2018 64 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. {
  2.  "cells": [
  3.   {
  4.    "cell_type": "code",
  5.    "execution_count": 102,
  6.    "metadata": {},
  7.    "outputs": [],
  8.    "source": [
  9.     "# to add t-tests\n",
  10.     "# to add categories of exposure and for centre"
  11.    ]
  12.   },
  13.   {
  14.    "cell_type": "code",
  15.    "execution_count": 1,
  16.    "metadata": {},
  17.    "outputs": [
  18.     {
  19.      "name": "stderr",
  20.      "output_type": "stream",
  21.      "text": [
  22.       "/home/drcjar/anaconda3/envs/ipfjes/lib/python3.5/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.\n",
  23.       "  from pandas.core import datetools\n"
  24.      ]
  25.     }
  26.    ],
  27.    "source": [
  28.     "import pandas as pd\n",
  29.     "import statsmodels.api as sm\n",
  30.     "import seaborn as sns; sns.set(color_codes=True)"
  31.    ]
  32.   },
  33.   {
  34.    "cell_type": "code",
  35.    "execution_count": 78,
  36.    "metadata": {},
  37.    "outputs": [],
  38.    "source": [
  39.     "df = pd.read_csv('for_cosetta.csv')"
  40.    ]
  41.   },
  42.   {
  43.    "cell_type": "code",
  44.    "execution_count": 79,
  45.    "metadata": {},
  46.    "outputs": [
  47.     {
  48.      "data": {
  49.       "text/html": [
  50.        "<div>\n",
  51.        "<style scoped>\n",
  52.        "    .dataframe tbody tr th:only-of-type {\n",
  53.        "        vertical-align: middle;\n",
  54.        "    }\n",
  55.        "\n",
  56.        "    .dataframe tbody tr th {\n",
  57.        "        vertical-align: top;\n",
  58.        "    }\n",
  59.        "\n",
  60.        "    .dataframe thead th {\n",
  61.        "        text-align: right;\n",
  62.        "    }\n",
  63.        "</style>\n",
  64.        "<table border=\"1\" class=\"dataframe\">\n",
  65.        "  <thead>\n",
  66.        "    <tr style=\"text-align: right;\">\n",
  67.        "      <th></th>\n",
  68.        "      <th>participant_id</th>\n",
  69.        "      <th>soc_job_ft</th>\n",
  70.        "      <th>job_tasks</th>\n",
  71.        "      <th>start_year</th>\n",
  72.        "      <th>end_year</th>\n",
  73.        "      <th>pt</th>\n",
  74.        "      <th>soc90</th>\n",
  75.        "      <th>soc2000</th>\n",
  76.        "      <th>ssec</th>\n",
  77.        "      <th>years</th>\n",
  78.        "      <th>...</th>\n",
  79.        "      <th>es</th>\n",
  80.        "      <th>cs</th>\n",
  81.        "      <th>centre</th>\n",
  82.        "      <th>gp_coords</th>\n",
  83.        "      <th>centre_coords</th>\n",
  84.        "      <th>distfromcentre</th>\n",
  85.        "      <th>exposed</th>\n",
  86.        "      <th>duration</th>\n",
  87.        "      <th>risk</th>\n",
  88.        "      <th>jobcat</th>\n",
  89.        "    </tr>\n",
  90.        "  </thead>\n",
  91.        "  <tbody>\n",
  92.        "    <tr>\n",
  93.        "      <th>0</th>\n",
  94.        "      <td>80024</td>\n",
  95.        "      <td>Assistant, laboratory</td>\n",
  96.        "      <td>tiles\\nglaziers\\n\\njust colouring \\n\\ntiles fo...</td>\n",
  97.        "      <td>1962</td>\n",
  98.        "      <td>1982</td>\n",
  99.        "      <td>case</td>\n",
  100.        "      <td>864</td>\n",
  101.        "      <td>8138</td>\n",
  102.        "      <td>3.0</td>\n",
  103.        "      <td>72</td>\n",
  104.        "      <td>...</td>\n",
  105.        "      <td>Yes</td>\n",
  106.        "      <td>No</td>\n",
  107.        "      <td>8</td>\n",
  108.        "      <td>NaN</td>\n",
  109.        "      <td>(53.46633849590757, -2.9311029200152485)</td>\n",
  110.        "      <td>5932.734447</td>\n",
  111.        "      <td>False</td>\n",
  112.        "      <td>20</td>\n",
  113.        "      <td>0</td>\n",
  114.        "      <td>3.0</td>\n",
  115.        "    </tr>\n",
  116.        "    <tr>\n",
  117.        "      <th>1</th>\n",
  118.        "      <td>80024</td>\n",
  119.        "      <td>Joiner</td>\n",
  120.        "      <td>1st and 2nd fix\\nincluding fitting kitchems</td>\n",
  121.        "      <td>1982</td>\n",
  122.        "      <td>1992</td>\n",
  123.        "      <td>case</td>\n",
  124.        "      <td>859</td>\n",
  125.        "      <td>8139</td>\n",
  126.        "      <td>7.0</td>\n",
  127.        "      <td>72</td>\n",
  128.        "      <td>...</td>\n",
  129.        "      <td>Yes</td>\n",
  130.        "      <td>No</td>\n",
  131.        "      <td>8</td>\n",
  132.        "      <td>NaN</td>\n",
  133.        "      <td>(53.46633849590757, -2.9311029200152485)</td>\n",
  134.        "      <td>5932.734447</td>\n",
  135.        "      <td>False</td>\n",
  136.        "      <td>10</td>\n",
  137.        "      <td>0</td>\n",
  138.        "      <td>3.0</td>\n",
  139.        "    </tr>\n",
  140.        "    <tr>\n",
  141.        "      <th>2</th>\n",
  142.        "      <td>100009</td>\n",
  143.        "      <td>Soldier</td>\n",
  144.        "      <td>Infantry Soldier in the army: trained in comba...</td>\n",
  145.        "      <td>1955</td>\n",
  146.        "      <td>1958</td>\n",
  147.        "      <td>case</td>\n",
  148.        "      <td>600</td>\n",
  149.        "      <td>3311</td>\n",
  150.        "      <td>3.0</td>\n",
  151.        "      <td>81</td>\n",
  152.        "      <td>...</td>\n",
  153.        "      <td>No</td>\n",
  154.        "      <td>NaN</td>\n",
  155.        "      <td>10</td>\n",
  156.        "      <td>(51.410693661901455, -0.161964476739319)</td>\n",
  157.        "      <td>(51.516971016915214, -0.173544641038758)</td>\n",
  158.        "      <td>11.851470</td>\n",
  159.        "      <td>False</td>\n",
  160.        "      <td>3</td>\n",
  161.        "      <td>0</td>\n",
  162.        "      <td>4.0</td>\n",
  163.        "    </tr>\n",
  164.        "    <tr>\n",
  165.        "      <th>3</th>\n",
  166.        "      <td>80003</td>\n",
  167.        "      <td>Assistant, catering</td>\n",
  168.        "      <td>working in the kitchen, restaurants etc. on th...</td>\n",
  169.        "      <td>1970</td>\n",
  170.        "      <td>1972</td>\n",
  171.        "      <td>case</td>\n",
  172.        "      <td>953</td>\n",
  173.        "      <td>9223</td>\n",
  174.        "      <td>6.0</td>\n",
  175.        "      <td>63</td>\n",
  176.        "      <td>...</td>\n",
  177.        "      <td>Yes</td>\n",
  178.        "      <td>No</td>\n",
  179.        "      <td>8</td>\n",
  180.        "      <td>NaN</td>\n",
  181.        "      <td>(53.46633849590757, -2.9311029200152485)</td>\n",
  182.        "      <td>5932.734447</td>\n",
  183.        "      <td>False</td>\n",
  184.        "      <td>2</td>\n",
  185.        "      <td>0</td>\n",
  186.        "      <td>1.0</td>\n",
  187.        "    </tr>\n",
  188.        "    <tr>\n",
  189.        "      <th>4</th>\n",
  190.        "      <td>100009</td>\n",
  191.        "      <td>Hand, bacon</td>\n",
  192.        "      <td>Bacon Hand - working in the supermarket; cutti...</td>\n",
  193.        "      <td>1958</td>\n",
  194.        "      <td>1972</td>\n",
  195.        "      <td>case</td>\n",
  196.        "      <td>720</td>\n",
  197.        "      <td>7111</td>\n",
  198.        "      <td>6.0</td>\n",
  199.        "      <td>81</td>\n",
  200.        "      <td>...</td>\n",
  201.        "      <td>No</td>\n",
  202.        "      <td>NaN</td>\n",
  203.        "      <td>10</td>\n",
  204.        "      <td>(51.410693661901455, -0.161964476739319)</td>\n",
  205.        "      <td>(51.516971016915214, -0.173544641038758)</td>\n",
  206.        "      <td>11.851470</td>\n",
  207.        "      <td>False</td>\n",
  208.        "      <td>14</td>\n",
  209.        "      <td>0</td>\n",
  210.        "      <td>5.0</td>\n",
  211.        "    </tr>\n",
  212.        "  </tbody>\n",
  213.        "</table>\n",
  214.        "<p>5 rows × 22 columns</p>\n",
  215.        "</div>"
  216.       ],
  217.       "text/plain": [
  218.        "   participant_id             soc_job_ft  \\\n",
  219.        "0           80024  Assistant, laboratory   \n",
  220.        "1           80024                 Joiner   \n",
  221.        "2          100009                Soldier   \n",
  222.        "3           80003    Assistant, catering   \n",
  223.        "4          100009            Hand, bacon   \n",
  224.        "\n",
  225.        "                                           job_tasks  start_year  end_year  \\\n",
  226.        "0  tiles\\nglaziers\\n\\njust colouring \\n\\ntiles fo...        1962      1982   \n",
  227.        "1        1st and 2nd fix\\nincluding fitting kitchems        1982      1992   \n",
  228.        "2  Infantry Soldier in the army: trained in comba...        1955      1958   \n",
  229.        "3  working in the kitchen, restaurants etc. on th...        1970      1972   \n",
  230.        "4  Bacon Hand - working in the supermarket; cutti...        1958      1972   \n",
  231.        "\n",
  232.        "     pt  soc90  soc2000  ssec  years   ...     es   cs centre  \\\n",
  233.        "0  case    864     8138   3.0     72   ...    Yes   No      8   \n",
  234.        "1  case    859     8139   7.0     72   ...    Yes   No      8   \n",
  235.        "2  case    600     3311   3.0     81   ...     No  NaN     10   \n",
  236.        "3  case    953     9223   6.0     63   ...    Yes   No      8   \n",
  237.        "4  case    720     7111   6.0     81   ...     No  NaN     10   \n",
  238.        "\n",
  239.        "                                  gp_coords  \\\n",
  240.        "0                                       NaN   \n",
  241.        "1                                       NaN   \n",
  242.        "2  (51.410693661901455, -0.161964476739319)   \n",
  243.        "3                                       NaN   \n",
  244.        "4  (51.410693661901455, -0.161964476739319)   \n",
  245.        "\n",
  246.        "                              centre_coords distfromcentre exposed  duration  \\\n",
  247.        "0  (53.46633849590757, -2.9311029200152485)    5932.734447   False        20   \n",
  248.        "1  (53.46633849590757, -2.9311029200152485)    5932.734447   False        10   \n",
  249.        "2  (51.516971016915214, -0.173544641038758)      11.851470   False         3   \n",
  250.        "3  (53.46633849590757, -2.9311029200152485)    5932.734447   False         2   \n",
  251.        "4  (51.516971016915214, -0.173544641038758)      11.851470   False        14   \n",
  252.        "\n",
  253.        "   risk  jobcat  \n",
  254.        "0     0     3.0  \n",
  255.        "1     0     3.0  \n",
  256.        "2     0     4.0  \n",
  257.        "3     0     1.0  \n",
  258.        "4     0     5.0  \n",
  259.        "\n",
  260.        "[5 rows x 22 columns]"
  261.       ]
  262.      },
  263.      "execution_count": 79,
  264.      "metadata": {},
  265.      "output_type": "execute_result"
  266.     }
  267.    ],
  268.    "source": [
  269.     "df.head()"
  270.    ]
  271.   },
  272.   {
  273.    "cell_type": "code",
  274.    "execution_count": 80,
  275.    "metadata": {},
  276.    "outputs": [],
  277.    "source": [
  278.     "df = df[df.gp_coords.notna()] # get rid of rows which lack gp coords"
  279.    ]
  280.   },
  281.   {
  282.    "cell_type": "code",
  283.    "execution_count": 81,
  284.    "metadata": {},
  285.    "outputs": [],
  286.    "source": [
  287.     "cumrisk = df.groupby('participant_id').risk.sum().reset_index() "
  288.    ]
  289.   },
  290.   {
  291.    "cell_type": "code",
  292.    "execution_count": 82,
  293.    "metadata": {},
  294.    "outputs": [],
  295.    "source": [
  296.     "maxcat = df.groupby('participant_id').jobcat.min().reset_index() "
  297.    ]
  298.   },
  299.   {
  300.    "cell_type": "code",
  301.    "execution_count": 83,
  302.    "metadata": {},
  303.    "outputs": [],
  304.    "source": [
  305.     "df = df[['participant_id', 'pt', 'years', 'agegroup', 'ethnicity', 'es', 'cs', 'distfromcentre', 'centre']]"
  306.    ]
  307.   },
  308.   {
  309.    "cell_type": "code",
  310.    "execution_count": 84,
  311.    "metadata": {},
  312.    "outputs": [
  313.     {
  314.      "data": {
  315.       "text/html": [
  316.        "<div>\n",
  317.        "<style scoped>\n",
  318.        "    .dataframe tbody tr th:only-of-type {\n",
  319.        "        vertical-align: middle;\n",
  320.        "    }\n",
  321.        "\n",
  322.        "    .dataframe tbody tr th {\n",
  323.        "        vertical-align: top;\n",
  324.        "    }\n",
  325.        "\n",
  326.        "    .dataframe thead th {\n",
  327.        "        text-align: right;\n",
  328.        "    }\n",
  329.        "</style>\n",
  330.        "<table border=\"1\" class=\"dataframe\">\n",
  331.        "  <thead>\n",
  332.        "    <tr style=\"text-align: right;\">\n",
  333.        "      <th></th>\n",
  334.        "      <th>participant_id</th>\n",
  335.        "      <th>pt</th>\n",
  336.        "      <th>years</th>\n",
  337.        "      <th>agegroup</th>\n",
  338.        "      <th>ethnicity</th>\n",
  339.        "      <th>es</th>\n",
  340.        "      <th>cs</th>\n",
  341.        "      <th>distfromcentre</th>\n",
  342.        "      <th>centre</th>\n",
  343.        "    </tr>\n",
  344.        "  </thead>\n",
  345.        "  <tbody>\n",
  346.        "    <tr>\n",
  347.        "      <th>2</th>\n",
  348.        "      <td>100009</td>\n",
  349.        "      <td>case</td>\n",
  350.        "      <td>81</td>\n",
  351.        "      <td>80 to 84</td>\n",
  352.        "      <td>White</td>\n",
  353.        "      <td>No</td>\n",
  354.        "      <td>NaN</td>\n",
  355.        "      <td>11.851470</td>\n",
  356.        "      <td>10</td>\n",
  357.        "    </tr>\n",
  358.        "    <tr>\n",
  359.        "      <th>4</th>\n",
  360.        "      <td>100009</td>\n",
  361.        "      <td>case</td>\n",
  362.        "      <td>81</td>\n",
  363.        "      <td>80 to 84</td>\n",
  364.        "      <td>White</td>\n",
  365.        "      <td>No</td>\n",
  366.        "      <td>NaN</td>\n",
  367.        "      <td>11.851470</td>\n",
  368.        "      <td>10</td>\n",
  369.        "    </tr>\n",
  370.        "    <tr>\n",
  371.        "      <th>5</th>\n",
  372.        "      <td>70003</td>\n",
  373.        "      <td>case</td>\n",
  374.        "      <td>55</td>\n",
  375.        "      <td>55 to 59</td>\n",
  376.        "      <td>White</td>\n",
  377.        "      <td>Yes</td>\n",
  378.        "      <td>Yes</td>\n",
  379.        "      <td>30.403741</td>\n",
  380.        "      <td>7</td>\n",
  381.        "    </tr>\n",
  382.        "    <tr>\n",
  383.        "      <th>6</th>\n",
  384.        "      <td>80001</td>\n",
  385.        "      <td>case</td>\n",
  386.        "      <td>87</td>\n",
  387.        "      <td>85 to 90</td>\n",
  388.        "      <td>White</td>\n",
  389.        "      <td>Yes</td>\n",
  390.        "      <td>No</td>\n",
  391.        "      <td>5.549752</td>\n",
  392.        "      <td>8</td>\n",
  393.        "    </tr>\n",
  394.        "    <tr>\n",
  395.        "      <th>7</th>\n",
  396.        "      <td>10002</td>\n",
  397.        "      <td>case</td>\n",
  398.        "      <td>75</td>\n",
  399.        "      <td>75 to 79</td>\n",
  400.        "      <td>White</td>\n",
  401.        "      <td>Yes</td>\n",
  402.        "      <td>No</td>\n",
  403.        "      <td>6.779659</td>\n",
  404.        "      <td>1</td>\n",
  405.        "    </tr>\n",
  406.        "  </tbody>\n",
  407.        "</table>\n",
  408.        "</div>"
  409.       ],
  410.       "text/plain": [
  411.        "   participant_id    pt  years  agegroup ethnicity   es   cs  distfromcentre  \\\n",
  412.        "2          100009  case     81  80 to 84     White   No  NaN       11.851470   \n",
  413.        "4          100009  case     81  80 to 84     White   No  NaN       11.851470   \n",
  414.        "5           70003  case     55  55 to 59     White  Yes  Yes       30.403741   \n",
  415.        "6           80001  case     87  85 to 90     White  Yes   No        5.549752   \n",
  416.        "7           10002  case     75  75 to 79     White  Yes   No        6.779659   \n",
  417.        "\n",
  418.        "   centre  \n",
  419.        "2      10  \n",
  420.        "4      10  \n",
  421.        "5       7  \n",
  422.        "6       8  \n",
  423.        "7       1  "
  424.       ]
  425.      },
  426.      "execution_count": 84,
  427.      "metadata": {},
  428.      "output_type": "execute_result"
  429.     }
  430.    ],
  431.    "source": [
  432.     "df.head()"
  433.    ]
  434.   },
  435.   {
  436.    "cell_type": "code",
  437.    "execution_count": 85,
  438.    "metadata": {},
  439.    "outputs": [],
  440.    "source": [
  441.     "df = pd.merge(df, cumrisk, on='participant_id')\n",
  442.     "df = pd.merge(df, maxcat, on='participant_id')\n"
  443.    ]
  444.   },
  445.   {
  446.    "cell_type": "code",
  447.    "execution_count": 86,
  448.    "metadata": {},
  449.    "outputs": [
  450.     {
  451.      "data": {
  452.       "text/html": [
  453.        "<div>\n",
  454.        "<style scoped>\n",
  455.        "    .dataframe tbody tr th:only-of-type {\n",
  456.        "        vertical-align: middle;\n",
  457.        "    }\n",
  458.        "\n",
  459.        "    .dataframe tbody tr th {\n",
  460.        "        vertical-align: top;\n",
  461.        "    }\n",
  462.        "\n",
  463.        "    .dataframe thead th {\n",
  464.        "        text-align: right;\n",
  465.        "    }\n",
  466.        "</style>\n",
  467.        "<table border=\"1\" class=\"dataframe\">\n",
  468.        "  <thead>\n",
  469.        "    <tr style=\"text-align: right;\">\n",
  470.        "      <th></th>\n",
  471.        "      <th>participant_id</th>\n",
  472.        "      <th>pt</th>\n",
  473.        "      <th>years</th>\n",
  474.        "      <th>agegroup</th>\n",
  475.        "      <th>ethnicity</th>\n",
  476.        "      <th>es</th>\n",
  477.        "      <th>cs</th>\n",
  478.        "      <th>distfromcentre</th>\n",
  479.        "      <th>centre</th>\n",
  480.        "      <th>risk</th>\n",
  481.        "      <th>jobcat</th>\n",
  482.        "    </tr>\n",
  483.        "  </thead>\n",
  484.        "  <tbody>\n",
  485.        "    <tr>\n",
  486.        "      <th>0</th>\n",
  487.        "      <td>100009</td>\n",
  488.        "      <td>case</td>\n",
  489.        "      <td>81</td>\n",
  490.        "      <td>80 to 84</td>\n",
  491.        "      <td>White</td>\n",
  492.        "      <td>No</td>\n",
  493.        "      <td>NaN</td>\n",
  494.        "      <td>11.85147</td>\n",
  495.        "      <td>10</td>\n",
  496.        "      <td>0</td>\n",
  497.        "      <td>1.0</td>\n",
  498.        "    </tr>\n",
  499.        "    <tr>\n",
  500.        "      <th>1</th>\n",
  501.        "      <td>100009</td>\n",
  502.        "      <td>case</td>\n",
  503.        "      <td>81</td>\n",
  504.        "      <td>80 to 84</td>\n",
  505.        "      <td>White</td>\n",
  506.        "      <td>No</td>\n",
  507.        "      <td>NaN</td>\n",
  508.        "      <td>11.85147</td>\n",
  509.        "      <td>10</td>\n",
  510.        "      <td>0</td>\n",
  511.        "      <td>1.0</td>\n",
  512.        "    </tr>\n",
  513.        "    <tr>\n",
  514.        "      <th>2</th>\n",
  515.        "      <td>100009</td>\n",
  516.        "      <td>case</td>\n",
  517.        "      <td>81</td>\n",
  518.        "      <td>80 to 84</td>\n",
  519.        "      <td>White</td>\n",
  520.        "      <td>No</td>\n",
  521.        "      <td>NaN</td>\n",
  522.        "      <td>11.85147</td>\n",
  523.        "      <td>10</td>\n",
  524.        "      <td>0</td>\n",
  525.        "      <td>1.0</td>\n",
  526.        "    </tr>\n",
  527.        "    <tr>\n",
  528.        "      <th>3</th>\n",
  529.        "      <td>100009</td>\n",
  530.        "      <td>case</td>\n",
  531.        "      <td>81</td>\n",
  532.        "      <td>80 to 84</td>\n",
  533.        "      <td>White</td>\n",
  534.        "      <td>No</td>\n",
  535.        "      <td>NaN</td>\n",
  536.        "      <td>11.85147</td>\n",
  537.        "      <td>10</td>\n",
  538.        "      <td>0</td>\n",
  539.        "      <td>1.0</td>\n",
  540.        "    </tr>\n",
  541.        "    <tr>\n",
  542.        "      <th>4</th>\n",
  543.        "      <td>100009</td>\n",
  544.        "      <td>case</td>\n",
  545.        "      <td>81</td>\n",
  546.        "      <td>80 to 84</td>\n",
  547.        "      <td>White</td>\n",
  548.        "      <td>No</td>\n",
  549.        "      <td>NaN</td>\n",
  550.        "      <td>11.85147</td>\n",
  551.        "      <td>10</td>\n",
  552.        "      <td>0</td>\n",
  553.        "      <td>1.0</td>\n",
  554.        "    </tr>\n",
  555.        "  </tbody>\n",
  556.        "</table>\n",
  557.        "</div>"
  558.       ],
  559.       "text/plain": [
  560.        "   participant_id    pt  years  agegroup ethnicity  es   cs  distfromcentre  \\\n",
  561.        "0          100009  case     81  80 to 84     White  No  NaN        11.85147   \n",
  562.        "1          100009  case     81  80 to 84     White  No  NaN        11.85147   \n",
  563.        "2          100009  case     81  80 to 84     White  No  NaN        11.85147   \n",
  564.        "3          100009  case     81  80 to 84     White  No  NaN        11.85147   \n",
  565.        "4          100009  case     81  80 to 84     White  No  NaN        11.85147   \n",
  566.        "\n",
  567.        "   centre  risk  jobcat  \n",
  568.        "0      10     0     1.0  \n",
  569.        "1      10     0     1.0  \n",
  570.        "2      10     0     1.0  \n",
  571.        "3      10     0     1.0  \n",
  572.        "4      10     0     1.0  "
  573.       ]
  574.      },
  575.      "execution_count": 86,
  576.      "metadata": {},
  577.      "output_type": "execute_result"
  578.     }
  579.    ],
  580.    "source": [
  581.     "df.head()"
  582.    ]
  583.   },
  584.   {
  585.    "cell_type": "code",
  586.    "execution_count": 87,
  587.    "metadata": {},
  588.    "outputs": [],
  589.    "source": [
  590.     "# defining anything less than cat 5 as exposed\n",
  591.     "\n",
  592.     "ev_exposed = df.groupby('participant_id').jobcat.min() < 5\n",
  593.     "ev_exposed = ev_exposed.reset_index() \n",
  594.     "ev_exposed.columns = ['participant_id', 'exposed']"
  595.    ]
  596.   },
  597.   {
  598.    "cell_type": "code",
  599.    "execution_count": 88,
  600.    "metadata": {},
  601.    "outputs": [],
  602.    "source": [
  603.     "df = pd.merge(df, ev_exposed, on='participant_id')"
  604.    ]
  605.   },
  606.   {
  607.    "cell_type": "code",
  608.    "execution_count": 89,
  609.    "metadata": {},
  610.    "outputs": [
  611.     {
  612.      "data": {
  613.       "text/html": [
  614.        "<div>\n",
  615.        "<style scoped>\n",
  616.        "    .dataframe tbody tr th:only-of-type {\n",
  617.        "        vertical-align: middle;\n",
  618.        "    }\n",
  619.        "\n",
  620.        "    .dataframe tbody tr th {\n",
  621.        "        vertical-align: top;\n",
  622.        "    }\n",
  623.        "\n",
  624.        "    .dataframe thead th {\n",
  625.        "        text-align: right;\n",
  626.        "    }\n",
  627.        "</style>\n",
  628.        "<table border=\"1\" class=\"dataframe\">\n",
  629.        "  <thead>\n",
  630.        "    <tr style=\"text-align: right;\">\n",
  631.        "      <th></th>\n",
  632.        "      <th>participant_id</th>\n",
  633.        "      <th>pt</th>\n",
  634.        "      <th>years</th>\n",
  635.        "      <th>agegroup</th>\n",
  636.        "      <th>ethnicity</th>\n",
  637.        "      <th>es</th>\n",
  638.        "      <th>cs</th>\n",
  639.        "      <th>distfromcentre</th>\n",
  640.        "      <th>centre</th>\n",
  641.        "      <th>risk</th>\n",
  642.        "      <th>jobcat</th>\n",
  643.        "      <th>exposed</th>\n",
  644.        "    </tr>\n",
  645.        "  </thead>\n",
  646.        "  <tbody>\n",
  647.        "    <tr>\n",
  648.        "      <th>0</th>\n",
  649.        "      <td>100009</td>\n",
  650.        "      <td>case</td>\n",
  651.        "      <td>81</td>\n",
  652.        "      <td>80 to 84</td>\n",
  653.        "      <td>White</td>\n",
  654.        "      <td>No</td>\n",
  655.        "      <td>NaN</td>\n",
  656.        "      <td>11.85147</td>\n",
  657.        "      <td>10</td>\n",
  658.        "      <td>0</td>\n",
  659.        "      <td>1.0</td>\n",
  660.        "      <td>True</td>\n",
  661.        "    </tr>\n",
  662.        "    <tr>\n",
  663.        "      <th>1</th>\n",
  664.        "      <td>100009</td>\n",
  665.        "      <td>case</td>\n",
  666.        "      <td>81</td>\n",
  667.        "      <td>80 to 84</td>\n",
  668.        "      <td>White</td>\n",
  669.        "      <td>No</td>\n",
  670.        "      <td>NaN</td>\n",
  671.        "      <td>11.85147</td>\n",
  672.        "      <td>10</td>\n",
  673.        "      <td>0</td>\n",
  674.        "      <td>1.0</td>\n",
  675.        "      <td>True</td>\n",
  676.        "    </tr>\n",
  677.        "    <tr>\n",
  678.        "      <th>2</th>\n",
  679.        "      <td>100009</td>\n",
  680.        "      <td>case</td>\n",
  681.        "      <td>81</td>\n",
  682.        "      <td>80 to 84</td>\n",
  683.        "      <td>White</td>\n",
  684.        "      <td>No</td>\n",
  685.        "      <td>NaN</td>\n",
  686.        "      <td>11.85147</td>\n",
  687.        "      <td>10</td>\n",
  688.        "      <td>0</td>\n",
  689.        "      <td>1.0</td>\n",
  690.        "      <td>True</td>\n",
  691.        "    </tr>\n",
  692.        "    <tr>\n",
  693.        "      <th>3</th>\n",
  694.        "      <td>100009</td>\n",
  695.        "      <td>case</td>\n",
  696.        "      <td>81</td>\n",
  697.        "      <td>80 to 84</td>\n",
  698.        "      <td>White</td>\n",
  699.        "      <td>No</td>\n",
  700.        "      <td>NaN</td>\n",
  701.        "      <td>11.85147</td>\n",
  702.        "      <td>10</td>\n",
  703.        "      <td>0</td>\n",
  704.        "      <td>1.0</td>\n",
  705.        "      <td>True</td>\n",
  706.        "    </tr>\n",
  707.        "    <tr>\n",
  708.        "      <th>4</th>\n",
  709.        "      <td>100009</td>\n",
  710.        "      <td>case</td>\n",
  711.        "      <td>81</td>\n",
  712.        "      <td>80 to 84</td>\n",
  713.        "      <td>White</td>\n",
  714.        "      <td>No</td>\n",
  715.        "      <td>NaN</td>\n",
  716.        "      <td>11.85147</td>\n",
  717.        "      <td>10</td>\n",
  718.        "      <td>0</td>\n",
  719.        "      <td>1.0</td>\n",
  720.        "      <td>True</td>\n",
  721.        "    </tr>\n",
  722.        "  </tbody>\n",
  723.        "</table>\n",
  724.        "</div>"
  725.       ],
  726.       "text/plain": [
  727.        "   participant_id    pt  years  agegroup ethnicity  es   cs  distfromcentre  \\\n",
  728.        "0          100009  case     81  80 to 84     White  No  NaN        11.85147   \n",
  729.        "1          100009  case     81  80 to 84     White  No  NaN        11.85147   \n",
  730.        "2          100009  case     81  80 to 84     White  No  NaN        11.85147   \n",
  731.        "3          100009  case     81  80 to 84     White  No  NaN        11.85147   \n",
  732.        "4          100009  case     81  80 to 84     White  No  NaN        11.85147   \n",
  733.        "\n",
  734.        "   centre  risk  jobcat  exposed  \n",
  735.        "0      10     0     1.0     True  \n",
  736.        "1      10     0     1.0     True  \n",
  737.        "2      10     0     1.0     True  \n",
  738.        "3      10     0     1.0     True  \n",
  739.        "4      10     0     1.0     True  "
  740.       ]
  741.      },
  742.      "execution_count": 89,
  743.      "metadata": {},
  744.      "output_type": "execute_result"
  745.     }
  746.    ],
  747.    "source": [
  748.     "df.head()"
  749.    ]
  750.   },
  751.   {
  752.    "cell_type": "code",
  753.    "execution_count": 90,
  754.    "metadata": {},
  755.    "outputs": [],
  756.    "source": [
  757.     "df.cs = df.cs.fillna('No')"
  758.    ]
  759.   },
  760.   {
  761.    "cell_type": "code",
  762.    "execution_count": 91,
  763.    "metadata": {},
  764.    "outputs": [],
  765.    "source": [
  766.     "# pandas has a get dummies function for this which would be more elegant\n",
  767.     "df['pt'] = df['pt'].str.replace('case', '1')\n",
  768.     "df['pt'] = df['pt'].str.replace('control', '0')\n",
  769.     "df['pt'] = df['pt'].astype(int)\n",
  770.     "\n",
  771.     "df['es'] = df['es'].str.replace('Yes', '1')\n",
  772.     "df['es'] = df['es'].str.replace('No', '0')\n",
  773.     "df['es'] = df['es'].astype(int)\n",
  774.     "\n",
  775.     "df['exposed'] = df['exposed'].astype(str)\n",
  776.     "df['exposed'] = df['exposed'].str.replace('True', '1')\n",
  777.     "df['exposed'] = df['exposed'].str.replace('False', '0')\n",
  778.     "df['exposed'] = df['exposed'].astype(int)"
  779.    ]
  780.   },
  781.   {
  782.    "cell_type": "code",
  783.    "execution_count": 92,
  784.    "metadata": {},
  785.    "outputs": [
  786.     {
  787.      "data": {
  788.       "text/html": [
  789.        "<div>\n",
  790.        "<style scoped>\n",
  791.        "    .dataframe tbody tr th:only-of-type {\n",
  792.        "        vertical-align: middle;\n",
  793.        "    }\n",
  794.        "\n",
  795.        "    .dataframe tbody tr th {\n",
  796.        "        vertical-align: top;\n",
  797.        "    }\n",
  798.        "\n",
  799.        "    .dataframe thead th {\n",
  800.        "        text-align: right;\n",
  801.        "    }\n",
  802.        "</style>\n",
  803.        "<table border=\"1\" class=\"dataframe\">\n",
  804.        "  <thead>\n",
  805.        "    <tr style=\"text-align: right;\">\n",
  806.        "      <th></th>\n",
  807.        "      <th>participant_id</th>\n",
  808.        "      <th>pt</th>\n",
  809.        "      <th>years</th>\n",
  810.        "      <th>agegroup</th>\n",
  811.        "      <th>ethnicity</th>\n",
  812.        "      <th>es</th>\n",
  813.        "      <th>cs</th>\n",
  814.        "      <th>distfromcentre</th>\n",
  815.        "      <th>centre</th>\n",
  816.        "      <th>risk</th>\n",
  817.        "      <th>jobcat</th>\n",
  818.        "      <th>exposed</th>\n",
  819.        "    </tr>\n",
  820.        "  </thead>\n",
  821.        "  <tbody>\n",
  822.        "    <tr>\n",
  823.        "      <th>0</th>\n",
  824.        "      <td>100009</td>\n",
  825.        "      <td>1</td>\n",
  826.        "      <td>81</td>\n",
  827.        "      <td>80 to 84</td>\n",
  828.        "      <td>White</td>\n",
  829.        "      <td>0</td>\n",
  830.        "      <td>No</td>\n",
  831.        "      <td>11.85147</td>\n",
  832.        "      <td>10</td>\n",
  833.        "      <td>0</td>\n",
  834.        "      <td>1.0</td>\n",
  835.        "      <td>1</td>\n",
  836.        "    </tr>\n",
  837.        "    <tr>\n",
  838.        "      <th>1</th>\n",
  839.        "      <td>100009</td>\n",
  840.        "      <td>1</td>\n",
  841.        "      <td>81</td>\n",
  842.        "      <td>80 to 84</td>\n",
  843.        "      <td>White</td>\n",
  844.        "      <td>0</td>\n",
  845.        "      <td>No</td>\n",
  846.        "      <td>11.85147</td>\n",
  847.        "      <td>10</td>\n",
  848.        "      <td>0</td>\n",
  849.        "      <td>1.0</td>\n",
  850.        "      <td>1</td>\n",
  851.        "    </tr>\n",
  852.        "    <tr>\n",
  853.        "      <th>2</th>\n",
  854.        "      <td>100009</td>\n",
  855.        "      <td>1</td>\n",
  856.        "      <td>81</td>\n",
  857.        "      <td>80 to 84</td>\n",
  858.        "      <td>White</td>\n",
  859.        "      <td>0</td>\n",
  860.        "      <td>No</td>\n",
  861.        "      <td>11.85147</td>\n",
  862.        "      <td>10</td>\n",
  863.        "      <td>0</td>\n",
  864.        "      <td>1.0</td>\n",
  865.        "      <td>1</td>\n",
  866.        "    </tr>\n",
  867.        "    <tr>\n",
  868.        "      <th>3</th>\n",
  869.        "      <td>100009</td>\n",
  870.        "      <td>1</td>\n",
  871.        "      <td>81</td>\n",
  872.        "      <td>80 to 84</td>\n",
  873.        "      <td>White</td>\n",
  874.        "      <td>0</td>\n",
  875.        "      <td>No</td>\n",
  876.        "      <td>11.85147</td>\n",
  877.        "      <td>10</td>\n",
  878.        "      <td>0</td>\n",
  879.        "      <td>1.0</td>\n",
  880.        "      <td>1</td>\n",
  881.        "    </tr>\n",
  882.        "    <tr>\n",
  883.        "      <th>4</th>\n",
  884.        "      <td>100009</td>\n",
  885.        "      <td>1</td>\n",
  886.        "      <td>81</td>\n",
  887.        "      <td>80 to 84</td>\n",
  888.        "      <td>White</td>\n",
  889.        "      <td>0</td>\n",
  890.        "      <td>No</td>\n",
  891.        "      <td>11.85147</td>\n",
  892.        "      <td>10</td>\n",
  893.        "      <td>0</td>\n",
  894.        "      <td>1.0</td>\n",
  895.        "      <td>1</td>\n",
  896.        "    </tr>\n",
  897.        "  </tbody>\n",
  898.        "</table>\n",
  899.        "</div>"
  900.       ],
  901.       "text/plain": [
  902.        "   participant_id  pt  years  agegroup ethnicity  es  cs  distfromcentre  \\\n",
  903.        "0          100009   1     81  80 to 84     White   0  No        11.85147   \n",
  904.        "1          100009   1     81  80 to 84     White   0  No        11.85147   \n",
  905.        "2          100009   1     81  80 to 84     White   0  No        11.85147   \n",
  906.        "3          100009   1     81  80 to 84     White   0  No        11.85147   \n",
  907.        "4          100009   1     81  80 to 84     White   0  No        11.85147   \n",
  908.        "\n",
  909.        "   centre  risk  jobcat  exposed  \n",
  910.        "0      10     0     1.0        1  \n",
  911.        "1      10     0     1.0        1  \n",
  912.        "2      10     0     1.0        1  \n",
  913.        "3      10     0     1.0        1  \n",
  914.        "4      10     0     1.0        1  "
  915.       ]
  916.      },
  917.      "execution_count": 92,
  918.      "metadata": {},
  919.      "output_type": "execute_result"
  920.     }
  921.    ],
  922.    "source": [
  923.     "df.head()"
  924.    ]
  925.   },
  926.   {
  927.    "cell_type": "code",
  928.    "execution_count": 96,
  929.    "metadata": {},
  930.    "outputs": [],
  931.    "source": [
  932.     "df.columns = ['participant_id', 'case', 'age', 'agegroup', 'ethnicity', 'ever_smoked',  'current_smoker', 'distfromcentre', 'centre', 'exp_cum', 'jobcat', 'exp_bin']"
  933.    ]
  934.   },
  935.   {
  936.    "cell_type": "code",
  937.    "execution_count": 97,
  938.    "metadata": {},
  939.    "outputs": [],
  940.    "source": [
  941.     "df = df.drop_duplicates(subset='participant_id')"
  942.    ]
  943.   },
  944.   {
  945.    "cell_type": "code",
  946.    "execution_count": 98,
  947.    "metadata": {},
  948.    "outputs": [
  949.     {
  950.      "data": {
  951.       "text/html": [
  952.        "<div>\n",
  953.        "<style scoped>\n",
  954.        "    .dataframe tbody tr th:only-of-type {\n",
  955.        "        vertical-align: middle;\n",
  956.        "    }\n",
  957.        "\n",
  958.        "    .dataframe tbody tr th {\n",
  959.        "        vertical-align: top;\n",
  960.        "    }\n",
  961.        "\n",
  962.        "    .dataframe thead th {\n",
  963.        "        text-align: right;\n",
  964.        "    }\n",
  965.        "</style>\n",
  966.        "<table border=\"1\" class=\"dataframe\">\n",
  967.        "  <thead>\n",
  968.        "    <tr style=\"text-align: right;\">\n",
  969.        "      <th></th>\n",
  970.        "      <th>participant_id</th>\n",
  971.        "      <th>case</th>\n",
  972.        "      <th>age</th>\n",
  973.        "      <th>agegroup</th>\n",
  974.        "      <th>ethnicity</th>\n",
  975.        "      <th>ever_smoked</th>\n",
  976.        "      <th>current_smoker</th>\n",
  977.        "      <th>distfromcentre</th>\n",
  978.        "      <th>centre</th>\n",
  979.        "      <th>exp_cum</th>\n",
  980.        "      <th>jobcat</th>\n",
  981.        "      <th>exp_bin</th>\n",
  982.        "    </tr>\n",
  983.        "  </thead>\n",
  984.        "  <tbody>\n",
  985.        "    <tr>\n",
  986.        "      <th>0</th>\n",
  987.        "      <td>100009</td>\n",
  988.        "      <td>1</td>\n",
  989.        "      <td>81</td>\n",
  990.        "      <td>80 to 84</td>\n",
  991.        "      <td>White</td>\n",
  992.        "      <td>0</td>\n",
  993.        "      <td>No</td>\n",
  994.        "      <td>11.851470</td>\n",
  995.        "      <td>10</td>\n",
  996.        "      <td>0</td>\n",
  997.        "      <td>1.0</td>\n",
  998.        "      <td>1</td>\n",
  999.        "    </tr>\n",
  1000.        "    <tr>\n",
  1001.        "      <th>5</th>\n",
  1002.        "      <td>70003</td>\n",
  1003.        "      <td>1</td>\n",
  1004.        "      <td>55</td>\n",
  1005.        "      <td>55 to 59</td>\n",
  1006.        "      <td>White</td>\n",
  1007.        "      <td>1</td>\n",
  1008.        "      <td>Yes</td>\n",
  1009.        "      <td>30.403741</td>\n",
  1010.        "      <td>7</td>\n",
  1011.        "      <td>47</td>\n",
  1012.        "      <td>1.0</td>\n",
  1013.        "      <td>1</td>\n",
  1014.        "    </tr>\n",
  1015.        "    <tr>\n",
  1016.        "      <th>8</th>\n",
  1017.        "      <td>80001</td>\n",
  1018.        "      <td>1</td>\n",
  1019.        "      <td>87</td>\n",
  1020.        "      <td>85 to 90</td>\n",
  1021.        "      <td>White</td>\n",
  1022.        "      <td>1</td>\n",
  1023.        "      <td>No</td>\n",
  1024.        "      <td>5.549752</td>\n",
  1025.        "      <td>8</td>\n",
  1026.        "      <td>43</td>\n",
  1027.        "      <td>2.2</td>\n",
  1028.        "      <td>1</td>\n",
  1029.        "    </tr>\n",
  1030.        "    <tr>\n",
  1031.        "      <th>13</th>\n",
  1032.        "      <td>10002</td>\n",
  1033.        "      <td>1</td>\n",
  1034.        "      <td>75</td>\n",
  1035.        "      <td>75 to 79</td>\n",
  1036.        "      <td>White</td>\n",
  1037.        "      <td>1</td>\n",
  1038.        "      <td>No</td>\n",
  1039.        "      <td>6.779659</td>\n",
  1040.        "      <td>1</td>\n",
  1041.        "      <td>44</td>\n",
  1042.        "      <td>3.0</td>\n",
  1043.        "      <td>1</td>\n",
  1044.        "    </tr>\n",
  1045.        "    <tr>\n",
  1046.        "      <th>16</th>\n",
  1047.        "      <td>80010</td>\n",
  1048.        "      <td>1</td>\n",
  1049.        "      <td>77</td>\n",
  1050.        "      <td>75 to 79</td>\n",
  1051.        "      <td>White</td>\n",
  1052.        "      <td>1</td>\n",
  1053.        "      <td>No</td>\n",
  1054.        "      <td>33.762778</td>\n",
  1055.        "      <td>8</td>\n",
  1056.        "      <td>23</td>\n",
  1057.        "      <td>2.1</td>\n",
  1058.        "      <td>1</td>\n",
  1059.        "    </tr>\n",
  1060.        "  </tbody>\n",
  1061.        "</table>\n",
  1062.        "</div>"
  1063.       ],
  1064.       "text/plain": [
  1065.        "    participant_id  case  age  agegroup ethnicity  ever_smoked current_smoker  \\\n",
  1066.        "0           100009     1   81  80 to 84     White            0             No   \n",
  1067.        "5            70003     1   55  55 to 59     White            1            Yes   \n",
  1068.        "8            80001     1   87  85 to 90     White            1             No   \n",
  1069.        "13           10002     1   75  75 to 79     White            1             No   \n",
  1070.        "16           80010     1   77  75 to 79     White            1             No   \n",
  1071.        "\n",
  1072.        "    distfromcentre  centre  exp_cum  jobcat  exp_bin  \n",
  1073.        "0        11.851470      10        0     1.0        1  \n",
  1074.        "5        30.403741       7       47     1.0        1  \n",
  1075.        "8         5.549752       8       43     2.2        1  \n",
  1076.        "13        6.779659       1       44     3.0        1  \n",
  1077.        "16       33.762778       8       23     2.1        1  "
  1078.       ]
  1079.      },
  1080.      "execution_count": 98,
  1081.      "metadata": {},
  1082.      "output_type": "execute_result"
  1083.     }
  1084.    ],
  1085.    "source": [
  1086.     "df.head() # note exp_cum is buggy currently"
  1087.    ]
  1088.   },
  1089.   {
  1090.    "cell_type": "code",
  1091.    "execution_count": 47,
  1092.    "metadata": {},
  1093.    "outputs": [
  1094.     {
  1095.      "data": {
  1096.       "text/plain": [
  1097.        "<matplotlib.axes._subplots.AxesSubplot at 0x7f97790adc50>"
  1098.       ]
  1099.      },
  1100.      "execution_count": 47,
  1101.      "metadata": {},
  1102.      "output_type": "execute_result"
  1103.     },
  1104.     {
  1105.      "data": {
  1106.       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEGCAYAAACUzrmNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAF45JREFUeJzt3XuUZWV55/FvUUUDDRW6hUPMgiZi0jzSoIIKeJnlZWTkYhaNTkaBYIIazSAkYzS4NJk1YZFxFkpQyQiKIoMwKkOMQzoEBK/xAjqdGG8tPEyHIDQolE4BRTdN091n/ti7oShO9XvqVO2uU3W+n7V6Ve193r3P8+7eVb/at/cMtdttJEnamd3muwBJUv8zLCRJRYaFJKnIsJAkFRkWkqSikfkuYKbGxiZ6un1r+fKljI9vmutyFgz7b/8Huf/gNmi1Rodms/zAHFmMjAzPdwnzyv7b/0HnNpidgQkLSVLvDAtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkosYeyouIK4DfAh7IzCM6vD4EXAycBGwCzszM7zVVjySpd00+wX0l8FHgqmlePxFYWf87FvhY/XXOPbJpC5dd8V2+u+7nTay+UUPAdI+sj+w2BEPQ3tZmWxfrWjIyxG7tNpunaXzW6w7jsBX78Yk1P+a2nz7IdmDfpUt4zxlH8czle3dd8yObtnD1zXcw9uCjtJbtxZuOPxTa8D9uvJ3b7x5ny5bt7D4Cz/n1Z/CW1x4GbZ7S/jVHH8Sl161j46OPs/eeu/OO1x/O39/yU+6450FgiFixjDe/9jnss9cSfv7LjVx4zfd5eONjbN8OI8ND7LXHCLS3s/nx9hPLX3Hj7ay/e5z7xx+lDew+PMR7zjiK3/i1ZR3r3WevJV33d1dZKHUuBm7rpxtq8sOPIuJZwPXTHFlcBnw9Mz9XTyfwysz82c7W2ctwHx+77sesvf2BmS42kI5+zgFP21bLR/fgorNf1vU6pm7vo59zAEDH/4NOr+0+PMTj29rTTu9Y7qxTjuDdl3yb8YnHdlpPp+V3zL/s3Fd1rPesU562y8672dTZao0yNjbRVGkLwky2wULZJ2ZitsN9zOfYUAcC90ya3lDP22lYLF++dMaP7T+4ccuMixtUnbbVps2P02qN9ryOnW3/Tq9tnfKLfer0juVarVE2bX68WE+n5XfMb7VGO9Y7k/7uKrOtsx/7tKt1uw0Wyj6xK81nWHRKueJRQy8DgS3be7APH2ei07ZauufuM/qrdOo6drb9O702MuVIYOr0juXGxiZYusfuPPb4zo8sOi2/Y/7Y2ETHevvxr/DZ1OmRxcy2wULZJ2ZitmE3n2GxAVgxafog4L4m3uhNxx/K8PBufGexXbMYrvJ2RtcsaLN5a+fXz3rdYRx28H48unnLk9cs9l7CuacfOaOa33T8oQBPvWYBbN22/WnXLHa8Nrn9a449iEu/UF+z2Ku+ZvHtSdcsDl72xHLn/s6RXPjZKdcs9hyB7fU1i3r5f/j+zztes9hZvf1modS5GLitn24+r1m8FjiH6m6oY4G/ysxjSuvsdYjyQf/Lyv7b/0HuP7gN+vaaRUR8DnglsH9EbAD+HNgdIDM/DtxAFRTrqW6dfXNTtUiSZqexsMjM0wqvt4Gzm3p/SdLc8QluSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVGRYSJKKDAtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpKKRJlceEScAFwPDwOWZecGU1w8GPg0sq9u8NzNvaLImSdLMNXZkERHDwCXAicAq4LSIWDWl2X8Grs3Mo4BTgUubqkeS1LsmT0MdA6zPzDszcwtwDbB6Sps28Cv19/sC9zVYjySpR02ehjoQuGfS9Abg2CltzgNujog/BPYGjiutdPnypYyMDPdUUKs12tNyi4X9t/+Dzm3QuybDYqjDvPaU6dOAKzPzooh4CXB1RByRmdunW+n4+Kaeimm1Rhkbm+hp2cXA/tv/Qe4/uA1mG5RNnobaAKyYNH0QTz/N9FbgWoDMvBXYE9i/wZokST1oMizWAisj4pCIWEJ1AXvNlDZ3A68GiIjDqMJirMGaJEk9aCwsMnMrcA5wE3Ab1V1P6yLi/Ig4uW72buBtEfED4HPAmZk59VSVJGmeDbXbC+t389jYRE8Fe77S/tv/we0/uA1ardFO15G75hPckqQiw0KSVGRYSJKKDAtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVGRYSJKKDAtJUpFhIUkqMiwkSUUjTa48Ik4ALgaGgcsz84IObd4AnAe0gR9k5ulN1iRJmrnGjiwiYhi4BDgRWAWcFhGrprRZCbwPeFlmHg68s6l6JEm9a/I01DHA+sy8MzO3ANcAq6e0eRtwSWaOA2TmAw3WI0nqUZOnoQ4E7pk0vQE4dkqbQwEi4ttUp6rOy8wv7myly5cvZWRkuKeCWq3RnpZbLOy//R90boPeNRkWQx3mtTu8/0rglcBBwDcj4ojMfHC6lY6Pb+qpmFZrlLGxiZ6WXQzsv/0f5P6D22C2QdnkaagNwIpJ0wcB93Vo87eZ+Xhm/iuQVOEhSeojTYbFWmBlRBwSEUuAU4E1U9pcB7wKICL2pzotdWeDNUmSetBYWGTmVuAc4CbgNuDazFwXEedHxMl1s5uAX0bET4CvAedm5i+bqkmS1JuhdnvqZYT+NjY20VPBnq+0//Z/cPsPboNWa7TTdeSu+QS3JKnIsJAkFRkWkqQiw0KSVNTVQ3kRcQDwIeDgzHx5RDwPeGlmfrzR6iRJfaHbI4tPAt8CltXTtwPvaKQiSVLf6TYsDqyPIrYB1AMDbm+sKklSX+k2LLZOnoiIZXQe+0mStAh1GxZ/ExGXAaMRcSZwM3BFY1VJkvpKV2GRmRcC3wD+CTgJ+KvMvLjJwiRJ/aPrIcoz8zPAZxqsRZLUp7q9dfYi4HxgI9WAfy8A/iAz/2eDtUmS+kS31yyOy8yHgOOBe6mGEv+TxqqSJPWVmT7B/XLgC5l5L0//1DtJ0iLVbVg8EBGfpPoAoy9FxAjNfiSrJKmPdBsWpwPrgDdm5jjVR6Re1FhVkqS+0tXRQWaOAR+ZNH0XcGUzJUmS+k23d0OtAD4IPB/Yc8f8zHx2Q3VJkvpIt6ehrgC+TDXEx+9QDSr46aaKkiT1l27DYv/M/BSwNTNvBc4EXtFYVZKkvtJtWGypvz4SEQcDuwO/3kxJkqR+0+3tr9+IiGcAl1KND/UY8NeNVSVJ6ivdHlncBGzLzKuBF1I9b/F3jVUlSeor3YbFhcDDAJl5N3AL8JdNFSVJ6i/dhsVQZj4xvEdmbgeGmylJktRvug2LiYg4dsdE/f3GZkqSJPWbbi9wvwe4LiLW1dOrgNc3U5Ikqd90O9zHrRGxCngJ1YN5t9RjREmSBsBMPilvHLihwVokSX2q0WHGI+IE4GKqi+GXZ+YF07T7barnNo7OzH9ssiZJ0szN9MOPuhYRw8AlwIlU1zhOq09lTW03CvwR8N2mapEkzU5jYQEcA6zPzDszcwtwDbC6Q7u/oBrRdnODtUiSZqHJ01AHAvdMmt4AHDu5QUQcBazIzOsjoqvP9F6+fCkjI7094tFqjfa03GJh/+3/oHMb9K7JsBjqMO+JB/siYjfgw1Qj2HZtfHxTT8W0WqOMjU30tOxiYP/t/yD3H9wGsw3KJk9DbQBWTJo+CLhv0vQocATw9Yi4C3gxsCYiXtRgTZKkHjR5ZLEWWBkRhwD3Ug0+ePqOFzPzIWD/HdMR8XXgT7wbSpL6T2NHFpm5FTiHasTa24BrM3NdRJwfESc39b6SpLk31G63y636yNjYRE8Fe77S/tv/we0/uA1ardFO15G71uQ1C0nSImFYSJKKDAtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVGRYSJKKDAtJUpFhIUkqMiwkSUWGhSSpaKTJlUfECcDFwDBweWZeMOX1dwG/D2wFxoC3ZOZPm6xJkjRzjR1ZRMQwcAlwIrAKOC0iVk1p9s/AizLzecDngQ82VY8kqXdNHlkcA6zPzDsBIuIaYDXwkx0NMvNrk9p/BzijwXokST1qMiwOBO6ZNL0BOHYn7d8K3Fha6fLlSxkZGe6poFZrtKflFgv7b/8Hndugd02GxVCHee1ODSPiDOBFwCtKKx0f39RTMa3WKGNjEz0tuxjYf/s/yP0Ht8Fsg7LJsNgArJg0fRBw39RGEXEc8GfAKzLzsQbrkST1qMmwWAusjIhDgHuBU4HTJzeIiKOAy4ATMvOBBmuRJM1CY3dDZeZW4BzgJuA24NrMXBcR50fEyXWzC4F9gL+OiO9HxJqm6pEk9W6o3e54GaFvjY1N9FSw5yvtv/0f3P6D26DVGu10HblrPsEtSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVGRYSJKKDAtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkIsNCklRkWEiSigwLSVKRYSFJKjIsJElFhoUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSoyLCRJRYaFJKnIsJAkFRkWkqQiw0KSVDTS5Moj4gTgYmAYuDwzL5jy+h7AVcALgV8Cb8zMu5qsSZI0c42FRUQMA5cA/w7YAKyNiDWZ+ZNJzd4KjGfmb0bEqcAHgDfOdS0/Wj/Ghy/46lyv9in2GAEYYtv2Ntu2Q7uev9sQ7LN0d/ZduoRn7rc3bzr+UGjD1TffwdiDj9JathdvOv5Q9tlryYze75FNW7j65jv4+S838sjmrYwuHeFXl+/dcV2PbNrCFVetZcP9E0+831zUIMGT+6L7UnP6YRs3eWRxDLA+M+8EiIhrgNXA5LBYDZxXf/954KMRMZSZbebQRz7/o7lcXUePbYUnI+JJ29vw8MbHeXjj49wztvGJ+WtvfwCAu34+AcBZpxwxo/e7+uY7nlgHwPjEY9x9/8aO65rcdsf7zUUNEnTev9yX5lY/bOMmw+JA4J5J0xuAY6drk5lbI+IhYD/gF9OtdPnypYyMDM+okDlNnll6cOOWjvNardFZr2e6dU1tO1c1LDSLvX8lTfW/0/7Vr9u6X+sq6Ydt3GRYDHWYN/X3djdtnmJ8fFNPhfRLYCzb++mHjsv2XsLY2ESH1jNbz3Trmtp2rmpYSFqt0UXdv5Im+99p/+rHbb2Q94G52MazDZcmw2IDsGLS9EHAfdO02RARI8C+wP+b60L++NTn8qFrmj0VtccIMDTEtm1dXLOoTT7/OFM7lul0zaJT2z32GHnqNYs5qEGCJ/dF96Xm9MM2Hmq3m/mbu/7lfwfwauBeYC1wemaum9TmbOC5mfkf6wvcr8/MN+xsvWNjEz0VvJD/qpgL9t/+D3L/wW3Qao12OpPTtcaes8jMrcA5wE3AbcC1mbkuIs6PiJPrZp8C9ouI9cC7gPc2VY8kqXeNHVk0xSOL3th/+z/I/Qe3Qd8eWUiSFg/DQpJUZFhIkooMC0lSkWEhSSoyLCRJRQvu1llJ0q7nkYUkqciwkCQVGRaSpCLDQpJUZFhIkooMC0lSkWEhSSpq8pPy+kZEnABcDAwDl2fmBfNcUuMi4i5gAtgGbM3MF0XEM4D/BTwLuAt4Q2aOz1OJcyoirgB+C3ggM4+o53Xsb0QMUe0PJwGbgDMz83vzUfdcmab/5wFvA8bqZn+amTfUr70PeCvV/vFHmXnTLi96DkXECuAq4JnAduATmXnxoOwDO+n/eczRPrDojywiYhi4BDgRWAWcFhGr5reqXeZVmXlkZr6onn4v8JXMXAl8hcX1YVNXAidMmTddf08EVtb/3g58bBfV2KQreXr/AT5c7wNHTvolsQo4FTi8XubS+udkIdsKvDszDwNeDJxd93NQ9oHp+g9ztA8s+rAAjgHWZ+admbkFuAZYPc81zZfVwKfr7z8NnDKPtcypzPwGT//89un6uxq4KjPbmfkdYFlE/NquqbQZ0/R/OquBazLzscz8V2A91c/JgpWZP9txZJCZE1SfznkgA7IP7KT/05nxPjAIYXEgcM+k6Q3sfCMuFm3g5oj4p4h4ez3vVzPzZ1DtXMAB81bdrjFdfwdpnzgnIn4YEVdExPJ63qLuf0Q8CzgK+C4DuA9M6T/M0T4wCGHR6aMEB2FArJdl5guoDrfPjoiXz3dBfWRQ9omPAb8BHAn8DLionr9o+x8R+wB/A7wzMx/eSdNFuQ069H/O9oFBCIsNwIpJ0wcB981TLbtMZt5Xf30A+N9Uh5j37zjUrr8+MH8V7hLT9Xcg9onMvD8zt2XmduCTPHmaYVH2PyJ2p/pF+ZnM/EI9e2D2gU79n8t9YBDCYi2wMiIOiYglVBd11sxzTY2KiL0jYnTH98BrgB9T9fv36ma/B/zt/FS4y0zX3zXA70bEUES8GHhox6mKxWTKOfjXUe0DUPX/1IjYIyIOobrI+392dX1zqb676VPAbZn5oUkvDcQ+MF3/53IfGIghyiPiJOAjVLfOXpGZ75/nkhoVEc+mOpqA6vboz2bm+yNiP+Ba4GDgbuA/ZGa3F0X7WkR8DnglsD9wP/DnwHV06G/9g/VRqrtANgFvzsx/nI+658o0/X8l1emHNtVto3+w4xdiRPwZ8Baqu2jemZk37vKi51BE/Bvgm8CPqG4dBfhTqvP2i34f2En/T2OO9oGBCAtJ0uwMwmkoSdIsGRaSpCLDQpJUZFhIkooMC0lS0UCMOqvFKyLawCjwLeAlmfnoNO2WAW/PzA9OmreS6rZKgL/MzM80Xe9ciIhTgPsyc0E/G6GFxSMLLQr1iJodg6K2DHjPlHmvB27JzKOmBkVE9PMfUqewk0HfFsEIsupDPmehBSUiXg/8N6oRVm8A/oLqyGKi/rqJ6mGrfws8BjySmS+LiL8Hjqd6gnUT1bD1F1H9wXQf8O+pnoC9BTgW2JyZr42I3wXOpXqo6V+oHmp6ICLOBE4HHgSeB9wL/CFwIdXTsGuBMzKzHRH7Ah8GjqZ6YOqbmXlOPaLA+4FXAEuoHqg6KzMfiYgrgc3AoVTDMtxK9QTya4DP1X34BfAhqofNPkL1UNbRwH8FvlG/9jxgT+BrwLsyc9us/gM0sDyy0IIREQdQjW+zOjNfCmzp0Oz5wHHAqsx8PtUHAgGcDTxYH4G8tD6S+DjVMNVHZua/1O2OAI6vg+II4ALgNZn5PKqg+e+T3utoql/AzwEeBT5LFSCrgOcCr67bfQTYCDy/rum8ev57qIaZOCYzj6QKrfdNWv8RVB/OczjwQuC4+gNq1gAX1HVfVbd9LtWT+i/OzOupguIfMvMYqid4D6B6WlfqST8faktTvRj4XmZmPf0J4ANT2txJNazLpyLiq8D1M3yPz2bm1vr7VwE3TBoz6DLgB5PafjszN9Tf/zNwV2Y+BBARPwB+E/gyVWC9sB7Mjcz8Rb3MycCvRMRv19N7TFn/dZm5uV7f96hGD/3SNHX/38y8ddL0ycAxEfHuenop1eBxUk8MCy0knYZVforMfCgiDqcaF+nVwAci4gUzeI9Hprzf1PO0k6c3T/p+W4fp0s/XEPCOzPzqNK/PZH2PTJkeAk7JzDsLNUhd8TSUFpJbgaPqu5gAfn9qg4hoAXtl5hepPkLzIeDZwMPA0hleuP4KcFJEPLOefhvVkcJMXQ+cWw9eR0TsX89fA7wrIvaq549GxGFdrO9hYN9CmzXAe3dc7I6I/evRRaWeGBZaMOrP5ng78HcRcQvVaJlTrQC+XJ8G+iFwI/CdenTdzwA/qpft5v3WUV1D+FJE/JDqesh/6qH0P6a6+P7juq7/Us+/gOq009p6/d8CugmLq4HTI+L79QX4Tt5JdTTyg4j4EfBFFsknwWl+eDeUJKnIIwtJUpFhIUkqMiwkSUWGhSSpyLCQJBUZFpKkIsNCklT0/wGHFq8tBiN6sgAAAABJRU5ErkJggg==\n",
  1107.       "text/plain": [
  1108.        "<matplotlib.figure.Figure at 0x7f977909c630>"
  1109.       ]
  1110.      },
  1111.      "metadata": {},
  1112.      "output_type": "display_data"
  1113.     }
  1114.    ],
  1115.    "source": [
  1116.     "df.plot(x='distfromcentre', y='case', kind='scatter') # being a case is assoc with being further from centre"
  1117.    ]
  1118.   },
  1119.   {
  1120.    "cell_type": "code",
  1121.    "execution_count": 44,
  1122.    "metadata": {},
  1123.    "outputs": [
  1124.     {
  1125.      "data": {
  1126.       "text/plain": [
  1127.        "<matplotlib.axes._subplots.AxesSubplot at 0x7f97791cf080>"
  1128.       ]
  1129.      },
  1130.      "execution_count": 44,
  1131.      "metadata": {},
  1132.      "output_type": "execute_result"
  1133.     },
  1134.     {
  1135.      "data": {
  1136.       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUUAAAFYCAYAAADEPO4YAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAFxtJREFUeJzt3XmYZ1V95/F30y07htY0IMiihHxH1IC4YQQlzGjEUWGCERERQguiiEMgcUFFnAFtYqIwKEaFDktYFXBQMagwAya4YIgaAb8ICDY2QgNtaGgabOj8ce4PvhRV1VVNVf1qeb+epx+q7u8u59xb9alz7z3nMGvVqlVIkpq1+l0ASZpMDEVJKgxFSSoMRUkqDEVJKgxFSSoMxRkgIk6PiOP6XY5+G+48RMSBEfHPE10mTT5z+l2AmSQibgU2BR4BfgdcDRyamYv6WKwniIhVwHaZeVO/yyL1gy3FiffGzNwQeBZwJ3Byn8szbiJiVkT4M6YpxZZin2Tmioj4CnBib1lE/B4tJPcAlgNfAj6RmY9GxOeBeZn55m7dE4CXAP8NeDXwj8ApwJHA/cCHM/PswY4dEQcDHwCeAfwzrbW6OCKu6lb5SddinJ+Z5w/YdjbwN8ABwDLg77oyPy0zV0bE/wf+BdgN2Al4YUQsB/4e2AW4FzghM7/U7e904PbM/Ej3/W7AP2bms7vvbwW+AOxP+0PyVeDdmbmi+/wNwHHANsD1XV1+2n32IuA0YDvgUmB1w7dmRcTJwDuAO4DDMvPyiPhz4IOZ+eJyHo4Cds3MvQY5v8/ozsufAusBV2bmXhExFzgLeDntd+9fuvLe3m13IHAMMA+4G/hI7xpGxEHAXwObAT8EDsnM21ZTH60B/4r3SUSsD+wDfL8sPhn4PeC5tKB7B/AX3WdHAX/UPfvaFZgPHJCZvV/0zYDfB7agBdYXIyIGOe7uwCeBt9BC5jbgPIDMfFW32g6ZueHAQOwcTAvtHWmh96RQoAXYIcBG3f7PBW4HNgfeDHwiIv7rkCfnyfajBcy2wB8CvQDdCVgIvAt4Ji08L4mIdSJibVqAnkUL/y8De6/mOC8HbqGdx48BF3UBdwnwnIh4Xln37d2+B3MWsD7wfGAT4DPd8rWAfwC2BrYCHgQ+29VlA+D/AHtk5kbAHwM/7j7bCzga+DNaYH6Xdk41DgzFiffViPgtcB/wGuBT8FgLbB/gQ5m5LDNvpbU29gfIzOW0X8RP01qFh/daGMVHM/OhzLwS+AYt+AbaD1iYmddm5kPAh4BXRMQ2Iyz/W4CTMvP2zFwKLBhkndMz87rMXEkL612AD2Tmisz8MXBqr14j9NnMXJSZ9wLHA/t2yw8GvpCZP8jMRzLzDOAhYOfu39OAEzPzd5n5FeCa1RznrrL++UAC/707T+fTzj8R8Xxay/TrA3cQEc+i/dE4NDOXdvu6EiAz78nMCzNzeWYu6+ry6rL5o8ALImK9zLwjM6/rlr8L+GRm3tCd008AO0bE1iM8fxoFQ3Hi7ZWZGwPrAO8FroyIXitvbVrLquc2WssPgMz8Ia0lMwu4YMB+l2bmAwO23XyQ429ej5GZ9wP31OOsxuZAfTE02Euiumxz4N4uBGrZRnq8gfur9doaOCoiftv7B2zZfb458OvSku5tO5zB1u8d6wzgbRExixboF3RhOdCWtPouHfhBRKwfEV+IiNsi4j7gKmDjiJjdXbt9gEOBOyLiGxHxX0o9Typ1vJf2MzCac6gRMhT7pGvZXER7E70L7RnS72i/AD1bAb/ufRMRh9HCdDHw/gG7nNvdgtVtFw9y6MX1GN02z6zHWY07gGeX77ccZJ0aLIuBZ0TERgPK1jveA7RbzZ7NBtlfPUat1yLg+MzcuPxbPzPP7cq5RRdiddvhDLb+YoDM/D7wMLAr8DaGvnVeRKvvxoN8dhQQwMsz8+lA73HFrO4Yl2Xma2iPNX5Oe6bc2+e7BtRzvcy8ejX10RrwRUufdL98bwLmAjdk5iMRcQFwfES8g/Yc7Ejgb7v1/5D2QmE32kuYH0bEN7vb0Z6PR8TRtGdjb6A9FxvoHOC8iDgHuIF2K/aD7nYd2hvx5wJDdcm5APifEfENWqB9YLh6ZuaiiLga+GRE/BXtmeB8ultR2nOzo7r+g2sDRwyym8Mi4utdvY+m3cpCC42LI+I7tJcP69POz1XA94CVwPsi4nO0c/0y4P8NU9xNuvVPoT0rfR7tBU3PmbRngCszc9A+jZl5R0R8Ezil+yN2P/CKzLyK9oz1QeC33bPKx65PRGxKu26Xd+vcT/uDCe0l1f+OiB9n5nXdC7nXZuaXh6mL1pAtxYn3tYi4n/ZM8Xjay5Les6PDaUFzC+2t8DnAwoiYQ3uOeEJm/iQzf0ELh7MiYp1u298AS2ktm7Npz7R+PvDgmXk58FHgQlpralvgrWWVY4Ezulu1wZ5Jfgn4FvBT4N9oobGSx3+BB7Mv7RncYuBi4GOZ+e3us7OAnwC3dvsd7OXOOd1nt3T/juvq8iPac8XPdnW/CTiw++xh2ouJA7vP9gEuGqaMAD+gvam+m3Zt3pyZ95TPzwJewNCtxJ79aa3+n9OeU/aC/kTa2+i7aS/Y/qlssxatJbmYdnv8auA9XV0uBk6g/TG7D/gZ7bmlxsEsJ5md+gZ2Y5ngY+8B/H1mjstD/65Lzjsz8zvjsf/RiIj1aCG3U/eHSdOQt88alS4Y/oTWctuUdgt4cV8LNXHeDVxjIE5vhqJGaxbwcdpt7oO0rj/H9LVEE6Brsc5i8H6Zmka8fZ6heiNJgMuAUzPzSR29R7CPTWmdol8EfDEzjxrTQkp9YEtxhsvM79K6iQwrIo4F/iAz314WH0J7afD0Af37phQnwVDl22c9FVsD1w8ViN1b8ylvutRDI+Pt8wwxxOQINwHf4YkTMHwAeB/wdFr3kPfQhstdQnum9hBwM607zn7dfh6mPWvbhdZlZQWtX+CRtO4rJ/D4kMMLaEP+Huq9NaeN+f0rWreed3f7O5E2yudvM/MTXdlm0/pFzqf1KbyRNkJoUTf642TgxcAS2pDHC7rtTqd1ddqG1mH6euBtmXlzNwnGrrQ+kKu6fd/Zletk4C+Bb2fm/sNNPqHpw5biDDDSyRG6CSTeC7y0m5TgT4FbM/OfaJ28z+8mitghMw+k9Yf8m25Zr8vMnsBXgI27zz9MG4e8I7ADrQP1R8phNwPWpQ1ZO4bWD/LttHDbFTgmIp7brXskrc/j62mhfRCwvBuV821af8ZNunVO6cYo9+xLe0E0l/bH4HgYdhKMzbpztTVwyHCTTwxx2jVFeVswM9TJEVYBX4mIIwdZ7xHaMMLtI2JJGeUyGt/LzK92Xz8YEfvRJq+4CyAiPk4LlI926/yONlTvkYg4D/gibcKJZcB1EXEd8Ee0TtvvBN6fmdlt+5Nun/vQwvsfuuXXRsSFtBl5eh3jL+rGjhMRZ9Mm1hjOo7RO5g912zw2+UT3+Rnd6KGdgStHc4I0uRmKM8OIJkfIzJsi4gjaqJbnR8RlwJGZOdgY6qEMnCDiCRNQ8OSJKu7JzN5omAe7/95ZPn8Q2LD7ekvarftAWwMv7yZL6JnDE0ee/KZ8vbzscyhLspuzsRzjgIg4vCxbm8En3dAUZijODI9NjlCCcSsGCZjMPAc4JyKeTmvRnUAbtjbSh88D1+tNQNFrsQ01UcVILKINS/zZIMuv7CZTGCsD69GbfOL4MTyGJiFDcWYY0eQI3TPFLWgzQq+gtdJ6z53vBF4TEWtl5qOjOPa5wEci4hpa0BxDe4mxJk6lTYxwPe254Atps+18HVgQEfvTTZhLe4Z5f2beMIL9rm4SDBhm8okB06JpivNFywwwiskR1qFNGns37XZzE9rEE9BezgDcExHXjuLwxwE/ok0g8e/Atd2yNfFp2tvrb9Em1DgNWK8LpdfSJrZY3JX9hK4+I3Esw0+CMezkE5pe7JIjSYUtRUkqDEVJKgxFSSoMRUkqDEVJKsaln+KSJctG9Up77tz1Wbp0+XgUpe+s29Rk3aamuXPXZ86c2bNWv+bQJkVLcc6c2f0uwrixblOTdZuaxqJukyIUJWmyMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpmNPvAozE4SdexQMrVo5qm/m/uoTTtnrTmBx/g3XncPIRrxqTfUma3KZEKD6wYiULP7j7qLa58Z1njnqboRy04Iox2Y+kyc/bZ0kqDEVJKgxFSSomTSjuvfcb+12EacdzKo3eal+0RMRC4A3AXZn5gvEqyM03/2K8dj1jeU6l0RtJS/F04HXjXA5JmhRWG4qZeRVw7wSURZL6btI8U5SkyWBcOm/Pnbs+c+bMHvV2w3WSnjdvo1Ht68Y12GY4U7UD91ieg37sv5+s28w0LqG4dOnyUa3fu0BDjUA5aMEVLFmybNTlWJNthrKmo2PmzdtoTMsxGjueN7bnYKB+1m28WbepaSzC3ttnSSpWG4oRcS7wvfZl3B4R88e/WJLUH6u9fc7MfSeiIJI0GUya2+dtt92u30WYdjyn0uhNmlC88MKv9bsI047nVBq9SROKkjQZGIqSVBiKklRMif8dAYx+RMn8tTdmwRiNQtlg3SlzmiQ9RVPit33NRpPszivHvCSSpjtvnyWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqZjT7wIM5vATr+KBFSsBmP+rSzhtqzcNut4G687h5CNeNZFFkzTNTcpQfGDFShZ+cHcAbnznmY99PdBBC66YyGJJmgG8fZakwlCUpMJQlKRi0oTi3nu/cVodR9LUNKIXLRHxOuAkYDZwamYuGOuC3HzzL8Z6l309jqSpabUtxYiYDXwO2APYHtg3IrYf74JJUj+M5Pb5ZcBNmXlLZj4MnAfsOb7FkqT+GEkobgEsKt/f3i2TpGlnJM8UZw2ybNVwG8yduz5z5swedWFqZ+x58zYC4Mby9eq2Ganh9jceJvp4E8m6TU3TuW5P1UhC8XZgy/L9s4HFw22wdOnyURWid4F6I1cOWnAFS5Yse+zz+vVAQ412GcqO5w2/v7E2b95GE3q8iWTdpqbpXrenaiSheA2wXUQ8B/g18FbgbU/5yJI0Ca32mWJmrgTeC1wG3ABckJnXjXfBJKkfRtRPMTMvBS4d57JIUt9NmhEt22673bQ6jqSpadKE4oUXfm1aHUfS1DRpQlGSJgNDUZIKQ1GSikn5vyOAx0eqzF97YxYMMWplg3UnbfElTVGTMlWeOEpld17Zt5JImmm8fZakwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpMJQlKTCUJSkwlCUpGLWqlWr+l0GSZo0bClKUmEoSlJhKEpSYShKUmEoSlJhKEpSMaefB4+I1wEnAbOBUzNzQT/L81RFxK3AMuARYGVmviQingGcD2wD3Aq8JTOX9qmIoxIRC4E3AHdl5gu6ZYPWJyJm0a7l64HlwIGZeW0/yj0SQ9TtWOBgYEm32tGZeWn32YeA+bRr+77MvGzCCz1CEbElcCawGfAo8MXMPGk6XLth6nYsY3Tt+tZSjIjZwOeAPYDtgX0jYvt+lWcM/Ulm7piZL+m+/yBweWZuB1zefT9VnA68bsCyoeqzB7Bd9+8Q4PMTVMY1dTpPrhvAZ7rrt2P5pdoeeCvw/G6bU7qf38lqJXBUZj4P2Bk4rKvDdLh2Q9UNxuja9fP2+WXATZl5S2Y+DJwH7NnH8oyXPYEzuq/PAPbqY1lGJTOvAu4dsHio+uwJnJmZqzLz+8DGEfGsiSnp6A1Rt6HsCZyXmQ9l5i+Bm2g/v5NSZt7Ra+ll5jLgBmALpsG1G6ZuQxn1tetnKG4BLCrf387wlZsKVgHfioh/jYhDumWbZuYd0C4osEnfSjc2hqrPdLme742In0bEwoiY2y2bsnWLiG2AFwE/YJpduwF1gzG6dv0MxVmDLJvqYw5fmZk70W5HDouIV/W7QBNoOlzPzwPbAjsCdwB/1y2fknWLiA2BC4EjMvO+YVadcvUbpG5jdu36GYq3A1uW758NLO5TWcZEZi7u/nsXcDGtmX5n71ak++9d/SvhmBiqPlP+embmnZn5SGY+CnyJx2+zplzdIuJptNA4OzMv6hZPi2s3WN3G8tr1MxSvAbaLiOdExNq0h6GX9LE8T0lEbBARG/W+Bl4L/IxWpwO61Q4A/m9/SjhmhqrPJcA7ImJWROwM/EfvVm2qGPAc7X/Qrh+0ur01ItaJiOfQXkj8cKLLN1Ld2+TTgBsy89Ployl/7Yaq21heu77OkhMRrwdOpHXJWZiZx/etME9RRDyX1jqE1tXpnMw8PiKeCVwAbAX8CvjzzBzpA/6+iohzgd2A3wfuBD4GfJVB6tP9sH6W9oZvOfAXmfmjfpR7JIao2260269VtC4r7+qFQ0R8GDiI9vbziMz85oQXeoQiYhfgu8C/07qtABxNe/Y2pa/dMHXblzG6dk4dJkmFI1okqTAUJakwFCWpMBQlqTAUJakwFCWpMBQlqejrfIqaGSLiFcCngI26RX9NG/HzamBt4G7goMy8LSI2Ac4BNu3W/U5m/mW3n/cDb6b93P4aODgzfzNhFdGMYEtR46qb2PRi4P2ZuQOwE22I54LMfGm37FzghG6T/YDbMvOFmflC4H91+3k78AfAzt2kG5fy+KB/aczYUtR4ewVwfWZeDZCZjwBLI2L/iDgM2JAn/hx+HzgyIj4FXAn0Zkl+E/AS4NqIoNvmPyamCppJDEWNtydN3RQRWwOfAV6amb+MiD+m3TKTmd+LiB2B1wD702aH3qXbz3GZuXDCSq4Zydtnjberge2754q9/w3FVsDDwG8iYi3g0N7K3Uwm92XmecCRwIu7dS4B3tObPLSb9WSHia2KZgJDUeOqmxHoz4BPR8RPgX8F1gG+DFwHXAH8smyyG/BvEfFj4JvAoZn5aGaeBZwNXFn288oJq4hmDGfJkaTClqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBX/CbgIf/5qcC6XAAAAAElFTkSuQmCC\n",
  1137.       "text/plain": [
  1138.        "<matplotlib.figure.Figure at 0x7f97791cdbe0>"
  1139.       ]
  1140.      },
  1141.      "metadata": {},
  1142.      "output_type": "display_data"
  1143.     }
  1144.    ],
  1145.    "source": [
  1146.     "df.boxplot(column='distfromcentre', by='case', vert=False, figsize=(5,5))"
  1147.    ]
  1148.   },
  1149.   {
  1150.    "cell_type": "code",
  1151.    "execution_count": 101,
  1152.    "metadata": {},
  1153.    "outputs": [
  1154.     {
  1155.      "data": {
  1156.       "text/plain": [
  1157.        "case\n",
  1158.        "0    13.149346\n",
  1159.        "1    27.014471\n",
  1160.        "Name: distfromcentre, dtype: float64"
  1161.       ]
  1162.      },
  1163.      "execution_count": 101,
  1164.      "metadata": {},
  1165.      "output_type": "execute_result"
  1166.     }
  1167.    ],
  1168.    "source": [
  1169.     "df.groupby('case').distfromcentre.mean()"
  1170.    ]
  1171.   },
  1172.   {
  1173.    "cell_type": "code",
  1174.    "execution_count": 24,
  1175.    "metadata": {},
  1176.    "outputs": [
  1177.     {
  1178.      "data": {
  1179.       "text/plain": [
  1180.        "<matplotlib.axes._subplots.AxesSubplot at 0x7f977cbf3cc0>"
  1181.       ]
  1182.      },
  1183.      "execution_count": 24,
  1184.      "metadata": {},
  1185.      "output_type": "execute_result"
  1186.     },
  1187.     {
  1188.      "data": {
  1189.       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEGCAYAAAB2EqL0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAGTVJREFUeJzt3XuUnXV97/H3MJOLiSNJZaMI6QI1fDWCJgpBsctL4chFF1FPaxMuFbV6tNIeUWFhe84ptbUrwrHKaVFrkcOlSrTWaupBoV4qVEBjuRhBvxgBS4iaQQMOCbmyzx/PTpxMZjK/vWfvzGW/X2tlZZ69f8/zfL+ZZ+aT57p76vU6kiSN5aCJLkCSNDUYGJKkIgaGJKmIgSFJKmJgSJKK9E10Aa0YGBhs6dKu+fPnsGnTlnaXM2XYv/3bf/f2D1Cr9feMZ/6u2sPo6+ud6BImlP3bfzfr9v7boasCQ5LUOgNDklTEwJAkFTEwJElFDAxJUhEDQ5JUxMCQJBXp6I17EXEl8BpgY2YeM8L7PcBlwOnAFuDczLy9kzVJklrT6Tu9rwL+FrhmlPdPAxY2/pwAfKzxd9u9eeXXO7HYaefJM+Gx7fu+/vqXLeA1Jy4sXs4373iQq2/40Z7pN52+kCXPfhp//y/3cM9PNvFEvU7/k2Zw0Tkv5Onz5/KzX2zm0lV3svnxHcydPYPaU3q4d8PWPfM/+2kzWffzXxfW29vDRWcv4VmHzeOGb9/PZ75x/573Vpx8FMcedehey7vgrMXUav2svOaWvZb7nCNmc+HZJ/Lj9Y9wyXV3sGNXnRm9PVzYWPZk88Wbf8QXv/Xgnulmvy+dsnbdAB/53FrqQA9w/vJjOebI2kSXNS5TZZs4kDp6SCozbwJ+uZ8hy4BrMrOembcB8yLisE7WpP0bKSwAPn/TgyO/MYqhYQHwf6//EdfeeC9r7/8lu56oU6/Dr7bs4NJP3wnApavuZNPgNrbvfIJNj23b65c6sFdYAOzaVeeSf7gDYK+wALjuq/fvs7zd6xm+3B+ur6Z3/2IA2DFk2ZPN0LCA5r8vnbI7LADqwIdXrZ3IctpiqmwTB9JEP0vqcGDoFr++8dpP9zfT/PlzvM1/AtRq/eOa/5HN+6bRlq07qNX62bJ1R9PL27mrPmpNw5e3v+XXav3s3LX348n2t+zJppk6O9XT8Ie71Tu4rvFopqapvE10ykQHxkgPwhrzwYLd/gCxiTIwMDiu+efNnbnPa3Nmz2BgYJA5s2awbce2ppbX19szak3Dlzdn9oxRlzMwMEhfb8+e/02OtezJprTOWq2/Yz31sPcPbg/j317ardn+p/I2MZrxBt5EXyW1HlgwZPoIYMME1SKqcxgjef3LFoz8xijedPrCfabPOeVojn3mb9B7UA89PfCUuTO44MzFAFxw1mLm989iZt9BzO+fxXOOmL3X/M8+bO/CehvHlKE6ZzHUipOP2md5u9czfLm7py88ewkzeqv/v8wYsuzJZvj3odnvS6ecv/zYPf/7230OY6qbKtvEgdRTr7f0pPBiEXEk8KVRrpJ6NXAe1VVSJwD/JzOXjrXMVh9v3sn/YU0F9m//9t+9/cP4H2/e6ctqrwNeARwSEeuBPwNmAGTmx4HrqcJiHdVltW/qZD2SpNZ1NDAyc8UY79eBd3ayBklSe0z0OQxJ0hRhYEiSihgYkqQiBoYkqYiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKKGBiSpCIGhiSpiIEhSSpiYEiSihgYkqQiBoYkqYiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKKGBiSpCIGhiSpiIEhSSpiYEiSihgYkqQiBoYkqYiBIUkqYmBIkor0dXoFEXEqcBnQC1yRmSuHvf+bwNXAvMaYizLz+k7XJUlqTkf3MCKiF7gcOA1YBKyIiEXDhv0P4LOZuQRYDny0kzVJklrT6UNSS4F1mXlfZm4HVgHLho2pA09pfH0wsKHDNUmSWtDpQ1KHAw8OmV4PnDBszMXAjRHxR8Bc4OSxFjp//hz6+npbKqhW629pvunC/u2/m3V7/+PV6cDoGeG1+rDpFcBVmfmhiHgJcG1EHJOZT4y20E2btrRUTK3Wz8DAYEvzTgf2b//23739w/gDs9OHpNYDC4ZMH8G+h5zeAnwWIDNvBWYDh3S4LklSkzodGGuAhRFxVETMpDqpvXrYmP8ETgKIiOdSBcZAh+uSJDWpo4GRmTuB84AbgB9QXQ11d0S8PyLOaAx7D/DWiLgLuA44NzOHH7aSJE2wnnp96v1uHhgYbKnobj+Gaf/2b//d2z9ArdY/0nnlYt7pLUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKKGBiSpCIGhiSpiIEhSSpiYEiSihgYkqQiBoYkqYiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKKGBiSpCIGhiSpiIEhSSpiYEiSihgYkqQiBoYkqYiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKK9HV6BRFxKnAZ0AtckZkrRxjzBuBioA7clZlndrouSVJzOrqHERG9wOXAacAiYEVELBo2ZiHwPuClmfk84F2drEmS1JpOH5JaCqzLzPsyczuwClg2bMxbgcszcxNAZm7scE2SpBY0fUgqImYOnS8zt+xn+OHAg0Om1wMnDBtzdGO536I6bHVxZn5lfzXMnz+Hvr7eZsreo1brb2m+6cL+7b+bdXv/41UcGBHxOuBvgMMaL/VQnXPY32/unhFeq49Qw0LgFcARwM0RcUxmPjLaQjdt2l9Gja5W62dgYLCleacD+7d/++/e/mH8gdnMHsalwBuA2zLzicJ51gMLhkwfAWwYYcxtmbkDuD8ikipA1jRRmySpw5oJjF9m5i1NLn8NsDAijgIeApYDw6+A+gKwArgqIg6hOkR1X5PrkSR1WDOB8c8R8Q7gM8DW3S/u7xxGZu6MiPOAG6gOXV2ZmXdHxPuB72bm6sZ7r4qIe4BdwAWZ+YsWepEkdVBPvT78lMLIImLoYag6jXMYmdna2edxGBgYLCt6mG4/hmn/9m//3ds/QK3WP9J55WLFexiZ6V3hktTFDAFJUpEx9zAi4muZeVJEDLD3JbG7D0kd2rHqJEmTRskhqbMbfx/XyUIkSZPbmIekMvOnjb9/QnUPRT/wZOChxmuSpC5QfA4jIn6L6v6If6K6d+K+iDixU4VJkiaXZk56Xw6clZmRmQuBs4CPdaYsSdJk00xgbM3Mm3ZPZObNwOPtL0mSNBk1Exg3RcRZuyci4kzgy+0vSZI0GZVcVrv7ctoe4KkRcUXjrVnAw8Cfd648SdJkUXJZrZfTSpLGDozSS2cj4juZuXT8JUmSJqN2PhpkRhuXJUmaZNoZGC09QVaSNDX48EFJUpF2Bsa4nrMuSZrc2hkYt7VxWZKkSab4A5Qiog94G/DbVOcrvgF8IjN3AmTmOzpSoSRpUmjmM70vB44ErqY6/HQ28Hzg7e0vS5I02TQTGC8DnpeZTwBExGeAtR2pSpI06TRzDmM91eNAdpsBPNjeciRJk1Uzexj3A7c29iwAfpfqgYR/CJCZH213cZKkyaOZwJgB3AEc3Zi+CzgYOB5v2pOkaa84MDLzTZ0sRJI0uTXzEa1/FRE9Q6bnRsSnOlOWJGmyaeak9zOAmyPi8IhYDHwXWNeZsiRJk00zh6TOjYhzgNuBbcAbM/MbHatMkjSpNHNIqh84A/g+VWD4wUqS1EWaOST1H8CdwMlUV0adGBF+prckdYlmLqt9K9VltUsy83bgdRFx/lgzRcSpwGVAL3BFZq4cZdzvAP8IHJ+Z322iLknSAdDMHsZc4G7g8wARcRzVgwhHFRG9VM+gOg1YBKyIiEUjjOsH/hj4dhP1SJIOoGYC48+pDkVtAmjsBTxzjHmWAusy877M3A6sApaNMO4vgEuArU3UI0k6gJo5JEVm/iwihr60fYxZDmfv502tB04YOiAilgALMvNLEfHekjrmz59DX19vydB91Gr9Lc03Xdi//Xezbu9/vJoJjMGIeBqNx4BExCuAR8aYZ6RP4dvzGJGIOAj4MHBuE3WwadOWZobvUav1MzAw2NK804H927/9d2//MP7AbOaQ1EXAl4GjIuLfgE8BY+0RrAcWDJk+AtgwZLofOAb4t4h4AHgxsLpxfkSSNIk0c+PedyLilcCJVHsOt2TmWHsYa4CFEXEU8BCwHDhzyDIfBQ7ZPd0Iovd6lZQkTT7NnsN4lGovo3T8zog4D7iB6rLaKzPz7oh4P/DdzFzdVLWSpAnTU69PvSeTDwwMtlR0tx/DtH/7t//u7R+gVusf6bxysWbOYUiSupiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKKGBiSpCIGhiSpiIEhSSpiYEiSihgYkqQiBoYkqYiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKKGBiSpCIGhiSpiIEhSSpiYEiSihgYkqQiBoYkqYiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKtLX6RVExKnAZUAvcEVmrhz2/ruBPwB2AgPAmzPzJ52uS5LUnI7uYUREL3A5cBqwCFgREYuGDbsDOC4znw98DrikkzVJklrT6T2MpcC6zLwPICJWAcuAe3YPyMxvDBl/G3B2h2uSJLWg04FxOPDgkOn1wAn7Gf8W4MtjLXT+/Dn09fW2VFCt1t/SfNOF/dt/N+v2/ser04HRM8Jr9ZEGRsTZwHHAy8da6KZNW1oqplbrZ2BgsKV5pwP7t3/7797+YfyB2enAWA8sGDJ9BLBh+KCIOBn4U+DlmbmtwzVJklrQ6cBYAyyMiKOAh4DlwJlDB0TEEuDvgFMzc2OH65EktaijV0ll5k7gPOAG4AfAZzPz7oh4f0Sc0Rh2KfBk4B8j4s6IWN3JmiRJremp10c8pTCpDQwMtlR0tx/DtH/7t//u7R+gVusf6bxyMe/0liQVMTAkSUUMDElSEQNDklTEwJAkFTEwJElFDAxJUhEDQ5JUxMCQJBUxMCRJRQwMSVIRA0OSVMTAkCQVMTAkSUUMDElSEQNDklTEwJAkFTEwJElFDAxJUhEDQ5JUxMCQJBUxMCRJRQwMSVIRA0OSVMTAkCQVMTAkSUUMDElSEQNDklTEwJAkFTEwJElF+jq9gog4FbgM6AWuyMyVw96fBVwDvAj4BfB7mflAp+uSJDWno4EREb3A5cB/AdYDayJidWbeM2TYW4BNmfnsiFgOfBD4vU7Us37jY7z3sm+yeeuuti/7oB54or73a+9efixHHnow1954Lxse3szGTVuo1+v0z5nFBWct5unz5/LYlu1ce+O9DDzyOLV5T+KcU47myU+a2fT6164b4COfW0t9jHEzenu48OwlPOuweQBtW38zfvaLzVy66k42P76DubNn7Pm3aNWP1z/CJdfdwY5dv+5++Pfj9Jc8g+/c80t+tXl7W9YpHWjt/rlpRacPSS0F1mXmfZm5HVgFLBs2ZhlwdePrzwEnRURPJ4r5nx//VkfCAvYNC4APr1rLtTfey5ofbuShhzezY1ednU/Apse2cemn7wTY8/4DPxtkzQ83cu0N97a0/pKwANixq84l/3DHnul2rb8Zl666k02D29i+84m9/i1aNTwsYN/vx/W3buDhR7e2bZ3Sgdbun5tWdPqQ1OHAg0Om1wMnjDYmM3dGxKPAU4GHR1vo/Plz6OvrbbqYwS3bm55nPOrAI5tHXueWrTuo1fr3ef+Rzdup1fpbWlepnbvqe9bRrvU3Y8vWHftMj2edO3c103171jlVdWPPQ03l/tv9c9OKTgfGSHsKw3+6S8bsZdOmLS0V0z9nJtse3drSvK3oAebNHfnwzpzZMxgYGNzn/XlzZzIwMNjSukp/bfb19uxZR7vW34w5s2awbce2X083/i1a1dfbs88expg1jHOdU1Gt1t91PQ811ftvx8/NeAOm04ek1gMLhkwfAWwYbUxE9AEHA7/sRDF/+faXMnd283smJQ4aIfbOX34s55xyNMc/51AOP2QuM3p76DsI5vfP4oIzFwPsef/Ip/dz/HMO5ZxTjm5p/ecvP3bE5B1u9zmM3dq1/mZccNZi5vfPYmbfQXv9W7TqwrOXMKN37+6Hfz9e/ZJncMjBs9u2TulAa/fPTSt66vXmd+dLNQLgXuAk4CFgDXBmZt49ZMw7gWMz8+2Nk96vz8w37G+5AwODLRU91f+HMV72b//23739A9Rq/eM6P9zRPYzM3AmcB9wA/AD4bGbeHRHvj4gzGsM+CTw1ItYB7wYu6mRNkqTWdHQPo1Pcw2iN/du//Xdv/zDJ9zAkSdOHgSFJKmJgSJKKGBiSpCIGhiSpiIEhSSoyJS+rlSQdeO5hSJKKGBiSpCIGhiSpiIEhSSpiYEiSihgYkqQiBoYkqUinP6J10oiIU4HLgF7gisxcOcEldVxEPAAMAruAnZl5XET8BvAZ4EjgAeANmblpgkpsq4i4EngNsDEzj2m8NmK/EdFDtT2cDmwBzs3M2yei7nYZpf+LgbcCA41hf5KZ1zfeex/wFqrt448z84YDXnQbRcQC4Brg6cATwCcy87Ju2Qb20//FtGkb6Io9jIjoBS4HTgMWASsiYtHEVnXAvDIzF2fmcY3pi4CvZeZC4GtMrw+sugo4ddhro/V7GrCw8edtwMcOUI2ddBX79g/w4cY2sHjIL4pFwHLgeY15Ptr4OZnKdgLvycznAi8G3tnos1u2gdH6hzZtA10RGMBSYF1m3peZ24FVwLIJrmmiLAOubnx9NfDaCaylrTLzJvb9PPjR+l0GXJOZ9cy8DZgXEYcdmEo7Y5T+R7MMWJWZ2zLzfmAd1c/JlJWZP929h5CZg1Sf8nk4XbIN7Kf/0TS9DXRLYBwOPDhkej37/4ecLurAjRHxHxHxtsZrT8vMn0K1gQGHTlh1B8Zo/XbTNnFeRHwvIq6MiPmN16Z1/xFxJLAE+DZduA0M6x/atA10S2CM9LGE3fAQrZdm5gupdr3fGREvm+iCJpFu2SY+BjwLWAz8FPhQ4/Vp239EPBn4J+Bdmfmr/Qydlv8GI/Tftm2gWwJjPbBgyPQRwIYJquWAycwNjb83Av9Mtbv589273Y2/N05chQfEaP12xTaRmT/PzF2Z+QTw9/z6kMO07D8iZlD9svxUZn6+8XLXbAMj9d/ObaBbAmMNsDAijoqImVQnelZPcE0dFRFzI6J/99fAq4DvU/X9xsawNwJfnJgKD5jR+l0N/H5E9ETEi4FHdx+2mE6GHZN/HdU2AFX/yyNiVkQcRXXi9zsHur52alz19EngB5n510Pe6optYLT+27kNdM3jzSPidOAjVJfVXpmZH5jgkjoqIp5JtVcB1eXTn87MD0TEU4HPAr8J/Cfwu5lZeqJ0UouI64BXAIcAPwf+DPgCI/Tb+OH6W6qrQ7YAb8rM705E3e0ySv+voDoUUae6pPS/7f6lGBF/CryZ6uqad2Xmlw940W0UEb8F3AyspbqsFOBPqI7jT/ttYD/9r6BN20DXBIYkaXy65ZCUJGmcDAxJUhEDQ5JUxMCQJBUxMCRJRbrmabWaviKiDvQD/w68JDMfH2XcPOBtmXnJkNcWUl1yCfC/M/NTna63HSLitcCGzJzS905oanEPQ9NG40mcI4ZFwzzgwmGvvR64JTOXDA+LiJjM/6F6Lft5UNw0ePKsJiHvw9CUExGvB/6K6sms1wN/QbWHMdj4ewvVDVm/DWwDHsvMl0bE/wNOobrTdQvVI+8/RPUfpw3Af6W6U/YW4ARga2a+OiJ+H7iA6sanH1Pd+LQxIs4FzgQeAZ4PPAT8EXAp1V2za4CzM7MeEQcDHwaOp7qp6ubMPK/x5IEPAC8HZlLddPWOzHwsIq4CtgJHUz3C4VaqO5VfBVzX6OFh4K+pbkj7CNWNW8cDfwnc1Hjv+cBs4BvAuzNz17i+Aepa7mFoSomIQ6meh7MsM08Eto8w7AXAycCizHwB1YcKAbwTeKSxJ3JiY4/i41SPuF6cmT9ujDsGOKURFscAK4FXZebzqcLmb4as63iqX8LPAR4HPk0VIouAY4GTGuM+AmwGXtCo6eLG6xdSPZJiaWYupgqu9w1Z/jFUH/DzPOBFwMmND7lZDaxs1H1NY+yxVHf0vzgzv0QVFt/MzKVUd/oeSnVXr9SSybzLLY3kxcDtmZmN6U8AHxw25j6qR8B8MiK+DnypyXV8OjN3Nr5+JXD9kGcM/R1w15Cx38rM9Y2v7wAeyMxHASLiLuDZwFepQutFjQfAkZkPN+Y5A3hKRPxOY3rWsOV/ITO3NpZ3O9VTR/91lLp/lJm3Dpk+A1gaEe9pTM+heuCc1BIDQ1PNSI9k3ktmPhoRz6N6jtJJwAcj4oVNrOOxYesbftx26PTWIV/vGmF6rJ+xHuAPM/Pro7zfzPIeGzbdA7w2M+8bowapiIekNNXcCixpXN0E8AfDB0REDXhSZn6F6uM4HwWeCfwKmNPkyeyvAadHxNMb02+l2mNo1peACxoPvCMiDmm8vhp4d0Q8qfF6f0Q8t2B5vwIOHmPMauCi3SfAI+KQxlNJpZYYGJpSGp/t8TbgXyLiFqqnbA63APhq45DQ94AvA7c1nsr7KWBtY96S9d1NdU7hXyPie1TnR/57C6WfT3VC/vuNuv5X4/WVVIeg1jSW/+9ASWBcC5wZEXc2TsqP5F1UeyV3RcRa4CtMk0+U08TwKilJUhH3MCRJRQwMSVIRA0OSVMTAkCQVMTAkSUUMDElSEQNDklTk/wMTV9IC6Zn9gwAAAABJRU5ErkJggg==\n",
  1190.       "text/plain": [
  1191.        "<matplotlib.figure.Figure at 0x7f977cc4d5c0>"
  1192.       ]
  1193.      },
  1194.      "metadata": {},
  1195.      "output_type": "display_data"
  1196.     }
  1197.    ],
  1198.    "source": [
  1199.     "df.plot(x='distfromcentre', y='exp_bin', kind='scatter') # being exposed is assoc with being closer to centre"
  1200.    ]
  1201.   },
  1202.   {
  1203.    "cell_type": "code",
  1204.    "execution_count": 46,
  1205.    "metadata": {},
  1206.    "outputs": [
  1207.     {
  1208.      "data": {
  1209.       "text/plain": [
  1210.        "<matplotlib.axes._subplots.AxesSubplot at 0x7f97790ea8d0>"
  1211.       ]
  1212.      },
  1213.      "execution_count": 46,
  1214.      "metadata": {},
  1215.      "output_type": "execute_result"
  1216.     },
  1217.     {
  1218.      "data": {
  1219.       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUUAAAFZCAYAAAAPYD29AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAFz1JREFUeJzt3XmYplWZ3/FvT7eAIEgDDciOiHcEZ8SdRCBIohGiYoILi4BDI0YBw8BkRFHES3GaiTo4ICoCAso6gga3uOAEnKgsIeIMkhuBoGAjtNAzQjcNNvT8cZ7Cm+Ktrfutequqv5/r6qvrfdZznlP1q/Nsp+asWrUKSVLzR4MugCRNJ4aiJBWGoiQVhqIkFYaiJBWGoiQVhuIMFhHnR8THBl2OQRvtOETEOyLi76e6TP0wVvtGxMMR8dypLNPaYN6gCzAbRMRdwBbA48DvgR8B/yUz7x5gsZ4iIlYBO2fm7YMui/ojM5816DLMRvYU++cN3Tfpc4D7gDMGXJ5JExFzIsLvHc1K9hT7LDNXRMRXgNOHpkXEs2khuS+wHPgC8PHMfCIiPgssyMw3d8ueBrwM+PfAvwW+DJwFHA88DJyUmRf12ndEvBN4H7AJ8Pe03uriiLi2W+Tmrse4MDMvG7buXOCvgMOBh4BPdmV+RmaujIj/BfxvYG/gJcAfR8Ry4HPAHsCDwGmZ+YVue+cD92TmB7vPewNfzsxtus93AZ8HDqX9Ivka8O7MXNHNfz3wMWAH4OddXX7WzXsxcC6wM/AtYKzXsuZExBnAYcC9wNGZeXVEvAU4MTNfWo7DCcCemfmmHsf32cCngP2AJ4AvAh/OzMf72Y7DbBYR3wN2B24CDsvMX3b7eLL33x3vZd3x2qs7Zgdn5h3j2IcKf9v3WUSsD7wN+EmZfAbwbOC5tB+Qw4A/7eadAPxJd+1rT2AhcHhmDv2gbwlsBmxNC6yzIyJ67Hcf4C+Bt9JC5pfApQCZuVe32Isy81nDA7HzTlpo70YLvaeFAi3AjgI27LZ/CXAPsBXwZuDjEfHvRjw4T3cI8B+AnYDnA0MB+hLgPOBdwKa08LwqItaNiHVoAfolWvj/LXDAGPt5JXAn7Th+GLgyIjYBrgJ2jIgXlGXf3m27lwuAlcDzgBcDrwWO7Ob1pR17OAT4aLfuT4HRgvQg4CPAfOB24NRxbF/DGIr987WI+Cfgd8BrgP8OT/bA3ga8PzMfysy7aL2wQwEyczntB/FTtN7EsZl5z7BtfygzH83Ma4Bv0oJvuEOA8zLzpsx8FHg/8K8jYodxlv+twKcz857MXAos6rHM+Zl5S2aupP2Q7wG8LzNXZOZPgXOG6jVOZ2bm3Zn5IO0H+KBu+juBz2fmdZn5eGZeADxK6y3tDjwDOD0zf5+ZXwFuGGM/95flLwMS+I/dcbqMdvyJiF1pPa1vDN9ARGxB+6VxXGYuy8z7gb8GDoS+tuNw38zMa7uynkRr021HWPbKzLy+a5+LaL/gNEGGYv+8KTM3BtYFjgGuiYih3sE6tJ7VkF/SegwAZOb1tJ7MHODyYdtdmpnLhq27VY/9b1X3kZkPAw/U/YxhK6DeGOp1k6hO2wp4MDMfGla28e5v+PZqvbYHToiIfxr6B2zbzd8K+HXpgQ2tO5peyw/t6wLg4IiYQwv0y7sAGm57WhjfW8r0eWDzoQX61I7DPXmMujZ9cJT1flO+Xg54I2Y1GIp91vVsrqTdid4D+C3tjvT2ZbHtgF8PfYiIo2lhuhj4i2GbnB8RGwxbd3GPXS+u++jW2bTuZwz3AtuUz716IzVYFgObRMSGw8o2tL9lwPpl3pY9tlf3Uet1N3BqZm5c/q2fmZd05dy6C7G67mh6Lb8YIDN/AjwG7AkczMinznfTequblTJtlJm7Di3Qp3Yc7sljFBHPol0yGM96Wk3eaOmz7ofvjbTrOrd2F+EvB06NiMNo39THA5/oln8+7YbC3rTf7tdHxLe709EhH4mID9Cujb2edl1suIuBSyPiYuBW4OPAdd3pOrQ74s+lXWvq5XLgv0bEN2mB9r7R6pmZd0fEj4C/jIg/p10TXEh3Kkq7/nVC95zdOsBxPTZzdER8o6v3B2instBuRH01Ir4PXE8L172Ba4Ef067rvTciPkM71q8A/m6U4m7eLX8W7VrpC2g3aIZcCJwJrMzMns80Zua9EfFd4JMR8SHazZIdgW0y85o+tuNw+0XEHt1x+CitTafNo16zkT3F/vl6RDxMu6Z4Ku0i+y3dvGNpQXMn7a7wxcB5ETGPdv3ptMy8OTN/QQuHL0XEut26vwGW0noHF9Huwv6/4TvPzKuBDwFX0HpTO9Fd7+qcAlzQnfr1upb1BeC7wM+A/0sLjZW0Hu9IDqJdg1sMfJV2J/Z73bwvATcDd3Xb7XVz5+Ju3p3dv491dbmRdl3xzK7utwPv6OY9Bvzn7vNS2vXaK0cpI8B1tDvVv6W1zZsz84Ey/0vACxm5lzjkMFrA/7zb91eA5/SzHXu4mBaeDwIvpV071iSa4yCz09fwx1imeN/7Ap/LzO3HXHj1tn8XcGRmfn8ytj8REfFM2s2Yl3SB1u/t782A2lET5+mzgCeD4dW0ntsWtN7JVwdaqKnzbuCGyQhEzTyGoobMoT3jdhnwCO2RkZMHWqIp0PVY59D7ucyp2P8tPPUm3JB3jfPhbvWZp89rqaE3ToDvAOdk5ngeJB6+jS1oD0+/GDg7M0/oayGlAbCnuJbLzB8CYwZiRJwCPC8z314mH0W7ebHRsOcAZxQHy1Dl3Wetie2Bn48UiN1d2RlvttRD4+Pp81pihEEUbge+z1MHangf8F5gI9rjI++hvclxFe3a26PAHbTHdg7ptvMY7ZrcHrRHW1bQnh88nvaYy2n84ZW2y2mvBj46dFcW+Bvgz2mP/7y7297ptLeBPpGZH+/KNpf2/ORC2rOHt9HeJLo7Iv4V7R3zlwJLaK/UXd6tdz4jDJbQDZaxJ+3ZwlXdtu/rynUG8GfA9zLz0NEGqdDsYU9xLTDeQRS6AQqOAV6emRvSBmu4KzP/J+1h8Mu6ASVelJnvoD1v91fdtKFHa/anPb+3cTf/JNr7yrsBL6I9aP3BststgfVorweeTHte8u20cNsTOLkMpHo87dnI/WihfQSwvHtT5Hu0Z/o275Y5q3uXeUjPwRJGGSxjy+5YbQ8cNdogFSMcds1QnhasHeogCquAr0TE8T2We5z2mtouEbGkvA0zET/OzK91Xz8SEYfQBke4HyAiPkILlA91y/ye9krf4xFxKXA2bWCKh4Bburuzf0J7uPtI4C8yM7t1b+62+TZaeH+xm35TRFxBG7ln6AH6K7t3k4mIi2gDN4zmCdrD6I926zw5SEU3/4Lu7ZTdgWsmcoA0vRmKa4dxDaKQbVy+42hvv+waEd8Bjs/MibxrO/wVtKcMVMHTB0J4IDOH3pp5pPv/vjL/Ef4wsMG2tFP34bYHXtkN0jBkHk99Q2WigyUsyW5sx7KPwyPi2DJtHcY3qINmEENx7fDkIAolGLejR8Bk5sXAxRGxEa1Hdxpt9JjxXnwevtzQQBVDPbbxDoTQy9201xf/scf0azLzNau53V6G12NokArHKJzlDMW1w7gGUeiuKW5NG2F7Ba2XNnTd+T7gNRHxR5n5xAT2fQnwwYi4gRY0J9NuYqyOc4CPRsTPadcF/5g2Ks83gEURcSjdwLq0a5gPZ+at49juWINlwCiDVAwbPk0znDda1gITGERhXdrgsr+lnW5uThvYANrNGYAHIuKmCez+Y8CNtIEm/oE2pP7q/gXCT9HuXn+XNvDGucAzu1B6LW0AjMVd2U/r6jMepzD6YBmjDlKh2cVHciSpsKcoSYWhKEmFoShJhaEoSYWhKEnFpDynuGTJQxO6pT1//vosXbp8MooycNZtZrJuM9P8+eszb97cOWMvObJp0VOcN2/uoIswaazbzGTdZqZ+1G1ahKIkTReGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQVhqIkFYaiJBXzBl2A0Rx7+rUsW7Fy1GUW/uoqzt3ujau9jw3Wm8cZx+212utLml2mdSguW7GS807cZ9RlbjvywjGXGc0Ri36w2utKmn08fZakwlCUpMJQlKRi2oTiAQe8YdBFGDiPgTR4Y95oiYjzgNcD92fmCyerIHfc8YvJ2vSM4TGQBm88PcXzgddNcjkkaVoYMxQz81rgwSkoiyQN3LS5pihJ08GkPLw9f/76zJs3d8Lr9XqQesGCDUdd57ZxLLM6+x2UNa3LVJtp5Z0I67Z2mpRQXLp0+YSWH2qg4W+mHLHoByxZ8tCY649nmdGsyRsxY1mwYMNxl2+3S9e8LlNpInWbaazbzNSPsPf0WZKKMUMxIi4Bfty+jHsiYuHkF0uSBmPM0+fMPGgqCiJJ08G0OX3eaaedB12EgfMYSIM3bULxiiu+PugiDJzHQBq8aROKkjQdGIqSVBiKklQYipJUTOu/0QJjv4K3cJ2NWbQGr+ltsN60PwSSptC0ToTxvX63D6+a9JJIWlt4+ixJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJhaEoSYWhKEmFoShJxbxBF2A0x55+LctWrOw5b+GvruLc7d44xSUa2QbrzeOM4/YadDEkraFpHYrLVqzkvBP36TnvtiMvHHHeIByx6AeDLoKkPvD0WZIKQ1GSCkNRkoppE4oHHPCGQRdBq8F202wzrhstEfE64NPAXOCczFzU74Lccccv+r1JTQHbTbPNmD3FiJgLfAbYF9gFOCgidpnsgknSIIzn9PkVwO2ZeWdmPgZcCuw/ucWSpMEYTyhuDdxdPt/TTZOkWWc81xTn9Ji2arQV5s9fn3nz5k64ML0egF6wYMOey942yrxBWVsf4J5u7dAvs7VeMLvrtqbGE4r3ANuWz9sAi0dbYenS5RMqxFADDX9D5YhFP2DJkodGXG+0eYPQ6w2bBQs2nHbl7JcFCzZk60vfMyvrN9vbbTbXbU2NJxRvAHaOiB2BXwMHAgev8Z4laRoa85piZq4EjgG+A9wKXJ6Zt0x2wSRpEMb1nGJmfgv41iSXRZIGbtq80bLTTjsPughaDbabZptpE4pXXPH1QRdBq8F202wzbUJRkqYDQ1GSCkNRkgpDUZKKaf03WmDkV+cWrrMxi6bRa3UbrDftD6WkcZjWP8mj/2GqfXjVlJVE0trC02dJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKgxFSSoMRUkqDEVJKuasWrVq0GWQpGnDnqIkFYaiJBWGoiQVhqIkFYaiJBWGoiQV8wa584h4HfBpYC5wTmYuGmR51lRE3AU8BDwOrMzMl0XEJsBlwA7AXcBbM3PpgIo4IRFxHvB64P7MfGE3rWd9ImIOrS33A5YD78jMmwZR7vEYoW6nAO8ElnSLfSAzv9XNez+wkNa2783M70x5occpIrYFLgS2BJ4Azs7MT8+GthulbqfQp7YbWE8xIuYCnwH2BXYBDoqIXQZVnj56dWbulpkv6z6fCFydmTsDV3efZ4rzgdcNmzZSffYFdu7+HQV8dorKuLrO5+l1A/jrrv12Kz9UuwAHArt265zVff9OVyuBEzLzBcDuwNFdHWZD241UN+hT2w3y9PkVwO2ZeWdmPgZcCuw/wPJMlv2BC7qvLwDeNMCyTEhmXgs8OGzySPXZH7gwM1dl5k+AjSPiOVNT0okboW4j2R+4NDMfzcz/D9xO+/6dljLz3qGeXmY+BNwKbM0saLtR6jaSCbfdIENxa+Du8vkeRq/cTLAK+G5E/J+IOKqbtkVm3gutQYHNB1a6/hipPrOlPY+JiJ9FxHkRMb+bNmPrFhE7AC8GrmOWtd2wukGf2m6QoTinx7SZ/s7hqzLzJbTTkaMjYq9BF2gKzYb2/CywE7AbcC/wyW76jKxbRDwLuAI4LjN/N8qiM65+PerWt7YbZCjeA2xbPm8DLB5QWfoiMxd3/98PfJXWTb9v6FSk+//+wZWwL0aqz4xvz8y8LzMfz8wngC/wh9OsGVe3iHgGLTQuyswru8mzou161a2fbTfIULwB2DkidoyIdWgXQ68aYHnWSERsEBEbDn0NvBb4R1qdDu8WOxz4H4MpYd+MVJ+rgMMiYk5E7A7889Cp2kwx7Draf6K1H7S6HRgR60bEjrQbEtdPdfnGq7ubfC5wa2Z+qsya8W03Ut362XYDHSUnIvYDTqc9knNeZp46sMKsoYh4Lq13CO1Rp4sz89SI2BS4HNgO+BXwlswc7wX+gYqIS4C9gc2A+4APA1+jR326b9YzaXf4lgN/mpk3DqLc4zFC3famnX6toj2y8q6hcIiIk4AjaHc/j8vMb095occpIvYAfgj8A+2xFYAP0K69zei2G6VuB9GntnPoMEkqfKNFkgpDUZIKQ1GSCkNRkgpDUZIKQ1GSCkNR015E7BARvx1h3lYR8XdTXSbNXgMdT1FaU92rla8edDk0exiKWmMR8UpgEbBRN+lkYBntHdSXZeY/R8T5wG8y88Tu698DO9LeS70WOLobQm60/XwC2At4JvCezPxhN1LKjZm5WbfMKuAk2qtemwL/LTOv6GN1Nct5+qw1EhEbA58DDs7Ml9JGs/488FPaCMnnRsRhwPOBD5ZVX0kbz29XYHva4Kaj2RT4WWa+AjgGuCQi1h1h2d9l5suBQ4G/Wa2Kaa1lKGpN/Rtaj+/bEfFT4Nu090+fB5wKbEIbxunAzFxZ1rssMx/upl0A7DPGfh4DvgyQmdcAjwAxwrKXdv//BNgqItabcK201vL0WWtqDq0H97SxI7uBPrcDHqX19H41yjYm+hL+aOusAMjMxyMC/D7XBNhT1Jr6EW0IuCdvdkTEy7uRV74InEMbpuqSoaHVOm/phlubB7wdGOsO8jrAwd329wTWA7J/1ZAaQ1FrpPvLhG8EPhwRN0fErcApwJ/RboiclplXA38LnF1WvZY2DNkttOHi67xeHqCF73XAWcBBY92YkVaHQ4dpynV3n2/MzDMHXRZpOHuKklTYU9S0ERGfo/0t32pl+Rva0qQzFCWp8PRZkgpDUZIKQ1GSCkNRkgpDUZKKfwF+n8BTrDNK/QAAAABJRU5ErkJggg==\n",
  1220.       "text/plain": [
  1221.        "<matplotlib.figure.Figure at 0x7f97790e1978>"
  1222.       ]
  1223.      },
  1224.      "metadata": {},
  1225.      "output_type": "display_data"
  1226.     }
  1227.    ],
  1228.    "source": [
  1229.     "df.boxplot(column='distfromcentre', by='exp_bin', vert=False, figsize=(5,5))"
  1230.    ]
  1231.   },
  1232.   {
  1233.    "cell_type": "code",
  1234.    "execution_count": 100,
  1235.    "metadata": {},
  1236.    "outputs": [
  1237.     {
  1238.      "data": {
  1239.       "text/plain": [
  1240.        "exp_bin\n",
  1241.        "0    29.437325\n",
  1242.        "1    21.816693\n",
  1243.        "Name: distfromcentre, dtype: float64"
  1244.       ]
  1245.      },
  1246.      "execution_count": 100,
  1247.      "metadata": {},
  1248.      "output_type": "execute_result"
  1249.     }
  1250.    ],
  1251.    "source": [
  1252.     "df.groupby('exp_bin').distfromcentre.mean()"
  1253.    ]
  1254.   },
  1255.   {
  1256.    "cell_type": "code",
  1257.    "execution_count": 104,
  1258.    "metadata": {},
  1259.    "outputs": [
  1260.     {
  1261.      "name": "stdout",
  1262.      "output_type": "stream",
  1263.      "text": [
  1264.       "Optimization terminated successfully.\n",
  1265.       "         Current function value: 0.568095\n",
  1266.       "         Iterations 6\n",
  1267.       "                           Logit Regression Results                           \n",
  1268.       "==============================================================================\n",
  1269.       "Dep. Variable:                   case   No. Observations:                  375\n",
  1270.       "Model:                          Logit   Df Residuals:                      371\n",
  1271.       "Method:                           MLE   Df Model:                            3\n",
  1272.       "Date:                Thu, 12 Jul 2018   Pseudo R-squ.:                 0.06828\n",
  1273.       "Time:                        15:51:50   Log-Likelihood:                -213.04\n",
  1274.       "converged:                       True   LL-Null:                       -228.65\n",
  1275.       "                                        LLR p-value:                 7.617e-07\n",
  1276.       "==================================================================================\n",
  1277.       "                     coef    std err          z      P>|z|      [0.025      0.975]\n",
  1278.       "----------------------------------------------------------------------------------\n",
  1279.       "distfromcentre     0.0318      0.008      4.091      0.000       0.017       0.047\n",
  1280.       "age                0.0007      0.005      0.150      0.880      -0.009       0.010\n",
  1281.       "ever_smoked        0.1899      0.260      0.730      0.465      -0.320       0.699\n",
  1282.       "exp_bin            0.1372      0.342      0.401      0.688      -0.533       0.807\n",
  1283.       "==================================================================================\n"
  1284.      ]
  1285.     }
  1286.    ],
  1287.    "source": [
  1288.     "import statsmodels.api as sm\n",
  1289.     "import numpy as np\n",
  1290.     "from scipy import stats\n",
  1291.     "stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)\n",
  1292.     "y=df['case']\n",
  1293.     "X=df[['distfromcentre', 'age', 'ever_smoked', 'exp_bin']]\n",
  1294.     "logit_model=sm.Logit(y,X)\n",
  1295.     "result=logit_model.fit()\n",
  1296.     "print(result.summary())"
  1297.    ]
  1298.   },
  1299.   {
  1300.    "cell_type": "code",
  1301.    "execution_count": 105,
  1302.    "metadata": {},
  1303.    "outputs": [
  1304.     {
  1305.      "name": "stdout",
  1306.      "output_type": "stream",
  1307.      "text": [
  1308.       "                    2.5%     97.5%        OR\n",
  1309.       "distfromcentre  1.016707  1.048178  1.032323\n",
  1310.       "age             0.991344  1.010191  1.000723\n",
  1311.       "ever_smoked     0.726397  2.012467  1.209070\n",
  1312.       "exp_bin         0.586808  2.242103  1.147033\n"
  1313.      ]
  1314.     }
  1315.    ],
  1316.    "source": [
  1317.     "params = result.params\n",
  1318.     "conf = result.conf_int()\n",
  1319.     "conf['OR'] = params\n",
  1320.     "conf.columns = ['2.5%', '97.5%', 'OR']\n",
  1321.     "print(np.exp(conf))"
  1322.    ]
  1323.   },
  1324.   {
  1325.    "cell_type": "code",
  1326.    "execution_count": null,
  1327.    "metadata": {},
  1328.    "outputs": [],
  1329.    "source": []
  1330.   }
  1331.  ],
  1332.  "metadata": {
  1333.   "kernelspec": {
  1334.    "display_name": "Python 3",
  1335.    "language": "python",
  1336.    "name": "python3"
  1337.   },
  1338.   "language_info": {
  1339.    "codemirror_mode": {
  1340.     "name": "ipython",
  1341.     "version": 3
  1342.    },
  1343.    "file_extension": ".py",
  1344.    "mimetype": "text/x-python",
  1345.    "name": "python",
  1346.    "nbconvert_exporter": "python",
  1347.    "pygments_lexer": "ipython3",
  1348.    "version": "3.5.4"
  1349.   }
  1350.  },
  1351.  "nbformat": 4,
  1352.  "nbformat_minor": 2
  1353. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top