Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "# cases \n",
- "\n",
- "wood\n",
- "(-0.027365794640202726, 0.6329200219533746)\n",
- "metal\n",
- "(0.02705067814646989, 0.6368417634645179)\n",
- "\n",
- "# controls\n",
- "wood\n",
- "(0.09576879095559523, 0.09393193478421226)\n",
- "metal\n",
- "(-0.039643310289723156, 0.4889087597244508)\n"
- ]
- }
- ],
- "source": [
- "import pandas as pd\n",
- "import numpy as np\n",
- "import scipy.stats as stats\n",
- "import statsmodels.formula.api as smf\n",
- "stats.chisqprob = lambda chisq, df: stats.chi2.sf(chisq, df)\n",
- "\n",
- "df = pd.read_csv('genotyped_subset_audrey.csv') # read Audrey data\n",
- "\n",
- "genotype_transforms = {\"(G;G)\":0, \"(G;T)\":1, \"(T;T)\":2}\n",
- "df.genotype = df.genotype.map(genotype_transforms) # change genotype to 0,1,2 (assumes additive model)\n",
- "\n",
- "def correlations(df):\n",
- " \"\"\"\n",
- " check for correlations between genotype and wood or metal exposure\n",
- " using scipy stats.pearsonr and print the result (coefficient, p-value)\n",
- " \"\"\"\n",
- " print('wood')\n",
- " print(stats.pearsonr(df['genotype'], df['exposed_wood']))\n",
- " print('metal')\n",
- " print(stats.pearsonr(df['genotype'], df['exposed_metal']))\n",
- "\n",
- "print(\"# cases \\n\")\n",
- "correlations(df[df['case'] == 1])\n",
- "\n",
- "print(\"\\n# controls\")\n",
- "correlations(df[df['case'] == 0])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Optimization terminated successfully.\n",
- " Current function value: 0.690774\n",
- " Iterations 4\n",
- " Logit Regression Results \n",
- "==============================================================================\n",
- "Dep. Variable: case No. Observations: 614\n",
- "Model: Logit Df Residuals: 612\n",
- "Method: MLE Df Model: 1\n",
- "Date: Thu, 08 Aug 2019 Pseudo R-squ.: 0.003424\n",
- "Time: 11:25:46 Log-Likelihood: -424.14\n",
- "converged: True LL-Null: -425.59\n",
- " LLR p-value: 0.08781\n",
- "=================================================================================\n",
- " coef std err z P>|z| [0.025 0.975]\n",
- "---------------------------------------------------------------------------------\n",
- "Intercept -0.0574 0.087 -0.656 0.512 -0.229 0.114\n",
- "exposed_metal 0.3901 0.230 1.697 0.090 -0.060 0.841\n",
- "=================================================================================\n",
- "Odds Ratios\n",
- "======================\n",
- " 2.5% 97.5% OR\n",
- "Intercept 0.795445 1.120864 0.944238\n",
- "exposed_metal 0.941345 2.317783 1.477103\n"
- ]
- }
- ],
- "source": [
- "model = smf.logit('case ~ exposed_metal', data=df) \n",
- "result = model.fit()\n",
- "print(result.summary())\n",
- "print (\"Odds Ratios\")\n",
- "print (\"======================\")\n",
- "params = result.params\n",
- "conf = result.conf_int()\n",
- "conf['OR'] = params\n",
- "conf.columns = ['2.5%', '97.5%', 'OR']\n",
- "print(np.exp(conf))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Optimization terminated successfully.\n",
- " Current function value: 0.692971\n",
- " Iterations 3\n",
- " Logit Regression Results \n",
- "==============================================================================\n",
- "Dep. Variable: case No. Observations: 614\n",
- "Model: Logit Df Residuals: 612\n",
- "Method: MLE Df Model: 1\n",
- "Date: Thu, 08 Aug 2019 Pseudo R-squ.: 0.0002537\n",
- "Time: 11:26:27 Log-Likelihood: -425.48\n",
- "converged: True LL-Null: -425.59\n",
- " LLR p-value: 0.6421\n",
- "================================================================================\n",
- " coef std err z P>|z| [0.025 0.975]\n",
- "--------------------------------------------------------------------------------\n",
- "Intercept 0.0105 0.084 0.126 0.900 -0.154 0.175\n",
- "exposed_wood -0.1441 0.310 -0.464 0.642 -0.752 0.464\n",
- "================================================================================\n",
- "Odds Ratios\n",
- "======================\n",
- " 2.5% 97.5% OR\n",
- "Intercept 0.857453 1.191102 1.010601\n",
- "exposed_wood 0.471259 1.590732 0.865822\n"
- ]
- }
- ],
- "source": [
- "model = smf.logit('case ~ exposed_wood', data=df) \n",
- "result = model.fit()\n",
- "print(result.summary())\n",
- "print (\"Odds Ratios\")\n",
- "print (\"======================\")\n",
- "params = result.params\n",
- "conf = result.conf_int()\n",
- "conf['OR'] = params\n",
- "conf.columns = ['2.5%', '97.5%', 'OR']\n",
- "print(np.exp(conf))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Optimization terminated successfully.\n",
- " Current function value: 0.625035\n",
- " Iterations 5\n",
- " Logit Regression Results \n",
- "==============================================================================\n",
- "Dep. Variable: case No. Observations: 614\n",
- "Model: Logit Df Residuals: 612\n",
- "Method: MLE Df Model: 1\n",
- "Date: Thu, 08 Aug 2019 Pseudo R-squ.: 0.09826\n",
- "Time: 11:26:46 Log-Likelihood: -383.77\n",
- "converged: True LL-Null: -425.59\n",
- " LLR p-value: 5.932e-20\n",
- "==============================================================================\n",
- " coef std err z P>|z| [0.025 0.975]\n",
- "------------------------------------------------------------------------------\n",
- "Intercept -0.6188 0.110 -5.631 0.000 -0.834 -0.403\n",
- "genotype 1.3742 0.162 8.469 0.000 1.056 1.692\n",
- "==============================================================================\n",
- "Odds Ratios\n",
- "======================\n",
- " 2.5% 97.5% OR\n",
- "Intercept 0.434221 0.668037 0.538587\n",
- "genotype 2.875396 5.431683 3.951992\n"
- ]
- }
- ],
- "source": [
- "model = smf.logit('case ~ genotype', data=df) \n",
- "result = model.fit()\n",
- "print(result.summary())\n",
- "print (\"Odds Ratios\")\n",
- "print (\"======================\")\n",
- "params = result.params\n",
- "conf = result.conf_int()\n",
- "conf['OR'] = params\n",
- "conf.columns = ['2.5%', '97.5%', 'OR']\n",
- "print(np.exp(conf))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Optimization terminated successfully.\n",
- " Current function value: 0.681147\n",
- " Iterations 5\n",
- " Logit Regression Results \n",
- "==============================================================================\n",
- "Dep. Variable: case No. Observations: 614\n",
- "Model: Logit Df Residuals: 610\n",
- "Method: MLE Df Model: 3\n",
- "Date: Thu, 08 Aug 2019 Pseudo R-squ.: 0.01731\n",
- "Time: 11:27:46 Log-Likelihood: -418.22\n",
- "converged: True LL-Null: -425.59\n",
- " LLR p-value: 0.002057\n",
- "=================================================================================\n",
- " coef std err z P>|z| [0.025 0.975]\n",
- "---------------------------------------------------------------------------------\n",
- "Intercept -1.8501 0.776 -2.383 0.017 -3.372 -0.328\n",
- "exposed_metal 0.4286 0.232 1.849 0.065 -0.026 0.883\n",
- "age 0.0217 0.010 2.116 0.034 0.002 0.042\n",
- "packyrs 0.0100 0.004 2.772 0.006 0.003 0.017\n",
- "=================================================================================\n",
- "Odds Ratios\n",
- "======================\n",
- " 2.5% 97.5% OR\n",
- "Intercept 0.034321 0.720217 0.157221\n",
- "exposed_metal 0.974511 2.418135 1.535090\n",
- "age 1.001600 1.042627 1.021908\n",
- "packyrs 1.002933 1.017220 1.010051\n"
- ]
- }
- ],
- "source": [
- "model = smf.logit('case ~ exposed_metal + age + packyrs', data=df) \n",
- "result = model.fit()\n",
- "print(result.summary())\n",
- "print (\"Odds Ratios\")\n",
- "print (\"======================\")\n",
- "params = result.params\n",
- "conf = result.conf_int()\n",
- "conf['OR'] = params\n",
- "conf.columns = ['2.5%', '97.5%', 'OR']\n",
- "print(np.exp(conf))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Optimization terminated successfully.\n",
- " Current function value: 0.614869\n",
- " Iterations 6\n",
- " Logit Regression Results \n",
- "==============================================================================\n",
- "Dep. Variable: case No. Observations: 614\n",
- "Model: Logit Df Residuals: 608\n",
- "Method: MLE Df Model: 5\n",
- "Date: Thu, 08 Aug 2019 Pseudo R-squ.: 0.1129\n",
- "Time: 11:28:03 Log-Likelihood: -377.53\n",
- "converged: True LL-Null: -425.59\n",
- " LLR p-value: 3.461e-19\n",
- "==========================================================================================\n",
- " coef std err z P>|z| [0.025 0.975]\n",
- "------------------------------------------------------------------------------------------\n",
- "Intercept -2.3354 0.842 -2.773 0.006 -3.986 -0.684\n",
- "exposed_metal 0.3302 0.309 1.067 0.286 -0.276 0.936\n",
- "genotype 1.3246 0.175 7.559 0.000 0.981 1.668\n",
- "exposed_metal:genotype 0.3011 0.494 0.610 0.542 -0.667 1.269\n",
- "age 0.0202 0.011 1.831 0.067 -0.001 0.042\n",
- "packyrs 0.0095 0.004 2.523 0.012 0.002 0.017\n",
- "==========================================================================================\n",
- "Odds Ratios\n",
- "======================\n",
- " 2.5% 97.5% OR\n",
- "Intercept 0.018568 0.504364 0.096773\n",
- "exposed_metal 0.758763 2.550909 1.391235\n",
- "genotype 2.667556 5.301737 3.760676\n",
- "exposed_metal:genotype 0.513502 3.556446 1.351385\n",
- "age 0.998572 1.042785 1.020439\n",
- "packyrs 1.002120 1.017013 1.009539\n"
- ]
- }
- ],
- "source": [
- "model = smf.logit('case ~ exposed_metal*genotype + age + packyrs', data=df) \n",
- "result = model.fit()\n",
- "print(result.summary())\n",
- "print (\"Odds Ratios\")\n",
- "print (\"======================\")\n",
- "params = result.params\n",
- "conf = result.conf_int()\n",
- "conf['OR'] = params\n",
- "conf.columns = ['2.5%', '97.5%', 'OR']\n",
- "print(np.exp(conf))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Optimization terminated successfully.\n",
- " Current function value: 0.615811\n",
- " Iterations 5\n",
- " Logit Regression Results \n",
- "==============================================================================\n",
- "Dep. Variable: case No. Observations: 614\n",
- "Model: Logit Df Residuals: 608\n",
- "Method: MLE Df Model: 5\n",
- "Date: Thu, 08 Aug 2019 Pseudo R-squ.: 0.1116\n",
- "Time: 11:28:29 Log-Likelihood: -378.11\n",
- "converged: True LL-Null: -425.59\n",
- " LLR p-value: 6.061e-19\n",
- "=========================================================================================\n",
- " coef std err z P>|z| [0.025 0.975]\n",
- "-----------------------------------------------------------------------------------------\n",
- "Intercept -2.2892 0.838 -2.731 0.006 -3.932 -0.646\n",
- "exposed_wood 0.2269 0.442 0.513 0.608 -0.640 1.094\n",
- "genotype 1.4316 0.171 8.355 0.000 1.096 1.767\n",
- "exposed_wood:genotype -0.8695 0.601 -1.447 0.148 -2.047 0.308\n",
- "age 0.0201 0.011 1.823 0.068 -0.002 0.042\n",
- "packyrs 0.0095 0.004 2.516 0.012 0.002 0.017\n",
- "=========================================================================================\n",
- "Odds Ratios\n",
- "======================\n",
- " 2.5% 97.5% OR\n",
- "Intercept 0.019603 0.523984 0.101349\n",
- "exposed_wood 0.527371 2.984926 1.254656\n",
- "genotype 2.991419 5.855585 4.185273\n",
- "exposed_wood:genotype 0.129108 1.360924 0.419173\n",
- "age 0.998487 1.042558 1.020285\n",
- "packyrs 1.002109 1.017109 1.009581\n"
- ]
- }
- ],
- "source": [
- "model = smf.logit('case ~ exposed_wood*genotype + age + packyrs', data=df) \n",
- "result = model.fit()\n",
- "print(result.summary())\n",
- "print (\"Odds Ratios\")\n",
- "print (\"======================\")\n",
- "params = result.params\n",
- "conf = result.conf_int()\n",
- "conf['OR'] = params\n",
- "conf.columns = ['2.5%', '97.5%', 'OR']\n",
- "print(np.exp(conf))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.4"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement