Untitled

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.metrics import roc_auc_score, average_precision_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = []\n",
    "for i in range(3):\n",
    "    df.append(pd.read_csv('vw.{}.model_predictions'.format(i),\n",
    "                          delimiter=' ',\n",
    "                          header=None,\n",
    "                          names=['score', 'label']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset 0\n",
      "AUROC : 0.40218855218855215\n",
      "AUPR  : 0.1827077652269048\n",
      "==========\n",
      "Dataset 1\n",
      "AUROC : 0.5784233496999454\n",
      "AUPR  : 0.017244651483781917\n",
      "==========\n",
      "Dataset 2\n",
      "AUROC : 0.5091074681238617\n",
      "AUPR  : 0.0019083969465648854\n",
      "==========\n"
     ]
    }
   ],
   "source": [
    "for i in range(3):\n",
    "    print('Dataset {}'.format(i))\n",
    "    print('AUROC : {}'.format(roc_auc_score(df[i].label,\n",
    "                                            df[i].score)))\n",
    "    print('AUPR  : {}'.format(average_precision_score(df[i].label,\n",
    "                                                      df[i].score)))\n",
    "    print('=' * 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "thresholds = \"\"\"0.0002\n",
    "0.0003\n",
    "0.0016\n",
    "0.0029\n",
    "0.0035\n",
    "0.0364\n",
    "0.2701\n",
    "0.3376\n",
    "0.6688\n",
    "0.9669\n",
    "0.9967\"\"\".split()\n",
    "thresholds = list(map(float, thresholds))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset 0\n",
      "Threshold: 0.000, Precision: 0.008, recall: 0.400\n",
      "Threshold: 0.000, Precision: 0.008, recall: 0.400\n",
      "Threshold: 0.002, Precision: 0.120, recall: 0.300\n",
      "Threshold: 0.003, Precision: 0.125, recall: 0.300\n",
      "Threshold: 0.004, Precision: 0.125, recall: 0.300\n",
      "Threshold: 0.036, Precision: nan, recall: 0.000\n",
      "Threshold: 0.270, Precision: nan, recall: 0.000\n",
      "Threshold: 0.338, Precision: nan, recall: 0.000\n",
      "Threshold: 0.669, Precision: nan, recall: 0.000\n",
      "Threshold: 0.967, Precision: nan, recall: 0.000\n",
      "Threshold: 0.997, Precision: nan, recall: 0.000\n",
      "==========\n",
      "Dataset 1\n",
      "Threshold: 0.000, Precision: 0.010, recall: 1.000\n",
      "Threshold: 0.000, Precision: 0.010, recall: 1.000\n",
      "Threshold: 0.002, Precision: 0.024, recall: 0.167\n",
      "Threshold: 0.003, Precision: 0.036, recall: 0.167\n",
      "Threshold: 0.004, Precision: 0.036, recall: 0.167\n",
      "Threshold: 0.036, Precision: nan, recall: 0.000\n",
      "Threshold: 0.270, Precision: nan, recall: 0.000\n",
      "Threshold: 0.338, Precision: nan, recall: 0.000\n",
      "Threshold: 0.669, Precision: nan, recall: 0.000\n",
      "Threshold: 0.967, Precision: nan, recall: 0.000\n",
      "Threshold: 0.997, Precision: nan, recall: 0.000\n",
      "==========\n",
      "Dataset 2\n",
      "Threshold: 0.000, Precision: 0.004, recall: 1.000\n",
      "Threshold: 0.000, Precision: 0.004, recall: 1.000\n",
      "Threshold: 0.002, Precision: 0.004, recall: 1.000\n",
      "Threshold: 0.003, Precision: 0.004, recall: 1.000\n",
      "Threshold: 0.004, Precision: 0.004, recall: 1.000\n",
      "Threshold: 0.036, Precision: 0.000, recall: 0.000\n",
      "Threshold: 0.270, Precision: nan, recall: 0.000\n",
      "Threshold: 0.338, Precision: nan, recall: 0.000\n",
      "Threshold: 0.669, Precision: nan, recall: 0.000\n",
      "Threshold: 0.967, Precision: nan, recall: 0.000\n",
      "Threshold: 0.997, Precision: nan, recall: 0.000\n",
      "==========\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/amir.ziai/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:8: RuntimeWarning: invalid value encountered in long_scalars\n"
     ]
    }
   ],
   "source": [
    "for i in range(3):\n",
    "    print('Dataset {}'.format(i))\n",
    "    for threshold in thresholds:\n",
    "        p_predicted = df[i].score >= threshold\n",
    "        p_actual = df[i].label == 1\n",
    "        tp = len(df[i][p_predicted & p_actual])\n",
    "        print('Threshold: {:.3f}, Precision: {:.3f}, recall: {:.3f}'.format(threshold,\n",
    "                                                                           tp / p_predicted.sum(),\n",
    "                                                                           tp / p_actual.sum()))\n",
    "        \n",
    "    print('=' * 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}