Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "from sklearn.metrics import roc_auc_score, average_precision_score"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "df = []\n",
- "for i in range(3):\n",
- " df.append(pd.read_csv('vw.{}.model_predictions'.format(i),\n",
- " delimiter=' ',\n",
- " header=None,\n",
- " names=['score', 'label']))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Dataset 0\n",
- "AUROC : 0.40218855218855215\n",
- "AUPR : 0.1827077652269048\n",
- "==========\n",
- "Dataset 1\n",
- "AUROC : 0.5784233496999454\n",
- "AUPR : 0.017244651483781917\n",
- "==========\n",
- "Dataset 2\n",
- "AUROC : 0.5091074681238617\n",
- "AUPR : 0.0019083969465648854\n",
- "==========\n"
- ]
- }
- ],
- "source": [
- "for i in range(3):\n",
- " print('Dataset {}'.format(i))\n",
- " print('AUROC : {}'.format(roc_auc_score(df[i].label,\n",
- " df[i].score)))\n",
- " print('AUPR : {}'.format(average_precision_score(df[i].label,\n",
- " df[i].score)))\n",
- " print('=' * 10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 37,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "thresholds = \"\"\"0.0002\n",
- "0.0003\n",
- "0.0016\n",
- "0.0029\n",
- "0.0035\n",
- "0.0364\n",
- "0.2701\n",
- "0.3376\n",
- "0.6688\n",
- "0.9669\n",
- "0.9967\"\"\".split()\n",
- "thresholds = list(map(float, thresholds))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 79,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Dataset 0\n",
- "Threshold: 0.000, Precision: 0.008, recall: 0.400\n",
- "Threshold: 0.000, Precision: 0.008, recall: 0.400\n",
- "Threshold: 0.002, Precision: 0.120, recall: 0.300\n",
- "Threshold: 0.003, Precision: 0.125, recall: 0.300\n",
- "Threshold: 0.004, Precision: 0.125, recall: 0.300\n",
- "Threshold: 0.036, Precision: nan, recall: 0.000\n",
- "Threshold: 0.270, Precision: nan, recall: 0.000\n",
- "Threshold: 0.338, Precision: nan, recall: 0.000\n",
- "Threshold: 0.669, Precision: nan, recall: 0.000\n",
- "Threshold: 0.967, Precision: nan, recall: 0.000\n",
- "Threshold: 0.997, Precision: nan, recall: 0.000\n",
- "==========\n",
- "Dataset 1\n",
- "Threshold: 0.000, Precision: 0.010, recall: 1.000\n",
- "Threshold: 0.000, Precision: 0.010, recall: 1.000\n",
- "Threshold: 0.002, Precision: 0.024, recall: 0.167\n",
- "Threshold: 0.003, Precision: 0.036, recall: 0.167\n",
- "Threshold: 0.004, Precision: 0.036, recall: 0.167\n",
- "Threshold: 0.036, Precision: nan, recall: 0.000\n",
- "Threshold: 0.270, Precision: nan, recall: 0.000\n",
- "Threshold: 0.338, Precision: nan, recall: 0.000\n",
- "Threshold: 0.669, Precision: nan, recall: 0.000\n",
- "Threshold: 0.967, Precision: nan, recall: 0.000\n",
- "Threshold: 0.997, Precision: nan, recall: 0.000\n",
- "==========\n",
- "Dataset 2\n",
- "Threshold: 0.000, Precision: 0.004, recall: 1.000\n",
- "Threshold: 0.000, Precision: 0.004, recall: 1.000\n",
- "Threshold: 0.002, Precision: 0.004, recall: 1.000\n",
- "Threshold: 0.003, Precision: 0.004, recall: 1.000\n",
- "Threshold: 0.004, Precision: 0.004, recall: 1.000\n",
- "Threshold: 0.036, Precision: 0.000, recall: 0.000\n",
- "Threshold: 0.270, Precision: nan, recall: 0.000\n",
- "Threshold: 0.338, Precision: nan, recall: 0.000\n",
- "Threshold: 0.669, Precision: nan, recall: 0.000\n",
- "Threshold: 0.967, Precision: nan, recall: 0.000\n",
- "Threshold: 0.997, Precision: nan, recall: 0.000\n",
- "==========\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/amir.ziai/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:8: RuntimeWarning: invalid value encountered in long_scalars\n"
- ]
- }
- ],
- "source": [
- "for i in range(3):\n",
- " print('Dataset {}'.format(i))\n",
- " for threshold in thresholds:\n",
- " p_predicted = df[i].score >= threshold\n",
- " p_actual = df[i].label == 1\n",
- " tp = len(df[i][p_predicted & p_actual])\n",
- " print('Threshold: {:.3f}, Precision: {:.3f}, recall: {:.3f}'.format(threshold,\n",
- " tp / p_predicted.sum(),\n",
- " tp / p_actual.sum()))\n",
- " \n",
- " print('=' * 10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "anaconda-cloud": {},
- "kernelspec": {
- "display_name": "Python [default]",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.4"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment