Guest User

Untitled

a guest
Oct 21st, 2017
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.72 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 28,
  6. "metadata": {
  7. "collapsed": true
  8. },
  9. "outputs": [],
  10. "source": [
  11. "import pandas as pd\n",
  12. "from sklearn.metrics import roc_auc_score, average_precision_score"
  13. ]
  14. },
  15. {
  16. "cell_type": "code",
  17. "execution_count": 29,
  18. "metadata": {
  19. "collapsed": true
  20. },
  21. "outputs": [],
  22. "source": [
  23. "df = []\n",
  24. "for i in range(3):\n",
  25. " df.append(pd.read_csv('vw.{}.model_predictions'.format(i),\n",
  26. " delimiter=' ',\n",
  27. " header=None,\n",
  28. " names=['score', 'label']))"
  29. ]
  30. },
  31. {
  32. "cell_type": "code",
  33. "execution_count": 30,
  34. "metadata": {
  35. "collapsed": false
  36. },
  37. "outputs": [
  38. {
  39. "name": "stdout",
  40. "output_type": "stream",
  41. "text": [
  42. "Dataset 0\n",
  43. "AUROC : 0.40218855218855215\n",
  44. "AUPR : 0.1827077652269048\n",
  45. "==========\n",
  46. "Dataset 1\n",
  47. "AUROC : 0.5784233496999454\n",
  48. "AUPR : 0.017244651483781917\n",
  49. "==========\n",
  50. "Dataset 2\n",
  51. "AUROC : 0.5091074681238617\n",
  52. "AUPR : 0.0019083969465648854\n",
  53. "==========\n"
  54. ]
  55. }
  56. ],
  57. "source": [
  58. "for i in range(3):\n",
  59. " print('Dataset {}'.format(i))\n",
  60. " print('AUROC : {}'.format(roc_auc_score(df[i].label,\n",
  61. " df[i].score)))\n",
  62. " print('AUPR : {}'.format(average_precision_score(df[i].label,\n",
  63. " df[i].score)))\n",
  64. " print('=' * 10)"
  65. ]
  66. },
  67. {
  68. "cell_type": "code",
  69. "execution_count": 37,
  70. "metadata": {
  71. "collapsed": true
  72. },
  73. "outputs": [],
  74. "source": [
  75. "thresholds = \"\"\"0.0002\n",
  76. "0.0003\n",
  77. "0.0016\n",
  78. "0.0029\n",
  79. "0.0035\n",
  80. "0.0364\n",
  81. "0.2701\n",
  82. "0.3376\n",
  83. "0.6688\n",
  84. "0.9669\n",
  85. "0.9967\"\"\".split()\n",
  86. "thresholds = list(map(float, thresholds))"
  87. ]
  88. },
  89. {
  90. "cell_type": "code",
  91. "execution_count": 79,
  92. "metadata": {
  93. "collapsed": false
  94. },
  95. "outputs": [
  96. {
  97. "name": "stdout",
  98. "output_type": "stream",
  99. "text": [
  100. "Dataset 0\n",
  101. "Threshold: 0.000, Precision: 0.008, recall: 0.400\n",
  102. "Threshold: 0.000, Precision: 0.008, recall: 0.400\n",
  103. "Threshold: 0.002, Precision: 0.120, recall: 0.300\n",
  104. "Threshold: 0.003, Precision: 0.125, recall: 0.300\n",
  105. "Threshold: 0.004, Precision: 0.125, recall: 0.300\n",
  106. "Threshold: 0.036, Precision: nan, recall: 0.000\n",
  107. "Threshold: 0.270, Precision: nan, recall: 0.000\n",
  108. "Threshold: 0.338, Precision: nan, recall: 0.000\n",
  109. "Threshold: 0.669, Precision: nan, recall: 0.000\n",
  110. "Threshold: 0.967, Precision: nan, recall: 0.000\n",
  111. "Threshold: 0.997, Precision: nan, recall: 0.000\n",
  112. "==========\n",
  113. "Dataset 1\n",
  114. "Threshold: 0.000, Precision: 0.010, recall: 1.000\n",
  115. "Threshold: 0.000, Precision: 0.010, recall: 1.000\n",
  116. "Threshold: 0.002, Precision: 0.024, recall: 0.167\n",
  117. "Threshold: 0.003, Precision: 0.036, recall: 0.167\n",
  118. "Threshold: 0.004, Precision: 0.036, recall: 0.167\n",
  119. "Threshold: 0.036, Precision: nan, recall: 0.000\n",
  120. "Threshold: 0.270, Precision: nan, recall: 0.000\n",
  121. "Threshold: 0.338, Precision: nan, recall: 0.000\n",
  122. "Threshold: 0.669, Precision: nan, recall: 0.000\n",
  123. "Threshold: 0.967, Precision: nan, recall: 0.000\n",
  124. "Threshold: 0.997, Precision: nan, recall: 0.000\n",
  125. "==========\n",
  126. "Dataset 2\n",
  127. "Threshold: 0.000, Precision: 0.004, recall: 1.000\n",
  128. "Threshold: 0.000, Precision: 0.004, recall: 1.000\n",
  129. "Threshold: 0.002, Precision: 0.004, recall: 1.000\n",
  130. "Threshold: 0.003, Precision: 0.004, recall: 1.000\n",
  131. "Threshold: 0.004, Precision: 0.004, recall: 1.000\n",
  132. "Threshold: 0.036, Precision: 0.000, recall: 0.000\n",
  133. "Threshold: 0.270, Precision: nan, recall: 0.000\n",
  134. "Threshold: 0.338, Precision: nan, recall: 0.000\n",
  135. "Threshold: 0.669, Precision: nan, recall: 0.000\n",
  136. "Threshold: 0.967, Precision: nan, recall: 0.000\n",
  137. "Threshold: 0.997, Precision: nan, recall: 0.000\n",
  138. "==========\n"
  139. ]
  140. },
  141. {
  142. "name": "stderr",
  143. "output_type": "stream",
  144. "text": [
  145. "/Users/amir.ziai/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:8: RuntimeWarning: invalid value encountered in long_scalars\n"
  146. ]
  147. }
  148. ],
  149. "source": [
  150. "for i in range(3):\n",
  151. " print('Dataset {}'.format(i))\n",
  152. " for threshold in thresholds:\n",
  153. " p_predicted = df[i].score >= threshold\n",
  154. " p_actual = df[i].label == 1\n",
  155. " tp = len(df[i][p_predicted & p_actual])\n",
  156. " print('Threshold: {:.3f}, Precision: {:.3f}, recall: {:.3f}'.format(threshold,\n",
  157. " tp / p_predicted.sum(),\n",
  158. " tp / p_actual.sum()))\n",
  159. " \n",
  160. " print('=' * 10)"
  161. ]
  162. },
  163. {
  164. "cell_type": "code",
  165. "execution_count": null,
  166. "metadata": {
  167. "collapsed": true
  168. },
  169. "outputs": [],
  170. "source": []
  171. }
  172. ],
  173. "metadata": {
  174. "anaconda-cloud": {},
  175. "kernelspec": {
  176. "display_name": "Python [default]",
  177. "language": "python",
  178. "name": "python3"
  179. },
  180. "language_info": {
  181. "codemirror_mode": {
  182. "name": "ipython",
  183. "version": 3
  184. },
  185. "file_extension": ".py",
  186. "mimetype": "text/x-python",
  187. "name": "python",
  188. "nbconvert_exporter": "python",
  189. "pygments_lexer": "ipython3",
  190. "version": "3.5.4"
  191. }
  192. },
  193. "nbformat": 4,
  194. "nbformat_minor": 2
  195. }
Add Comment
Please, Sign In to add comment