Guest User

Untitled

a guest
Jul 17th, 2018
98
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 17.15 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "metadata": {
  5. "trusted": true
  6. },
  7. "cell_type": "code",
  8. "source": "import csv\nimport shlex\nfile2 = open(\"/Users/zainab/Dropbox/DREU/Code/data/conditions-master.csv\", \"r\", encoding='utf-8', errors='ignore')\nreader2 = csv.reader(file2, delimiter=',')\ncond_list = {}\nfor row in reader2:\n cond = row[0]\n count = row[1]\n if len(cond) > 0:\n cond_list[cond]=count\nfile2.close()",
  9. "execution_count": 3,
  10. "outputs": []
  11. },
  12. {
  13. "metadata": {
  14. "trusted": true
  15. },
  16. "cell_type": "code",
  17. "source": "len(cond_list)",
  18. "execution_count": 4,
  19. "outputs": [
  20. {
  21. "output_type": "execute_result",
  22. "execution_count": 4,
  23. "data": {
  24. "text/plain": "69136"
  25. },
  26. "metadata": {}
  27. }
  28. ]
  29. },
  30. {
  31. "metadata": {
  32. "trusted": true
  33. },
  34. "cell_type": "code",
  35. "source": "import os\npath_ann = \"/Users/zainab/Desktop/ebm_nlp/annotations/hierarchical_labels/participants/\"\npath_abs = \"/Users/zainab/Desktop/ebm_nlp/documents/\"\nabstracts = []\ngold = []\npmids = []\n\nfor filepath in os.listdir(path_ann):\n if filepath.endswith('DAWID_SKENE.ann'):\n pmid = filepath.strip('_DAWID_SKENE.ann')\n pmids.append(pmid)\n labels = open(path_ann+filepath).read().split(',')\n for abs_filepath in os.listdir(path_abs):\n if abs_filepath == pmid+\".text\":\n abstract = open(path_abs+abs_filepath).read()\n abstracts.append(abstract)\n if abs_filepath == pmid+\".tokens\":\n tokens = open(path_abs+abs_filepath).read().split(' ')\n if len(labels) == len(tokens):\n ann = []\n s = \"\"\n for i in range(len(tokens)):\n if labels[i] == \"4\":\n s = s+tokens[i]+\" \"\n if labels[i] != \"4\":\n if len(s)>0:\n ann.append(s.strip())\n s = \"\"\n gold.append(ann)",
  36. "execution_count": 5,
  37. "outputs": []
  38. },
  39. {
  40. "metadata": {
  41. "trusted": true
  42. },
  43. "cell_type": "code",
  44. "source": "import os\npath_ann = \"/Users/zainab/Desktop/ebm_nlp/annotations/hierarchical_labels/participants_experts/\"\npath_abs = \"/Users/zainab/Desktop/ebm_nlp/documents/\"\nabstracts_2 = []\ngold_2 = []\npmids = []\n\nfor filepath in os.listdir(path_ann):\n if filepath.endswith('DAWID_SKENE.ann'):\n pmid = filepath.strip('_DAWID_SKENE.ann')\n pmids.append(pmid)\n labels = open(path_ann+filepath).read().split(',')\n for abs_filepath in os.listdir(path_abs):\n if abs_filepath == pmid+\".text\":\n abstract = open(path_abs+abs_filepath).read()\n abstracts_2.append(abstract)\n if abs_filepath == pmid+\".tokens\":\n tokens = open(path_abs+abs_filepath).read().split(' ')\n if len(labels) == len(tokens):\n ann = []\n s = \"\"\n for i in range(len(tokens)):\n if labels[i] == \"4\":\n s = s+tokens[i]+\" \"\n if labels[i] != \"4\":\n if len(s)>0:\n ann.append(s.strip())\n s = \"\"\n gold_2.append(ann)",
  45. "execution_count": 107,
  46. "outputs": []
  47. },
  48. {
  49. "metadata": {
  50. "trusted": true
  51. },
  52. "cell_type": "code",
  53. "source": "long_predictions = []\nfor abstract in abstracts_2:\n abstract = abstract.lower()\n anns = []\n for key in cond_list.keys():\n if key in abstract:\n anns.append(key)\n long_predictions.append(anns)",
  54. "execution_count": 164,
  55. "outputs": []
  56. },
  57. {
  58. "metadata": {
  59. "trusted": true
  60. },
  61. "cell_type": "code",
  62. "source": "print(\"Abstract: \", abstracts[8])\nprint(\"What is annotated: \", gold[8])\nprint(\"What we found: \", long_predictions[8])",
  63. "execution_count": null,
  64. "outputs": []
  65. },
  66. {
  67. "metadata": {
  68. "trusted": true
  69. },
  70. "cell_type": "code",
  71. "source": "import itertools\n# collapse gold list \ngold_merged_2 = list(itertools.chain.from_iterable(gold_2))",
  72. "execution_count": 165,
  73. "outputs": []
  74. },
  75. {
  76. "metadata": {
  77. "trusted": true
  78. },
  79. "cell_type": "code",
  80. "source": "# full match\ncounter = 0\nfor term in gold_merged_2:\n if term in cond_list.keys():\n counter+= 1",
  81. "execution_count": 169,
  82. "outputs": []
  83. },
  84. {
  85. "metadata": {
  86. "trusted": true
  87. },
  88. "cell_type": "code",
  89. "source": "counter/len(gold_merged_2)",
  90. "execution_count": 170,
  91. "outputs": [
  92. {
  93. "output_type": "execute_result",
  94. "execution_count": 170,
  95. "data": {
  96. "text/plain": "0.4726277372262774"
  97. },
  98. "metadata": {}
  99. }
  100. ]
  101. },
  102. {
  103. "metadata": {
  104. "trusted": true
  105. },
  106. "cell_type": "code",
  107. "source": "# partial match\ncounter_partial = 0\nfor term in gold_merged_2:\n for key in cond_list.keys():\n if term in key:\n counter_partial+= 1\n break;",
  108. "execution_count": 172,
  109. "outputs": []
  110. },
  111. {
  112. "metadata": {
  113. "trusted": true
  114. },
  115. "cell_type": "code",
  116. "source": "counter_partial/len(gold_merged_2)",
  117. "execution_count": 173,
  118. "outputs": [
  119. {
  120. "output_type": "execute_result",
  121. "execution_count": 173,
  122. "data": {
  123. "text/plain": "0.5291970802919708"
  124. },
  125. "metadata": {}
  126. }
  127. ]
  128. },
  129. {
  130. "metadata": {
  131. "trusted": true
  132. },
  133. "cell_type": "code",
  134. "source": "#at paper level full match\natleast_one = 0\nfor condition in gold_2:\n boo = False\n for cond in condition:\n if cond in cond_list.keys():\n boo = True\n if boo == True:\n atleast_one+=1",
  135. "execution_count": 174,
  136. "outputs": []
  137. },
  138. {
  139. "metadata": {
  140. "trusted": true
  141. },
  142. "cell_type": "code",
  143. "source": "atleast_one/len(gold_2)",
  144. "execution_count": 176,
  145. "outputs": [
  146. {
  147. "output_type": "execute_result",
  148. "execution_count": 176,
  149. "data": {
  150. "text/plain": "0.705"
  151. },
  152. "metadata": {}
  153. }
  154. ]
  155. },
  156. {
  157. "metadata": {
  158. "trusted": true
  159. },
  160. "cell_type": "code",
  161. "source": "#at paper level partial match\natleast_one_p = 0\nfor condition in gold_2:\n boo = False\n for cond in condition:\n for key in cond_list.keys():\n if cond in key:\n boo = True\n if boo == True:\n atleast_one_p+=1",
  162. "execution_count": 178,
  163. "outputs": []
  164. },
  165. {
  166. "metadata": {
  167. "trusted": true
  168. },
  169. "cell_type": "code",
  170. "source": "atleast_one_p/len(gold_2)",
  171. "execution_count": 179,
  172. "outputs": [
  173. {
  174. "output_type": "execute_result",
  175. "execution_count": 179,
  176. "data": {
  177. "text/plain": "0.755"
  178. },
  179. "metadata": {}
  180. }
  181. ]
  182. },
  183. {
  184. "metadata": {
  185. "trusted": true
  186. },
  187. "cell_type": "code",
  188. "source": "#find example where there isn't a full match\nno_match = []\nfor i in range(len(gold_2)):\n boo = False\n condition = gold_2[i]\n for cond in condition:\n if cond in cond_list.keys():\n boo = True\n if boo == False:\n no_match.append(i)",
  189. "execution_count": 180,
  190. "outputs": []
  191. },
  192. {
  193. "metadata": {
  194. "trusted": true
  195. },
  196. "cell_type": "code",
  197. "source": "no_match[0:20]",
  198. "execution_count": 181,
  199. "outputs": [
  200. {
  201. "output_type": "execute_result",
  202. "execution_count": 181,
  203. "data": {
  204. "text/plain": "[5, 9, 14, 22, 24, 27, 28, 35, 39, 42, 44, 53, 54, 55, 59, 63, 72, 75, 76, 83]"
  205. },
  206. "metadata": {}
  207. }
  208. ]
  209. },
  210. {
  211. "metadata": {
  212. "trusted": true
  213. },
  214. "cell_type": "code",
  215. "source": "k = 3\nprint(\"Abstract: \", abstracts[k])\nprint(\"What is annotated: \", gold[k])\nprint(\"What we found: \", long_predictions[k])",
  216. "execution_count": 214,
  217. "outputs": [
  218. {
  219. "output_type": "stream",
  220. "text": "Abstract: In vitro biocompatibility tests of glass ionomer cements impregnated with collagen or bioactive glass to fibroblasts.\n\nAIM AND DESIGN To evaluate the biocompatibility of glass ionomer cement (GIC) impregnated with collagen or bioactive glass to BHK-21 fibroblasts in vitro. Mineral Trioxide Aggregate was used as the standard for comparison. Human maxillary central incisors (n = 70) were instrumented with a rotary NiTi system and filled. Following resection of the apical 3mm, root end cavities were prepared and restored with conventional GIC (group 1) or GIC with 0.01%, 0.1% or 1% collagen (groups 2, 3, 4 respectively) or, 10%, 30% or 50% bioactive glass (groups 5, 6, 7 respectively), or Mineral Trioxide Aggregate (group 8). The root slices were incubated in tissue culture plates with BHK-21 fibroblast cell line. Phase contrast and scanning electron microscopes were used to score cell quantity, morphology and cell attachment. The data were statistically analyzed by one way ANOVA with Post Hoc Tukey HSD test (p = 0.05).\nRESULTS AND CONCLUSIONS Group 5 showed the highest scores which was significantly higher than all other groups (p < 0.05) except group 8, with which there was no significant difference (p > 0.05). Glass ionomer cement with 10% bioactive glass showed better adhesion and spreading of cells than glass ionomer cement with 0.01% collagen. The biocompatibility of collagen and bioactive glass was concentration dependent. The addition of bioactive glass improved the biocompatibility of glass ionomer cement to fibroblasts better than addition of collagen.\n\n\nWhat is annotated: ['Human maxillary central incisors', 'root end cavities', 'root slices']\nWhat we found: ['healthy', 'aged', 'healthy adults', 'behavior', 'adult', 'diet', 'osa', 'health', 'cognitive performance', 'attention', 'cerebral blood flow', 'men', 'control', 'performance', 'outcome', 'age', 'brain function', 'healthy young adults', 'art', 'placebo', 'ect', 'hemoglobin', 'outcomes', 'behavioral', 'brain', 'supplementation', 'healthy adult', 'adults', 'young adult', 'sah', 'ad', 'ct', 'ph', 'ami', 'blood flow', 'fatty acids', 'osah', 'young adults', 'function', 'acc', 'stent', 'young', 'ra', 'dietary supplement', 'dietary supplementation', 'ms', 'pe', 'blood', 'cts', 'hemodynamic', 'hemodynamic response', 'par', 'iol', 'near-infrared spectroscopy', 'healthy young', 'ich', 'oic', 'blind', 'spect', 'ed', 'act', 'fatty acid', 'concentration', 'he', 'ifi', 'controlled', 'supplement', 'computer', 'er', 'ain', 'prefrontal cortex', 'ic', 'double-blind', 'alt', 'omega-3 polyunsaturated fatty acids', 'mpa', 'ee', 'mic', 'polyunsaturated fatty acids', 'as', 'os']\n",
  221. "name": "stdout"
  222. }
  223. ]
  224. },
  225. {
  226. "metadata": {
  227. "trusted": true
  228. },
  229. "cell_type": "code",
  230. "source": "no_match_p = []\nfor i in range(len(gold)):\n boo = False\n condition = gold[i]\n for cond in condition:\n for key in cond_list.keys():\n if cond in key:\n boo = True\n if boo == False and len(condition) > 0:\n no_match_p.append(i)",
  231. "execution_count": 189,
  232. "outputs": [
  233. {
  234. "output_type": "error",
  235. "ename": "KeyboardInterrupt",
  236. "evalue": "",
  237. "traceback": [
  238. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  239. "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
  240. "\u001b[0;32m<ipython-input-189-7ee41a36ec56>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcond\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcondition\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcond_list\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mcond\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mboo\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mboo\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;32mFalse\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcondition\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  241. "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
  242. ]
  243. }
  244. ]
  245. },
  246. {
  247. "metadata": {
  248. "trusted": true
  249. },
  250. "cell_type": "code",
  251. "source": "print(len(no_match_p))\nprint(len(no_match))\nprint(no_match_p[0:30])",
  252. "execution_count": 196,
  253. "outputs": [
  254. {
  255. "output_type": "stream",
  256. "text": "180\n59\n[3, 12, 14, 16, 24, 36, 44, 48, 58, 61, 62, 67, 68, 69, 95, 96, 99, 100, 103, 105, 107, 110, 122, 124, 126, 132, 133, 136, 138, 141]\n",
  257. "name": "stdout"
  258. }
  259. ]
  260. },
  261. {
  262. "metadata": {
  263. "trusted": true
  264. },
  265. "cell_type": "code",
  266. "source": "k = 141\nprint(\"Abstract: \", abstracts[k])\nprint(\"What is annotated: \", gold[k])\nprint(\"What we found: \", long_predictions[k])",
  267. "execution_count": 201,
  268. "outputs": [
  269. {
  270. "output_type": "stream",
  271. "text": "Abstract: Randomized Phase II trial assessing estramustine and vinblastine combination chemotherapy vs estramustine alone in patients with progressive hormone-escaped metastatic prostate cancer.\n\nBased on the results of combined data from three North American Phase II studies, a randomised Phase II study in the same patient population was performed, using combination chemotherapy with estramustine phosphate (EMP) and vinblastine (VBL) in hormone refractory prostate cancer patients. In all, 92 patients were randomised into a Phase II study of oral EMP (10 mg kg day continuously) or oral EMP in combination with intravenous VBL (4 mg m(2) week for 6 weeks, followed by 2 weeks rest). The end points were toxicity and PSA response in both groups, with the option to continue the trial as a Phase III study with time to progression and survival as end points, if sufficient responses were observed. Toxicity was unexpectedly high in both treatment arms and led to treatment withdrawal or refusal in 49% of all patients, predominantly already during the first treatment cycle. The mean treatment duration was 10 and 14 weeks, median time to PSA progression was 27.2 and 30.8 weeks, median survival time was 44 and 50.9 weeks, and PSA response rate was only 24.6 and 28.9% in the EMP/VBL and EMP arms, respectively. There was no correlation between PSA response and survival. While the PSA response in the patients tested was less than half that recorded in the North American studies, the toxicity of EMP monotherapy or in combination with VBL was much higher than expected. Further research on more effective and less toxic treatment strategies for hormone refractory prostate cancer is mandatory.\n\n\nWhat is annotated: ['progressive hormone-escaped metastatic prostate cancer']\nWhat we found: ['hepatitis c', 'infection', 'chronic hepatitis c', 'hepatitis', 'treatment', 'hcv', 'clinical trial', 'tia', 'chronic hepatitis', 'men', 'outcome', 'age', 'therapy', 'chronic', 'patients', 'genotype', 'genotype 1', 'ect', 'uti', 'outcomes', 'exposure', 'ct', 'ph', 'ami', 'acc', 'ra', 'pe', 'cts', 'practice', 'drug', 'sid', 'spect', 'achievement', 'tic', 'ed', 'net', 'act', 'ict', 'he', 'ifi', 'clinical', 'trial', 'tha', 'type 1', 'interferon alpha', 'er', 'ain', 'ic', 'stai', 'rop', 'mpa', 'tis', 'ee', 'chi', 'mic', 'evaluated', 'naive patients', 'as', 'os']\n",
  272. "name": "stdout"
  273. }
  274. ]
  275. },
  276. {
  277. "metadata": {
  278. "trusted": true
  279. },
  280. "cell_type": "code",
  281. "source": "len(long_predictions_2)",
  282. "execution_count": 127,
  283. "outputs": [
  284. {
  285. "output_type": "execute_result",
  286. "execution_count": 127,
  287. "data": {
  288. "text/plain": "0"
  289. },
  290. "metadata": {}
  291. }
  292. ]
  293. },
  294. {
  295. "metadata": {
  296. "trusted": true
  297. },
  298. "cell_type": "code",
  299. "source": "cond_list['pathology']",
  300. "execution_count": 147,
  301. "outputs": [
  302. {
  303. "output_type": "execute_result",
  304. "execution_count": 147,
  305. "data": {
  306. "text/plain": "'13'"
  307. },
  308. "metadata": {}
  309. }
  310. ]
  311. },
  312. {
  313. "metadata": {
  314. "trusted": true
  315. },
  316. "cell_type": "code",
  317. "source": "\n",
  318. "execution_count": 163,
  319. "outputs": [
  320. {
  321. "output_type": "execute_result",
  322. "execution_count": 163,
  323. "data": {
  324. "text/plain": "False"
  325. },
  326. "metadata": {}
  327. }
  328. ]
  329. },
  330. {
  331. "metadata": {
  332. "trusted": true
  333. },
  334. "cell_type": "code",
  335. "source": "",
  336. "execution_count": null,
  337. "outputs": []
  338. }
  339. ],
  340. "metadata": {
  341. "kernelspec": {
  342. "name": "python3",
  343. "display_name": "Python 3",
  344. "language": "python"
  345. },
  346. "language_info": {
  347. "name": "python",
  348. "version": "3.6.0",
  349. "mimetype": "text/x-python",
  350. "codemirror_mode": {
  351. "name": "ipython",
  352. "version": 3
  353. },
  354. "pygments_lexer": "ipython3",
  355. "nbconvert_exporter": "python",
  356. "file_extension": ".py"
  357. }
  358. },
  359. "nbformat": 4,
  360. "nbformat_minor": 2
  361. }
Add Comment
Please, Sign In to add comment