Advertisement
Guest User

Untitled

a guest
Jun 27th, 2017
549
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.13 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 6,
  6. "metadata": {
  7. "collapsed": false,
  8. "scrolled": true
  9. },
  10. "outputs": [
  11. {
  12. "name": "stdout",
  13. "output_type": "stream",
  14. "text": [
  15. "From: stephen.marquard@uct.ac.za\n",
  16. "From: louis@media.berkeley.edu\n",
  17. "From: zqian@umich.edu\n",
  18. "From: rjlowe@iupui.edu\n",
  19. "From: zqian@umich.edu\n",
  20. "From: rjlowe@iupui.edu\n",
  21. "From: cwen@iupui.edu\n",
  22. "From: cwen@iupui.edu\n",
  23. "From: gsilver@umich.edu\n",
  24. "From: gsilver@umich.edu\n",
  25. "From: zqian@umich.edu\n",
  26. "From: gsilver@umich.edu\n",
  27. "From: wagnermr@iupui.edu\n",
  28. "From: zqian@umich.edu\n",
  29. "From: antranig@caret.cam.ac.uk\n",
  30. "From: gopal.ramasammycook@gmail.com\n",
  31. "From: david.horwitz@uct.ac.za\n",
  32. "From: david.horwitz@uct.ac.za\n",
  33. "From: david.horwitz@uct.ac.za\n",
  34. "From: david.horwitz@uct.ac.za\n",
  35. "From: stephen.marquard@uct.ac.za\n",
  36. "From: louis@media.berkeley.edu\n",
  37. "From: louis@media.berkeley.edu\n",
  38. "From: ray@media.berkeley.edu\n",
  39. "From: cwen@iupui.edu\n",
  40. "From: cwen@iupui.edu\n",
  41. "From: cwen@iupui.edu\n"
  42. ]
  43. }
  44. ],
  45. "source": [
  46. "hand = open('mbox-short.txt')\n",
  47. "for line in hand:\n",
  48. " line = line.rstrip()\n",
  49. " if line.find('From:') >= 0:\n",
  50. " # if line.startswith('From:') >= 0:\n",
  51. " print (line)"
  52. ]
  53. },
  54. {
  55. "cell_type": "code",
  56. "execution_count": 2,
  57. "metadata": {
  58. "collapsed": false
  59. },
  60. "outputs": [
  61. {
  62. "name": "stdout",
  63. "output_type": "stream",
  64. "text": [
  65. "From: stephen.marquard@uct.ac.za\n",
  66. "From: louis@media.berkeley.edu\n",
  67. "From: zqian@umich.edu\n",
  68. "From: rjlowe@iupui.edu\n",
  69. "From: zqian@umich.edu\n",
  70. "From: rjlowe@iupui.edu\n",
  71. "From: cwen@iupui.edu\n",
  72. "From: cwen@iupui.edu\n",
  73. "From: gsilver@umich.edu\n",
  74. "From: gsilver@umich.edu\n",
  75. "From: zqian@umich.edu\n",
  76. "From: gsilver@umich.edu\n",
  77. "From: wagnermr@iupui.edu\n",
  78. "From: zqian@umich.edu\n",
  79. "From: antranig@caret.cam.ac.uk\n",
  80. "From: gopal.ramasammycook@gmail.com\n",
  81. "From: david.horwitz@uct.ac.za\n",
  82. "From: david.horwitz@uct.ac.za\n",
  83. "From: david.horwitz@uct.ac.za\n",
  84. "From: david.horwitz@uct.ac.za\n",
  85. "From: stephen.marquard@uct.ac.za\n",
  86. "From: louis@media.berkeley.edu\n",
  87. "From: louis@media.berkeley.edu\n",
  88. "From: ray@media.berkeley.edu\n",
  89. "From: cwen@iupui.edu\n",
  90. "From: cwen@iupui.edu\n",
  91. "From: cwen@iupui.edu\n"
  92. ]
  93. }
  94. ],
  95. "source": [
  96. "import re\n",
  97. "\n",
  98. "hand = open('mbox-short.txt')\n",
  99. "for line in hand:\n",
  100. " line = line.strip()\n",
  101. " if re.search('From:', line):\n",
  102. " print (line)"
  103. ]
  104. },
  105. {
  106. "cell_type": "code",
  107. "execution_count": 7,
  108. "metadata": {
  109. "collapsed": false
  110. },
  111. "outputs": [
  112. {
  113. "name": "stdout",
  114. "output_type": "stream",
  115. "text": [
  116. "From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008\n",
  117. "From: stephen.marquard@uct.ac.za\n",
  118. "From louis@media.berkeley.edu Fri Jan 4 18:10:48 2008\n",
  119. "From: louis@media.berkeley.edu\n",
  120. "From zqian@umich.edu Fri Jan 4 16:10:39 2008\n",
  121. "From: zqian@umich.edu\n",
  122. "From rjlowe@iupui.edu Fri Jan 4 15:46:24 2008\n",
  123. "From: rjlowe@iupui.edu\n",
  124. "From zqian@umich.edu Fri Jan 4 15:03:18 2008\n",
  125. "From: zqian@umich.edu\n",
  126. "From rjlowe@iupui.edu Fri Jan 4 14:50:18 2008\n",
  127. "From: rjlowe@iupui.edu\n",
  128. "From cwen@iupui.edu Fri Jan 4 11:37:30 2008\n",
  129. "From: cwen@iupui.edu\n",
  130. "From cwen@iupui.edu Fri Jan 4 11:35:08 2008\n",
  131. "From: cwen@iupui.edu\n",
  132. "From gsilver@umich.edu Fri Jan 4 11:12:37 2008\n",
  133. "From: gsilver@umich.edu\n",
  134. "From gsilver@umich.edu Fri Jan 4 11:11:52 2008\n",
  135. "From: gsilver@umich.edu\n",
  136. "From zqian@umich.edu Fri Jan 4 11:11:03 2008\n",
  137. "From: zqian@umich.edu\n",
  138. "From gsilver@umich.edu Fri Jan 4 11:10:22 2008\n",
  139. "From: gsilver@umich.edu\n",
  140. "From wagnermr@iupui.edu Fri Jan 4 10:38:42 2008\n",
  141. "From: wagnermr@iupui.edu\n",
  142. "From zqian@umich.edu Fri Jan 4 10:17:43 2008\n",
  143. "From: zqian@umich.edu\n",
  144. "From antranig@caret.cam.ac.uk Fri Jan 4 10:04:14 2008\n",
  145. "From: antranig@caret.cam.ac.uk\n",
  146. "From gopal.ramasammycook@gmail.com Fri Jan 4 09:05:31 2008\n",
  147. "From: gopal.ramasammycook@gmail.com\n",
  148. "From david.horwitz@uct.ac.za Fri Jan 4 07:02:32 2008\n",
  149. "From: david.horwitz@uct.ac.za\n",
  150. "From david.horwitz@uct.ac.za Fri Jan 4 06:08:27 2008\n",
  151. "From: david.horwitz@uct.ac.za\n",
  152. "From david.horwitz@uct.ac.za Fri Jan 4 04:49:08 2008\n",
  153. "From: david.horwitz@uct.ac.za\n",
  154. "From david.horwitz@uct.ac.za Fri Jan 4 04:33:44 2008\n",
  155. "From: david.horwitz@uct.ac.za\n",
  156. "From stephen.marquard@uct.ac.za Fri Jan 4 04:07:34 2008\n",
  157. "From: stephen.marquard@uct.ac.za\n",
  158. "From louis@media.berkeley.edu Thu Jan 3 19:51:21 2008\n",
  159. "From: louis@media.berkeley.edu\n",
  160. "From louis@media.berkeley.edu Thu Jan 3 17:18:23 2008\n",
  161. "From: louis@media.berkeley.edu\n",
  162. "From ray@media.berkeley.edu Thu Jan 3 17:07:00 2008\n",
  163. "From: ray@media.berkeley.edu\n",
  164. "From cwen@iupui.edu Thu Jan 3 16:34:40 2008\n",
  165. "From: cwen@iupui.edu\n",
  166. "From cwen@iupui.edu Thu Jan 3 16:29:07 2008\n",
  167. "From: cwen@iupui.edu\n",
  168. "From cwen@iupui.edu Thu Jan 3 16:23:48 2008\n",
  169. "From: cwen@iupui.edu\n"
  170. ]
  171. }
  172. ],
  173. "source": [
  174. "import re\n",
  175. "\n",
  176. "hand = open('mbox-short.txt')\n",
  177. "for line in hand:\n",
  178. " line = line.rstrip()\n",
  179. " # if re.search('^From:', line):\n",
  180. " if re.search('^F.*:', line):\n",
  181. " print (line)"
  182. ]
  183. },
  184. {
  185. "cell_type": "code",
  186. "execution_count": 12,
  187. "metadata": {
  188. "collapsed": false
  189. },
  190. "outputs": [
  191. {
  192. "name": "stdout",
  193. "output_type": "stream",
  194. "text": [
  195. "From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008\n",
  196. "From: stephen.marquard@uct.ac.za\n",
  197. "From louis@media.berkeley.edu Fri Jan 4 18:10:48 2008\n",
  198. "From: louis@media.berkeley.edu\n",
  199. "From zqian@umich.edu Fri Jan 4 16:10:39 2008\n",
  200. "From: zqian@umich.edu\n",
  201. "From rjlowe@iupui.edu Fri Jan 4 15:46:24 2008\n",
  202. "From: rjlowe@iupui.edu\n",
  203. "From zqian@umich.edu Fri Jan 4 15:03:18 2008\n",
  204. "From: zqian@umich.edu\n",
  205. "From rjlowe@iupui.edu Fri Jan 4 14:50:18 2008\n",
  206. "From: rjlowe@iupui.edu\n",
  207. "From cwen@iupui.edu Fri Jan 4 11:37:30 2008\n",
  208. "From: cwen@iupui.edu\n",
  209. "From cwen@iupui.edu Fri Jan 4 11:35:08 2008\n",
  210. "From: cwen@iupui.edu\n",
  211. "From gsilver@umich.edu Fri Jan 4 11:12:37 2008\n",
  212. "From: gsilver@umich.edu\n",
  213. "From gsilver@umich.edu Fri Jan 4 11:11:52 2008\n",
  214. "From: gsilver@umich.edu\n",
  215. "From zqian@umich.edu Fri Jan 4 11:11:03 2008\n",
  216. "From: zqian@umich.edu\n",
  217. "From gsilver@umich.edu Fri Jan 4 11:10:22 2008\n",
  218. "From: gsilver@umich.edu\n",
  219. "From wagnermr@iupui.edu Fri Jan 4 10:38:42 2008\n",
  220. "From: wagnermr@iupui.edu\n",
  221. "From zqian@umich.edu Fri Jan 4 10:17:43 2008\n",
  222. "From: zqian@umich.edu\n",
  223. "From antranig@caret.cam.ac.uk Fri Jan 4 10:04:14 2008\n",
  224. "From: antranig@caret.cam.ac.uk\n",
  225. "From gopal.ramasammycook@gmail.com Fri Jan 4 09:05:31 2008\n",
  226. "From: gopal.ramasammycook@gmail.com\n",
  227. "From david.horwitz@uct.ac.za Fri Jan 4 07:02:32 2008\n",
  228. "From: david.horwitz@uct.ac.za\n",
  229. "From david.horwitz@uct.ac.za Fri Jan 4 06:08:27 2008\n",
  230. "From: david.horwitz@uct.ac.za\n",
  231. "From david.horwitz@uct.ac.za Fri Jan 4 04:49:08 2008\n",
  232. "From: david.horwitz@uct.ac.za\n",
  233. "From david.horwitz@uct.ac.za Fri Jan 4 04:33:44 2008\n",
  234. "From: david.horwitz@uct.ac.za\n",
  235. "From stephen.marquard@uct.ac.za Fri Jan 4 04:07:34 2008\n",
  236. "From: stephen.marquard@uct.ac.za\n",
  237. "From louis@media.berkeley.edu Thu Jan 3 19:51:21 2008\n",
  238. "From: louis@media.berkeley.edu\n",
  239. "From louis@media.berkeley.edu Thu Jan 3 17:18:23 2008\n",
  240. "From: louis@media.berkeley.edu\n",
  241. "From ray@media.berkeley.edu Thu Jan 3 17:07:00 2008\n",
  242. "From: ray@media.berkeley.edu\n",
  243. "From cwen@iupui.edu Thu Jan 3 16:34:40 2008\n",
  244. "From: cwen@iupui.edu\n",
  245. "From cwen@iupui.edu Thu Jan 3 16:29:07 2008\n",
  246. "From: cwen@iupui.edu\n",
  247. "From cwen@iupui.edu Thu Jan 3 16:23:48 2008\n",
  248. "From: cwen@iupui.edu\n"
  249. ]
  250. }
  251. ],
  252. "source": [
  253. "import re\n",
  254. "\n",
  255. "hand = open('mbox-short.txt')\n",
  256. "for line in hand:\n",
  257. " line = line.rstrip()\n",
  258. " if re.findall('^F.+?:', line):\n",
  259. " print (line)"
  260. ]
  261. },
  262. {
  263. "cell_type": "code",
  264. "execution_count": 5,
  265. "metadata": {
  266. "collapsed": false
  267. },
  268. "outputs": [
  269. {
  270. "name": "stdout",
  271. "output_type": "stream",
  272. "text": [
  273. "['2', '19', '42']\n"
  274. ]
  275. }
  276. ],
  277. "source": [
  278. "import re\n",
  279. "\n",
  280. "hand = open('mbox-short.txt')\n",
  281. "# for line in hand:\n",
  282. "line = '2 19 42'\n",
  283. "y = re.findall('[0-9]+', line)\n",
  284. "print (y)"
  285. ]
  286. },
  287. {
  288. "cell_type": "code",
  289. "execution_count": 16,
  290. "metadata": {
  291. "collapsed": false
  292. },
  293. "outputs": [
  294. {
  295. "name": "stdout",
  296. "output_type": "stream",
  297. "text": [
  298. "['stephen.marquard@uct.ac.za']\n",
  299. "['stephen.marquard@uct.ac.za']\n",
  300. "['From stephen.marquard@uct.ac.za']\n"
  301. ]
  302. }
  303. ],
  304. "source": [
  305. "x ='From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008'\n",
  306. "y = re.findall('\\S+@\\S+',x)\n",
  307. "print (y)\n",
  308. "\n",
  309. "y = re.findall('^From (\\S+@\\S+)',x)\n",
  310. "print (y)\n",
  311. "\n",
  312. "y = re.findall('^From \\S+@\\S+',x)\n",
  313. "print (y)\n"
  314. ]
  315. },
  316. {
  317. "cell_type": "code",
  318. "execution_count": 28,
  319. "metadata": {
  320. "collapsed": false
  321. },
  322. "outputs": [
  323. {
  324. "name": "stdout",
  325. "output_type": "stream",
  326. "text": [
  327. "uct.ac.za\n",
  328. "uct.ac.za\n",
  329. "['uct.ac.za']\n"
  330. ]
  331. }
  332. ],
  333. "source": [
  334. "import re\n",
  335. "data ='From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008'\n",
  336. "\n",
  337. "atpos = data.find('@')\n",
  338. "sppos = data.find(' ', atpos)\n",
  339. "host = data[atpos+1 : sppos]\n",
  340. "print (host)\n",
  341. "\n",
  342. "words = data.split()\n",
  343. "email = words[1]\n",
  344. "pieces = email.split('@')\n",
  345. "print (pieces[1])\n",
  346. "\n",
  347. "y = re.findall ('@([^ ]*)',data)\n",
  348. "print (y)"
  349. ]
  350. },
  351. {
  352. "cell_type": "code",
  353. "execution_count": 35,
  354. "metadata": {
  355. "collapsed": false
  356. },
  357. "outputs": [
  358. {
  359. "name": "stdout",
  360. "output_type": "stream",
  361. "text": [
  362. "Maximum: 0.9907\n"
  363. ]
  364. }
  365. ],
  366. "source": [
  367. "import re\n",
  368. "hand = open('mbox-short.txt')\n",
  369. "numlist = list()\n",
  370. "for line in hand:\n",
  371. " line = line.rstrip()\n",
  372. " stuff = re.findall('^X-DSPAM-Confidence: ([0-9.]+)',line)\n",
  373. " if len(stuff) != 1 : continue\n",
  374. " num = float(stuff[0])\n",
  375. " numlist.append(num)\n",
  376. " \n",
  377. "print ('Maximum:', max(numlist))"
  378. ]
  379. },
  380. {
  381. "cell_type": "code",
  382. "execution_count": 37,
  383. "metadata": {
  384. "collapsed": false
  385. },
  386. "outputs": [
  387. {
  388. "name": "stdout",
  389. "output_type": "stream",
  390. "text": [
  391. "['$10.00']\n"
  392. ]
  393. }
  394. ],
  395. "source": [
  396. "import re\n",
  397. "x = 'We just received $10.00 for cookies'\n",
  398. "y = re.findall('\\$[0-9.]+',x)\n",
  399. "print (y)"
  400. ]
  401. }
  402. ],
  403. "metadata": {
  404. "kernelspec": {
  405. "display_name": "Python 3",
  406. "language": "python",
  407. "name": "python3"
  408. },
  409. "language_info": {
  410. "codemirror_mode": {
  411. "name": "ipython",
  412. "version": 3
  413. },
  414. "file_extension": ".py",
  415. "mimetype": "text/x-python",
  416. "name": "python",
  417. "nbconvert_exporter": "python",
  418. "pygments_lexer": "ipython3",
  419. "version": "3.6.0"
  420. }
  421. },
  422. "nbformat": 4,
  423. "nbformat_minor": 2
  424. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement