Advertisement
Guest User

Untitled

a guest
Nov 25th, 2015
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.73 KB | None | 0 0
  1.  
  2. import requests
  3. from bs4 import BeautifulSoup
  4.  
  5. def get_text_from_html(source):
  6.     soup = BeautifulSoup(source)
  7.     # kill all script and style elements
  8.     for script in soup(["script", "style"]):
  9.         script.extract()    # rip it out
  10.  
  11.     # get text
  12.     return soup.select("#content")[0].text #get_text()
  13.  
  14. def getwiki(query):
  15.     r = requests.get("https://en.wikipedia.org/wiki/%s" % query, timeout = 20)
  16.     with open("./%s.txt" % query,"w") as fd:
  17.         fd.write(get_text_from_html(r.text).encode("utf-8", errors="ignore"))
  18.        
  19.        
  20. getwiki("Brick")
  21.  
  22. """
  23. a-z0-9
  24. .*?
  25. +
  26. [a-z]
  27. [^a-z]
  28. (ab)* # ababababab
  29.  
  30. (ac|b*)*
  31. [ac|b*]* # c||*a
  32.  
  33. \w
  34. \s
  35. \t
  36. \n
  37. \W
  38. [\w_]{5}
  39. [\w\W]{1,5}
  40.  
  41. ^l*$.*ppp
  42. ".*?"
  43. """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement