Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- def get_text_from_html(source):
- soup = BeautifulSoup(source)
- # kill all script and style elements
- for script in soup(["script", "style"]):
- script.extract() # rip it out
- # get text
- return soup.select("#content")[0].text #get_text()
- def getwiki(query):
- r = requests.get("https://en.wikipedia.org/wiki/%s" % query, timeout = 20)
- with open("./%s.txt" % query,"w") as fd:
- fd.write(get_text_from_html(r.text).encode("utf-8", errors="ignore"))
- getwiki("Brick")
- """
- a-z0-9
- .*?
- +
- [a-z]
- [^a-z]
- (ab)* # ababababab
- (ac|b*)*
- [ac|b*]* # c||*a
- \w
- \s
- \t
- \n
- \W
- [\w_]{5}
- [\w\W]{1,5}
- ^l*$.*ppp
- ".*?"
- """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement