Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from urllib.request import urlopen
- from bs4 import BeautifulSoup
- import requests
- from urllib.parse import urlparse, urljoin
- from bs4 import BeautifulSoup
- import colorama
- url = "https://lite.ip2location.com/china-ip-address-ranges"
- html = urlopen(url).read()
- soup = BeautifulSoup(html, features="html.parser")
- # kill all script and style elements
- for script in soup(["script", "style"]):
- script.extract() # rip it out
- # get text
- text = soup.get_text()
- # break into lines and remove leading and trailing space on each
- lines = (line.strip() for line in text.splitlines())
- # break multi-headlines into a line each
- chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
- # drop blank lines
- text = '\n'.join(chunk for chunk in chunks if chunk)
- print(text)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement