Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from bs4 import BeautifulSoup
- base_url = "https://www.scrapethissite.com"
- url = "https://www.scrapethissite.com/pages/"
- response = requests.get(url)
- soup = BeautifulSoup(response.text, "html.parser")
- extracted_data = []
- # start by finding the main div
- pages = soup.find_all("div", {"class":"page"})
- # iterate through found page divs
- for page in pages:
- # extract the title and the link using find method
- title = page.find("h3").text.strip()
- link = page.find("a").get("href")
- # add the base website url to the link
- # since it won't be as a full URL
- link = base_url + link
- # extract the description
- description = page.find("p").text.strip()
- # append the data to the extracted_data list
- data = {"title": title, "link": link, "description":description}
- extracted_data.append(data)
- # now let's print it out!
- for data in extracted_data:
- print(data)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement