Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests, csv, os
- from bs4 import BeautifulSoup
- from time import strftime, sleep
- # make a GET request (requests.get("URL")) and store the response in a response object (req)
- responsePA = requests.get('https://www.prothomalo.com/search?q=%E0%A6%A7%E0%A6%B0%E0%A7%8D%E0%A6%B7%E0%A6%A3')
- # read the content of the server’s response
- rawPagePA = responsePA.text
- soupPA = BeautifulSoup(rawPagePA)
- # take a look
- print (soupPA.prettify())
- urlsPA = [] #creating empty list to store URLs
- for item in soupPA.find_all("div", class_= "customStoryCard9-m__story-data__2qgWb"): #first part of loop selects all items with class=field-title
- aTag = item.find("a") #extracting elements containing 'a' tags
- urlsPA.append(aTag.attrs["href"])
- print(urlsPA)
- PAlist=[]
- for link in urlsPA:
- specificpagePA=requests.get(link) #making a get request and stores the response in an object
- rawAddPagePA=specificpagePA.text # read the content of the server’s response
- PASoup2=BeautifulSoup(rawAddPagePA) # parse the response into an HTML tree
- PAcontent=PASoup2.find_all(class_=["story-element story-element-text", "time-social-share-wrapper storyPageMetaData-m__time-social-share-wrapper__2-RAX", "headline headline-type-9 story-headline bn-story-headline headline-m__headline__3vaq9 headline-m__headline-type-9__3gT8S", "contributor-name contributor-m__contributor-name__1-593"])
- #print(PAcontent)
- PAlist.append(PAcontent)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement