Advertisement
Guest User

BBC News Fetcher

a guest
Apr 6th, 2020
390
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.06 KB | None | 0 0
  1. from selenium import webdriver
  2. from bs4 import BeautifulSoup
  3. import pandas as pd
  4. import re
  5.  
  6. def cleanhtml(raw_html):
  7.   cleanr = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});')
  8.   cleantext = re.sub(cleanr, '', raw_html)
  9.   return cleantext
  10.  
  11. def main():
  12.   driver = webdriver.Chrome(executable_path=r'E:\Program Files (x86)\Chromedriver\chromedriver.exe')
  13.   driver.get('https://www.bbc.co.uk/news')
  14.  
  15.   content = driver.page_source
  16.   soup = BeautifulSoup(content, "html.parser")
  17.  
  18.   news1=soup.find('h3', attrs={'class':'gs-c-promo-heading__title gel-paragon-bold nw-o-link-split__text'})
  19.  
  20.   news2=soup.find_all('h3', attrs={'class':'gs-c-promo-heading__title gel-pica-bold nw-o-link-split__text'})
  21.  
  22.   desc=soup.find_all('p', attrs={'class':'gs-c-promo-summary gel-long-primer gs-u-mt nw-c-promo-summary'})
  23.   times=soup.find_all('span', attrs={'class':'qa-status-date-output'})
  24.  
  25.  
  26.   image1 = soup.find('img', attrs={'class':'qa-srcset-image'})
  27.   image1 = image1['src']
  28.  
  29.   images=[]
  30.   image2 = soup.find_all('img', attrs={'class':'lazyloaded'})
  31.   for image in image2:
  32.       images.append(image['src'])
  33.      
  34.  
  35.   print((news1.contents)[0])
  36.   print(((desc)[1]).contents[0])
  37.   print(((times)[1]).contents[0])
  38.   print(image1)
  39.  
  40.   print("////////////////////////////////////////////////////////")
  41.   print(((news2)[0]).contents[0])
  42.   print(((desc)[2]).contents[0])
  43.   print(((times)[2]).contents[0])
  44.   print(images[0])
  45.  
  46.   print("////////////////////////////////////////////////////////")
  47.   print(((news2)[1]).contents[0])
  48.   print(((desc)[3]).contents[0])
  49.   print(((times)[3]).contents[0])
  50.   print(images[1])
  51.  
  52.   print("////////////////////////////////////////////////////////")
  53.   print(((news2)[2]).contents[0])
  54.   print(((desc)[4]).contents[0])
  55.   print(((times)[4]).contents[0])
  56.   print(images[2])
  57.  
  58.   print("////////////////////////////////////////////////////////")
  59.   print(((news2)[3]).contents[0])
  60.   print(((desc)[5]).contents[0])
  61.   print(((times)[5]).contents[0])
  62.   print(images[3])
  63.  
  64.   driver.quit()
  65.  
  66. if __name__ == '__main__':
  67.   main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement