Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- from bs4 import BeautifulSoup as bs
- from selenium import webdriver
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.common.by import By
- browser=webdriver.Chrome()
- browser.get('https://dir.businessworld.com.my/15/posts/16-Computers-The-Internet')
- WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "table.postlisting")))
- soup=bs(browser.page_source)
- filename = "C:/Users/User/Desktop/test.csv"
- csv_writer = csv.writer(open(filename, 'w'))
- pages_remaining = True
- while pages_remaining:
- WebDriverWait(browser,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"table.postlisting")))
- for tr in soup.find_all("tr"):
- data = []
- # for headers ( entered only once - the first time - )
- for th in tr.find_all("th"):
- data.append(th.text)
- if data:
- print("Inserting headers : {}".format(','.join(data)))
- csv_writer.writerow(data)
- continue
- for td in tr.find_all("td"):
- if td.a:
- data.append(td.a.text.strip())
- else:
- data.append(td.text.strip())
- if data:
- print("Inserting data: {}".format(','.join(data)))
- csv_writer.writerow(data)
- if len(browser.find_elements_by_xpath("//a[contains(.,'Next')]"))>0:
- browser.find_element_by_xpath("//a[contains(.,'Next')]").click()
- else:
- break
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement