Guest User

Untitled

a guest
Aug 31st, 2019
162
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.31 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. import sys
  3. reload(sys)
  4. sys.setdefaultencoding('utf8')
  5. from selenium import webdriver
  6. from selenium.webdriver.support.ui import WebDriverWait
  7. from selenium.webdriver.support import expected_conditions as EC
  8. from selenium.webdriver.common.by import By
  9. from selenium.common.exceptions import TimeoutException
  10. from selenium.webdriver.firefox.options import Options
  11. #from selenium.webdriver.chrome.options import Options
  12. from selenium.webdriver.support.ui import Select
  13. from selenium.webdriver.common import keys
  14. import csv
  15. import time
  16. import json
  17.  
  18. class Book:
  19.     def __init__(self, title, url):
  20.         self.title = title
  21.         self.url = url
  22.     def __iter__(self):
  23.         return iter([self.title, self.url])
  24.  
  25. url = 'https://www.goodreads.com/'
  26.  
  27. def create_csv_file():
  28.     header = ['Title', 'URL']
  29.     with open('/home/l/Downloads/WebScraping/GoodReadsBooksNew.csv', 'w') as csv_file:
  30.         wr = csv.writer(csv_file, delimiter=',')
  31.         wr.writerow(header)
  32.  
  33. def read_from_txt_file():
  34.     lines = [line.rstrip('\n') for line in open('/home/l/Downloads/WebScraping/BookTitles.txt')]
  35.     return lines
  36.  
  37. def init_selenium():
  38.     options = Options()
  39.     options.add_argument('--headless')
  40.     global driver
  41.     driver = webdriver.Chrome("/home/l/Downloads/WebScraping/chromedriver")
  42.     driver.get(url)
  43.     time.sleep(30)
  44.     driver.get('https://www.goodreads.com/search?q=')
  45.  
  46. def search_for_title(title):
  47.     search_field = driver.find_element_by_xpath('//*[@id="search_query_main"]')
  48.     search_field.clear()
  49.     search_field.send_keys(title)
  50.     search_button = driver.find_element_by_xpath('/html/body/div[2]/div[3]/div[1]/div[1]/div[2]/form/div[1]/input[3]')
  51.     search_button.click()
  52.  
  53. def scrape_url():
  54.     try:
  55.         url = driver.find_element_by_css_selector('a.bookTitle').get_attribute('href')
  56.     except:
  57.         url = "N/A"
  58.  
  59.     return url
  60.  
  61. def write_into_csv_file(vendor):
  62.     with open('/home/l/Downloads/WebScraping/GoodReadsBooksNew.csv', 'a') as csv_file:
  63.         wr = csv.writer(csv_file, delimiter=',')
  64.         wr.writerow(list(vendor))
  65.  
  66. create_csv_file()
  67. titles = read_from_txt_file()    
  68. init_selenium()
  69.  
  70. for title in titles:
  71.     search_for_title(title)
  72.     url = scrape_url()
  73.     book = Book(title, url)
  74.     write_into_csv_file(book)
Add Comment
Please, Sign In to add comment