Advertisement
Guest User

facebook scrapper

a guest
Nov 18th, 2018
916
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.86 KB | None | 0 0
  1. from selenium import webdriver
  2. from selenium.webdriver.support.ui import WebDriverWait
  3. from selenium.webdriver.support import expected_conditions as EC
  4. from selenium.webdriver.common.by import By
  5. from selenium.common.exceptions import TimeoutException
  6.  
  7. from bs4 import BeautifulSoup
  8. import urllib.request
  9. import math
  10.  
  11.  
  12. # defining variables within the facebook url
  13. class FacebookScraper(object):
  14. def __init__(self, browser):
  15. self.browser = browser
  16. self.url = f"https://wwww.facebook.com/marketplace/" + location + "/search?query=" + query + "&maxPrice=" + max_price
  17.  
  18.  
  19. def log_into_fb(self, usernameStr, passwordStr):
  20. self.browser.get("https://www.facebook.com")
  21. # fill in username
  22. username = browser.find_element_by_id("email")
  23. username.send_keys(usernameStr)
  24.  
  25. # fill in password
  26. password = browser.find_element_by_id("pass")
  27. password.send_keys(passwordStr)
  28.  
  29. # sign in to facebook
  30. signInButton = browser.find_element_by_id("loginbutton")
  31. signInButton.click()
  32.  
  33. # navigating to the facebook url
  34. def load_facebook_url(self, query, location, max_price):
  35.  
  36. try:
  37. # defining the facebook url as a whole
  38. #url = "https://wwww.facebook.com/marketplace/" + location + "/search?query=" + query + "&maxPrice=" + max_price
  39. self.browser.get(self.url)
  40. element = WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.ID, "facebook")),
  41. print("Page is ready"))
  42. except TimeoutException:
  43. print("Loading took too much time")
  44.  
  45.  
  46. # extracting information from facebook
  47. def extract_post_titles(self):
  48.  
  49. all_posts = self.browser.find_elements_by_class_name("_1oem")
  50. post_title_list = []
  51. for post in all_posts:
  52. print(post.text)
  53. post_title_list.append(post.text)
  54. return post_title_list
  55.  
  56. def extract_post_urls(self):
  57. # url = "https://wwww.facebook.com/marketplace/" + location + "/search?query=" + query + "&maxPrice=" + max_price
  58. url_list = []
  59. html_page = urllib.request.urlopen(self.url)
  60. soup = BeautifulSoup(html_page, "html.parser")
  61. for link in soup.findAll("a", {"class": "_1oem"}):
  62. print(link)
  63.  
  64.  
  65. # user input variables
  66. location = "nyc"
  67. max_price = "0"
  68. # radius = "2"
  69. query = "books"
  70.  
  71. # Browser
  72. browser = webdriver.Firefox(
  73. executable_path=r'C:\Users\Alex\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Python 3.7\geckodriver.exe'
  74. )
  75. # defining log in and password variables
  76. usernameStr = "alexb2123@gmail.com"
  77. passwordStr = "2123Constantine"
  78. scraper = FacebookScraper(browser)
  79. scraper.log_into_fb(usernameStr, passwordStr)
  80. scraper.load_facebook_url(query, location, max_price)
  81. #scraper.extract_post_titles()
  82. scraper.extract_post_urls()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement