Advertisement
Guest User

facebook

a guest
Nov 12th, 2018
2,791
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.04 KB | None | 0 0
  1. from selenium import webdriver
  2. from selenium.webdriver.support.ui import WebDriverWait
  3. from selenium.webdriver.support import expected_conditions as EC
  4. from selenium.webdriver.common.by import By
  5. from selenium.common.exceptions import TimeoutException
  6.  
  7. from bs4 import BeautifulSoup
  8. import urllib.request
  9. import math
  10.  
  11. # defining log in and password variables
  12. usernameStr = "alexb2123@gmail.com"
  13. passwordStr = "2123Constantine"
  14.  
  15.  
  16. # Browser
  17.  
  18. browser = webdriver.Firefox(
  19. executable_path=r'C:\Users\Alex\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Python 3.7\geckodriver.exe'
  20. )
  21. browser.get("https://www.facebook.com")
  22.  
  23. # fill in username
  24. username = browser.find_element_by_id("email")
  25. username.send_keys(usernameStr)
  26.  
  27. # fill in password
  28. password = browser.find_element_by_id("pass")
  29. password.send_keys(passwordStr)
  30.  
  31. # sign in to facebook
  32. signInButton = browser.find_element_by_id("loginbutton")
  33. signInButton.click()
  34.  
  35.  
  36. #defining variables within the facebook url
  37. class FacebookScraper(object):
  38. def __init__(self, location, max_price):
  39. self.location = location
  40. self.max_price = max_price
  41. #self.radius = math.trunc((radius/.62137119)+1)
  42. self.query = query
  43.  
  44. #defining the facebook url as a whole
  45. self.url = f"https://wwww.facebook.com/marketplace/{location}/search?query={query}&maxPrice={max_price}"
  46.  
  47. # https://www.facebook.com/marketplace/nyc/search?query=books&maxPrice=0&radiusKM=4
  48. # https://wwww.facebook.com/marketplace/{location}/search?query={query}&maxPrice={max_price}&radiusKM={radius}
  49.  
  50. #getting the webdriver
  51. self.driver = webdriver.Firefox(
  52. executable_path=r'C:\Users\Alex\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Python 3.7\geckodriver.exe'
  53. )
  54. self.delay = 6
  55.  
  56. #navigating to the facebook url
  57. def load_facebook_url(self):
  58. self.driver.get(self.url)
  59. try:
  60. wait = WebDriverWait(self.driver, self.delay)
  61. wait.until(EC.presence_of_element_located((By.ID, "searchform")))
  62. print("page is ready")
  63. except TimeoutException:
  64. print("Loading took too much time")
  65.  
  66.  
  67. #exctracting information from facebook
  68. #def extract_post_titles(self):
  69.  
  70. # all_posts = self.driver.find_elements_by_class_name("result-row")
  71. # post_title_list = []
  72. # for post in all_posts:
  73. # print(post.text)
  74. # post_title_list.append(post.text)
  75. # return post_title_list
  76.  
  77. #def extract_post_urls(self):
  78. # url_list = []
  79. # html_page = urllib.request.urlopen(self.url)
  80. # soup = BeautifulSoup(html_page, "html.parser")
  81. # for link in soup.findAll("a", {"class": "result-title hdrlnk"}):
  82. # print(link)
  83.  
  84.  
  85. #user input variables
  86. location = "nyc"
  87. max_price = "0"
  88. #radius = "2"
  89. query = "books"
  90.  
  91. #craigslist scraper functions
  92. scraper = FacebookScraper(location, max_price, )
  93. scraper.load_facebook_url()
  94. #scraper.extract_post_titles()
  95. #scraper.extract_post_urls()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement