Advertisement
skip420

ScrapeSocialMediaPage

Sep 10th, 2022
1,018
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.80 KB | None | 0 0
  1. #Scrape all /any webpage from social media,FB,Twitter,etc
  2.  
  3.  
  4.  
  5.  
  6. from time import sleep
  7. from random import randint
  8. from selenium import webdriver
  9. from selenium.webdriver.chrome.options import Options
  10. from selenium.common.exceptions import NoSuchElementException
  11.  
  12. chrome_options = Options()
  13. chrome_options.add_argument("--start-maximized")
  14. chrome_options.add_argument("--disable-infobars")
  15. chrome_options.add_argument("--disable-extensions")
  16. chrome_options.add_argument("--disable-popup-blocking")
  17.  
  18. # disable the banner "Chrome is being controlled by automated test software"
  19. chrome_options.add_experimental_option("useAutomationExtension", False)
  20. chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
  21.  
  22. # global driver
  23. driver = webdriver.Chrome('/usr/bin/chromedriver', options=chrome_options)
  24.  
  25. driver.get('https://www.facebook.com/allegianceflagsupply/posts/pfbid0dTkrvzyLdHUnGibKxwWDG1ytPK7HKShFJm5AXEK4XmhYGgGbbSbKAKNRYkNjdumRl?notif_id=1662404244196067&notif_t=comment_mention&ref=notif')
  26.  
  27. driver.implicitly_wait(20)
  28.  
  29. driver.find_element_by_id("email").send_keys("USRName")
  30. driver.find_element_by_id("pass").send_keys("PssWRD")
  31.  
  32. driver.implicitly_wait(10)
  33.  
  34. driver.find_element_by_xpath(("//button[text()='Log In']")).click()
  35.  
  36.  
  37. # this function checks for a standard username tag
  38. def user_element_exist():
  39.     try:
  40.         if driver.find_element_by_xpath("//h4[@id][@class][./span[./a]]/span/a"):
  41.             return True
  42.     except NoSuchElementException:
  43.         return False
  44.  
  45.  
  46. # this function looks for username linked to Facebook Groups at the top of your feed
  47. def group_element():
  48.     try:
  49.         if driver.find_element_by_xpath("//*[starts-with(@id, 'jsc_c_')]/span[1]/span/span/a/b"):
  50.             poster_name = driver.find_element_by_xpath("//*[starts-with(@id, 'jsc_c_')]/span[1]/span/span/a/b").text
  51.             return poster_name
  52.  
  53.         if driver.find_element_by_xpath("//*[starts-with(@id, 'jsc_c_')]/strong[1]/span/a/span/span"):
  54.             poster_name = driver.find_element_by_xpath("//*[starts-with(@id, 'jsc_c_')]/strong["
  55.                                                        "1]/span/a/span/span").text
  56.             return poster_name
  57.  
  58.     except NoSuchElementException:
  59.         return "No user information found"
  60.  
  61.  
  62. while True:
  63.     element_exists = user_element_exist()
  64.     if not element_exists:
  65.         user_name = group_element()
  66.         print(user_name)
  67.         driver.refresh()
  68.     elif element_exists:
  69.         user_name = driver.find_element_by_xpath("//h4[@id][@class][./span[./a]]/span/a").text
  70.         print(user_name)
  71.         driver.refresh()
  72.  
  73.     # set the sleep timer to fit your needs
  74.     sleep(300) # This sleeps for 300 seconds, which is 5 minutes.
  75.  
  76.     # I would likely use a random sleep function
  77.     # sleep(randint(180, 360))
  78.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement