Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Scrape all /any webpage from social media,FB,Twitter,etc
- from time import sleep
- from random import randint
- from selenium import webdriver
- from selenium.webdriver.chrome.options import Options
- from selenium.common.exceptions import NoSuchElementException
- chrome_options = Options()
- chrome_options.add_argument("--start-maximized")
- chrome_options.add_argument("--disable-infobars")
- chrome_options.add_argument("--disable-extensions")
- chrome_options.add_argument("--disable-popup-blocking")
- # disable the banner "Chrome is being controlled by automated test software"
- chrome_options.add_experimental_option("useAutomationExtension", False)
- chrome_options.add_experimental_option("excludeSwitches", ['enable-automation'])
- # global driver
- driver = webdriver.Chrome('/usr/bin/chromedriver', options=chrome_options)
- driver.get('https://www.facebook.com/allegianceflagsupply/posts/pfbid0dTkrvzyLdHUnGibKxwWDG1ytPK7HKShFJm5AXEK4XmhYGgGbbSbKAKNRYkNjdumRl?notif_id=1662404244196067¬if_t=comment_mention&ref=notif')
- driver.implicitly_wait(20)
- driver.find_element_by_id("email").send_keys("USRName")
- driver.find_element_by_id("pass").send_keys("PssWRD")
- driver.implicitly_wait(10)
- driver.find_element_by_xpath(("//button[text()='Log In']")).click()
- # this function checks for a standard username tag
- def user_element_exist():
- try:
- if driver.find_element_by_xpath("//h4[@id][@class][./span[./a]]/span/a"):
- return True
- except NoSuchElementException:
- return False
- # this function looks for username linked to Facebook Groups at the top of your feed
- def group_element():
- try:
- if driver.find_element_by_xpath("//*[starts-with(@id, 'jsc_c_')]/span[1]/span/span/a/b"):
- poster_name = driver.find_element_by_xpath("//*[starts-with(@id, 'jsc_c_')]/span[1]/span/span/a/b").text
- return poster_name
- if driver.find_element_by_xpath("//*[starts-with(@id, 'jsc_c_')]/strong[1]/span/a/span/span"):
- poster_name = driver.find_element_by_xpath("//*[starts-with(@id, 'jsc_c_')]/strong["
- "1]/span/a/span/span").text
- return poster_name
- except NoSuchElementException:
- return "No user information found"
- while True:
- element_exists = user_element_exist()
- if not element_exists:
- user_name = group_element()
- print(user_name)
- driver.refresh()
- elif element_exists:
- user_name = driver.find_element_by_xpath("//h4[@id][@class][./span[./a]]/span/a").text
- print(user_name)
- driver.refresh()
- # set the sleep timer to fit your needs
- sleep(300) # This sleeps for 300 seconds, which is 5 minutes.
- # I would likely use a random sleep function
- # sleep(randint(180, 360))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement