Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.support.ui import Select
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.common.exceptions import TimeoutException
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.common.exceptions import NoSuchElementException
- from selenium.common.exceptions import NoAlertPresentException
- import sys
- import pdb
- import unittest, time, re, json
- from bs4 import BeautifulSoup
- from parametrizedTestCase import ParametrizedTestCase
- class Sel(ParametrizedTestCase):
- usr = ""
- pwd = ""
- def setUp(self):
- # self.driver = webdriver.Firefox()
- self.driver = webdriver.PhantomJS()
- self.driver.set_window_size(1120, 800)
- self.driver.implicitly_wait(30)
- self.base_url = "https://facebook.com"
- self.verificationErrors = []
- self.accept_next_alert = True
- self.usr = self.param["usr"]
- self.pwd = self.param["pwd"]
- def tearDown(self):
- self.driver.quit()
- print 'closed PhantomJS'
- def test_sel(self):
- driver = self.driver
- delay = 2
- #log in
- print "GOT HERE 1"
- driver.get(self.base_url)
- driver.find_element_by_id("email").clear()
- driver.find_element_by_id("email").send_keys(self.usr)
- driver.find_element_by_id("pass").clear()
- driver.find_element_by_id("pass").send_keys(self.pwd)
- driver.get_screenshot_as_file('here.png')
- driver.find_element_by_id("u_0_o").click()
- driver.get_screenshot_as_file('here2.png')
- print "GOT HERE 2"
- # Create wait obj with a 5 sec timeout, and default 0.5 poll frequency
- wait = WebDriverWait(driver, 5)
- # Test that login was successful by checking if the URL in the browser changed
- try:
- page_loaded = wait.until(
- lambda driver: "login_attempt=1" not in driver.current_url
- )
- except TimeoutException:
- self.param["data"] = False #login fail
- self.fail("Loading timeout expired")
- print "Login Complete"
- print driver.page_source.encode('utf-8').strip()
- fbProfileBase = driver.find_element_by_xpath('//*[@title="Profile"]').get_attribute("href")
- driver.get(fbProfileBase + "/friends")
- friendTag = driver.find_element_by_class_name("_3d0")
- numOfFriends = int(friendTag.text.replace(',', ''))
- print self.param["usr"] + ' number of friends: ' + str(numOfFriends)
- #scroll until position stops changing
- yPositionHistory = []
- while True:
- self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
- time.sleep(delay)
- scrollTop = self.driver.execute_script("return window.pageYOffset")
- yPositionHistory.append(scrollTop);
- currentNumOfFriends = self.driver.execute_script("return document.body.getElementsByClassName('_698').length");
- if not currentNumOfFriends:
- currentNumOfFriends = 0
- if currentNumOfFriends == numOfFriends:
- break
- #if the page has not been scrolled for 5 time periods, break
- if len(yPositionHistory) >= 5 and yPositionHistory[-5] == scrollTop:
- break
- self.param["progress"] = int(100.0 * currentNumOfFriends / numOfFriends)
- self.param["progress"] = 100
- html_source = driver.page_source
- data = html_source.encode('utf-8')
- self.param["data"] = data
- #class handling the fb crawling
- class Crawler():
- username = ''
- password = ''
- params = dict()
- def __init__(self, usr, pwd):
- self.username = usr
- self.password = pwd
- def crawl(self):
- """runs the selenium script that crawls fb,
- Saves the long src of friend list to self.data and
- extracts friends
- """
- suite = unittest.TestSuite()
- #the purpose of the 'data' property is to return values
- self.params = {"usr": self.username, "pwd": self.password, "data": "", "progress": 0}
- suite.addTest(ParametrizedTestCase.parametrize(Sel, param=self.params))
- unittest.TextTestRunner(verbosity=1).run(suite)
- #login fail
- if self.params["data"] == False:
- return False
- else:
- return self.extractFriends(self.params["data"])
- def extractFriends(self, src):
- #no data
- if src == '':
- return False
- soup = BeautifulSoup(src.decode('utf-8'))
- body = soup.body
- friends = body.select("li._698")
- friendArray = []
- for friend in friends:
- try:
- img = friend.img["src"]
- div = friend.select("div.fsl.fcb.fwb")[0]
- a = div.a
- name = a.text
- data = json.loads(a["data-gt"])
- fbID = data["engagement"]["eng_tid"]
- friendArray.append({"name": name, "id": fbID, "img": img})
- except:
- pass
- return friendArray
- def getProgress(self):
- return self.params["progress"]
Add Comment
Please, Sign In to add comment