Guest User

Untitled

a guest
Aug 20th, 2016
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.39 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2.  
  3. from selenium import webdriver
  4. from selenium.webdriver.common.by import By
  5. from selenium.webdriver.common.keys import Keys
  6. from selenium.webdriver.support.ui import Select
  7. from selenium.webdriver.support.ui import WebDriverWait
  8. from selenium.common.exceptions import TimeoutException
  9. from selenium.webdriver.support import expected_conditions as EC
  10. from selenium.common.exceptions import NoSuchElementException
  11. from selenium.common.exceptions import NoAlertPresentException
  12. import sys
  13.  
  14. import pdb
  15.  
  16. import unittest, time, re, json
  17.  
  18. from bs4 import BeautifulSoup
  19. from parametrizedTestCase import ParametrizedTestCase
  20.  
  21.  
  22.  
  23. class Sel(ParametrizedTestCase):
  24.     usr = ""
  25.     pwd = ""
  26.     def setUp(self):
  27.         # self.driver = webdriver.Firefox()
  28.  
  29.         self.driver = webdriver.PhantomJS()
  30.         self.driver.set_window_size(1120, 800)
  31.  
  32.         self.driver.implicitly_wait(30)
  33.         self.base_url = "https://facebook.com"
  34.         self.verificationErrors = []
  35.         self.accept_next_alert = True
  36.  
  37.         self.usr = self.param["usr"]
  38.         self.pwd = self.param["pwd"]
  39.  
  40.     def tearDown(self):
  41.         self.driver.quit()
  42.         print 'closed PhantomJS'
  43.  
  44.     def test_sel(self):
  45.         driver = self.driver
  46.         delay = 2
  47.  
  48.         #log in
  49.         print "GOT HERE 1"
  50.         driver.get(self.base_url)
  51.         driver.find_element_by_id("email").clear()
  52.         driver.find_element_by_id("email").send_keys(self.usr)
  53.         driver.find_element_by_id("pass").clear()
  54.         driver.find_element_by_id("pass").send_keys(self.pwd)
  55.         driver.get_screenshot_as_file('here.png')
  56.         driver.find_element_by_id("u_0_o").click()
  57.         driver.get_screenshot_as_file('here2.png')
  58.  
  59.         print "GOT HERE 2"
  60.  
  61.  
  62.         # Create wait obj with a 5 sec timeout, and default 0.5 poll frequency
  63.         wait = WebDriverWait(driver, 5)
  64.          
  65.         # Test that login was successful by checking if the URL in the browser changed
  66.         try:
  67.             page_loaded = wait.until(
  68.             lambda driver: "login_attempt=1" not in driver.current_url
  69.             )
  70.         except TimeoutException:
  71.             self.param["data"] = False #login fail
  72.             self.fail("Loading timeout expired")
  73.        
  74.         print "Login Complete"
  75.  
  76.         print driver.page_source.encode('utf-8').strip()
  77.  
  78.         fbProfileBase = driver.find_element_by_xpath('//*[@title="Profile"]').get_attribute("href")
  79.         driver.get(fbProfileBase + "/friends")
  80.         friendTag = driver.find_element_by_class_name("_3d0")
  81.         numOfFriends = int(friendTag.text.replace(',', ''))
  82.  
  83.         print self.param["usr"] + ' number of friends: ' + str(numOfFriends)
  84.  
  85.  
  86.         #scroll until position stops changing
  87.         yPositionHistory = []
  88.         while True:
  89.             self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
  90.             time.sleep(delay)
  91.             scrollTop = self.driver.execute_script("return window.pageYOffset")
  92.             yPositionHistory.append(scrollTop);
  93.  
  94.             currentNumOfFriends = self.driver.execute_script("return document.body.getElementsByClassName('_698').length");
  95.             if not currentNumOfFriends:
  96.                 currentNumOfFriends = 0
  97.  
  98.             if currentNumOfFriends == numOfFriends:
  99.                 break
  100.  
  101.             #if the page has not been scrolled for 5 time periods, break
  102.             if len(yPositionHistory) >= 5 and yPositionHistory[-5] == scrollTop:
  103.                 break
  104.  
  105.             self.param["progress"] = int(100.0 * currentNumOfFriends / numOfFriends)
  106.  
  107.  
  108.         self.param["progress"] = 100
  109.         html_source = driver.page_source
  110.         data = html_source.encode('utf-8')
  111.  
  112.         self.param["data"] = data
  113.  
  114.  
  115. #class handling the fb crawling
  116. class Crawler():
  117.  
  118.     username = ''
  119.     password = ''
  120.     params = dict()
  121.  
  122.     def __init__(self, usr, pwd):
  123.         self.username = usr
  124.         self.password = pwd
  125.  
  126.     def crawl(self):
  127.         """runs the selenium script that crawls fb,
  128.        Saves the long src of friend list to self.data and
  129.        extracts friends
  130.        """
  131.         suite = unittest.TestSuite()
  132.  
  133.         #the purpose of the 'data' property is to return values
  134.         self.params = {"usr": self.username, "pwd": self.password, "data": "", "progress": 0}
  135.  
  136.         suite.addTest(ParametrizedTestCase.parametrize(Sel, param=self.params))
  137.         unittest.TextTestRunner(verbosity=1).run(suite)
  138.  
  139.         #login fail
  140.         if self.params["data"] == False:
  141.             return False
  142.         else:
  143.             return self.extractFriends(self.params["data"])
  144.  
  145.  
  146.     def extractFriends(self, src):
  147.         #no data
  148.         if src == '':
  149.             return False
  150.  
  151.         soup = BeautifulSoup(src.decode('utf-8'))
  152.         body = soup.body
  153.         friends = body.select("li._698")
  154.  
  155.         friendArray = []
  156.         for friend in friends:
  157.             try:
  158.                 img = friend.img["src"]
  159.                 div = friend.select("div.fsl.fcb.fwb")[0]
  160.                 a = div.a
  161.                 name = a.text
  162.                 data = json.loads(a["data-gt"])
  163.                 fbID = data["engagement"]["eng_tid"]
  164.                 friendArray.append({"name": name, "id": fbID, "img": img})
  165.             except:
  166.                 pass
  167.          
  168.         return friendArray
  169.  
  170.     def getProgress(self):
  171.         return self.params["progress"]
Add Comment
Please, Sign In to add comment