Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- how i loop through each result in this search page
- https://www.facebook.com/public/zack-januik
- def getUsersByName():
- first = "zack"
- last = "januik"
- targetURL = "https://www.facebook.com/public/" + first + "-" + last
- debugPrint("getUsersByName() targetURL: " + str(targetURL), 3)
- browser = RoboBrowser(allow_redirects = kwargs.get("allow_redirects", getUsersByNameAllowRedirectsDefault),
- user_agent = kwargs.get("user_agent", getUsersByNameUserAgentDefault),
- history = kwargs.get("history", getUsersByNameHistoryDefault),
- parser = kwargs.get("parser", getUsersByNameParserDefault))
- browser.open(targetURL)
- #output all data pulled from targetURL
- debugPrint(browser.find(), 5)
- #clean up shit... ignore this
- # #facebook hides the pages data inside a comment <!--(.+)-->
- # to get around this we find the first HTML comment:
- allHTML = str(browser.find_all(recursive=False))
- match = re.search("<!--(.+)-->", allHTML)
- # clean it up and then use that as our html page
- foundDiv = match.group(1)
- soup = BeautifulSoup(foundDiv, 'html.parser')
- # now we find the BrowseResultsContainer div and all of its children are FB profile result cards
- # this matters: find all the <div>'s in the <div id="BrowseResultsContainer">
- resultContents = soup.find("div", id="BrowseResultsContainer")
- resultSet = ResultSet()
- if not resultContents:
- return resultSet # return empty resultSet if no results found
- else:
- allEntries = resultContents.find_all("div", recursive=False)
- #gets the results initially sent by facebook on page load
- #TODO get more results by simulating page scroll down
- # FOR EACH RESULT IN THE LIST
- for count, result in enumerate(allEntries):
- #debug
- output = "\n==================\nRESULT " + str(count + 1) + " of " + str(len(allEntries)) + "\n===============\n" + str(result)
- debugPrint(output, 4)
- #end debug
- thisFBid = False
- thisName = False
- thisHandle = False
- #everything on this search result page is obfuscated. so you need to do lots of sneaky tricks to get the data reliably.
- entry = BeautifulSoup(str(result), 'html.parser')
- # # get the FBid:
- databtTag = entry.find(has_data_bt)
- databtDict = json.loads(databtTag["data-bt"])
- thisFBid = databtDict['id']
- # the 'a' tage with the span inside of it is the one we want:
- allLinks = entry.find_all("a")
- for link in allLinks:
- possibleSpan = link.find("span")
- if possibleSpan:
- #look for the link with the span in it. if found, add that data to a FacebookProfile
- debugPrint("\n\n\nFOUND THE ONE\n\n", 5)
- debugPrint(link,5)
- thisName = link.text
- thisHandle = link['href'].split("/")[-1]
- # thisPic = "https://graph.facebook.com/" + thisFBid + "/picture"
- debugPrint("Found FacebookProfile: " + str(thisName) + " " + str(thisHandle) , 4)
- #new FacebookProfile, add it to the resultSet for output
- newFacebookProfile = FacebookProfile(thisFBid)
- newFacebookProfile.name = thisName
- newFacebookProfile.buildProfile()
- resultSet.append(newFacebookProfile)
- return resultSet
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement