Advertisement
Guest User

Untitled

a guest
May 22nd, 2019
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.12 KB | None | 0 0
  1. how i loop through each result in this search page
  2.  
  3. https://www.facebook.com/public/zack-januik
  4.  
  5.  
  6. def getUsersByName():
  7.  
  8. first = "zack"
  9. last = "januik"
  10.  
  11. targetURL = "https://www.facebook.com/public/" + first + "-" + last
  12. debugPrint("getUsersByName() targetURL: " + str(targetURL), 3)
  13.  
  14.  
  15. browser = RoboBrowser(allow_redirects = kwargs.get("allow_redirects", getUsersByNameAllowRedirectsDefault),
  16. user_agent = kwargs.get("user_agent", getUsersByNameUserAgentDefault),
  17. history = kwargs.get("history", getUsersByNameHistoryDefault),
  18. parser = kwargs.get("parser", getUsersByNameParserDefault))
  19.  
  20.  
  21. browser.open(targetURL)
  22. #output all data pulled from targetURL
  23. debugPrint(browser.find(), 5)
  24.  
  25.  
  26. #clean up shit... ignore this
  27. # #facebook hides the pages data inside a comment <!--(.+)-->
  28. # to get around this we find the first HTML comment:
  29. allHTML = str(browser.find_all(recursive=False))
  30. match = re.search("<!--(.+)-->", allHTML)
  31. # clean it up and then use that as our html page
  32. foundDiv = match.group(1)
  33.  
  34. soup = BeautifulSoup(foundDiv, 'html.parser')
  35. # now we find the BrowseResultsContainer div and all of its children are FB profile result cards
  36.  
  37. # this matters: find all the <div>'s in the <div id="BrowseResultsContainer">
  38. resultContents = soup.find("div", id="BrowseResultsContainer")
  39. resultSet = ResultSet()
  40. if not resultContents:
  41. return resultSet # return empty resultSet if no results found
  42. else:
  43. allEntries = resultContents.find_all("div", recursive=False)
  44.  
  45.  
  46.  
  47. #gets the results initially sent by facebook on page load
  48. #TODO get more results by simulating page scroll down
  49. # FOR EACH RESULT IN THE LIST
  50. for count, result in enumerate(allEntries):
  51. #debug
  52. output = "\n==================\nRESULT " + str(count + 1) + " of " + str(len(allEntries)) + "\n===============\n" + str(result)
  53. debugPrint(output, 4)
  54. #end debug
  55.  
  56. thisFBid = False
  57. thisName = False
  58. thisHandle = False
  59.  
  60.  
  61.  
  62. #everything on this search result page is obfuscated. so you need to do lots of sneaky tricks to get the data reliably.
  63. entry = BeautifulSoup(str(result), 'html.parser')
  64.  
  65. # # get the FBid:
  66. databtTag = entry.find(has_data_bt)
  67. databtDict = json.loads(databtTag["data-bt"])
  68. thisFBid = databtDict['id']
  69.  
  70. # the 'a' tage with the span inside of it is the one we want:
  71. allLinks = entry.find_all("a")
  72. for link in allLinks:
  73. possibleSpan = link.find("span")
  74. if possibleSpan:
  75. #look for the link with the span in it. if found, add that data to a FacebookProfile
  76.  
  77. debugPrint("\n\n\nFOUND THE ONE\n\n", 5)
  78. debugPrint(link,5)
  79.  
  80. thisName = link.text
  81. thisHandle = link['href'].split("/")[-1]
  82. # thisPic = "https://graph.facebook.com/" + thisFBid + "/picture"
  83. debugPrint("Found FacebookProfile: " + str(thisName) + " " + str(thisHandle) , 4)
  84.  
  85. #new FacebookProfile, add it to the resultSet for output
  86. newFacebookProfile = FacebookProfile(thisFBid)
  87. newFacebookProfile.name = thisName
  88. newFacebookProfile.buildProfile()
  89. resultSet.append(newFacebookProfile)
  90.  
  91.  
  92. return resultSet
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement