Advertisement
Guest User

Untitled

a guest
Oct 29th, 2014
188
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.47 KB | None | 0 0
  1. """
  2. Facebook Music Group YouTube Link Extractor
  3. Crawls a specified Facebook group and returns a list of posted YouTube
  4. links sorted by the amount of likes each has received to maybe find good tunes.
  5. """
  6.  
  7. import facebook
  8. import requests
  9. import operator
  10. from urlparse import urlparse
  11.  
  12. #GET A TEMPORARY TOKEN FROM https://developers.facebook.com/tools/explorer/
  13. #UNDER GET TOKEN BUTTON CHECK 'USER_GROUPS' AND IN EXTENDED TAB 'READ_STREAM'
  14. ACCESS_TOKEN = ''
  15.  
  16. #FIND GROUP ID BY PASTING GROUP URL INTO http://lookup-id.com/
  17. GROUP = ''
  18.  
  19.  
  20. def main():
  21.     """
  22.    Connect to Graph API and parse the group feed passing to link extractor
  23.    functions. Loops through all pagination links to get every post.
  24.    """
  25.     all_posts = []
  26.     graph = facebook.GraphAPI(ACCESS_TOKEN)
  27.     posts = graph.get_connections(GROUP, 'feed')
  28.  
  29.     while True:
  30.         try:
  31.             [fetch_links(post, graph, all_posts) for post in posts["data"]]
  32.             posts = requests.get(posts['paging']['next']).json()
  33.         except KeyError:
  34.             break
  35.      
  36.     youtube_links = only_tube(all_posts)
  37.     write_text(youtube_links)
  38.  
  39.  
  40. def fetch_links(post, graph, all_posts):
  41.     """
  42.    Take all group posts and get the total amount of likes for each one,
  43.    some posts are not available due to 'external app' privacy settings
  44.    by poster so will skip any where a link is not present in JSON
  45.    """
  46.     try:
  47.         if post["source"]:
  48.             post_link =  post["source"]
  49.             post_id = post["id"]
  50.             get_likes = graph.get_connections(post_id, 'likes', summary=1)
  51.             like_count = get_likes["summary"]["total_count"]
  52.             all_posts.append((post_link, like_count))
  53.     except KeyError:
  54.         pass  
  55.    
  56.     return all_posts
  57.  
  58.  
  59. def only_tube(all_posts):
  60.     """
  61.    Filter out any links that are not YouTube and sort by likes descending.
  62.    """
  63.     youtube_links = []
  64.     for row in all_posts:
  65.             parsed = (urlparse(row[0]))
  66.             video_id = parsed.path.split("/")[-1]
  67.             if parsed.netloc == 'www.youtube.com':
  68.                url = ('http://youtube.com/watch?v='+video_id)
  69.                youtube_links.append((url, row[1]))
  70.     links = sorted(youtube_links, key=operator.itemgetter(1), reverse=True)
  71.     return links
  72.  
  73.  
  74. def write_text(links):
  75.     txt_out = open('YouTubeLinks.txt', 'wb')
  76.     for row in links:
  77.         txt_out.write("%s\n" % (row[0]))
  78.     txt_out.close()
  79.  
  80.  
  81. if __name__ == "__main__":
  82.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement