Advertisement
Guest User

Text File URLS

a guest
Dec 7th, 2011
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.17 KB | None | 0 0
  1. #!/usr/bin/env python
  2. """Download video meta-info for a given video urls from input file.
  3.  
  4. Input files is in Firefox'es bookmarks export file
  5. """
  6. import csv
  7. import re
  8. import sys
  9. import urlparse
  10. from BeautifulSoup import BeautifulSoup
  11. from gdata.youtube.service import YouTubeService
  12.  
  13. # parse bookmarks.html
  14. #with open(sys.argv[1]) as bookmark_file:
  15. #    soup = BeautifulSoup(bookmark_file.read())
  16.    
  17. # extract youtube video urls
  18. #video_url_regex = re.compile('http://www.youtube.com/watch')
  19. #urls = [link['href'] for link in soup('a', href=video_url_regex)]
  20.  
  21. # parse text file
  22. urls = open(sys.argv[1]).readlines()
  23.  
  24. ### video order doesn't change ###
  25.  
  26. # extract video ids from the urls
  27. ids = []
  28. for video_url in urls:
  29.     url = urlparse.urlparse(video_url)
  30.     video_id = urlparse.parse_qs(url.query).get('v')
  31.     if not video_id: continue # no video_id in the url
  32.     ids.append(video_id[0])
  33.    
  34. ### video order doesn't change ###
  35.  
  36. # remove duplicates but changes the order of the list
  37. #ids = list(set(ids))
  38.  
  39. # print total number of video_ids
  40. counter = len(ids)
  41. print counter
  42.  
  43. # get some statistics for the videos
  44. yt_service = YouTubeService()
  45. yt_service.developer_key = 'AI39si4yOmI0GEhSTXH0nkiVDf6tQjCkqoys5BBYLKEr-PQxWJ0IlwnUJAcdxpocGLBBCapdYeMLIsB7KVC_OA8gYK0VKV726g'
  46. #NOTE: you don't need to authenticate for readonly requests
  47. yt_service.ssl = True #NOTE: it works for readonly requests
  48. #yt_service.debug = True # show requests
  49.  
  50. writer = csv.writer(open(sys.argv[2], 'wb'))
  51. writer.writerow(("video_id", "title", "view_count", "favorites", "comments", "average", "num_raters", "author", "published", "tags"))
  52. for video_id in ids:
  53.     try:
  54.         entry = yt_service.GetYouTubeVideoEntry(video_id=video_id)
  55.         dir(entry.rating)
  56.         ['FindExtensions', 'ToString', '_AddMembersToElementTree', '_BecomeChildElement', '_ConvertElementAttributeToMember', '_ConvertElementTreeToMember', '_HarvestElementTree', '_ToElementTree', '__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__hash__', '__init__', '__module__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_attributes', '_children', '_namespace', '_tag', 'average', 'extension_attributes', 'extension_elements', 'max', 'min', 'num_raters', 'text']
  57.         comments = yt_service.GetYouTubeVideoCommentFeed(video_id=video_id)
  58.     except Exception, e:
  59.         print "############################"       
  60.         print "## Videos Remaining:", counter, "##"
  61.         print "############################"
  62.         counter -= 1
  63.         print "##"
  64.         print >>sys.stderr, "## Failed to retrieve entry"
  65.         error = sys.stderr, "Failed to retrieve entry video_id=%s: %s" %(video_id, e)
  66.         print "############################"
  67.         print ""
  68.         print ""
  69.         writer.writerow((video_id, e))
  70.     else:
  71.         print "############################"       
  72.         print "## Videos Remaining:", counter, "##"
  73.         print "############################"
  74.         counter -= 1
  75.         title = entry.media.title.text
  76.         print "## Title:", title
  77.         view_count = entry.statistics.view_count
  78.         print "## View count:", view_count
  79.         favorites = entry.statistics.favorite_count
  80.         print "## Favorite Count:", favorites
  81.         comments = comments.total_results.text
  82.         print "## Comment Count:", comments
  83.         if entry.rating is None: # skip it
  84.             average = 0
  85.         else:
  86.             average = entry.rating.average
  87.         print "## Average Rating:", average
  88.         if entry.rating is None: # skip it
  89.             num_raters = 0
  90.         else:
  91.             num_raters = entry.rating.num_raters
  92.         print "## Number of Raters:", num_raters
  93.         author = entry.author[0].name.text
  94.         print "## Autor:", author
  95.         published = entry.published.text
  96.         print "## Published on:", published
  97.         tags = entry.media.keywords.text
  98.         print "## Tags:", tags
  99.         print "############################"
  100.         print ""
  101.         print ""
  102.         writer.writerow((video_id, title, view_count, favorites, comments, average, num_raters, author, published, tags))
  103.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement