Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- """Download video meta-info for a given video urls from input file.
- Input files is in Firefox'es bookmarks export file
- """
- import csv
- import re
- import sys
- import urlparse
- from BeautifulSoup import BeautifulSoup
- from gdata.youtube.service import YouTubeService
- # parse bookmarks.html
- #with open(sys.argv[1]) as bookmark_file:
- # soup = BeautifulSoup(bookmark_file.read())
- # extract youtube video urls
- #video_url_regex = re.compile('http://www.youtube.com/watch')
- #urls = [link['href'] for link in soup('a', href=video_url_regex)]
- # parse text file
- urls = open(sys.argv[1]).readlines()
- ### video order doesn't change ###
- # extract video ids from the urls
- ids = []
- for video_url in urls:
- url = urlparse.urlparse(video_url)
- video_id = urlparse.parse_qs(url.query).get('v')
- if not video_id: continue # no video_id in the url
- ids.append(video_id[0])
- ### video order doesn't change ###
- # remove duplicates but changes the order of the list
- #ids = list(set(ids))
- # print total number of video_ids
- counter = len(ids)
- print counter
- # get some statistics for the videos
- yt_service = YouTubeService()
- yt_service.developer_key = 'AI39si4yOmI0GEhSTXH0nkiVDf6tQjCkqoys5BBYLKEr-PQxWJ0IlwnUJAcdxpocGLBBCapdYeMLIsB7KVC_OA8gYK0VKV726g'
- #NOTE: you don't need to authenticate for readonly requests
- yt_service.ssl = True #NOTE: it works for readonly requests
- #yt_service.debug = True # show requests
- writer = csv.writer(open(sys.argv[2], 'wb'))
- writer.writerow(("video_id", "title", "view_count", "favorites", "comments", "average", "num_raters", "author", "published", "tags"))
- for video_id in ids:
- try:
- entry = yt_service.GetYouTubeVideoEntry(video_id=video_id)
- dir(entry.rating)
- ['FindExtensions', 'ToString', '_AddMembersToElementTree', '_BecomeChildElement', '_ConvertElementAttributeToMember', '_ConvertElementTreeToMember', '_HarvestElementTree', '_ToElementTree', '__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__hash__', '__init__', '__module__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_attributes', '_children', '_namespace', '_tag', 'average', 'extension_attributes', 'extension_elements', 'max', 'min', 'num_raters', 'text']
- comments = yt_service.GetYouTubeVideoCommentFeed(video_id=video_id)
- except Exception, e:
- print "############################"
- print "## Videos Remaining:", counter, "##"
- print "############################"
- counter -= 1
- print "##"
- print >>sys.stderr, "## Failed to retrieve entry"
- error = sys.stderr, "Failed to retrieve entry video_id=%s: %s" %(video_id, e)
- print "############################"
- print ""
- print ""
- writer.writerow((video_id, e))
- else:
- print "############################"
- print "## Videos Remaining:", counter, "##"
- print "############################"
- counter -= 1
- title = entry.media.title.text
- print "## Title:", title
- view_count = entry.statistics.view_count
- print "## View count:", view_count
- favorites = entry.statistics.favorite_count
- print "## Favorite Count:", favorites
- comments = comments.total_results.text
- print "## Comment Count:", comments
- if entry.rating is None: # skip it
- average = 0
- else:
- average = entry.rating.average
- print "## Average Rating:", average
- if entry.rating is None: # skip it
- num_raters = 0
- else:
- num_raters = entry.rating.num_raters
- print "## Number of Raters:", num_raters
- author = entry.author[0].name.text
- print "## Autor:", author
- published = entry.published.text
- print "## Published on:", published
- tags = entry.media.keywords.text
- print "## Tags:", tags
- print "############################"
- print ""
- print ""
- writer.writerow((video_id, title, view_count, favorites, comments, average, num_raters, author, published, tags))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement