ak47suk1

utube

Jul 31st, 2011
210
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # Author: Ricardo Garcia Gonzalez
  4. # Author: Danny Colligan
  5. # Author: Benjamin Johnson
  6. # Author: Vasyl' Vavrychuk
  7. # Author: Witold Baryluk
  8. # Author: Paweł Paprota
  9. # Author: Gergely Imreh
  10. # License: Public domain code
  11. import cookielib
  12. import ctypes
  13. import datetime
  14. import email.utils
  15. import gzip
  16. import htmlentitydefs
  17. import httplib
  18. import locale
  19. import math
  20. import netrc
  21. import os
  22. import os.path
  23. import re
  24. import socket
  25. import string
  26. import StringIO
  27. import subprocess
  28. import sys
  29. import time
  30. import urllib
  31. import urllib2
  32. import zlib
  33.  
  34. # parse_qs was moved from the cgi module to the urlparse module recently.
  35. try:
  36.     from urlparse import parse_qs
  37. except ImportError:
  38.     from cgi import parse_qs
  39.  
  40. std_headers = {
  41.     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
  42.     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  43.     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  44.     'Accept-Encoding': 'gzip, deflate',
  45.     'Accept-Language': 'en-us,en;q=0.5',
  46. }
  47.  
  48. simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  49.  
  50. def preferredencoding():
  51.     """Get preferred encoding.
  52.  
  53.     Returns the best encoding scheme for the system, based on
  54.     locale.getpreferredencoding() and some further tweaks.
  55.     """
  56.     def yield_preferredencoding():
  57.         try:
  58.             pref = locale.getpreferredencoding()
  59.             u'TEST'.encode(pref)
  60.         except:
  61.             pref = 'UTF-8'
  62.         while True:
  63.             yield pref
  64.     return yield_preferredencoding().next()
  65.  
  66. def htmlentity_transform(matchobj):
  67.     """Transforms an HTML entity to a Unicode character.
  68.  
  69.     This function receives a match object and is intended to be used with
  70.     the re.sub() function.
  71.     """
  72.     entity = matchobj.group(1)
  73.  
  74.     # Known non-numeric HTML entity
  75.     if entity in htmlentitydefs.name2codepoint:
  76.         return unichr(htmlentitydefs.name2codepoint[entity])
  77.  
  78.     # Unicode character
  79.     mobj = re.match(ur'(?u)#(x?\d+)', entity)
  80.     if mobj is not None:
  81.         numstr = mobj.group(1)
  82.         if numstr.startswith(u'x'):
  83.             base = 16
  84.             numstr = u'0%s' % numstr
  85.         else:
  86.             base = 10
  87.         return unichr(long(numstr, base))
  88.  
  89.     # Unknown entity in name, return its literal representation
  90.     return (u'&%s;' % entity)
  91.  
  92. def sanitize_title(utitle):
  93.     """Sanitizes a video title so it could be used as part of a filename."""
  94.     utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
  95.     return utitle.replace(unicode(os.sep), u'%')
  96.  
  97. def sanitize_open(filename, open_mode):
  98.     """Try to open the given filename, and slightly tweak it if this fails.
  99.  
  100.     Attempts to open the given filename. If this fails, it tries to change
  101.     the filename slightly, step by step, until it's either able to open it
  102.     or it fails and raises a final exception, like the standard open()
  103.     function.
  104.  
  105.     It returns the tuple (stream, definitive_file_name).
  106.     """
  107.     try:
  108.         if filename == u'-':
  109.             if sys.platform == 'win32':
  110.                 import msvcrt
  111.                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
  112.             return (sys.stdout, filename)
  113.         stream = open(filename, open_mode)
  114.         return (stream, filename)
  115.     except (IOError, OSError), err:
  116.         # In case of error, try to remove win32 forbidden chars
  117.         filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
  118.  
  119.         # An exception here should be caught in the caller
  120.         stream = open(filename, open_mode)
  121.         return (stream, filename)
  122.  
  123. def timeconvert(timestr):
  124.     """Convert RFC 2822 defined time string into system timestamp"""
  125.     timestamp = None
  126.     timetuple = email.utils.parsedate_tz(timestr)
  127.     if timetuple is not None:
  128.         timestamp = email.utils.mktime_tz(timetuple)
  129.     return timestamp
  130.  
  131. class DownloadError(Exception):
  132.     """Download Error exception.
  133.  
  134.     This exception may be thrown by FileDownloader objects if they are not
  135.     configured to continue on errors. They will contain the appropriate
  136.     error message.
  137.     """
  138.     pass
  139.  
  140. class SameFileError(Exception):
  141.     """Same File exception.
  142.  
  143.     This exception will be thrown by FileDownloader objects if they detect
  144.     multiple files would have to be downloaded to the same file on disk.
  145.     """
  146.     pass
  147.  
  148. class PostProcessingError(Exception):
  149.     """Post Processing exception.
  150.  
  151.     This exception may be raised by PostProcessor's .run() method to
  152.     indicate an error in the postprocessing task.
  153.     """
  154.     pass
  155.  
  156. class UnavailableVideoError(Exception):
  157.     """Unavailable Format exception.
  158.  
  159.     This exception will be thrown when a video is requested
  160.     in a format that is not available for that video.
  161.     """
  162.     pass
  163.  
  164. class ContentTooShortError(Exception):
  165.     """Content Too Short exception.
  166.  
  167.     This exception may be raised by FileDownloader objects when a file they
  168.     download is too small for what the server announced first, indicating
  169.     the connection was probably interrupted.
  170.     """
  171.     # Both in bytes
  172.     downloaded = None
  173.     expected = None
  174.  
  175.     def __init__(self, downloaded, expected):
  176.         self.downloaded = downloaded
  177.         self.expected = expected
  178.  
  179. class YoutubeDLHandler(urllib2.HTTPHandler):
  180.     """Handler for HTTP requests and responses.
  181.  
  182.     This class, when installed with an OpenerDirector, automatically adds
  183.     the standard headers to every HTTP request and handles gzipped and
  184.     deflated responses from web servers. If compression is to be avoided in
  185.     a particular request, the original request in the program code only has
  186.     to include the HTTP header "Youtubedl-No-Compression", which will be
  187.     removed before making the real request.
  188.    
  189.     Part of this code was copied from:
  190.  
  191.       http://techknack.net/python-urllib2-handlers/
  192.      
  193.     Andrew Rowls, the author of that code, agreed to release it to the
  194.     public domain.
  195.     """
  196.  
  197.     @staticmethod
  198.     def deflate(data):
  199.         try:
  200.             return zlib.decompress(data, -zlib.MAX_WBITS)
  201.         except zlib.error:
  202.             return zlib.decompress(data)
  203.    
  204.     @staticmethod
  205.     def addinfourl_wrapper(stream, headers, url, code):
  206.         if hasattr(urllib2.addinfourl, 'getcode'):
  207.             return urllib2.addinfourl(stream, headers, url, code)
  208.         ret = urllib2.addinfourl(stream, headers, url)
  209.         ret.code = code
  210.         return ret
  211.    
  212.     def http_request(self, req):
  213.         for h in std_headers:
  214.             if h in req.headers:
  215.                 del req.headers[h]
  216.             req.add_header(h, std_headers[h])
  217.         if 'Youtubedl-no-compression' in req.headers:
  218.             if 'Accept-encoding' in req.headers:
  219.                 del req.headers['Accept-encoding']
  220.             del req.headers['Youtubedl-no-compression']
  221.         return req
  222.  
  223.     def http_response(self, req, resp):
  224.         old_resp = resp
  225.         # gzip
  226.         if resp.headers.get('Content-encoding', '') == 'gzip':
  227.             gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
  228.             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
  229.             resp.msg = old_resp.msg
  230.         # deflate
  231.         if resp.headers.get('Content-encoding', '') == 'deflate':
  232.             gz = StringIO.StringIO(self.deflate(resp.read()))
  233.             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
  234.             resp.msg = old_resp.msg
  235.         return resp
  236.  
  237. class FileDownloader(object):
  238.     """File Downloader class.
  239.  
  240.     File downloader objects are the ones responsible of downloading the
  241.     actual video file and writing it to disk if the user has requested
  242.     it, among some other tasks. In most cases there should be one per
  243.     program. As, given a video URL, the downloader doesn't know how to
  244.     extract all the needed information, task that InfoExtractors do, it
  245.     has to pass the URL to one of them.
  246.  
  247.     For this, file downloader objects have a method that allows
  248.     InfoExtractors to be registered in a given order. When it is passed
  249.     a URL, the file downloader handles it to the first InfoExtractor it
  250.     finds that reports being able to handle it. The InfoExtractor extracts
  251.     all the information about the video or videos the URL refers to, and
  252.     asks the FileDownloader to process the video information, possibly
  253.     downloading the video.
  254.  
  255.     File downloaders accept a lot of parameters. In order not to saturate
  256.     the object constructor with arguments, it receives a dictionary of
  257.     options instead. These options are available through the params
  258.     attribute for the InfoExtractors to use. The FileDownloader also
  259.     registers itself as the downloader in charge for the InfoExtractors
  260.     that are added to it, so this is a "mutual registration".
  261.  
  262.     Available options:
  263.  
  264.     username:         Username for authentication purposes.
  265.     password:         Password for authentication purposes.
  266.     usenetrc:         Use netrc for authentication instead.
  267.     quiet:            Do not print messages to stdout.
  268.     forceurl:         Force printing final URL.
  269.     forcetitle:       Force printing title.
  270.     forcethumbnail:   Force printing thumbnail URL.
  271.     forcedescription: Force printing description.
  272.     forcefilename:    Force printing final filename.
  273.     simulate:         Do not download the video files.
  274.     format:           Video format code.
  275.     format_limit:     Highest quality format to try.
  276.     outtmpl:          Template for output names.
  277.     ignoreerrors:     Do not stop on download errors.
  278.     ratelimit:        Download speed limit, in bytes/sec.
  279.     nooverwrites:     Prevent overwriting files.
  280.     retries:          Number of times to retry for HTTP error 5xx
  281.     continuedl:       Try to continue downloads if possible.
  282.     noprogress:       Do not print the progress bar.
  283.     playliststart:    Playlist item to start at.
  284.     playlistend:      Playlist item to end at.
  285.     logtostderr:      Log messages to stderr instead of stdout.
  286.     consoletitle:     Display progress in console window's titlebar.
  287.     nopart:           Do not use temporary .part files.
  288.     updatetime:       Use the Last-modified header to set output file timestamps.
  289.     """
  290.  
  291.     params = None
  292.     _ies = []
  293.     _pps = []
  294.     _download_retcode = None
  295.     _num_downloads = None
  296.     _screen_file = None
  297.  
  298.     def __init__(self, params):
  299.         """Create a FileDownloader object with the given options."""
  300.         self._ies = []
  301.         self._pps = []
  302.         self._download_retcode = 0
  303.         self._num_downloads = 0
  304.         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  305.         self.params = params
  306.  
  307.     @staticmethod
  308.     def pmkdir(filename):
  309.         """Create directory components in filename. Similar to Unix "mkdir -p"."""
  310.         components = filename.split(os.sep)
  311.         aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
  312.         aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
  313.         for dir in aggregate:
  314.             if not os.path.exists(dir):
  315.                 os.mkdir(dir)
  316.  
  317.     @staticmethod
  318.     def format_bytes(bytes):
  319.         if bytes is None:
  320.             return 'N/A'
  321.         if type(bytes) is str:
  322.             bytes = float(bytes)
  323.         if bytes == 0.0:
  324.             exponent = 0
  325.         else:
  326.             exponent = long(math.log(bytes, 1024.0))
  327.         suffix = 'bkMGTPEZY'[exponent]
  328.         converted = float(bytes) / float(1024**exponent)
  329.         return '%.2f%s' % (converted, suffix)
  330.  
  331.     @staticmethod
  332.     def calc_percent(byte_counter, data_len):
  333.         if data_len is None:
  334.             return '---.-%'
  335.         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
  336.  
  337.     @staticmethod
  338.     def calc_eta(start, now, total, current):
  339.         if total is None:
  340.             return '--:--'
  341.         dif = now - start
  342.         if current == 0 or dif < 0.001: # One millisecond
  343.             return '--:--'
  344.         rate = float(current) / dif
  345.         eta = long((float(total) - float(current)) / rate)
  346.         (eta_mins, eta_secs) = divmod(eta, 60)
  347.         if eta_mins > 99:
  348.             return '--:--'
  349.         return '%02d:%02d' % (eta_mins, eta_secs)
  350.  
  351.     @staticmethod
  352.     def calc_speed(start, now, bytes):
  353.         dif = now - start
  354.         if bytes == 0 or dif < 0.001: # One millisecond
  355.             return '%10s' % '---b/s'
  356.         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
  357.  
  358.     @staticmethod
  359.     def best_block_size(elapsed_time, bytes):
  360.         new_min = max(bytes / 2.0, 1.0)
  361.         new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
  362.         if elapsed_time < 0.001:
  363.             return long(new_max)
  364.         rate = bytes / elapsed_time
  365.         if rate > new_max:
  366.             return long(new_max)
  367.         if rate < new_min:
  368.             return long(new_min)
  369.         return long(rate)
  370.  
  371.     @staticmethod
  372.     def parse_bytes(bytestr):
  373.         """Parse a string indicating a byte quantity into a long integer."""
  374.         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
  375.         if matchobj is None:
  376.             return None
  377.         number = float(matchobj.group(1))
  378.         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
  379.         return long(round(number * multiplier))
  380.  
  381.     def add_info_extractor(self, ie):
  382.         """Add an InfoExtractor object to the end of the list."""
  383.         self._ies.append(ie)
  384.         ie.set_downloader(self)
  385.  
  386.     def add_post_processor(self, pp):
  387.         """Add a PostProcessor object to the end of the chain."""
  388.         self._pps.append(pp)
  389.         pp.set_downloader(self)
  390.  
  391.     def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
  392.         """Print message to stdout if not in quiet mode."""
  393.         try:
  394.             if not self.params.get('quiet', False):
  395.                 terminator = [u'\n', u''][skip_eol]
  396.                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
  397.             self._screen_file.flush()
  398.         except (UnicodeEncodeError), err:
  399.             if not ignore_encoding_errors:
  400.                 raise
  401.  
  402.     def to_stderr(self, message):
  403.         """Print message to stderr."""
  404.         print >>sys.stderr, message.encode(preferredencoding())
  405.  
  406.     def to_cons_title(self, message):
  407.         """Set console/terminal window title to message."""
  408.         if not self.params.get('consoletitle', False):
  409.             return
  410.         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
  411.             # c_wchar_p() might not be necessary if `message` is
  412.             # already of type unicode()
  413.             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
  414.         elif 'TERM' in os.environ:
  415.             sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
  416.  
  417.     def fixed_template(self):
  418.         """Checks if the output template is fixed."""
  419.         return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
  420.  
  421.     def trouble(self, message=None):
  422.         """Determine action to take when a download problem appears.
  423.  
  424.         Depending on if the downloader has been configured to ignore
  425.         download errors or not, this method may throw an exception or
  426.         not when errors are found, after printing the message.
  427.         """
  428.         if message is not None:
  429.             self.to_stderr(message)
  430.         if not self.params.get('ignoreerrors', False):
  431.             raise DownloadError(message)
  432.         self._download_retcode = 1
  433.  
  434.     def slow_down(self, start_time, byte_counter):
  435.         """Sleep if the download speed is over the rate limit."""
  436.         rate_limit = self.params.get('ratelimit', None)
  437.         if rate_limit is None or byte_counter == 0:
  438.             return
  439.         now = time.time()
  440.         elapsed = now - start_time
  441.         if elapsed <= 0.0:
  442.             return
  443.         speed = float(byte_counter) / elapsed
  444.         if speed > rate_limit:
  445.             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
  446.  
  447.     def temp_name(self, filename):
  448.         """Returns a temporary filename for the given filename."""
  449.         if self.params.get('nopart', False) or filename == u'-' or \
  450.                 (os.path.exists(filename) and not os.path.isfile(filename)):
  451.             return filename
  452.         return filename + u'.part'
  453.  
  454.     def undo_temp_name(self, filename):
  455.         if filename.endswith(u'.part'):
  456.             return filename[:-len(u'.part')]
  457.         return filename
  458.  
  459.     def try_rename(self, old_filename, new_filename):
  460.         try:
  461.             if old_filename == new_filename:
  462.                 return
  463.             os.rename(old_filename, new_filename)
  464.         except (IOError, OSError), err:
  465.             self.trouble(u'ERROR: unable to rename file')
  466.    
  467.     def try_utime(self, filename, last_modified_hdr):
  468.         """Try to set the last-modified time of the given file."""
  469.         if last_modified_hdr is None:
  470.             return
  471.         if not os.path.isfile(filename):
  472.             return
  473.         timestr = last_modified_hdr
  474.         if timestr is None:
  475.             return
  476.         filetime = timeconvert(timestr)
  477.         if filetime is None:
  478.             return
  479.         try:
  480.             os.utime(filename,(time.time(), filetime))
  481.         except:
  482.             pass
  483.  
  484.     def report_destination(self, filename):
  485.         """Report destination filename."""
  486.         self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
  487.  
  488.     def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
  489.         """Report download progress."""
  490.         if self.params.get('noprogress', False):
  491.             return
  492.         self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
  493.                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
  494.         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
  495.                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
  496.  
  497.     def report_resuming_byte(self, resume_len):
  498.         """Report attempt to resume at given byte."""
  499.         self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
  500.  
  501.     def report_retry(self, count, retries):
  502.         """Report retry in case of HTTP error 5xx"""
  503.         self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
  504.  
  505.     def report_file_already_downloaded(self, file_name):
  506.         """Report file has already been fully downloaded."""
  507.         try:
  508.             self.to_screen(u'[download] %s has already been downloaded' % file_name)
  509.         except (UnicodeEncodeError), err:
  510.             self.to_screen(u'[download] The file has already been downloaded')
  511.  
  512.     def report_unable_to_resume(self):
  513.         """Report it was impossible to resume download."""
  514.         self.to_screen(u'[download] Unable to resume')
  515.  
  516.     def report_finish(self):
  517.         """Report download finished."""
  518.         if self.params.get('noprogress', False):
  519.             self.to_screen(u'[download] Download completed')
  520.         else:
  521.             self.to_screen(u'')
  522.  
  523.     def increment_downloads(self):
  524.         """Increment the ordinal that assigns a number to each file."""
  525.         self._num_downloads += 1
  526.  
  527.     def prepare_filename(self, info_dict):
  528.         """Generate the output filename."""
  529.         try:
  530.             template_dict = dict(info_dict)
  531.             template_dict['epoch'] = unicode(long(time.time()))
  532.             template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
  533.             filename = self.params['outtmpl'] % template_dict
  534.             return filename
  535.         except (ValueError, KeyError), err:
  536.             self.trouble(u'ERROR: invalid system charset or erroneous output template')
  537.             return None
  538.  
  539.     def process_info(self, info_dict):
  540.         """Process a single dictionary returned by an InfoExtractor."""
  541.         filename = self.prepare_filename(info_dict)
  542.         # Do nothing else if in simulate mode
  543.         if self.params.get('simulate', False):
  544.             # Forced printings
  545.             if self.params.get('forcetitle', False):
  546.                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
  547.             if self.params.get('forceurl', False):
  548.                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
  549.             if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
  550.                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
  551.             if self.params.get('forcedescription', False) and 'description' in info_dict:
  552.                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
  553.             if self.params.get('forcefilename', False) and filename is not None:
  554.                 print filename.encode(preferredencoding(), 'xmlcharrefreplace')
  555.  
  556.             return
  557.  
  558.         if filename is None:
  559.             return
  560.         if self.params.get('nooverwrites', False) and os.path.exists(filename):
  561.             self.to_stderr(u'WARNING: file exists and will be skipped')
  562.             return
  563.  
  564.         try:
  565.             self.pmkdir(filename)
  566.         except (OSError, IOError), err:
  567.             self.trouble(u'ERROR: unable to create directories: %s' % str(err))
  568.             return
  569.  
  570.         try:
  571.             success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
  572.         except (OSError, IOError), err:
  573.             raise UnavailableVideoError
  574.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  575.             self.trouble(u'ERROR: unable to download video data: %s' % str(err))
  576.             return
  577.         except (ContentTooShortError, ), err:
  578.             self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
  579.             return
  580.  
  581.         if success:
  582.             try:
  583.                 self.post_process(filename, info_dict)
  584.             except (PostProcessingError), err:
  585.                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
  586.                 return
  587.  
  588.     def download(self, url_list):
  589.         """Download a given list of URLs."""
  590.         if len(url_list) > 1 and self.fixed_template():
  591.             raise SameFileError(self.params['outtmpl'])
  592.  
  593.         for url in url_list:
  594.             suitable_found = False
  595.             for ie in self._ies:
  596.                 # Go to next InfoExtractor if not suitable
  597.                 if not ie.suitable(url):
  598.                     continue
  599.  
  600.                 # Suitable InfoExtractor found
  601.                 suitable_found = True
  602.  
  603.                 # Extract information from URL and process it
  604.                 ie.extract(url)
  605.  
  606.                 # Suitable InfoExtractor had been found; go to next URL
  607.                 break
  608.  
  609.             if not suitable_found:
  610.                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
  611.  
  612.         return self._download_retcode
  613.  
  614.     def post_process(self, filename, ie_info):
  615.         """Run the postprocessing chain on the given file."""
  616.         info = dict(ie_info)
  617.         info['filepath'] = filename
  618.         for pp in self._pps:
  619.             info = pp.run(info)
  620.             if info is None:
  621.                 break
  622.  
  623.     def _download_with_rtmpdump(self, filename, url, player_url):
  624.         self.report_destination(filename)
  625.         tmpfilename = self.temp_name(filename)
  626.  
  627.         # Check for rtmpdump first
  628.         try:
  629.             subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
  630.         except (OSError, IOError):
  631.             self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
  632.             return False
  633.  
  634.         # Download using rtmpdump. rtmpdump returns exit code 2 when
  635.         # the connection was interrumpted and resuming appears to be
  636.         # possible. This is part of rtmpdump's normal usage, AFAIK.
  637.         basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
  638.         retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
  639.         while retval == 2 or retval == 1:
  640.             prevsize = os.path.getsize(tmpfilename)
  641.             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
  642.             time.sleep(5.0) # This seems to be needed
  643.             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
  644.             cursize = os.path.getsize(tmpfilename)
  645.             if prevsize == cursize and retval == 1:
  646.                 break
  647.         if retval == 0:
  648.             self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
  649.             self.try_rename(tmpfilename, filename)
  650.             return True
  651.         else:
  652.             self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
  653.             return False
  654.  
  655.     def _do_download(self, filename, url, player_url):
  656.         # Check file already present
  657.         if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
  658.             self.report_file_already_downloaded(filename)
  659.             return True
  660.  
  661.         # Attempt to download using rtmpdump
  662.         if url.startswith('rtmp'):
  663.             return self._download_with_rtmpdump(filename, url, player_url)
  664.  
  665.         tmpfilename = self.temp_name(filename)
  666.         stream = None
  667.         open_mode = 'wb'
  668.  
  669.         # Do not include the Accept-Encoding header
  670.         headers = {'Youtubedl-no-compression': 'True'}
  671.         basic_request = urllib2.Request(url, None, headers)
  672.         request = urllib2.Request(url, None, headers)
  673.  
  674.         # Establish possible resume length
  675.         if os.path.isfile(tmpfilename):
  676.             resume_len = os.path.getsize(tmpfilename)
  677.         else:
  678.             resume_len = 0
  679.  
  680.         # Request parameters in case of being able to resume
  681.         if self.params.get('continuedl', False) and resume_len != 0:
  682.             self.report_resuming_byte(resume_len)
  683.             request.add_header('Range','bytes=%d-' % resume_len)
  684.             open_mode = 'ab'
  685.  
  686.         count = 0
  687.         retries = self.params.get('retries', 0)
  688.         while count <= retries:
  689.             # Establish connection
  690.             try:
  691.                 data = urllib2.urlopen(request)
  692.                 break
  693.             except (urllib2.HTTPError, ), err:
  694.                 if (err.code < 500 or err.code >= 600) and err.code != 416:
  695.                     # Unexpected HTTP error
  696.                     raise
  697.                 elif err.code == 416:
  698.                     # Unable to resume (requested range not satisfiable)
  699.                     try:
  700.                         # Open the connection again without the range header
  701.                         data = urllib2.urlopen(basic_request)
  702.                         content_length = data.info()['Content-Length']
  703.                     except (urllib2.HTTPError, ), err:
  704.                         if err.code < 500 or err.code >= 600:
  705.                             raise
  706.                     else:
  707.                         # Examine the reported length
  708.                         if (content_length is not None and
  709.                             (resume_len - 100 < long(content_length) < resume_len + 100)):
  710.                             # The file had already been fully downloaded.
  711.                             # Explanation to the above condition: in issue #175 it was revealed that
  712.                             # YouTube sometimes adds or removes a few bytes from the end of the file,
  713.                             # changing the file size slightly and causing problems for some users. So
  714.                             # I decided to implement a suggested change and consider the file
  715.                             # completely downloaded if the file size differs less than 100 bytes from
  716.                             # the one in the hard drive.
  717.                             self.report_file_already_downloaded(filename)
  718.                             self.try_rename(tmpfilename, filename)
  719.                             return True
  720.                         else:
  721.                             # The length does not match, we start the download over
  722.                             self.report_unable_to_resume()
  723.                             open_mode = 'wb'
  724.                             break
  725.             # Retry
  726.             count += 1
  727.             if count <= retries:
  728.                 self.report_retry(count, retries)
  729.  
  730.         if count > retries:
  731.             self.trouble(u'ERROR: giving up after %s retries' % retries)
  732.             return False
  733.  
  734.         data_len = data.info().get('Content-length', None)
  735.         if data_len is not None:
  736.             data_len = long(data_len) + resume_len
  737.         data_len_str = self.format_bytes(data_len)
  738.         byte_counter = 0 + resume_len
  739.         block_size = 1024
  740.         start = time.time()
  741.         while True:
  742.             # Download and write
  743.             before = time.time()
  744.             data_block = data.read(block_size)
  745.             after = time.time()
  746.             if len(data_block) == 0:
  747.                 break
  748.             byte_counter += len(data_block)
  749.  
  750.             # Open file just in time
  751.             if stream is None:
  752.                 try:
  753.                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
  754.                     filename = self.undo_temp_name(tmpfilename)
  755.                     self.report_destination(filename)
  756.                 except (OSError, IOError), err:
  757.                     self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
  758.                     return False
  759.             try:
  760.                 stream.write(data_block)
  761.             except (IOError, OSError), err:
  762.                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
  763.                 return False
  764.             block_size = self.best_block_size(after - before, len(data_block))
  765.  
  766.             # Progress message
  767.             percent_str = self.calc_percent(byte_counter, data_len)
  768.             eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
  769.             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
  770.             self.report_progress(percent_str, data_len_str, speed_str, eta_str)
  771.  
  772.             # Apply rate limit
  773.             self.slow_down(start, byte_counter - resume_len)
  774.  
  775.         stream.close()
  776.         self.report_finish()
  777.         if data_len is not None and byte_counter != data_len:
  778.             raise ContentTooShortError(byte_counter, long(data_len))
  779.         self.try_rename(tmpfilename, filename)
  780.  
  781.         # Update file modification time
  782.         if self.params.get('updatetime', True):
  783.             self.try_utime(filename, data.info().get('last-modified', None))
  784.  
  785.         return True
  786.  
  787. class InfoExtractor(object):
  788.     """Information Extractor class.
  789.  
  790.     Information extractors are the classes that, given a URL, extract
  791.     information from the video (or videos) the URL refers to. This
  792.     information includes the real video URL, the video title and simplified
  793.     title, author and others. The information is stored in a dictionary
  794.     which is then passed to the FileDownloader. The FileDownloader
  795.     processes this information possibly downloading the video to the file
  796.     system, among other possible outcomes. The dictionaries must include
  797.     the following fields:
  798.  
  799.     id:     Video identifier.
  800.     url:        Final video URL.
  801.     uploader:   Nickname of the video uploader.
  802.     title:      Literal title.
  803.     stitle:     Simplified title.
  804.     ext:        Video filename extension.
  805.     format:     Video format.
  806.     player_url: SWF Player URL (may be None).
  807.  
  808.     The following fields are optional. Their primary purpose is to allow
  809.     youtube-dl to serve as the backend for a video search function, such
  810.     as the one in youtube2mp3.  They are only used when their respective
  811.     forced printing functions are called:
  812.  
  813.     thumbnail:  Full URL to a video thumbnail image.
  814.     description:    One-line video description.
  815.  
  816.     Subclasses of this one should re-define the _real_initialize() and
  817.     _real_extract() methods, as well as the suitable() static method.
  818.     Probably, they should also be instantiated and added to the main
  819.     downloader.
  820.     """
  821.  
  822.     _ready = False
  823.     _downloader = None
  824.  
  825.     def __init__(self, downloader=None):
  826.         """Constructor. Receives an optional downloader."""
  827.         self._ready = False
  828.         self.set_downloader(downloader)
  829.  
  830.     @staticmethod
  831.     def suitable(url):
  832.         """Receives a URL and returns True if suitable for this IE."""
  833.         return False
  834.  
  835.     def initialize(self):
  836.         """Initializes an instance (authentication, etc)."""
  837.         if not self._ready:
  838.             self._real_initialize()
  839.             self._ready = True
  840.  
  841.     def extract(self, url):
  842.         """Extracts URL information and returns it in list of dicts."""
  843.         self.initialize()
  844.         return self._real_extract(url)
  845.  
  846.     def set_downloader(self, downloader):
  847.         """Sets the downloader for this IE."""
  848.         self._downloader = downloader
  849.  
  850.     def _real_initialize(self):
  851.         """Real initialization process. Redefine in subclasses."""
  852.         pass
  853.  
  854.     def _real_extract(self, url):
  855.         """Real extraction process. Redefine in subclasses."""
  856.         pass
  857.  
  858. class YoutubeIE(InfoExtractor):
  859.     """Information extractor for youtube.com."""
  860.  
  861.     _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
  862.     _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
  863.     _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
  864.     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  865.     _NETRC_MACHINE = 'youtube'
  866.     # Listed in order of quality
  867.     _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
  868.     _video_extensions = {
  869.         '13': '3gp',
  870.         '17': 'mp4',
  871.         '18': 'mp4',
  872.         '22': 'mp4',
  873.         '37': 'mp4',
  874.         '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
  875.         '43': 'webm',
  876.         '45': 'webm',
  877.     }
  878.  
  879.     @staticmethod
  880.     def suitable(url):
  881.         return (re.match(YoutubeIE._VALID_URL, url) is not None)
  882.  
  883.     def report_lang(self):
  884.         """Report attempt to set language."""
  885.         self._downloader.to_screen(u'[youtube] Setting language')
  886.  
  887.     def report_login(self):
  888.         """Report attempt to log in."""
  889.         self._downloader.to_screen(u'[youtube] Logging in')
  890.  
  891.     def report_age_confirmation(self):
  892.         """Report attempt to confirm age."""
  893.         self._downloader.to_screen(u'[youtube] Confirming age')
  894.  
  895.     def report_video_webpage_download(self, video_id):
  896.         """Report attempt to download video webpage."""
  897.         self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
  898.  
  899.     def report_video_info_webpage_download(self, video_id):
  900.         """Report attempt to download video info webpage."""
  901.         self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
  902.  
  903.     def report_information_extraction(self, video_id):
  904.         """Report attempt to extract video information."""
  905.         self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
  906.  
  907.     def report_unavailable_format(self, video_id, format):
  908.         """Report extracted video URL."""
  909.         self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
  910.  
  911.     def report_rtmp_download(self):
  912.         """Indicate the download will use the RTMP protocol."""
  913.         self._downloader.to_screen(u'[youtube] RTMP download detected')
  914.  
  915.     def _real_initialize(self):
  916.         if self._downloader is None:
  917.             return
  918.  
  919.         username = None
  920.         password = None
  921.         downloader_params = self._downloader.params
  922.  
  923.         # Attempt to use provided username and password or .netrc data
  924.         if downloader_params.get('username', None) is not None:
  925.             username = downloader_params['username']
  926.             password = downloader_params['password']
  927.         elif downloader_params.get('usenetrc', False):
  928.             try:
  929.                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
  930.                 if info is not None:
  931.                     username = info[0]
  932.                     password = info[2]
  933.                 else:
  934.                     raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
  935.             except (IOError, netrc.NetrcParseError), err:
  936.                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
  937.                 return
  938.  
  939.         # Set language
  940.         request = urllib2.Request(self._LANG_URL)
  941.         try:
  942.             self.report_lang()
  943.             urllib2.urlopen(request).read()
  944.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  945.             self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
  946.             return
  947.  
  948.         # No authentication to be performed
  949.         if username is None:
  950.             return
  951.  
  952.         # Log in
  953.         login_form = {
  954.                 'current_form': 'loginForm',
  955.                 'next':     '/',
  956.                 'action_login': 'Log In',
  957.                 'username': username,
  958.                 'password': password,
  959.                 }
  960.         request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
  961.         try:
  962.             self.report_login()
  963.             login_results = urllib2.urlopen(request).read()
  964.             if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
  965.                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
  966.                 return
  967.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  968.             self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
  969.             return
  970.  
  971.         # Confirm age
  972.         age_form = {
  973.                 'next_url':     '/',
  974.                 'action_confirm':   'Confirm',
  975.                 }
  976.         request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
  977.         try:
  978.             self.report_age_confirmation()
  979.             age_results = urllib2.urlopen(request).read()
  980.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  981.             self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
  982.             return
  983.  
  984.     def _real_extract(self, url):
  985.         # Extract video id from URL
  986.         mobj = re.match(self._VALID_URL, url)
  987.         if mobj is None:
  988.             self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
  989.             return
  990.         video_id = mobj.group(2)
  991.  
  992.         # Get video webpage
  993.         self.report_video_webpage_download(video_id)
  994.         request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
  995.         try:
  996.             video_webpage = urllib2.urlopen(request).read()
  997.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  998.             self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
  999.             return
  1000.  
  1001.         # Attempt to extract SWF player URL
  1002.         mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
  1003.         if mobj is not None:
  1004.             player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
  1005.         else:
  1006.             player_url = None
  1007.  
  1008.         # Get video info
  1009.         self.report_video_info_webpage_download(video_id)
  1010.         for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
  1011.             video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
  1012.                        % (video_id, el_type))
  1013.             request = urllib2.Request(video_info_url)
  1014.             try:
  1015.                 video_info_webpage = urllib2.urlopen(request).read()
  1016.                 video_info = parse_qs(video_info_webpage)
  1017.                 if 'token' in video_info:
  1018.                     break
  1019.             except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1020.                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
  1021.                 return
  1022.         if 'token' not in video_info:
  1023.             if 'reason' in video_info:
  1024.                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
  1025.             else:
  1026.                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
  1027.             return
  1028.  
  1029.         # Start extracting information
  1030.         self.report_information_extraction(video_id)
  1031.  
  1032.         # uploader
  1033.         if 'author' not in video_info:
  1034.             self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
  1035.             return
  1036.         video_uploader = urllib.unquote_plus(video_info['author'][0])
  1037.  
  1038.         # title
  1039.         if 'title' not in video_info:
  1040.             self._downloader.trouble(u'ERROR: unable to extract video title')
  1041.             return
  1042.         video_title = urllib.unquote_plus(video_info['title'][0])
  1043.         video_title = video_title.decode('utf-8')
  1044.         video_title = sanitize_title(video_title)
  1045.  
  1046.         # simplified title
  1047.         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
  1048.         simple_title = simple_title.strip(ur'_')
  1049.  
  1050.         # thumbnail image
  1051.         if 'thumbnail_url' not in video_info:
  1052.             self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
  1053.             video_thumbnail = ''
  1054.         else:   # don't panic if we can't find it
  1055.             video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
  1056.  
  1057.         # upload date
  1058.         upload_date = u'NA'
  1059.         mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
  1060.         if mobj is not None:
  1061.             upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
  1062.             format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
  1063.             for expression in format_expressions:
  1064.                 try:
  1065.                     upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
  1066.                 except:
  1067.                     pass
  1068.  
  1069.         # description
  1070.         video_description = 'No description available.'
  1071.         if self._downloader.params.get('forcedescription', False):
  1072.             mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
  1073.             if mobj is not None:
  1074.                 video_description = mobj.group(1)
  1075.  
  1076.         # token
  1077.         video_token = urllib.unquote_plus(video_info['token'][0])
  1078.  
  1079.         # Decide which formats to download
  1080.         req_format = self._downloader.params.get('format', None)
  1081.  
  1082.         if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]:
  1083.             url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
  1084.             format_limit = self._downloader.params.get('format_limit', None)
  1085.             if format_limit is not None and format_limit in self._available_formats:
  1086.                 format_list = self._available_formats[self._available_formats.index(format_limit):]
  1087.             else:
  1088.                 format_list = self._available_formats
  1089.             existing_formats = [x for x in format_list if x in url_map]
  1090.             if len(existing_formats) == 0:
  1091.                 self._downloader.trouble(u'ERROR: no known formats available for video')
  1092.                 return
  1093.             if req_format is None:
  1094.                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
  1095.             elif req_format == '-1':
  1096.                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
  1097.             else:
  1098.                 # Specific format
  1099.                 if req_format not in url_map:
  1100.                     self._downloader.trouble(u'ERROR: requested format not available')
  1101.                     return
  1102.                 video_url_list = [(req_format, url_map[req_format])] # Specific format
  1103.  
  1104.         elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
  1105.             self.report_rtmp_download()
  1106.             video_url_list = [(None, video_info['conn'][0])]
  1107.  
  1108.         else:
  1109.             self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
  1110.             return
  1111.  
  1112.         for format_param, video_real_url in video_url_list:
  1113.             # At this point we have a new video
  1114.             self._downloader.increment_downloads()
  1115.  
  1116.             # Extension
  1117.             video_extension = self._video_extensions.get(format_param, 'flv')
  1118.  
  1119.             # Find the video URL in fmt_url_map or conn paramters
  1120.             try:
  1121.                 # Process video information
  1122.                 self._downloader.process_info({
  1123.                     'id':       video_id.decode('utf-8'),
  1124.                     'url':      video_real_url.decode('utf-8'),
  1125.                     'uploader': video_uploader.decode('utf-8'),
  1126.                     'upload_date':  upload_date,
  1127.                     'title':    video_title,
  1128.                     'stitle':   simple_title,
  1129.                     'ext':      video_extension.decode('utf-8'),
  1130.                     'format':   (format_param is None and u'NA' or format_param.decode('utf-8')),
  1131.                     'thumbnail':    video_thumbnail.decode('utf-8'),
  1132.                     'description':  video_description.decode('utf-8'),
  1133.                     'player_url':   player_url,
  1134.                 })
  1135.             except UnavailableVideoError, err:
  1136.                 self._downloader.trouble(u'\nERROR: unable to download video')
  1137.  
  1138.  
  1139. class MetacafeIE(InfoExtractor):
  1140.     """Information Extractor for metacafe.com."""
  1141.  
  1142.     _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
  1143.     _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
  1144.     _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
  1145.     _youtube_ie = None
  1146.  
  1147.     def __init__(self, youtube_ie, downloader=None):
  1148.         InfoExtractor.__init__(self, downloader)
  1149.         self._youtube_ie = youtube_ie
  1150.  
  1151.     @staticmethod
  1152.     def suitable(url):
  1153.         return (re.match(MetacafeIE._VALID_URL, url) is not None)
  1154.  
  1155.     def report_disclaimer(self):
  1156.         """Report disclaimer retrieval."""
  1157.         self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
  1158.  
  1159.     def report_age_confirmation(self):
  1160.         """Report attempt to confirm age."""
  1161.         self._downloader.to_screen(u'[metacafe] Confirming age')
  1162.  
  1163.     def report_download_webpage(self, video_id):
  1164.         """Report webpage download."""
  1165.         self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
  1166.  
  1167.     def report_extraction(self, video_id):
  1168.         """Report information extraction."""
  1169.         self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
  1170.  
  1171.     def _real_initialize(self):
  1172.         # Retrieve disclaimer
  1173.         request = urllib2.Request(self._DISCLAIMER)
  1174.         try:
  1175.             self.report_disclaimer()
  1176.             disclaimer = urllib2.urlopen(request).read()
  1177.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1178.             self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
  1179.             return
  1180.  
  1181.         # Confirm age
  1182.         disclaimer_form = {
  1183.             'filters': '0',
  1184.             'submit': "Continue - I'm over 18",
  1185.             }
  1186.         request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
  1187.         try:
  1188.             self.report_age_confirmation()
  1189.             disclaimer = urllib2.urlopen(request).read()
  1190.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1191.             self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
  1192.             return
  1193.  
  1194.     def _real_extract(self, url):
  1195.         # Extract id and simplified title from URL
  1196.         mobj = re.match(self._VALID_URL, url)
  1197.         if mobj is None:
  1198.             self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
  1199.             return
  1200.  
  1201.         video_id = mobj.group(1)
  1202.  
  1203.         # Check if video comes from YouTube
  1204.         mobj2 = re.match(r'^yt-(.*)$', video_id)
  1205.         if mobj2 is not None:
  1206.             self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
  1207.             return
  1208.  
  1209.         # At this point we have a new video
  1210.         self._downloader.increment_downloads()
  1211.  
  1212.         simple_title = mobj.group(2).decode('utf-8')
  1213.  
  1214.         # Retrieve video webpage to extract further information
  1215.         request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
  1216.         try:
  1217.             self.report_download_webpage(video_id)
  1218.             webpage = urllib2.urlopen(request).read()
  1219.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1220.             self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
  1221.             return
  1222.  
  1223.         # Extract URL, uploader and title from webpage
  1224.         self.report_extraction(video_id)
  1225.         mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
  1226.         if mobj is not None:
  1227.             mediaURL = urllib.unquote(mobj.group(1))
  1228.             video_extension = mediaURL[-3:]
  1229.  
  1230.             # Extract gdaKey if available
  1231.             mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
  1232.             if mobj is None:
  1233.                 video_url = mediaURL
  1234.             else:
  1235.                 gdaKey = mobj.group(1)
  1236.                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
  1237.         else:
  1238.             mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
  1239.             if mobj is None:
  1240.                 self._downloader.trouble(u'ERROR: unable to extract media URL')
  1241.                 return
  1242.             vardict = parse_qs(mobj.group(1))
  1243.             if 'mediaData' not in vardict:
  1244.                 self._downloader.trouble(u'ERROR: unable to extract media URL')
  1245.                 return
  1246.             mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
  1247.             if mobj is None:
  1248.                 self._downloader.trouble(u'ERROR: unable to extract media URL')
  1249.                 return
  1250.             mediaURL = mobj.group(1).replace('\\/', '/')
  1251.             video_extension = mediaURL[-3:]
  1252.             video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
  1253.  
  1254.         mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
  1255.         if mobj is None:
  1256.             self._downloader.trouble(u'ERROR: unable to extract title')
  1257.             return
  1258.         video_title = mobj.group(1).decode('utf-8')
  1259.         video_title = sanitize_title(video_title)
  1260.  
  1261.         mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
  1262.         if mobj is None:
  1263.             self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
  1264.             return
  1265.         video_uploader = mobj.group(1)
  1266.  
  1267.         try:
  1268.             # Process video information
  1269.             self._downloader.process_info({
  1270.                 'id':       video_id.decode('utf-8'),
  1271.                 'url':      video_url.decode('utf-8'),
  1272.                 'uploader': video_uploader.decode('utf-8'),
  1273.                 'upload_date':  u'NA',
  1274.                 'title':    video_title,
  1275.                 'stitle':   simple_title,
  1276.                 'ext':      video_extension.decode('utf-8'),
  1277.                 'format':   u'NA',
  1278.                 'player_url':   None,
  1279.             })
  1280.         except UnavailableVideoError:
  1281.             self._downloader.trouble(u'\nERROR: unable to download video')
  1282.  
  1283.  
  1284. class DailymotionIE(InfoExtractor):
  1285.     """Information Extractor for Dailymotion"""
  1286.  
  1287.     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
  1288.  
  1289.     def __init__(self, downloader=None):
  1290.         InfoExtractor.__init__(self, downloader)
  1291.  
  1292.     @staticmethod
  1293.     def suitable(url):
  1294.         return (re.match(DailymotionIE._VALID_URL, url) is not None)
  1295.  
  1296.     def report_download_webpage(self, video_id):
  1297.         """Report webpage download."""
  1298.         self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
  1299.  
  1300.     def report_extraction(self, video_id):
  1301.         """Report information extraction."""
  1302.         self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
  1303.  
  1304.     def _real_initialize(self):
  1305.         return
  1306.  
  1307.     def _real_extract(self, url):
  1308.         # Extract id and simplified title from URL
  1309.         mobj = re.match(self._VALID_URL, url)
  1310.         if mobj is None:
  1311.             self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
  1312.             return
  1313.  
  1314.         # At this point we have a new video
  1315.         self._downloader.increment_downloads()
  1316.         video_id = mobj.group(1)
  1317.  
  1318.         simple_title = mobj.group(2).decode('utf-8')
  1319.         video_extension = 'flv'
  1320.  
  1321.         # Retrieve video webpage to extract further information
  1322.         request = urllib2.Request(url)
  1323.         try:
  1324.             self.report_download_webpage(video_id)
  1325.             webpage = urllib2.urlopen(request).read()
  1326.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1327.             self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
  1328.             return
  1329.  
  1330.         # Extract URL, uploader and title from webpage
  1331.         self.report_extraction(video_id)
  1332.         mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
  1333.         if mobj is None:
  1334.             self._downloader.trouble(u'ERROR: unable to extract media URL')
  1335.             return
  1336.         mediaURL = urllib.unquote(mobj.group(1))
  1337.  
  1338.         # if needed add http://www.dailymotion.com/ if relative URL
  1339.  
  1340.         video_url = mediaURL
  1341.  
  1342.         # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
  1343.         mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
  1344.         if mobj is None:
  1345.             self._downloader.trouble(u'ERROR: unable to extract title')
  1346.             return
  1347.         video_title = mobj.group(1).decode('utf-8')
  1348.         video_title = sanitize_title(video_title)
  1349.  
  1350.         mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
  1351.         if mobj is None:
  1352.             self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
  1353.             return
  1354.         video_uploader = mobj.group(1)
  1355.  
  1356.         try:
  1357.             # Process video information
  1358.             self._downloader.process_info({
  1359.                 'id':       video_id.decode('utf-8'),
  1360.                 'url':      video_url.decode('utf-8'),
  1361.                 'uploader': video_uploader.decode('utf-8'),
  1362.                 'upload_date':  u'NA',
  1363.                 'title':    video_title,
  1364.                 'stitle':   simple_title,
  1365.                 'ext':      video_extension.decode('utf-8'),
  1366.                 'format':   u'NA',
  1367.                 'player_url':   None,
  1368.             })
  1369.         except UnavailableVideoError:
  1370.             self._downloader.trouble(u'\nERROR: unable to download video')
  1371.  
  1372. class GoogleIE(InfoExtractor):
  1373.     """Information extractor for video.google.com."""
  1374.  
  1375.     _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
  1376.  
  1377.     def __init__(self, downloader=None):
  1378.         InfoExtractor.__init__(self, downloader)
  1379.  
  1380.     @staticmethod
  1381.     def suitable(url):
  1382.         return (re.match(GoogleIE._VALID_URL, url) is not None)
  1383.  
  1384.     def report_download_webpage(self, video_id):
  1385.         """Report webpage download."""
  1386.         self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
  1387.  
  1388.     def report_extraction(self, video_id):
  1389.         """Report information extraction."""
  1390.         self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
  1391.  
  1392.     def _real_initialize(self):
  1393.         return
  1394.  
  1395.     def _real_extract(self, url):
  1396.         # Extract id from URL
  1397.         mobj = re.match(self._VALID_URL, url)
  1398.         if mobj is None:
  1399.             self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
  1400.             return
  1401.  
  1402.         # At this point we have a new video
  1403.         self._downloader.increment_downloads()
  1404.         video_id = mobj.group(1)
  1405.  
  1406.         video_extension = 'mp4'
  1407.  
  1408.         # Retrieve video webpage to extract further information
  1409.         request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
  1410.         try:
  1411.             self.report_download_webpage(video_id)
  1412.             webpage = urllib2.urlopen(request).read()
  1413.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1414.             self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
  1415.             return
  1416.  
  1417.         # Extract URL, uploader, and title from webpage
  1418.         self.report_extraction(video_id)
  1419.         mobj = re.search(r"download_url:'([^']+)'", webpage)
  1420.         if mobj is None:
  1421.             video_extension = 'flv'
  1422.             mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
  1423.         if mobj is None:
  1424.             self._downloader.trouble(u'ERROR: unable to extract media URL')
  1425.             return
  1426.         mediaURL = urllib.unquote(mobj.group(1))
  1427.         mediaURL = mediaURL.replace('\\x3d', '\x3d')
  1428.         mediaURL = mediaURL.replace('\\x26', '\x26')
  1429.  
  1430.         video_url = mediaURL
  1431.  
  1432.         mobj = re.search(r'<title>(.*)</title>', webpage)
  1433.         if mobj is None:
  1434.             self._downloader.trouble(u'ERROR: unable to extract title')
  1435.             return
  1436.         video_title = mobj.group(1).decode('utf-8')
  1437.         video_title = sanitize_title(video_title)
  1438.         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
  1439.  
  1440.         # Extract video description
  1441.         mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
  1442.         if mobj is None:
  1443.             self._downloader.trouble(u'ERROR: unable to extract video description')
  1444.             return
  1445.         video_description = mobj.group(1).decode('utf-8')
  1446.         if not video_description:
  1447.             video_description = 'No description available.'
  1448.  
  1449.         # Extract video thumbnail
  1450.         if self._downloader.params.get('forcethumbnail', False):
  1451.             request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
  1452.             try:
  1453.                 webpage = urllib2.urlopen(request).read()
  1454.             except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1455.                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
  1456.                 return
  1457.             mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
  1458.             if mobj is None:
  1459.                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
  1460.                 return
  1461.             video_thumbnail = mobj.group(1)
  1462.         else:   # we need something to pass to process_info
  1463.             video_thumbnail = ''
  1464.  
  1465.  
  1466.         try:
  1467.             # Process video information
  1468.             self._downloader.process_info({
  1469.                 'id':       video_id.decode('utf-8'),
  1470.                 'url':      video_url.decode('utf-8'),
  1471.                 'uploader': u'NA',
  1472.                 'upload_date':  u'NA',
  1473.                 'title':    video_title,
  1474.                 'stitle':   simple_title,
  1475.                 'ext':      video_extension.decode('utf-8'),
  1476.                 'format':   u'NA',
  1477.                 'player_url':   None,
  1478.             })
  1479.         except UnavailableVideoError:
  1480.             self._downloader.trouble(u'\nERROR: unable to download video')
  1481.  
  1482.  
  1483. class PhotobucketIE(InfoExtractor):
  1484.     """Information extractor for photobucket.com."""
  1485.  
  1486.     _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
  1487.  
  1488.     def __init__(self, downloader=None):
  1489.         InfoExtractor.__init__(self, downloader)
  1490.  
  1491.     @staticmethod
  1492.     def suitable(url):
  1493.         return (re.match(PhotobucketIE._VALID_URL, url) is not None)
  1494.  
  1495.     def report_download_webpage(self, video_id):
  1496.         """Report webpage download."""
  1497.         self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
  1498.  
  1499.     def report_extraction(self, video_id):
  1500.         """Report information extraction."""
  1501.         self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
  1502.  
  1503.     def _real_initialize(self):
  1504.         return
  1505.  
  1506.     def _real_extract(self, url):
  1507.         # Extract id from URL
  1508.         mobj = re.match(self._VALID_URL, url)
  1509.         if mobj is None:
  1510.             self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
  1511.             return
  1512.  
  1513.         # At this point we have a new video
  1514.         self._downloader.increment_downloads()
  1515.         video_id = mobj.group(1)
  1516.  
  1517.         video_extension = 'flv'
  1518.  
  1519.         # Retrieve video webpage to extract further information
  1520.         request = urllib2.Request(url)
  1521.         try:
  1522.             self.report_download_webpage(video_id)
  1523.             webpage = urllib2.urlopen(request).read()
  1524.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1525.             self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
  1526.             return
  1527.  
  1528.         # Extract URL, uploader, and title from webpage
  1529.         self.report_extraction(video_id)
  1530.         mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
  1531.         if mobj is None:
  1532.             self._downloader.trouble(u'ERROR: unable to extract media URL')
  1533.             return
  1534.         mediaURL = urllib.unquote(mobj.group(1))
  1535.  
  1536.         video_url = mediaURL
  1537.  
  1538.         mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
  1539.         if mobj is None:
  1540.             self._downloader.trouble(u'ERROR: unable to extract title')
  1541.             return
  1542.         video_title = mobj.group(1).decode('utf-8')
  1543.         video_title = sanitize_title(video_title)
  1544.         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
  1545.  
  1546.         video_uploader = mobj.group(2).decode('utf-8')
  1547.  
  1548.         try:
  1549.             # Process video information
  1550.             self._downloader.process_info({
  1551.                 'id':       video_id.decode('utf-8'),
  1552.                 'url':      video_url.decode('utf-8'),
  1553.                 'uploader': video_uploader,
  1554.                 'upload_date':  u'NA',
  1555.                 'title':    video_title,
  1556.                 'stitle':   simple_title,
  1557.                 'ext':      video_extension.decode('utf-8'),
  1558.                 'format':   u'NA',
  1559.                 'player_url':   None,
  1560.             })
  1561.         except UnavailableVideoError:
  1562.             self._downloader.trouble(u'\nERROR: unable to download video')
  1563.  
  1564.  
  1565. class YahooIE(InfoExtractor):
  1566.     """Information extractor for video.yahoo.com."""
  1567.  
  1568.     # _VALID_URL matches all Yahoo! Video URLs
  1569.     # _VPAGE_URL matches only the extractable '/watch/' URLs
  1570.     _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
  1571.     _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
  1572.  
  1573.     def __init__(self, downloader=None):
  1574.         InfoExtractor.__init__(self, downloader)
  1575.  
  1576.     @staticmethod
  1577.     def suitable(url):
  1578.         return (re.match(YahooIE._VALID_URL, url) is not None)
  1579.  
  1580.     def report_download_webpage(self, video_id):
  1581.         """Report webpage download."""
  1582.         self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
  1583.  
  1584.     def report_extraction(self, video_id):
  1585.         """Report information extraction."""
  1586.         self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
  1587.  
  1588.     def _real_initialize(self):
  1589.         return
  1590.  
  1591.     def _real_extract(self, url, new_video=True):
  1592.         # Extract ID from URL
  1593.         mobj = re.match(self._VALID_URL, url)
  1594.         if mobj is None:
  1595.             self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
  1596.             return
  1597.  
  1598.         # At this point we have a new video
  1599.         self._downloader.increment_downloads()
  1600.         video_id = mobj.group(2)
  1601.         video_extension = 'flv'
  1602.  
  1603.         # Rewrite valid but non-extractable URLs as
  1604.         # extractable English language /watch/ URLs
  1605.         if re.match(self._VPAGE_URL, url) is None:
  1606.             request = urllib2.Request(url)
  1607.             try:
  1608.                 webpage = urllib2.urlopen(request).read()
  1609.             except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1610.                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
  1611.                 return
  1612.  
  1613.             mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
  1614.             if mobj is None:
  1615.                 self._downloader.trouble(u'ERROR: Unable to extract id field')
  1616.                 return
  1617.             yahoo_id = mobj.group(1)
  1618.  
  1619.             mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
  1620.             if mobj is None:
  1621.                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
  1622.                 return
  1623.             yahoo_vid = mobj.group(1)
  1624.  
  1625.             url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
  1626.             return self._real_extract(url, new_video=False)
  1627.  
  1628.         # Retrieve video webpage to extract further information
  1629.         request = urllib2.Request(url)
  1630.         try:
  1631.             self.report_download_webpage(video_id)
  1632.             webpage = urllib2.urlopen(request).read()
  1633.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1634.             self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
  1635.             return
  1636.  
  1637.         # Extract uploader and title from webpage
  1638.         self.report_extraction(video_id)
  1639.         mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
  1640.         if mobj is None:
  1641.             self._downloader.trouble(u'ERROR: unable to extract video title')
  1642.             return
  1643.         video_title = mobj.group(1).decode('utf-8')
  1644.         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
  1645.  
  1646.         mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
  1647.         if mobj is None:
  1648.             self._downloader.trouble(u'ERROR: unable to extract video uploader')
  1649.             return
  1650.         video_uploader = mobj.group(1).decode('utf-8')
  1651.  
  1652.         # Extract video thumbnail
  1653.         mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
  1654.         if mobj is None:
  1655.             self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
  1656.             return
  1657.         video_thumbnail = mobj.group(1).decode('utf-8')
  1658.  
  1659.         # Extract video description
  1660.         mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
  1661.         if mobj is None:
  1662.             self._downloader.trouble(u'ERROR: unable to extract video description')
  1663.             return
  1664.         video_description = mobj.group(1).decode('utf-8')
  1665.         if not video_description: video_description = 'No description available.'
  1666.  
  1667.         # Extract video height and width
  1668.         mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
  1669.         if mobj is None:
  1670.             self._downloader.trouble(u'ERROR: unable to extract video height')
  1671.             return
  1672.         yv_video_height = mobj.group(1)
  1673.  
  1674.         mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
  1675.         if mobj is None:
  1676.             self._downloader.trouble(u'ERROR: unable to extract video width')
  1677.             return
  1678.         yv_video_width = mobj.group(1)
  1679.  
  1680.         # Retrieve video playlist to extract media URL
  1681.         # I'm not completely sure what all these options are, but we
  1682.         # seem to need most of them, otherwise the server sends a 401.
  1683.         yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
  1684.         yv_bitrate = '700'  # according to Wikipedia this is hard-coded
  1685.         request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
  1686.                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
  1687.                       '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
  1688.         try:
  1689.             self.report_download_webpage(video_id)
  1690.             webpage = urllib2.urlopen(request).read()
  1691.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1692.             self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
  1693.             return
  1694.  
  1695.         # Extract media URL from playlist XML
  1696.         mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
  1697.         if mobj is None:
  1698.             self._downloader.trouble(u'ERROR: Unable to extract media URL')
  1699.             return
  1700.         video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
  1701.         video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
  1702.  
  1703.         try:
  1704.             # Process video information
  1705.             self._downloader.process_info({
  1706.                 'id':       video_id.decode('utf-8'),
  1707.                 'url':      video_url,
  1708.                 'uploader': video_uploader,
  1709.                 'upload_date':  u'NA',
  1710.                 'title':    video_title,
  1711.                 'stitle':   simple_title,
  1712.                 'ext':      video_extension.decode('utf-8'),
  1713.                 'thumbnail':    video_thumbnail.decode('utf-8'),
  1714.                 'description':  video_description,
  1715.                 'thumbnail':    video_thumbnail,
  1716.                 'description':  video_description,
  1717.                 'player_url':   None,
  1718.             })
  1719.         except UnavailableVideoError:
  1720.             self._downloader.trouble(u'\nERROR: unable to download video')
  1721.  
  1722.  
  1723. class GenericIE(InfoExtractor):
  1724.     """Generic last-resort information extractor."""
  1725.  
  1726.     def __init__(self, downloader=None):
  1727.         InfoExtractor.__init__(self, downloader)
  1728.  
  1729.     @staticmethod
  1730.     def suitable(url):
  1731.         return True
  1732.  
  1733.     def report_download_webpage(self, video_id):
  1734.         """Report webpage download."""
  1735.         self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
  1736.         self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
  1737.  
  1738.     def report_extraction(self, video_id):
  1739.         """Report information extraction."""
  1740.         self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
  1741.  
  1742.     def _real_initialize(self):
  1743.         return
  1744.  
  1745.     def _real_extract(self, url):
  1746.         # At this point we have a new video
  1747.         self._downloader.increment_downloads()
  1748.  
  1749.         video_id = url.split('/')[-1]
  1750.         request = urllib2.Request(url)
  1751.         try:
  1752.             self.report_download_webpage(video_id)
  1753.             webpage = urllib2.urlopen(request).read()
  1754.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1755.             self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
  1756.             return
  1757.         except ValueError, err:
  1758.             # since this is the last-resort InfoExtractor, if
  1759.             # this error is thrown, it'll be thrown here
  1760.             self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
  1761.             return
  1762.  
  1763.         self.report_extraction(video_id)
  1764.         # Start with something easy: JW Player in SWFObject
  1765.         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
  1766.         if mobj is None:
  1767.             # Broaden the search a little bit
  1768.             mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
  1769.         if mobj is None:
  1770.             self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
  1771.             return
  1772.  
  1773.         # It's possible that one of the regexes
  1774.         # matched, but returned an empty group:
  1775.         if mobj.group(1) is None:
  1776.             self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
  1777.             return
  1778.  
  1779.         video_url = urllib.unquote(mobj.group(1))
  1780.         video_id  = os.path.basename(video_url)
  1781.  
  1782.         # here's a fun little line of code for you:
  1783.         video_extension = os.path.splitext(video_id)[1][1:]
  1784.         video_id        = os.path.splitext(video_id)[0]
  1785.  
  1786.         # it's tempting to parse this further, but you would
  1787.         # have to take into account all the variations like
  1788.         #   Video Title - Site Name
  1789.         #   Site Name | Video Title
  1790.         #   Video Title - Tagline | Site Name
  1791.         # and so on and so forth; it's just not practical
  1792.         mobj = re.search(r'<title>(.*)</title>', webpage)
  1793.         if mobj is None:
  1794.             self._downloader.trouble(u'ERROR: unable to extract title')
  1795.             return
  1796.         video_title = mobj.group(1).decode('utf-8')
  1797.         video_title = sanitize_title(video_title)
  1798.         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
  1799.  
  1800.         # video uploader is domain name
  1801.         mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
  1802.         if mobj is None:
  1803.             self._downloader.trouble(u'ERROR: unable to extract title')
  1804.             return
  1805.         video_uploader = mobj.group(1).decode('utf-8')
  1806.  
  1807.         try:
  1808.             # Process video information
  1809.             self._downloader.process_info({
  1810.                 'id':       video_id.decode('utf-8'),
  1811.                 'url':      video_url.decode('utf-8'),
  1812.                 'uploader': video_uploader,
  1813.                 'upload_date':  u'NA',
  1814.                 'title':    video_title,
  1815.                 'stitle':   simple_title,
  1816.                 'ext':      video_extension.decode('utf-8'),
  1817.                 'format':   u'NA',
  1818.                 'player_url':   None,
  1819.             })
  1820.         except UnavailableVideoError, err:
  1821.             self._downloader.trouble(u'\nERROR: unable to download video')
  1822.  
  1823.  
  1824. class YoutubeSearchIE(InfoExtractor):
  1825.     """Information Extractor for YouTube search queries."""
  1826.     _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
  1827.     _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
  1828.     _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
  1829.     _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
  1830.     _youtube_ie = None
  1831.     _max_youtube_results = 1000
  1832.  
  1833.     def __init__(self, youtube_ie, downloader=None):
  1834.         InfoExtractor.__init__(self, downloader)
  1835.         self._youtube_ie = youtube_ie
  1836.  
  1837.     @staticmethod
  1838.     def suitable(url):
  1839.         return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
  1840.  
  1841.     def report_download_page(self, query, pagenum):
  1842.         """Report attempt to download playlist page with given number."""
  1843.         query = query.decode(preferredencoding())
  1844.         self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
  1845.  
  1846.     def _real_initialize(self):
  1847.         self._youtube_ie.initialize()
  1848.  
  1849.     def _real_extract(self, query):
  1850.         mobj = re.match(self._VALID_QUERY, query)
  1851.         if mobj is None:
  1852.             self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
  1853.             return
  1854.  
  1855.         prefix, query = query.split(':')
  1856.         prefix = prefix[8:]
  1857.         query  = query.encode('utf-8')
  1858.         if prefix == '':
  1859.             self._download_n_results(query, 1)
  1860.             return
  1861.         elif prefix == 'all':
  1862.             self._download_n_results(query, self._max_youtube_results)
  1863.             return
  1864.         else:
  1865.             try:
  1866.                 n = long(prefix)
  1867.                 if n <= 0:
  1868.                     self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
  1869.                     return
  1870.                 elif n > self._max_youtube_results:
  1871.                     self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
  1872.                     n = self._max_youtube_results
  1873.                 self._download_n_results(query, n)
  1874.                 return
  1875.             except ValueError: # parsing prefix as integer fails
  1876.                 self._download_n_results(query, 1)
  1877.                 return
  1878.  
  1879.     def _download_n_results(self, query, n):
  1880.         """Downloads a specified number of results for a query"""
  1881.  
  1882.         video_ids = []
  1883.         already_seen = set()
  1884.         pagenum = 1
  1885.  
  1886.         while True:
  1887.             self.report_download_page(query, pagenum)
  1888.             result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
  1889.             request = urllib2.Request(result_url)
  1890.             try:
  1891.                 page = urllib2.urlopen(request).read()
  1892.             except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1893.                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
  1894.                 return
  1895.  
  1896.             # Extract video identifiers
  1897.             for mobj in re.finditer(self._VIDEO_INDICATOR, page):
  1898.                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
  1899.                 if video_id not in already_seen:
  1900.                     video_ids.append(video_id)
  1901.                     already_seen.add(video_id)
  1902.                     if len(video_ids) == n:
  1903.                         # Specified n videos reached
  1904.                         for id in video_ids:
  1905.                             self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
  1906.                         return
  1907.  
  1908.             if re.search(self._MORE_PAGES_INDICATOR, page) is None:
  1909.                 for id in video_ids:
  1910.                     self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
  1911.                 return
  1912.  
  1913.             pagenum = pagenum + 1
  1914.  
  1915. class GoogleSearchIE(InfoExtractor):
  1916.     """Information Extractor for Google Video search queries."""
  1917.     _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
  1918.     _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
  1919.     _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
  1920.     _MORE_PAGES_INDICATOR = r'<span>Next</span>'
  1921.     _google_ie = None
  1922.     _max_google_results = 1000
  1923.  
  1924.     def __init__(self, google_ie, downloader=None):
  1925.         InfoExtractor.__init__(self, downloader)
  1926.         self._google_ie = google_ie
  1927.  
  1928.     @staticmethod
  1929.     def suitable(url):
  1930.         return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
  1931.  
  1932.     def report_download_page(self, query, pagenum):
  1933.         """Report attempt to download playlist page with given number."""
  1934.         query = query.decode(preferredencoding())
  1935.         self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
  1936.  
  1937.     def _real_initialize(self):
  1938.         self._google_ie.initialize()
  1939.  
  1940.     def _real_extract(self, query):
  1941.         mobj = re.match(self._VALID_QUERY, query)
  1942.         if mobj is None:
  1943.             self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
  1944.             return
  1945.  
  1946.         prefix, query = query.split(':')
  1947.         prefix = prefix[8:]
  1948.         query  = query.encode('utf-8')
  1949.         if prefix == '':
  1950.             self._download_n_results(query, 1)
  1951.             return
  1952.         elif prefix == 'all':
  1953.             self._download_n_results(query, self._max_google_results)
  1954.             return
  1955.         else:
  1956.             try:
  1957.                 n = long(prefix)
  1958.                 if n <= 0:
  1959.                     self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
  1960.                     return
  1961.                 elif n > self._max_google_results:
  1962.                     self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
  1963.                     n = self._max_google_results
  1964.                 self._download_n_results(query, n)
  1965.                 return
  1966.             except ValueError: # parsing prefix as integer fails
  1967.                 self._download_n_results(query, 1)
  1968.                 return
  1969.  
  1970.     def _download_n_results(self, query, n):
  1971.         """Downloads a specified number of results for a query"""
  1972.  
  1973.         video_ids = []
  1974.         already_seen = set()
  1975.         pagenum = 1
  1976.  
  1977.         while True:
  1978.             self.report_download_page(query, pagenum)
  1979.             result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
  1980.             request = urllib2.Request(result_url)
  1981.             try:
  1982.                 page = urllib2.urlopen(request).read()
  1983.             except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  1984.                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
  1985.                 return
  1986.  
  1987.             # Extract video identifiers
  1988.             for mobj in re.finditer(self._VIDEO_INDICATOR, page):
  1989.                 video_id = mobj.group(1)
  1990.                 if video_id not in already_seen:
  1991.                     video_ids.append(video_id)
  1992.                     already_seen.add(video_id)
  1993.                     if len(video_ids) == n:
  1994.                         # Specified n videos reached
  1995.                         for id in video_ids:
  1996.                             self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
  1997.                         return
  1998.  
  1999.             if re.search(self._MORE_PAGES_INDICATOR, page) is None:
  2000.                 for id in video_ids:
  2001.                     self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
  2002.                 return
  2003.  
  2004.             pagenum = pagenum + 1
  2005.  
  2006. class YahooSearchIE(InfoExtractor):
  2007.     """Information Extractor for Yahoo! Video search queries."""
  2008.     _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
  2009.     _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
  2010.     _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
  2011.     _MORE_PAGES_INDICATOR = r'\s*Next'
  2012.     _yahoo_ie = None
  2013.     _max_yahoo_results = 1000
  2014.  
  2015.     def __init__(self, yahoo_ie, downloader=None):
  2016.         InfoExtractor.__init__(self, downloader)
  2017.         self._yahoo_ie = yahoo_ie
  2018.  
  2019.     @staticmethod
  2020.     def suitable(url):
  2021.         return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
  2022.  
  2023.     def report_download_page(self, query, pagenum):
  2024.         """Report attempt to download playlist page with given number."""
  2025.         query = query.decode(preferredencoding())
  2026.         self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
  2027.  
  2028.     def _real_initialize(self):
  2029.         self._yahoo_ie.initialize()
  2030.  
  2031.     def _real_extract(self, query):
  2032.         mobj = re.match(self._VALID_QUERY, query)
  2033.         if mobj is None:
  2034.             self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
  2035.             return
  2036.  
  2037.         prefix, query = query.split(':')
  2038.         prefix = prefix[8:]
  2039.         query  = query.encode('utf-8')
  2040.         if prefix == '':
  2041.             self._download_n_results(query, 1)
  2042.             return
  2043.         elif prefix == 'all':
  2044.             self._download_n_results(query, self._max_yahoo_results)
  2045.             return
  2046.         else:
  2047.             try:
  2048.                 n = long(prefix)
  2049.                 if n <= 0:
  2050.                     self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
  2051.                     return
  2052.                 elif n > self._max_yahoo_results:
  2053.                     self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
  2054.                     n = self._max_yahoo_results
  2055.                 self._download_n_results(query, n)
  2056.                 return
  2057.             except ValueError: # parsing prefix as integer fails
  2058.                 self._download_n_results(query, 1)
  2059.                 return
  2060.  
  2061.     def _download_n_results(self, query, n):
  2062.         """Downloads a specified number of results for a query"""
  2063.  
  2064.         video_ids = []
  2065.         already_seen = set()
  2066.         pagenum = 1
  2067.  
  2068.         while True:
  2069.             self.report_download_page(query, pagenum)
  2070.             result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
  2071.             request = urllib2.Request(result_url)
  2072.             try:
  2073.                 page = urllib2.urlopen(request).read()
  2074.             except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  2075.                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
  2076.                 return
  2077.  
  2078.             # Extract video identifiers
  2079.             for mobj in re.finditer(self._VIDEO_INDICATOR, page):
  2080.                 video_id = mobj.group(1)
  2081.                 if video_id not in already_seen:
  2082.                     video_ids.append(video_id)
  2083.                     already_seen.add(video_id)
  2084.                     if len(video_ids) == n:
  2085.                         # Specified n videos reached
  2086.                         for id in video_ids:
  2087.                             self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
  2088.                         return
  2089.  
  2090.             if re.search(self._MORE_PAGES_INDICATOR, page) is None:
  2091.                 for id in video_ids:
  2092.                     self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
  2093.                 return
  2094.  
  2095.             pagenum = pagenum + 1
  2096.  
  2097. class YoutubePlaylistIE(InfoExtractor):
  2098.     """Information Extractor for YouTube playlists."""
  2099.  
  2100.     _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
  2101.     _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
  2102.     _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
  2103.     _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
  2104.     _youtube_ie = None
  2105.  
  2106.     def __init__(self, youtube_ie, downloader=None):
  2107.         InfoExtractor.__init__(self, downloader)
  2108.         self._youtube_ie = youtube_ie
  2109.  
  2110.     @staticmethod
  2111.     def suitable(url):
  2112.         return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
  2113.  
  2114.     def report_download_page(self, playlist_id, pagenum):
  2115.         """Report attempt to download playlist page with given number."""
  2116.         self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
  2117.  
  2118.     def _real_initialize(self):
  2119.         self._youtube_ie.initialize()
  2120.  
  2121.     def _real_extract(self, url):
  2122.         # Extract playlist id
  2123.         mobj = re.match(self._VALID_URL, url)
  2124.         if mobj is None:
  2125.             self._downloader.trouble(u'ERROR: invalid url: %s' % url)
  2126.             return
  2127.  
  2128.         # Single video case
  2129.         if mobj.group(3) is not None:
  2130.             self._youtube_ie.extract(mobj.group(3))
  2131.             return
  2132.  
  2133.         # Download playlist pages
  2134.         # prefix is 'p' as default for playlists but there are other types that need extra care
  2135.         playlist_prefix = mobj.group(1)
  2136.         if playlist_prefix == 'a':
  2137.             playlist_access = 'artist'
  2138.         else:
  2139.             playlist_prefix = 'p'
  2140.             playlist_access = 'view_play_list'
  2141.         playlist_id = mobj.group(2)
  2142.         video_ids = []
  2143.         pagenum = 1
  2144.  
  2145.         while True:
  2146.             self.report_download_page(playlist_id, pagenum)
  2147.             request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
  2148.             try:
  2149.                 page = urllib2.urlopen(request).read()
  2150.             except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  2151.                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
  2152.                 return
  2153.  
  2154.             # Extract video identifiers
  2155.             ids_in_page = []
  2156.             for mobj in re.finditer(self._VIDEO_INDICATOR, page):
  2157.                 if mobj.group(1) not in ids_in_page:
  2158.                     ids_in_page.append(mobj.group(1))
  2159.             video_ids.extend(ids_in_page)
  2160.  
  2161.             if re.search(self._MORE_PAGES_INDICATOR, page) is None:
  2162.                 break
  2163.             pagenum = pagenum + 1
  2164.  
  2165.         playliststart = self._downloader.params.get('playliststart', 1) - 1
  2166.         playlistend = self._downloader.params.get('playlistend', -1)
  2167.         video_ids = video_ids[playliststart:playlistend]
  2168.  
  2169.         for id in video_ids:
  2170.             self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
  2171.         return
  2172.  
  2173. class YoutubeUserIE(InfoExtractor):
  2174.     """Information Extractor for YouTube users."""
  2175.  
  2176.     _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
  2177.     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
  2178.     _GDATA_PAGE_SIZE = 50
  2179.     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
  2180.     _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
  2181.     _youtube_ie = None
  2182.  
  2183.     def __init__(self, youtube_ie, downloader=None):
  2184.         InfoExtractor.__init__(self, downloader)
  2185.         self._youtube_ie = youtube_ie
  2186.  
  2187.     @staticmethod
  2188.     def suitable(url):
  2189.         return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
  2190.  
  2191.     def report_download_page(self, username, start_index):
  2192.         """Report attempt to download user page."""
  2193.         self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
  2194.                            (username, start_index, start_index + self._GDATA_PAGE_SIZE))
  2195.  
  2196.     def _real_initialize(self):
  2197.         self._youtube_ie.initialize()
  2198.  
  2199.     def _real_extract(self, url):
  2200.         # Extract username
  2201.         mobj = re.match(self._VALID_URL, url)
  2202.         if mobj is None:
  2203.             self._downloader.trouble(u'ERROR: invalid url: %s' % url)
  2204.             return
  2205.  
  2206.         username = mobj.group(1)
  2207.  
  2208.         # Download video ids using YouTube Data API. Result size per
  2209.         # query is limited (currently to 50 videos) so we need to query
  2210.         # page by page until there are no video ids - it means we got
  2211.         # all of them.
  2212.  
  2213.         video_ids = []
  2214.         pagenum = 0
  2215.  
  2216.         while True:
  2217.             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
  2218.             self.report_download_page(username, start_index)
  2219.  
  2220.             request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
  2221.  
  2222.             try:
  2223.                 page = urllib2.urlopen(request).read()
  2224.             except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  2225.                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
  2226.                 return
  2227.  
  2228.             # Extract video identifiers
  2229.             ids_in_page = []
  2230.  
  2231.             for mobj in re.finditer(self._VIDEO_INDICATOR, page):
  2232.                 if mobj.group(1) not in ids_in_page:
  2233.                     ids_in_page.append(mobj.group(1))
  2234.  
  2235.             video_ids.extend(ids_in_page)
  2236.  
  2237.             # A little optimization - if current page is not
  2238.             # "full", ie. does not contain PAGE_SIZE video ids then
  2239.             # we can assume that this page is the last one - there
  2240.             # are no more ids on further pages - no need to query
  2241.             # again.
  2242.  
  2243.             if len(ids_in_page) < self._GDATA_PAGE_SIZE:
  2244.                 break
  2245.  
  2246.             pagenum += 1
  2247.  
  2248.         all_ids_count = len(video_ids)
  2249.         playliststart = self._downloader.params.get('playliststart', 1) - 1
  2250.         playlistend = self._downloader.params.get('playlistend', -1)
  2251.  
  2252.         if playlistend == -1:
  2253.             video_ids = video_ids[playliststart:]
  2254.         else:
  2255.             video_ids = video_ids[playliststart:playlistend]
  2256.            
  2257.         self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" %
  2258.                            (username, all_ids_count, len(video_ids)))
  2259.  
  2260.         for video_id in video_ids:
  2261.             self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
  2262.  
  2263.  
  2264. class DepositFilesIE(InfoExtractor):
  2265.     """Information extractor for depositfiles.com"""
  2266.  
  2267.     _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
  2268.  
  2269.     def __init__(self, downloader=None):
  2270.         InfoExtractor.__init__(self, downloader)
  2271.  
  2272.     @staticmethod
  2273.     def suitable(url):
  2274.         return (re.match(DepositFilesIE._VALID_URL, url) is not None)
  2275.  
  2276.     def report_download_webpage(self, file_id):
  2277.         """Report webpage download."""
  2278.         self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
  2279.  
  2280.     def report_extraction(self, file_id):
  2281.         """Report information extraction."""
  2282.         self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
  2283.  
  2284.     def _real_initialize(self):
  2285.         return
  2286.  
  2287.     def _real_extract(self, url):
  2288.         # At this point we have a new file
  2289.         self._downloader.increment_downloads()
  2290.  
  2291.         file_id = url.split('/')[-1]
  2292.         # Rebuild url in english locale
  2293.         url = 'http://depositfiles.com/en/files/' + file_id
  2294.  
  2295.         # Retrieve file webpage with 'Free download' button pressed
  2296.         free_download_indication = { 'gateway_result' : '1' }
  2297.         request = urllib2.Request(url, urllib.urlencode(free_download_indication))
  2298.         try:
  2299.             self.report_download_webpage(file_id)
  2300.             webpage = urllib2.urlopen(request).read()
  2301.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  2302.             self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
  2303.             return
  2304.  
  2305.         # Search for the real file URL
  2306.         mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
  2307.         if (mobj is None) or (mobj.group(1) is None):
  2308.             # Try to figure out reason of the error.
  2309.             mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
  2310.             if (mobj is not None) and (mobj.group(1) is not None):
  2311.                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
  2312.                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
  2313.             else:
  2314.                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
  2315.             return
  2316.  
  2317.         file_url = mobj.group(1)
  2318.         file_extension = os.path.splitext(file_url)[1][1:]
  2319.  
  2320.         # Search for file title
  2321.         mobj = re.search(r'<b title="(.*?)">', webpage)
  2322.         if mobj is None:
  2323.             self._downloader.trouble(u'ERROR: unable to extract title')
  2324.             return
  2325.         file_title = mobj.group(1).decode('utf-8')
  2326.  
  2327.         try:
  2328.             # Process file information
  2329.             self._downloader.process_info({
  2330.                 'id':       file_id.decode('utf-8'),
  2331.                 'url':      file_url.decode('utf-8'),
  2332.                 'uploader': u'NA',
  2333.                 'upload_date':  u'NA',
  2334.                 'title':    file_title,
  2335.                 'stitle':   file_title,
  2336.                 'ext':      file_extension.decode('utf-8'),
  2337.                 'format':   u'NA',
  2338.                 'player_url':   None,
  2339.             })
  2340.         except UnavailableVideoError, err:
  2341.             self._downloader.trouble(u'ERROR: unable to download file')
  2342.  
  2343. class FacebookIE(InfoExtractor):
  2344.     """Information Extractor for Facebook"""
  2345.  
  2346.     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
  2347.     _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
  2348.     _NETRC_MACHINE = 'facebook'
  2349.     _available_formats = ['highqual', 'lowqual']
  2350.     _video_extensions = {
  2351.         'highqual': 'mp4',
  2352.         'lowqual': 'mp4',
  2353.     }
  2354.  
  2355.     def __init__(self, downloader=None):
  2356.         InfoExtractor.__init__(self, downloader)
  2357.  
  2358.     @staticmethod
  2359.     def suitable(url):
  2360.         return (re.match(FacebookIE._VALID_URL, url) is not None)
  2361.  
  2362.     def _reporter(self, message):
  2363.         """Add header and report message."""
  2364.         self._downloader.to_screen(u'[facebook] %s' % message)
  2365.  
  2366.     def report_login(self):
  2367.         """Report attempt to log in."""
  2368.         self._reporter(u'Logging in')
  2369.  
  2370.     def report_video_webpage_download(self, video_id):
  2371.         """Report attempt to download video webpage."""
  2372.         self._reporter(u'%s: Downloading video webpage' % video_id)
  2373.  
  2374.     def report_information_extraction(self, video_id):
  2375.         """Report attempt to extract video information."""
  2376.         self._reporter(u'%s: Extracting video information' % video_id)
  2377.  
  2378.     def _parse_page(self, video_webpage):
  2379.         """Extract video information from page"""
  2380.         # General data
  2381.         data = {'title': r'class="video_title datawrap">(.*?)</',
  2382.             'description': r'<div class="datawrap">(.*?)</div>',
  2383.             'owner': r'\("video_owner_name", "(.*?)"\)',
  2384.             'upload_date': r'data-date="(.*?)"',
  2385.             'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
  2386.             }
  2387.         video_info = {}
  2388.         for piece in data.keys():
  2389.             mobj = re.search(data[piece], video_webpage)
  2390.             if mobj is not None:
  2391.                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
  2392.  
  2393.         # Video urls
  2394.         video_urls = {}
  2395.         for fmt in self._available_formats:
  2396.             mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
  2397.             if mobj is not None:
  2398.                 # URL is in a Javascript segment inside an escaped Unicode format within
  2399.                 # the generally utf-8 page
  2400.                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
  2401.         video_info['video_urls'] = video_urls
  2402.  
  2403.         return video_info
  2404.  
  2405.     def _real_initialize(self):
  2406.         if self._downloader is None:
  2407.             return
  2408.  
  2409.         useremail = None
  2410.         password = None
  2411.         downloader_params = self._downloader.params
  2412.  
  2413.         # Attempt to use provided username and password or .netrc data
  2414.         if downloader_params.get('username', None) is not None:
  2415.             useremail = downloader_params['username']
  2416.             password = downloader_params['password']
  2417.         elif downloader_params.get('usenetrc', False):
  2418.             try:
  2419.                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
  2420.                 if info is not None:
  2421.                     useremail = info[0]
  2422.                     password = info[2]
  2423.                 else:
  2424.                     raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
  2425.             except (IOError, netrc.NetrcParseError), err:
  2426.                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
  2427.                 return
  2428.  
  2429.         if useremail is None:
  2430.             return
  2431.  
  2432.         # Log in
  2433.         login_form = {
  2434.             'email': useremail,
  2435.             'pass': password,
  2436.             'login': 'Log+In'
  2437.             }
  2438.         request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
  2439.         try:
  2440.             self.report_login()
  2441.             login_results = urllib2.urlopen(request).read()
  2442.             if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
  2443.                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
  2444.                 return
  2445.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  2446.             self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
  2447.             return
  2448.  
  2449.     def _real_extract(self, url):
  2450.         mobj = re.match(self._VALID_URL, url)
  2451.         if mobj is None:
  2452.             self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
  2453.             return
  2454.         video_id = mobj.group('ID')
  2455.  
  2456.         # Get video webpage
  2457.         self.report_video_webpage_download(video_id)
  2458.         request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
  2459.         try:
  2460.             page = urllib2.urlopen(request)
  2461.             video_webpage = page.read()
  2462.         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
  2463.             self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
  2464.             return
  2465.  
  2466.         # Start extracting information
  2467.         self.report_information_extraction(video_id)
  2468.  
  2469.         # Extract information
  2470.         video_info = self._parse_page(video_webpage)
  2471.  
  2472.         # uploader
  2473.         if 'owner' not in video_info:
  2474.             self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
  2475.             return
  2476.         video_uploader = video_info['owner']
  2477.  
  2478.         # title
  2479.         if 'title' not in video_info:
  2480.             self._downloader.trouble(u'ERROR: unable to extract video title')
  2481.             return
  2482.         video_title = video_info['title']
  2483.         video_title = video_title.decode('utf-8')
  2484.         video_title = sanitize_title(video_title)
  2485.  
  2486.         # simplified title
  2487.         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
  2488.         simple_title = simple_title.strip(ur'_')
  2489.  
  2490.         # thumbnail image
  2491.         if 'thumbnail' not in video_info:
  2492.             self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
  2493.             video_thumbnail = ''
  2494.         else:
  2495.             video_thumbnail = video_info['thumbnail']
  2496.  
  2497.         # upload date
  2498.         upload_date = u'NA'
  2499.         if 'upload_date' in video_info:
  2500.             upload_time = video_info['upload_date']
  2501.             timetuple = email.utils.parsedate_tz(upload_time)
  2502.             if timetuple is not None:
  2503.                 try:
  2504.                     upload_date = time.strftime('%Y%m%d', timetuple[0:9])
  2505.                 except:
  2506.                     pass
  2507.  
  2508.         # description
  2509.         video_description = 'No description available.'
  2510.         if (self._downloader.params.get('forcedescription', False) and
  2511.             'description' in video_info):
  2512.             video_description = video_info['description']
  2513.  
  2514.         url_map = video_info['video_urls']
  2515.         if len(url_map.keys()) > 0:
  2516.             # Decide which formats to download
  2517.             req_format = self._downloader.params.get('format', None)
  2518.             format_limit = self._downloader.params.get('format_limit', None)
  2519.  
  2520.             if format_limit is not None and format_limit in self._available_formats:
  2521.                 format_list = self._available_formats[self._available_formats.index(format_limit):]
  2522.             else:
  2523.                 format_list = self._available_formats
  2524.             existing_formats = [x for x in format_list if x in url_map]
  2525.             if len(existing_formats) == 0:
  2526.                 self._downloader.trouble(u'ERROR: no known formats available for video')
  2527.                 return
  2528.             if req_format is None:
  2529.                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
  2530.             elif req_format == '-1':
  2531.                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
  2532.             else:
  2533.                 # Specific format
  2534.                 if req_format not in url_map:
  2535.                     self._downloader.trouble(u'ERROR: requested format not available')
  2536.                     return
  2537.                 video_url_list = [(req_format, url_map[req_format])] # Specific format
  2538.  
  2539.         for format_param, video_real_url in video_url_list:
  2540.  
  2541.             # At this point we have a new video
  2542.             self._downloader.increment_downloads()
  2543.  
  2544.             # Extension
  2545.             video_extension = self._video_extensions.get(format_param, 'mp4')
  2546.  
  2547.             # Find the video URL in fmt_url_map or conn paramters
  2548.             try:
  2549.                 # Process video information
  2550.                 self._downloader.process_info({
  2551.                     'id':       video_id.decode('utf-8'),
  2552.                     'url':      video_real_url.decode('utf-8'),
  2553.                     'uploader': video_uploader.decode('utf-8'),
  2554.                     'upload_date':  upload_date,
  2555.                     'title':    video_title,
  2556.                     'stitle':   simple_title,
  2557.                     'ext':      video_extension.decode('utf-8'),
  2558.                     'format':   (format_param is None and u'NA' or format_param.decode('utf-8')),
  2559.                     'thumbnail':    video_thumbnail.decode('utf-8'),
  2560.                     'description':  video_description.decode('utf-8'),
  2561.                     'player_url':   None,
  2562.                 })
  2563.             except UnavailableVideoError, err:
  2564.                 self._downloader.trouble(u'\nERROR: unable to download video')
  2565.  
  2566. class PostProcessor(object):
  2567.     """Post Processor class.
  2568.  
  2569.     PostProcessor objects can be added to downloaders with their
  2570.     add_post_processor() method. When the downloader has finished a
  2571.     successful download, it will take its internal chain of PostProcessors
  2572.     and start calling the run() method on each one of them, first with
  2573.     an initial argument and then with the returned value of the previous
  2574.     PostProcessor.
  2575.  
  2576.     The chain will be stopped if one of them ever returns None or the end
  2577.     of the chain is reached.
  2578.  
  2579.     PostProcessor objects follow a "mutual registration" process similar
  2580.     to InfoExtractor objects.
  2581.     """
  2582.  
  2583.     _downloader = None
  2584.  
  2585.     def __init__(self, downloader=None):
  2586.         self._downloader = downloader
  2587.  
  2588.     def set_downloader(self, downloader):
  2589.         """Sets the downloader for this PP."""
  2590.         self._downloader = downloader
  2591.  
  2592.     def run(self, information):
  2593.         """Run the PostProcessor.
  2594.  
  2595.         The "information" argument is a dictionary like the ones
  2596.         composed by InfoExtractors. The only difference is that this
  2597.         one has an extra field called "filepath" that points to the
  2598.         downloaded file.
  2599.  
  2600.         When this method returns None, the postprocessing chain is
  2601.         stopped. However, this method may return an information
  2602.         dictionary that will be passed to the next postprocessing
  2603.         object in the chain. It can be the one it received after
  2604.         changing some fields.
  2605.  
  2606.         In addition, this method may raise a PostProcessingError
  2607.         exception that will be taken into account by the downloader
  2608.         it was called from.
  2609.         """
  2610.         return information # by default, do nothing
  2611.  
  2612. class FFmpegExtractAudioPP(PostProcessor):
  2613.  
  2614.     def __init__(self, downloader=None, preferredcodec=None):
  2615.         PostProcessor.__init__(self, downloader)
  2616.         if preferredcodec is None:
  2617.             preferredcodec = 'best'
  2618.         self._preferredcodec = preferredcodec
  2619.  
  2620.     @staticmethod
  2621.     def get_audio_codec(path):
  2622.         try:
  2623.             cmd = ['ffprobe', '-show_streams', '--', path]
  2624.             handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
  2625.             output = handle.communicate()[0]
  2626.             if handle.wait() != 0:
  2627.                 return None
  2628.         except (IOError, OSError):
  2629.             return None
  2630.         audio_codec = None
  2631.         for line in output.split('\n'):
  2632.             if line.startswith('codec_name='):
  2633.                 audio_codec = line.split('=')[1].strip()
  2634.             elif line.strip() == 'codec_type=audio' and audio_codec is not None:
  2635.                 return audio_codec
  2636.         return None
  2637.  
  2638.     @staticmethod
  2639.     def run_ffmpeg(path, out_path, codec, more_opts):
  2640.         try:
  2641.             cmd = ['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + ['--', out_path]
  2642.             ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
  2643.             return (ret == 0)
  2644.         except (IOError, OSError):
  2645.             return False
  2646.  
  2647.     def run(self, information):
  2648.         path = information['filepath']
  2649.  
  2650.         filecodec = self.get_audio_codec(path)
  2651.         if filecodec is None:
  2652.             self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
  2653.             return None
  2654.  
  2655.         more_opts = []
  2656.         if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
  2657.             if filecodec == 'aac' or filecodec == 'mp3':
  2658.                 # Lossless if possible
  2659.                 acodec = 'copy'
  2660.                 extension = filecodec
  2661.                 if filecodec == 'aac':
  2662.                     more_opts = ['-f', 'adts']
  2663.             else:
  2664.                 # MP3 otherwise.
  2665.                 acodec = 'libmp3lame'
  2666.                 extension = 'mp3'
  2667.                 more_opts = ['-ab', '128k']
  2668.         else:
  2669.             # We convert the audio (lossy)
  2670.             acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
  2671.             extension = self._preferredcodec
  2672.             more_opts = ['-ab', '128k']
  2673.             if self._preferredcodec == 'aac':
  2674.                 more_opts += ['-f', 'adts']
  2675.  
  2676.         (prefix, ext) = os.path.splitext(path)
  2677.         new_path = prefix + '.' + extension
  2678.         self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
  2679.         status = self.run_ffmpeg(path, new_path, acodec, more_opts)
  2680.  
  2681.         if not status:
  2682.             self._downloader.to_stderr(u'WARNING: error running ffmpeg')
  2683.             return None
  2684.  
  2685.         try:
  2686.             os.remove(path)
  2687.         except (IOError, OSError):
  2688.             self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
  2689.             return None
  2690.  
  2691.         information['filepath'] = new_path
  2692.         return information
  2693.  
  2694. ### MAIN PROGRAM ###
  2695. if __name__ == '__main__':
  2696.     try:
  2697.         # Modules needed only when running the main program
  2698.         import getpass
  2699.         import optparse
  2700.  
  2701.         # Function to update the program file with the latest version from the repository.
  2702.         def update_self(downloader, filename):
  2703.             # Note: downloader only used for options
  2704.             if not os.access(filename, os.W_OK):
  2705.                 sys.exit('ERROR: no write permissions on %s' % filename)
  2706.  
  2707.             downloader.to_screen('Updating to latest stable version...')
  2708.             try:
  2709.                 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
  2710.                 latest_version = urllib.urlopen(latest_url).read().strip()
  2711.                 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
  2712.                 newcontent = urllib.urlopen(prog_url).read()
  2713.             except (IOError, OSError), err:
  2714.                 sys.exit('ERROR: unable to download latest version')
  2715.             try:
  2716.                 stream = open(filename, 'w')
  2717.                 stream.write(newcontent)
  2718.                 stream.close()
  2719.             except (IOError, OSError), err:
  2720.                 sys.exit('ERROR: unable to overwrite current version')
  2721.             downloader.to_screen('Updated to version %s' % latest_version)
  2722.  
  2723.         # Parse command line
  2724.         parser = optparse.OptionParser(
  2725.             usage='Usage: %prog [options] url...',
  2726.             version='2011.03.29',
  2727.             conflict_handler='resolve',
  2728.         )
  2729.  
  2730.         parser.add_option('-h', '--help',
  2731.                 action='help', help='print this help text and exit')
  2732.         parser.add_option('-v', '--version',
  2733.                 action='version', help='print program version and exit')
  2734.         parser.add_option('-U', '--update',
  2735.                 action='store_true', dest='update_self', help='update this program to latest stable version')
  2736.         parser.add_option('-i', '--ignore-errors',
  2737.                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
  2738.         parser.add_option('-r', '--rate-limit',
  2739.                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
  2740.         parser.add_option('-R', '--retries',
  2741.                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
  2742.         parser.add_option('--playlist-start',
  2743.                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
  2744.         parser.add_option('--playlist-end',
  2745.                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
  2746.         parser.add_option('--dump-user-agent',
  2747.                 action='store_true', dest='dump_user_agent',
  2748.                 help='display the current browser identification', default=False)
  2749.  
  2750.         authentication = optparse.OptionGroup(parser, 'Authentication Options')
  2751.         authentication.add_option('-u', '--username',
  2752.                 dest='username', metavar='USERNAME', help='account username')
  2753.         authentication.add_option('-p', '--password',
  2754.                 dest='password', metavar='PASSWORD', help='account password')
  2755.         authentication.add_option('-n', '--netrc',
  2756.                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
  2757.         parser.add_option_group(authentication)
  2758.  
  2759.         video_format = optparse.OptionGroup(parser, 'Video Format Options')
  2760.         video_format.add_option('-f', '--format',
  2761.                 action='store', dest='format', metavar='FORMAT', help='video format code')
  2762.         video_format.add_option('--all-formats',
  2763.                 action='store_const', dest='format', help='download all available video formats', const='-1')
  2764.         video_format.add_option('--max-quality',
  2765.                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
  2766.         parser.add_option_group(video_format)
  2767.  
  2768.         verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
  2769.         verbosity.add_option('-q', '--quiet',
  2770.                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
  2771.         verbosity.add_option('-s', '--simulate',
  2772.                 action='store_true', dest='simulate', help='do not download video', default=False)
  2773.         verbosity.add_option('-g', '--get-url',
  2774.                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
  2775.         verbosity.add_option('-e', '--get-title',
  2776.                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
  2777.         verbosity.add_option('--get-thumbnail',
  2778.                 action='store_true', dest='getthumbnail',
  2779.                 help='simulate, quiet but print thumbnail URL', default=False)
  2780.         verbosity.add_option('--get-description',
  2781.                 action='store_true', dest='getdescription',
  2782.                 help='simulate, quiet but print video description', default=False)
  2783.         verbosity.add_option('--get-filename',
  2784.                 action='store_true', dest='getfilename',
  2785.                 help='simulate, quiet but print output filename', default=False)
  2786.         verbosity.add_option('--no-progress',
  2787.                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
  2788.         verbosity.add_option('--console-title',
  2789.                 action='store_true', dest='consoletitle',
  2790.                 help='display progress in console titlebar', default=False)
  2791.         parser.add_option_group(verbosity)
  2792.  
  2793.         filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
  2794.         filesystem.add_option('-t', '--title',
  2795.                 action='store_true', dest='usetitle', help='use title in file name', default=False)
  2796.         filesystem.add_option('-l', '--literal',
  2797.                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
  2798.         filesystem.add_option('-A', '--auto-number',
  2799.                 action='store_true', dest='autonumber',
  2800.                 help='number downloaded files starting from 00000', default=False)
  2801.         filesystem.add_option('-o', '--output',
  2802.                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
  2803.         filesystem.add_option('-a', '--batch-file',
  2804.                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
  2805.         filesystem.add_option('-w', '--no-overwrites',
  2806.                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
  2807.         filesystem.add_option('-c', '--continue',
  2808.                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
  2809.         filesystem.add_option('--cookies',
  2810.                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
  2811.         filesystem.add_option('--no-part',
  2812.                 action='store_true', dest='nopart', help='do not use .part files', default=False)
  2813.         filesystem.add_option('--no-mtime',
  2814.                 action='store_false', dest='updatetime',
  2815.                 help='do not use the Last-modified header to set the file modification time', default=True)
  2816.         parser.add_option_group(filesystem)
  2817.  
  2818.         postproc = optparse.OptionGroup(parser, 'Post-processing Options')
  2819.         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
  2820.                 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
  2821.         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
  2822.                 help='"best", "aac" or "mp3"; best by default')
  2823.         parser.add_option_group(postproc)
  2824.  
  2825.         (opts, args) = parser.parse_args()
  2826.  
  2827.         # Open appropriate CookieJar
  2828.         if opts.cookiefile is None:
  2829.             jar = cookielib.CookieJar()
  2830.         else:
  2831.             try:
  2832.                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
  2833.                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
  2834.                     jar.load()
  2835.             except (IOError, OSError), err:
  2836.                 sys.exit(u'ERROR: unable to open cookie file')
  2837.  
  2838.         # Dump user agent
  2839.         if opts.dump_user_agent:
  2840.             print std_headers['User-Agent']
  2841.             sys.exit(0)
  2842.  
  2843.         # General configuration
  2844.         cookie_processor = urllib2.HTTPCookieProcessor(jar)
  2845.         urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
  2846.         socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
  2847.  
  2848.         # Batch file verification
  2849.         batchurls = []
  2850.         if opts.batchfile is not None:
  2851.             try:
  2852.                 if opts.batchfile == '-':
  2853.                     batchfd = sys.stdin
  2854.                 else:
  2855.                     batchfd = open(opts.batchfile, 'r')
  2856.                 batchurls = batchfd.readlines()
  2857.                 batchurls = [x.strip() for x in batchurls]
  2858.                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
  2859.             except IOError:
  2860.                 sys.exit(u'ERROR: batch file could not be read')
  2861.         all_urls = batchurls + args
  2862.  
  2863.         # Conflicting, missing and erroneous options
  2864.         if opts.usenetrc and (opts.username is not None or opts.password is not None):
  2865.             parser.error(u'using .netrc conflicts with giving username/password')
  2866.         if opts.password is not None and opts.username is None:
  2867.             parser.error(u'account username missing')
  2868.         if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
  2869.             parser.error(u'using output template conflicts with using title, literal title or auto number')
  2870.         if opts.usetitle and opts.useliteral:
  2871.             parser.error(u'using title conflicts with using literal title')
  2872.         if opts.username is not None and opts.password is None:
  2873.             opts.password = getpass.getpass(u'Type account password and press return:')
  2874.         if opts.ratelimit is not None:
  2875.             numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
  2876.             if numeric_limit is None:
  2877.                 parser.error(u'invalid rate limit specified')
  2878.             opts.ratelimit = numeric_limit
  2879.         if opts.retries is not None:
  2880.             try:
  2881.                 opts.retries = long(opts.retries)
  2882.             except (TypeError, ValueError), err:
  2883.                 parser.error(u'invalid retry count specified')
  2884.         try:
  2885.             opts.playliststart = long(opts.playliststart)
  2886.             if opts.playliststart <= 0:
  2887.                 raise ValueError
  2888.         except (TypeError, ValueError), err:
  2889.             parser.error(u'invalid playlist start number specified')
  2890.         try:
  2891.             opts.playlistend = long(opts.playlistend)
  2892.             if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
  2893.                 raise ValueError
  2894.         except (TypeError, ValueError), err:
  2895.             parser.error(u'invalid playlist end number specified')
  2896.         if opts.extractaudio:
  2897.             if opts.audioformat not in ['best', 'aac', 'mp3']:
  2898.                 parser.error(u'invalid audio format specified')
  2899.  
  2900.         # Information extractors
  2901.         youtube_ie = YoutubeIE()
  2902.         metacafe_ie = MetacafeIE(youtube_ie)
  2903.         dailymotion_ie = DailymotionIE()
  2904.         youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
  2905.         youtube_user_ie = YoutubeUserIE(youtube_ie)
  2906.         youtube_search_ie = YoutubeSearchIE(youtube_ie)
  2907.         google_ie = GoogleIE()
  2908.         google_search_ie = GoogleSearchIE(google_ie)
  2909.         photobucket_ie = PhotobucketIE()
  2910.         yahoo_ie = YahooIE()
  2911.         yahoo_search_ie = YahooSearchIE(yahoo_ie)
  2912.         deposit_files_ie = DepositFilesIE()
  2913.         facebook_ie = FacebookIE()
  2914.         generic_ie = GenericIE()
  2915.  
  2916.         # File downloader
  2917.         fd = FileDownloader({
  2918.             'usenetrc': opts.usenetrc,
  2919.             'username': opts.username,
  2920.             'password': opts.password,
  2921.             'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
  2922.             'forceurl': opts.geturl,
  2923.             'forcetitle': opts.gettitle,
  2924.             'forcethumbnail': opts.getthumbnail,
  2925.             'forcedescription': opts.getdescription,
  2926.             'forcefilename': opts.getfilename,
  2927.             'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
  2928.             'format': opts.format,
  2929.             'format_limit': opts.format_limit,
  2930.             'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
  2931.                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
  2932.                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
  2933.                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
  2934.                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
  2935.                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
  2936.                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
  2937.                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
  2938.                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
  2939.                 or u'%(id)s.%(ext)s'),
  2940.             'ignoreerrors': opts.ignoreerrors,
  2941.             'ratelimit': opts.ratelimit,
  2942.             'nooverwrites': opts.nooverwrites,
  2943.             'retries': opts.retries,
  2944.             'continuedl': opts.continue_dl,
  2945.             'noprogress': opts.noprogress,
  2946.             'playliststart': opts.playliststart,
  2947.             'playlistend': opts.playlistend,
  2948.             'logtostderr': opts.outtmpl == '-',
  2949.             'consoletitle': opts.consoletitle,
  2950.             'nopart': opts.nopart,
  2951.             'updatetime': opts.updatetime,
  2952.             })
  2953.         fd.add_info_extractor(youtube_search_ie)
  2954.         fd.add_info_extractor(youtube_pl_ie)
  2955.         fd.add_info_extractor(youtube_user_ie)
  2956.         fd.add_info_extractor(metacafe_ie)
  2957.         fd.add_info_extractor(dailymotion_ie)
  2958.         fd.add_info_extractor(youtube_ie)
  2959.         fd.add_info_extractor(google_ie)
  2960.         fd.add_info_extractor(google_search_ie)
  2961.         fd.add_info_extractor(photobucket_ie)
  2962.         fd.add_info_extractor(yahoo_ie)
  2963.         fd.add_info_extractor(yahoo_search_ie)
  2964.         fd.add_info_extractor(deposit_files_ie)
  2965.         fd.add_info_extractor(facebook_ie)
  2966.  
  2967.         # This must come last since it's the
  2968.         # fallback if none of the others work
  2969.         fd.add_info_extractor(generic_ie)
  2970.  
  2971.         # PostProcessors
  2972.         if opts.extractaudio:
  2973.             fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
  2974.  
  2975.         # Update version
  2976.         if opts.update_self:
  2977.             update_self(fd, sys.argv[0])
  2978.  
  2979.         # Maybe do nothing
  2980.         if len(all_urls) < 1:
  2981.             if not opts.update_self:
  2982.                 parser.error(u'you must provide at least one URL')
  2983.             else:
  2984.                 sys.exit()
  2985.         retcode = fd.download(all_urls)
  2986.  
  2987.         # Dump cookie jar if requested
  2988.         if opts.cookiefile is not None:
  2989.             try:
  2990.                 jar.save()
  2991.             except (IOError, OSError), err:
  2992.                 sys.exit(u'ERROR: unable to save cookie jar')
  2993.  
  2994.         sys.exit(retcode)
  2995.  
  2996.     except DownloadError:
  2997.         sys.exit(1)
  2998.     except SameFileError:
  2999.         sys.exit(u'ERROR: fixed output name but more than one file to download')
  3000.     except KeyboardInterrupt:
  3001.         sys.exit(u'\nERROR: Interrupted by user')
RAW Paste Data