Merge remote-tracking branch 'upstream/master'

author: Allan Zhou <allanzp@gmail.com> 2013-08-28 09:57:28 -0700
committer: Allan Zhou <allanzp@gmail.com> 2013-08-28 09:57:28 -0700
commit: 591078babff1d783bed872c5b441dc570d354448 (patch)
tree: 6cf813c993ea26eaec1d976b616d973c2bc47583
parent: 99859d436cdee9acc9c869254e734eba5b748260 (diff)
parent: 9868c781a1bb3f50385bc7d1e87d82080ffffbc6 (diff)
download: youtube-dl-591078babff1d783bed872c5b441dc570d354448.tar.gz
youtube-dl-591078babff1d783bed872c5b441dc570d354448.tar.xz
youtube-dl-591078babff1d783bed872c5b441dc570d354448.zip
26 files changed, 989 insertions, 78 deletions
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
index 6f1d6ef99..917e8f79d 100644
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -14,9 +14,9 @@ tests = [
     # 89 
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
      "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
-    # 88
+    # 88 - vflapUV9V 2013/08/28
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
-     "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"),
+     "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
     # 87
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
      "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 217c4a52f..7c5ac4bc2 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -64,6 +64,17 @@ class FileDownloader(object):
         return '%.2f%s' % (converted, suffix)
 
     @staticmethod
+    def format_seconds(seconds):
+        (mins, secs) = divmod(seconds, 60)
+        (hours, eta_mins) = divmod(mins, 60)
+        if hours > 99:
+            return '--:--:--'
+        if hours == 0:
+            return '%02d:%02d' % (mins, secs)
+        else:
+            return '%02d:%02d:%02d' % (hours, mins, secs)
+
+    @staticmethod
     def calc_percent(byte_counter, data_len):
         if data_len is None:
             return '---.-%'
@@ -78,14 +89,7 @@ class FileDownloader(object):
             return '--:--'
         rate = float(current) / dif
         eta = int((float(total) - float(current)) / rate)
-        (eta_mins, eta_secs) = divmod(eta, 60)
-        (eta_hours, eta_mins) = divmod(eta_mins, 60)
-        if eta_hours > 99:
-            return '--:--:--'
-        if eta_hours == 0:
-            return '%02d:%02d' % (eta_mins, eta_secs)
-        else:
-            return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
+        return FileDownloader.format_seconds(eta)
 
     @staticmethod
     def calc_speed(start, now, bytes):
@@ -234,12 +238,14 @@ class FileDownloader(object):
         """Report it was impossible to resume download."""
         self.to_screen(u'[download] Unable to resume')
 
-    def report_finish(self):
+    def report_finish(self, data_len_str, tot_time):
         """Report download finished."""
         if self.params.get('noprogress', False):
             self.to_screen(u'[download] Download completed')
         else:
-            self.to_screen(u'')
+            clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
+            self.to_screen(u'\r%s[download] 100%% of %s in %s' %
+                (clear_line, data_len_str, self.format_seconds(tot_time)))
 
     def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
         self.report_destination(filename)
@@ -542,7 +548,7 @@ class FileDownloader(object):
             self.report_error(u'Did not get any data blocks')
             return False
         stream.close()
-        self.report_finish()
+        self.report_finish(data_len_str, (time.time() - start))
         if data_len is not None and byte_counter != data_len:
             raise ContentTooShortError(byte_counter, int(data_len))
         self.try_rename(tmpfilename, filename)
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py
index c02ed7148..ae56d2082 100644
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -137,7 +137,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         try:
             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
         except FFmpegPostProcessorError as err:
-            raise AudioConversionError(err.message)
+            raise AudioConversionError(err.msg)
 
     def run(self, information):
         path = information['filepath']
@@ -207,7 +207,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         except:
             etype,e,tb = sys.exc_info()
             if isinstance(e, AudioConversionError):
-                msg = u'audio conversion failed: ' + e.message
+                msg = u'audio conversion failed: ' + e.msg
             else:
                 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
             raise PostProcessingError(msg)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 3fc4ec378..b289bd9e2 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -97,6 +97,7 @@ class YoutubeDL(object):
     def __init__(self, params):
         """Create a FileDownloader object with the given options."""
         self._ies = []
+        self._ies_instances = {}
         self._pps = []
         self._progress_hooks = []
         self._download_retcode = 0
@@ -111,8 +112,21 @@ class YoutubeDL(object):
     def add_info_extractor(self, ie):
         """Add an InfoExtractor object to the end of the list."""
         self._ies.append(ie)
+        self._ies_instances[ie.ie_key()] = ie
         ie.set_downloader(self)
 
+    def get_info_extractor(self, ie_key):
+        """
+        Get an instance of an IE with name ie_key, it will try to get one from
+        the _ies list, if there's no instance it will create a new one and add
+        it to the extractor list.
+        """
+        ie = self._ies_instances.get(ie_key)
+        if ie is None:
+            ie = get_info_extractor(ie_key)()
+            self.add_info_extractor(ie)
+        return ie
+
     def add_default_info_extractors(self):
         """
         Add the InfoExtractors returned by gen_extractors to the end of the list
@@ -294,9 +308,7 @@ class YoutubeDL(object):
          '''
         
         if ie_key:
-            ie = get_info_extractor(ie_key)()
-            ie.set_downloader(self)
-            ies = [ie]
+            ies = [self.get_info_extractor(ie_key)]
         else:
             ies = self._ies
 
@@ -448,7 +460,8 @@ class YoutubeDL(object):
         if self.params.get('forceid', False):
             compat_print(info_dict['id'])
         if self.params.get('forceurl', False):
-            compat_print(info_dict['url'])
+            # For RTMP URLs, also include the playpath
+            compat_print(info_dict['url'] + info_dict.get('play_path', u''))
         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
             compat_print(info_dict['thumbnail'])
         if self.params.get('forcedescription', False) and 'description' in info_dict:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 614429073..431460c57 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -45,6 +45,7 @@ import sys
 import warnings
 import platform
 
+
 from .utils import *
 from .update import update_self
 from .version import __version__
@@ -99,6 +100,16 @@ def parseOpts(overrideArguments=None):
             pass
         return None
 
+    def _hide_login_info(opts):
+        opts = list(opts)
+        for private_opt in ['-p', '--password', '-u', '--username']:
+            try:
+                i = opts.index(private_opt)
+                opts[i+1] = '<PRIVATE>'
+            except ValueError:
+                pass
+        return opts
+
     max_width = 80
     max_help_position = 80
 
@@ -357,9 +368,9 @@ def parseOpts(overrideArguments=None):
         argv = systemConf + userConf + commandLineConf
         opts, args = parser.parse_args(argv)
         if opts.verbose:
-            sys.stderr.write(u'[debug] System config: ' + repr(systemConf) + '\n')
-            sys.stderr.write(u'[debug] User config: ' + repr(userConf) + '\n')
-            sys.stderr.write(u'[debug] Command-line args: ' + repr(commandLineConf) + '\n')
+            sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+            sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+            sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
 
     return parser, opts, args
 
@@ -430,6 +441,10 @@ def _real_main(argv=None):
     proxy_handler = compat_urllib_request.ProxyHandler(proxies)
     https_handler = make_HTTPS_handler(opts)
     opener = compat_urllib_request.build_opener(https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
+    # Delete the default user-agent header, which would otherwise apply in
+    # cases where our custom HTTP handler doesn't come into play
+    # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+    opener.addheaders =[]
     compat_urllib_request.install_opener(opener)
     socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
 
@@ -607,7 +622,7 @@ def _real_main(argv=None):
                 sys.exc_clear()
             except:
                 pass
-        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()) + u'\n')
+        sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
         sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
 
     ydl.add_default_info_extractors()
diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py
new file mode 100644
index 000000000..9a0c93fa6
--- /dev/null
+++ b/youtube_dl/aes.py
@@ -0,0 +1,202 @@
+__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
+
+import base64
+from math import ceil
+
+from .utils import bytes_to_intlist, intlist_to_bytes
+
+BLOCK_SIZE_BYTES = 16
+
+def aes_ctr_decrypt(data, key, counter):
+    """
+    Decrypt with aes in counter mode
+    
+    @param {int[]} data        cipher
+    @param {int[]} key         16/24/32-Byte cipher key
+    @param {instance} counter  Instance whose next_value function (@returns {int[]}  16-Byte block)
+                               returns the next counter block
+    @returns {int[]}           decrypted data
+    """
+    expanded_key = key_expansion(key)
+    block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+    
+    decrypted_data=[]
+    for i in range(block_count):
+        counter_block = counter.next_value()
+        block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
+        block += [0]*(BLOCK_SIZE_BYTES - len(block))
+        
+        cipher_counter_block = aes_encrypt(counter_block, expanded_key)
+        decrypted_data += xor(block, cipher_counter_block)
+    decrypted_data = decrypted_data[:len(data)]
+    
+    return decrypted_data
+
+def key_expansion(data):
+    """
+    Generate key schedule
+    
+    @param {int[]} data  16/24/32-Byte cipher key
+    @returns {int[]}     176/208/240-Byte expanded key 
+    """
+    data = data[:] # copy
+    rcon_iteration = 1
+    key_size_bytes = len(data)
+    expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+    
+    while len(data) < expanded_key_size_bytes:
+        temp = data[-4:]
+        temp = key_schedule_core(temp, rcon_iteration)
+        rcon_iteration += 1
+        data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        for _ in range(3):
+            temp = data[-4:]
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        if key_size_bytes == 32:
+            temp = data[-4:]
+            temp = sub_bytes(temp)
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+        
+        for _ in range(3 if key_size_bytes == 32  else 2 if key_size_bytes == 24 else 0):
+            temp = data[-4:]
+            data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
+    data = data[:expanded_key_size_bytes]
+    
+    return data
+
+def aes_encrypt(data, expanded_key):
+    """
+    Encrypt one block with aes
+    
+    @param {int[]} data          16-Byte state
+    @param {int[]} expanded_key  176/208/240-Byte expanded key 
+    @returns {int[]}             16-Byte cipher
+    """
+    rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+    
+    data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+    for i in range(1, rounds+1):
+        data = sub_bytes(data)
+        data = shift_rows(data)
+        if i != rounds:
+            data = mix_columns(data)
+        data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
+    
+    return data
+
+def aes_decrypt_text(data, password, key_size_bytes):
+    """
+    Decrypt text
+    - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
+    - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
+      with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
+    - Mode of operation is 'counter'
+    
+    @param {str} data                    Base64 encoded string
+    @param {str,unicode} password        Password (will be encoded with utf-8)
+    @param {int} key_size_bytes          Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
+    @returns {str}                       Decrypted data
+    """
+    NONCE_LENGTH_BYTES = 8
+    
+    data = bytes_to_intlist(base64.b64decode(data))
+    password = bytes_to_intlist(password.encode('utf-8'))
+    
+    key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
+    key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
+    
+    nonce = data[:NONCE_LENGTH_BYTES]
+    cipher = data[NONCE_LENGTH_BYTES:]
+    
+    class Counter:
+        __value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+        def next_value(self):
+            temp = self.__value
+            self.__value = inc(self.__value)
+            return temp
+    
+    decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
+    plaintext = intlist_to_bytes(decrypted_data)
+    
+    return plaintext
+
+RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
+SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+        0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+        0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+        0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+        0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+        0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+        0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+        0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+        0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+        0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+        0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+        0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+        0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+        0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+        0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+        0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
+MIX_COLUMN_MATRIX = ((2,3,1,1),
+                     (1,2,3,1),
+                     (1,1,2,3),
+                     (3,1,1,2))
+
+def sub_bytes(data):
+    return [SBOX[x] for x in data]
+
+def rotate(data):
+    return data[1:] + [data[0]]
+
+def key_schedule_core(data, rcon_iteration):
+    data = rotate(data)
+    data = sub_bytes(data)
+    data[0] = data[0] ^ RCON[rcon_iteration]
+    
+    return data
+
+def xor(data1, data2):
+    return [x^y for x, y in zip(data1, data2)]
+
+def mix_column(data):
+    data_mixed = []
+    for row in range(4):
+        mixed = 0
+        for column in range(4):
+            addend = data[column]
+            if MIX_COLUMN_MATRIX[row][column] in (2,3):
+                addend <<= 1
+                if addend > 0xff:
+                    addend &= 0xff
+                    addend ^= 0x1b
+                if MIX_COLUMN_MATRIX[row][column] == 3:
+                    addend ^= data[column]
+            mixed ^= addend & 0xff
+        data_mixed.append(mixed)
+    return data_mixed
+
+def mix_columns(data):
+    data_mixed = []
+    for i in range(4):
+        column = data[i*4 : (i+1)*4]
+        data_mixed += mix_column(column)
+    return data_mixed
+
+def shift_rows(data):
+    data_shifted = []
+    for column in range(4):
+        for row in range(4):
+            data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
+    return data_shifted
+
+def inc(data):
+    data = data[:] # copy
+    for i in range(len(data)-1,-1,-1):
+        if data[i] == 255:
+            data[i] = 0
+        else:
+            data[i] = data[i] + 1
+            break
+    return data
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index f71ae2713..6b5037c8c 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -1,3 +1,5 @@
+from .appletrailers import AppleTrailersIE
+from .addanime import AddAnimeIE
 from .archiveorg import ArchiveOrgIE
 from .ard import ARDIE
 from .arte import ArteTvIE
@@ -6,7 +8,10 @@ from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
+from .c56 import C56IE
 from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .cnn import CNNIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
 from .condenast import CondeNastIE
@@ -45,12 +50,14 @@ from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE
 from .metacafe import MetacafeIE
+from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mtv import MTVIE
 from .muzu import MuzuTVIE
 from .myspass import MySpassIE
 from .myvideo import MyVideoIE
 from .nba import NBAIE
+from .nbc import NBCNewsIE
 from .ooyala import OoyalaIE
 from .pbs import PBSIE
 from .photobucket import PhotobucketIE
@@ -63,6 +70,7 @@ from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .sina import SinaIE
 from .slashdot import SlashdotIE
+from .sohu import SohuIE
 from .soundcloud import SoundcloudIE, SoundcloudSetIE
 from .spiegel import SpiegelIE
 from .stanfordoc import StanfordOpenClassroomIE
@@ -73,18 +81,18 @@ from .ted import TEDIE
 from .tf1 import TF1IE
 from .thisav import ThisAVIE
 from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
-from .ustream import UstreamIE
 from .unistra import UnistraIE
+from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .videofyme import VideofyMeIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
-from .c56 import C56IE
 from .wat import WatIE
 from .weibo import WeiboIE
 from .wimp import WimpIE
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py
new file mode 100644
index 000000000..82a785a19
--- /dev/null
+++ b/youtube_dl/extractor/addanime.py
@@ -0,0 +1,75 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_HTTPError,
+    compat_str,
+    compat_urllib_parse,
+    compat_urllib_parse_urlparse,
+
+    ExtractorError,
+)
+
+
+class AddAnimeIE(InfoExtractor):
+
+    _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
+    IE_NAME = u'AddAnime'
+    _TEST = {
+        u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
+        u'file': u'24MR3YO5SAS9.flv',
+        u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
+        u'info_dict': {
+            u"description": u"One Piece 606",
+            u"title": u"One Piece 606"
+        }
+    }
+
+    def _real_extract(self, url):
+        try:
+            mobj = re.match(self._VALID_URL, url)
+            video_id = mobj.group('video_id')
+            webpage = self._download_webpage(url, video_id)
+        except ExtractorError as ee:
+            if not isinstance(ee.cause, compat_HTTPError):
+                raise
+
+            redir_webpage = ee.cause.read().decode('utf-8')
+            action = self._search_regex(
+                r'<form id="challenge-form" action="([^"]+)"',
+                redir_webpage, u'Redirect form')
+            vc = self._search_regex(
+                r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
+                redir_webpage, u'redirect vc value')
+            av = re.search(
+                r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
+                redir_webpage)
+            if av is None:
+                raise ExtractorError(u'Cannot find redirect math task')
+            av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
+
+            parsed_url = compat_urllib_parse_urlparse(url)
+            av_val = av_res + len(parsed_url.netloc)
+            confirm_url = (
+                parsed_url.scheme + u'://' + parsed_url.netloc +
+                action + '?' +
+                compat_urllib_parse.urlencode({
+                    'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
+            self._download_webpage(
+                confirm_url, video_id,
+                note=u'Confirming after redirect')
+            webpage = self._download_webpage(url, video_id)
+
+        video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
+                                       webpage, u'video file URL')
+        video_title = self._og_search_title(webpage)
+        video_description = self._og_search_description(webpage)
+
+        return {
+            '_type': 'video',
+            'id':  video_id,
+            'url': video_url,
+            'ext': 'flv',
+            'title': video_title,
+            'description': video_description
+        }
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
new file mode 100644
index 000000000..8b191c196
--- /dev/null
+++ b/youtube_dl/extractor/appletrailers.py
@@ -0,0 +1,166 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    determine_ext,
+)
+
+
+class AppleTrailersIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _TEST = {
+        u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
+        u"playlist": [
+            {
+                u"file": u"manofsteel-trailer4.mov",
+                u"md5": u"11874af099d480cc09e103b189805d5f",
+                u"info_dict": {
+                    u"duration": 111,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
+                    u"title": u"Trailer 4",
+                    u"upload_date": u"20130523",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-trailer3.mov",
+                u"md5": u"07a0a262aae5afe68120eed61137ab34",
+                u"info_dict": {
+                    u"duration": 182,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
+                    u"title": u"Trailer 3",
+                    u"upload_date": u"20130417",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-trailer.mov",
+                u"md5": u"e401fde0813008e3307e54b6f384cff1",
+                u"info_dict": {
+                    u"duration": 148,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
+                    u"title": u"Trailer",
+                    u"upload_date": u"20121212",
+                    u"uploader_id": u"wb",
+                },
+            },
+            {
+                u"file": u"manofsteel-teaser.mov",
+                u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
+                u"info_dict": {
+                    u"duration": 93,
+                    u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
+                    u"title": u"Teaser",
+                    u"upload_date": u"20120721",
+                    u"uploader_id": u"wb",
+                },
+            }
+        ]
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        movie = mobj.group('movie')
+        uploader_id = mobj.group('company')
+
+        playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
+        playlist_snippet = self._download_webpage(playlist_url, movie)
+        playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
+        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
+
+        size_cache = {}
+
+        doc = xml.etree.ElementTree.fromstring(playlist_html)
+        playlist = []
+        for li in doc.findall('./div/ul/li'):
+            title = li.find('.//h3').text
+            video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
+            thumbnail = li.find('.//img').attrib['src']
+
+            date_el = li.find('.//p')
+            upload_date = None
+            m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
+            if m:
+                upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
+            runtime_el = date_el.find('./br')
+            m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
+            duration = None
+            if m:
+                duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+
+            formats = []
+            for formats_el in li.findall('.//a'):
+                if formats_el.attrib['class'] != 'OverlayPanel':
+                    continue
+                target = formats_el.attrib['target']
+
+                format_code = formats_el.text
+                if 'Automatic' in format_code:
+                    continue
+
+                size_q = formats_el.attrib['href']
+                size_id = size_q.rpartition('#videos-')[2]
+                if size_id not in size_cache:
+                    size_url = url + size_q
+                    sizepage_html = self._download_webpage(
+                        size_url, movie,
+                        note=u'Downloading size info %s' % size_id,
+                        errnote=u'Error while downloading size info %s' % size_id,
+                    )
+                    _doc = xml.etree.ElementTree.fromstring(sizepage_html)
+                    size_cache[size_id] = _doc
+
+                sizepage_doc = size_cache[size_id]
+                links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
+                for vid_a in links:
+                    href = vid_a.get('href')
+                    if not href.endswith(target):
+                        continue
+                    detail_q = href.partition('#')[0]
+                    detail_url = url + '/' + detail_q
+
+                    m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
+                    detail_id = m.group('detail_id')
+
+                    detail_html = self._download_webpage(
+                        detail_url, movie,
+                        note=u'Downloading detail %s %s' % (detail_id, size_id),
+                        errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
+                    )
+                    detail_doc = xml.etree.ElementTree.fromstring(detail_html)
+                    movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
+                    assert movie_link_el.get('class') == 'movieLink'
+                    movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
+                    ext = determine_ext(movie_link)
+                    assert ext == 'mov'
+
+                    formats.append({
+                        'format': format_code,
+                        'ext': ext,
+                        'url': movie_link,
+                    })
+
+            info = {
+                '_type': 'video',
+                'id': video_id,
+                'title': title,
+                'formats': formats,
+                'title': title,
+                'duration': duration,
+                'thumbnail': thumbnail,
+                'upload_date': upload_date,
+                'uploader_id': uploader_id,
+                'user_agent': 'QuickTime compatible (youtube-dl)',
+            }
+            # TODO: Remove when #980 has been merged
+            info['url'] = formats[-1]['url']
+            info['ext'] = formats[-1]['ext']
+
+            playlist.append(info)
+
+        return {
+            '_type': 'playlist',
+            'id': movie,
+            'entries': playlist,
+        }
diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py
new file mode 100644
index 000000000..50832217a
--- /dev/null
+++ b/youtube_dl/extractor/canalc2.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+import re
+
+from .common import InfoExtractor
+
+
+class Canalc2IE(InfoExtractor):
+    _IE_NAME = 'canalc2.tv'
+    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
+
+    _TEST = {
+        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        u'file': u'12163.mp4',
+        u'md5': u'060158428b650f896c542dfbb3d6487f',
+        u'info_dict': {
+            u'title': u'Terrasses du Numérique'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = re.match(self._VALID_URL, url).group(1)
+        webpage = self._download_webpage(url, video_id)
+        file_name = self._search_regex(
+            r"so\.addVariable\('file','(.*?)'\);",
+            webpage, 'file name')
+        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
+
+        title = self._html_search_regex(
+            r'class="evenement8">(.*?)</a>', webpage, u'title')
+        
+        return {'id': video_id,
+                'ext': 'mp4',
+                'url': video_url,
+                'title': title,
+                }
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index 3b1c88876..1f02519a0 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
 from ..utils import unified_strdate
 
 class CanalplusIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
     IE_NAME = u'canalplus.fr'
 
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
new file mode 100644
index 000000000..a79f881cd
--- /dev/null
+++ b/youtube_dl/extractor/cnn.py
@@ -0,0 +1,58 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class CNNIE(InfoExtractor):
+    _VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
+        (?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
+
+    _TESTS = [{
+        u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
+        u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
+        u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
+        u'info_dict': {
+            u'title': u'Nadal wins 8th French Open title',
+            u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
+        },
+    },
+    {
+        u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
+        u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
+        u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
+        u"info_dict": {
+            u"title": "Student's epic speech stuns new freshmen",
+            u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
+        }
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        path = mobj.group('path')
+        page_title = mobj.group('title')
+        info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
+        info_xml = self._download_webpage(info_url, page_title)
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+
+        formats = []
+        for f in info.findall('files/file'):
+            mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
+            if mf is not None:
+                formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
+        formats = sorted(formats)
+        (_,_,_, video_path) = formats[-1]
+        video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
+
+        thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
+        thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
+
+        return {'id': info.attrib['id'],
+                'title': info.find('headline').text,
+                'url': video_url,
+                'ext': determine_ext(video_url),
+                'thumbnail': thumbnails[-1][1],
+                'thumbnails': thumbs_dict,
+                'description': info.find('description').text,
+                }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 52c4483c9..a2986cebe 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -114,6 +114,11 @@ class InfoExtractor(object):
         """Real extraction process. Redefine in subclasses."""
         pass
 
+    @classmethod
+    def ie_key(cls):
+        """A string for getting the InfoExtractor with get_info_extractor"""
+        return cls.__name__[:-2]
+
     @property
     def IE_NAME(self):
         return type(self).__name__[:-2]
@@ -129,7 +134,7 @@ class InfoExtractor(object):
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             if errnote is None:
                 errnote = u'Unable to download webpage'
-            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
+            raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2], cause=err)
 
     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
         """ Returns a tuple (page content as string, URL handle) """
@@ -140,12 +145,17 @@ class InfoExtractor(object):
 
         urlh = self._request_webpage(url_or_request, video_id, note, errnote)
         content_type = urlh.headers.get('Content-Type', '')
+        webpage_bytes = urlh.read()
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
         if m:
             encoding = m.group(1)
         else:
-            encoding = 'utf-8'
-        webpage_bytes = urlh.read()
+            m = re.search(br'<meta[^>]+charset="?([^"]+)[ /">]',
+                          webpage_bytes[:1024])
+            if m:
+                encoding = m.group(1).decode('ascii')
+            else:
+                encoding = 'utf-8'
         if self._downloader.params.get('dump_intermediate_pages', False):
             try:
                 url = url_or_request.get_full_url()
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index d034a11bb..dc4dea4ad 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -7,8 +7,8 @@ from .common import InfoExtractor
 from ..utils import (
     compat_urllib_error,
     compat_urllib_parse,
-    compat_urllib_parse_urlparse,
     compat_urllib_request,
+    compat_urlparse,
 
     ExtractorError,
 )
@@ -163,10 +163,7 @@ class GenericIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
 
         video_url = compat_urllib_parse.unquote(mobj.group(1))
-        if video_url.startswith('//'):
-            video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
-        if '://' not in video_url:
-            video_url = url + ('' if url.endswith('/') else '/') + video_url
+        video_url = compat_urlparse.urljoin(url, video_url)
         video_id = os.path.basename(video_url)
 
         # here's a fun little line of code for you:
diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py
index 9f7fc19a4..f1cd88983 100644
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@@ -57,8 +57,8 @@ class GooglePlusIE(InfoExtractor):
             webpage, 'title', default=u'NA')
 
         # Step 2, Simulate clicking the image box to launch video
-        DOMAIN = 'https://plus.google.com'
-        video_page = self._search_regex(r'<a href="((?:%s)?/photos/.*?)"' % re.escape(DOMAIN),
+        DOMAIN = 'https://plus.google.com/'
+        video_page = self._search_regex(r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
             webpage, u'video page URL')
         if not video_page.startswith(DOMAIN):
             video_page = DOMAIN + video_page
diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py
index ab0a69697..5bdd08afa 100644
--- a/youtube_dl/extractor/hark.py
+++ b/youtube_dl/extractor/hark.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 import re
+import json
 
 from .common import InfoExtractor
 from ..utils import determine_ext
@@ -12,24 +13,25 @@ class HarkIE(InfoExtractor):
         u'file': u'mmbzyhkgny.mp3',
         u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
         u'info_dict': {
-            u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
+            u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
+            u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
+            u'duration': 11,
         }
     }
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group(1)
-        embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
-        webpage = self._download_webpage(embed_url, video_id)
-
-        final_url = self._search_regex(r'src="(.+?).mp3"',
-                                webpage, 'video url')+'.mp3'
-        title = self._html_search_regex(r'<title>(.+?)</title>',
-                                webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
-                                'Sound Clip , Quote, MP3, and Ringtone - Hark','')
+        json_url = "http://www.hark.com/clips/%s.json" %(video_id)
+        info_json = self._download_webpage(json_url, video_id)
+        info = json.loads(info_json)
+        final_url = info['url']
 
         return {'id': video_id,
                 'url' : final_url,
-                'title': title,
+                'title': info['name'],
                 'ext': determine_ext(final_url),
+                'description': info['description'],
+                'thumbnail': info['image_original'],
+                'duration': info['duration'],
                 }
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py
index 8537ba584..445d46501 100644
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dl/extractor/kankan.py
@@ -21,8 +21,10 @@ class KankanIE(InfoExtractor):
         video_id = mobj.group('id')
         webpage = self._download_webpage(url, video_id)
 
-        title = self._search_regex(r'G_TITLE=[\'"](.+?)[\'"]', webpage, u'video title')
-        gcid = self._search_regex(r'lurl:[\'"]http://.+?/.+?/(.+?)/', webpage, u'gcid')
+        title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, u'video title')
+        surls = re.search(r'surls:\[\'.+?\'\]|lurl:\'.+?\.flv\'', webpage).group(0)
+        gcids = re.findall(r"http://.+?/.+?/(.+?)/", surls)
+        gcid = gcids[-1]
 
         video_info_page = self._download_webpage('http://p2s.cl.kankan.com/getCdnresource_flv?gcid=%s' % gcid,
                                                  video_id, u'Downloading video url info')
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py
new file mode 100644
index 000000000..d09d03e36
--- /dev/null
+++ b/youtube_dl/extractor/mit.py
@@ -0,0 +1,76 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    get_element_by_id,
+)
+
+
+class TechTVMITIE(InfoExtractor):
+    IE_NAME = u'techtv.mit.edu'
+    _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
+        u'file': u'25418.mp4',
+        u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
+        u'info_dict': {
+            u'title': u'MIT DNA Learning Center Set',
+            u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(
+            'http://techtv.mit.edu/videos/%s' % video_id, video_id)
+        embed_page = self._download_webpage(
+            'http://techtv.mit.edu/embeds/%s/' % video_id, video_id,
+            note=u'Downloading embed page')
+
+        base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
+            embed_page, u'base url')
+        formats_json = self._search_regex(r'bitrates: (\[.+?\])', embed_page,
+            u'video formats')
+        formats = json.loads(formats_json)
+        formats = sorted(formats, key=lambda f: f['bitrate'])
+
+        title = get_element_by_id('edit-title', webpage)
+        description = clean_html(get_element_by_id('edit-description', webpage))
+        thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
+            embed_page, u'thumbnail', flags=re.DOTALL)
+
+        return {'id': video_id,
+                'title': title,
+                'url': base_url + formats[-1]['url'].replace('mp4:', ''),
+                'ext': 'mp4',
+                'description': description,
+                'thumbnail': thumbnail,
+                }
+
+
+class MITIE(TechTVMITIE):
+    IE_NAME = u'video.mit.edu'
+    _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
+
+    _TEST = {
+        u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
+        u'file': u'21783.mp4',
+        u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
+        u'info_dict': {
+            u'title': u'The Government is Profiling You',
+            u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        page_title = mobj.group('title')
+        webpage = self._download_webpage(url, page_title)
+        self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
+        embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
+            u'embed url')
+        return self.url_result(embed_url, ie='TechTVMIT')
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
new file mode 100644
index 000000000..3bc9dae6d
--- /dev/null
+++ b/youtube_dl/extractor/nbc.py
@@ -0,0 +1,33 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import find_xpath_attr, compat_str
+
+
+class NBCNewsIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
+        u'file': u'52753292.flv',
+        u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
+        u'info_dict': {
+            u'title': u'Crew emerges after four-month Mars food study',
+            u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
+
+        return {'id': video_id,
+                'title': info.find('headline').text,
+                'ext': 'flv',
+                'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
+                'description': compat_str(info.find('caption').text),
+                'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
+                }
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
new file mode 100644
index 000000000..77bb0a8dc
--- /dev/null
+++ b/youtube_dl/extractor/sohu.py
@@ -0,0 +1,90 @@
+# encoding: utf-8
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class SohuIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
+
+    _TEST = {
+        u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
+        u'file': u'382479172.mp4',
+        u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7',
+        u'info_dict': {
+            u'title': u'MV：Far East Movement《The Illest》',
+        },
+    }
+
+    def _real_extract(self, url):
+
+        def _fetch_data(vid_id):
+            base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+            data_url = base_data_url + str(vid_id)
+            data_json = self._download_webpage(
+                data_url, video_id,
+                note=u'Downloading JSON data for ' + str(vid_id))
+            return json.loads(data_json)
+
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+        raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
+                                            webpage, u'video title')
+        title = raw_title.partition('-')[0].strip()
+
+        vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
+                                      u'video path')
+        data = _fetch_data(vid)
+
+        QUALITIES = ('ori', 'super', 'high', 'nor')
+        vid_ids = [data['data'][q + 'Vid']
+                   for q in QUALITIES
+                   if data['data'][q + 'Vid'] != 0]
+        if not vid_ids:
+            raise ExtractorError(u'No formats available for this video')
+
+        # For now, we just pick the highest available quality
+        vid_id = vid_ids[-1]
+
+        format_data = data if vid == vid_id else _fetch_data(vid_id)
+        part_count = format_data['data']['totalBlocks']
+        allot = format_data['allot']
+        prot = format_data['prot']
+        clipsURL = format_data['data']['clipsURL']
+        su = format_data['data']['su']
+
+        playlist = []
+        for i in range(part_count):
+            part_url = ('http://%s/?prot=%s&file=%s&new=%s' %
+                        (allot, prot, clipsURL[i], su[i]))
+            part_str = self._download_webpage(
+                part_url, video_id,
+                note=u'Downloading part %d of %d' % (i+1, part_count))
+
+            part_info = part_str.split('|')
+            video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])
+
+            video_info = {
+                'id': '%s_part%02d' % (video_id, i + 1),
+                'title': title,
+                'url': video_url,
+                'ext': 'mp4',
+            }
+            playlist.append(video_info)
+
+        if len(playlist) == 1:
+            info = playlist[0]
+            info['id'] = video_id
+        else:
+            info = {
+                '_type': 'playlist',
+                'entries': playlist,
+                'id': video_id,
+            }
+
+        return info
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py
new file mode 100644
index 000000000..f278951ba
--- /dev/null
+++ b/youtube_dl/extractor/trilulilu.py
@@ -0,0 +1,73 @@
+import json
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+
+
+class TriluliluIE(InfoExtractor):
+    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
+    _TEST = {
+        u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
+        u'file': u"big-buck-bunny-1.mp4",
+        u'info_dict': {
+            u"title": u"Big Buck Bunny",
+            u"description": u":) pentru copilul din noi",
+        },
+        # Server ignores Range headers (--test)
+        u"params": {
+            u"skip_download": True
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage)
+
+        log_str = self._search_regex(
+            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
+        log = json.loads(log_str)
+
+        format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
+                      u'video-formats2' % log)
+        format_str = self._download_webpage(
+            format_url, video_id,
+            note=u'Downloading formats',
+            errnote=u'Error while downloading formats')
+
+        format_doc = xml.etree.ElementTree.fromstring(format_str)
+ 
+        video_url_template = (
+            u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
+            u'&source=site&hash=%(hash)s&username=%(userid)s&'
+            u'key=ministhebest&format=%%s&sig=&exp=' %
+            log)
+        formats = [
+            {
+                'format': fnode.text,
+                'url': video_url_template % fnode.text,
+            }
+
+            for fnode in format_doc.findall('./formats/format')
+        ]
+
+        info = {
+            '_type': 'video',
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
+
+        # TODO: Remove when #980 has been merged
+        info['url'] = formats[-1]['url']
+        info['ext'] = formats[-1]['format'].partition('-')[0]
+
+        return info
diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py
index 7d228edac..29c25f0e3 100644
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dl/extractor/wat.py
@@ -6,7 +6,6 @@ import re
 from .common import InfoExtractor
 
 from ..utils import (
-    compat_urllib_parse,
     unified_strdate,
 )
 
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index d1156bf42..c85fd4b5a 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -12,14 +12,16 @@ from ..utils import (
     unescapeHTML,
     unified_strdate,
 )
-
+from ..aes import (
+    aes_decrypt_text
+)
 
 class YouPornIE(InfoExtractor):
     _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
     _TEST = {
         u'url': u'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
         u'file': u'505835.mp4',
-        u'md5': u'c37ddbaaa39058c76a7e86c6813423c1',
+        u'md5': u'71ec5fcfddacf80f495efa8b6a8d9a89',
         u'info_dict': {
             u"upload_date": u"20101221", 
             u"description": u"Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", 
@@ -75,7 +77,15 @@ class YouPornIE(InfoExtractor):
         # Get all of the links from the page
         LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
         links = re.findall(LINK_RE, download_list_html)
-        if(len(links) == 0):
+        
+        # Get link of hd video if available
+        mobj = re.search(r'var encryptedQuality720URL = \'(?P<encrypted_video_url>[a-zA-Z0-9+/]+={0,2})\';', webpage)
+        if mobj != None:
+            encrypted_video_url = mobj.group(u'encrypted_video_url')
+            video_url = aes_decrypt_text(encrypted_video_url, video_title, 32).decode('utf-8')
+            links = [video_url] + links
+        
+        if not links:
             raise ExtractorError(u'ERROR: no known formats available for video')
 
         self.to_screen(u'Links found: %d' % len(links))
@@ -112,7 +122,7 @@ class YouPornIE(InfoExtractor):
             self._print_formats(formats)
             return
 
-        req_format = self._downloader.params.get('format', None)
+        req_format = self._downloader.params.get('format', 'best')
         self.to_screen(u'Format: %s' % req_format)
 
         if req_format is None or req_format == 'best':
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index af01c9da0..8e486afd0 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -419,7 +419,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         elif len(s) == 89:
             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
         elif len(s) == 88:
-            return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
+            return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28]
         elif len(s) == 87:
             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
         elif len(s) == 86:
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ab1049cc0..b3d0f64ea 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,19 +1,20 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+import datetime
+import email.utils
 import errno
 import gzip
 import io
 import json
 import locale
 import os
+import platform
 import re
+import socket
 import sys
 import traceback
 import zlib
-import email.utils
-import socket
-import datetime
 
 try:
     import urllib.request as compat_urllib_request
@@ -61,6 +62,11 @@ except ImportError: # Python 2
     import httplib as compat_http_client
 
 try:
+    from urllib.error import HTTPError as compat_HTTPError
+except ImportError:  # Python 2
+    from urllib2 import HTTPError as compat_HTTPError
+
+try:
     from subprocess import DEVNULL
     compat_subprocess_get_DEVNULL = lambda: DEVNULL
 except ImportError:
@@ -476,7 +482,7 @@ def formatSeconds(secs):
 def make_HTTPS_handler(opts):
     if sys.version_info < (3,2):
         # Python's 2.x handler is very simplistic
-        return YoutubeDLHandlerHTTPS()
+        return compat_urllib_request.HTTPSHandler()
     else:
         import ssl
         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
@@ -485,11 +491,11 @@ def make_HTTPS_handler(opts):
         context.verify_mode = (ssl.CERT_NONE
                                if opts.no_check_certificate
                                else ssl.CERT_REQUIRED)
-        return YoutubeDLHandlerHTTPS(context=context)
+        return compat_urllib_request.HTTPSHandler(context=context)
 
 class ExtractorError(Exception):
     """Error during info extraction."""
-    def __init__(self, msg, tb=None, expected=False):
+    def __init__(self, msg, tb=None, expected=False, cause=None):
         """ tb, if given, is the original traceback (so that it can be printed out).
         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
         """
@@ -502,6 +508,7 @@ class ExtractorError(Exception):
 
         self.traceback = tb
         self.exc_info = sys.exc_info()  # preserve original exception
+        self.cause = cause
 
     def format_traceback(self):
         if self.traceback is None:
@@ -569,8 +576,7 @@ class ContentTooShortError(Exception):
         self.downloaded = downloaded
         self.expected = expected
 
-
-class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
+class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     """Handler for HTTP requests and responses.
 
     This class, when installed with an OpenerDirector, automatically adds
@@ -603,8 +609,8 @@ class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
         ret.code = code
         return ret
 
-    def _http_request(self, req):
-        for h, v in std_headers.items():
+    def http_request(self, req):
+        for h,v in std_headers.items():
             if h in req.headers:
                 del req.headers[h]
             req.add_header(h, v)
@@ -619,12 +625,27 @@ class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
             del req.headers['Youtubedl-user-agent']
         return req
 
-    def _http_response(self, req, resp):
+    def http_response(self, req, resp):
         old_resp = resp
         # gzip
         if resp.headers.get('Content-encoding', '') == 'gzip':
-            gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
-            resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+            content = resp.read()
+            gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
+            try:
+                uncompressed = io.BytesIO(gz.read())
+            except IOError as original_ioerror:
+                # There may be junk add the end of the file
+                # See http://stackoverflow.com/q/4928560/35070 for details
+                for i in range(1, 1024):
+                    try:
+                        gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
+                        uncompressed = io.BytesIO(gz.read())
+                    except IOError:
+                        continue
+                    break
+                else:
+                    raise original_ioerror
+            resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
             resp.msg = old_resp.msg
         # deflate
         if resp.headers.get('Content-encoding', '') == 'deflate':
@@ -633,16 +654,8 @@ class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
             resp.msg = old_resp.msg
         return resp
 
-
-class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
-    http_request = YoutubeDLHandler_Template._http_request
-    http_response = YoutubeDLHandler_Template._http_response
-
-
-class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
-    https_request = YoutubeDLHandler_Template._http_request
-    https_response = YoutubeDLHandler_Template._http_response
-
+    https_request = http_request
+    https_response = http_response
 
 def unified_strdate(date_str):
     """Return a string with the date in the format YYYYMMDD"""
@@ -720,3 +733,31 @@ class DateRange(object):
         return self.start <= date <= self.end
     def __str__(self):
         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+
+
+def platform_name():
+    """ Returns the platform name as a compat_str """
+    res = platform.platform()
+    if isinstance(res, bytes):
+        res = res.decode(preferredencoding())
+
+    assert isinstance(res, compat_str)
+    return res
+
+
+def bytes_to_intlist(bs):
+    if not bs:
+        return []
+    if isinstance(bs[0], int):  # Python 3
+        return list(bs)
+    else:
+        return [ord(c) for c in bs]
+
+
+def intlist_to_bytes(xs):
+    if not xs:
+        return b''
+    if isinstance(chr(0), bytes):  # Python 2
+        return ''.join([chr(x) for x in xs])
+    else:
+        return bytes(xs)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index c10ebd4e8..0b56e48dc 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
 
-__version__ = '2013.08.23'
+__version__ = '2013.08.28'
author	Allan Zhou <allanzp@gmail.com>	2013-08-28 09:57:28 -0700
committer	Allan Zhou <allanzp@gmail.com>	2013-08-28 09:57:28 -0700
commit	591078babff1d783bed872c5b441dc570d354448 (patch)
tree	6cf813c993ea26eaec1d976b616d973c2bc47583
parent	99859d436cdee9acc9c869254e734eba5b748260 (diff)
parent	9868c781a1bb3f50385bc7d1e87d82080ffffbc6 (diff)
download	youtube-dl-591078babff1d783bed872c5b441dc570d354448.tar.gz youtube-dl-591078babff1d783bed872c5b441dc570d354448.tar.xz youtube-dl-591078babff1d783bed872c5b441dc570d354448.zip