summary refs log tree commit diff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2013-09-22 00:35:03 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2013-09-22 00:35:03 +0200
commitc4417ddb611e14b81fe56b6b32964c5802faf554 (patch)
treeb48e51a91bd0c01e641bda652e6bcfa0ae39c238
parent2f2ffea9cad7d30165a0171bf6e662bef2182ab4 (diff)
downloadyoutube-dl-c4417ddb611e14b81fe56b6b32964c5802faf554.tar.gz
youtube-dl-c4417ddb611e14b81fe56b6b32964c5802faf554.tar.xz
youtube-dl-c4417ddb611e14b81fe56b6b32964c5802faf554.zip
[youtube] Add filesystem signature cache
-rw-r--r--youtube_dl/FileDownloader.py2
-rw-r--r--youtube_dl/extractor/youtube.py35
2 files changed, 30 insertions, 7 deletions
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 0b5a5d77d..1eb71a80e 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -39,6 +39,8 @@ class FileDownloader(object):
     test:              Download only first bytes to test the downloader.
     min_filesize:      Skip files smaller than this size
     max_filesize:      Skip files larger than this size
+    cachedir:          Location of the cache files in the filesystem.
+                       False to disable filesystem cache.
     """
 
     params = None
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 5c0ea2e43..63f59ae8f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -4,8 +4,10 @@ import collections
 import itertools
 import io
 import json
-import netrc
+import operator
+import os.path
 import re
+import shutil
 import socket
 import string
 import struct
@@ -422,13 +424,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         """Indicate the download will use the RTMP protocol."""
         self.to_screen(u'RTMP download detected')
 
-    def _extract_signature_function(self, video_id, player_url):
-        id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9]+)\.(?P<ext>[a-z]+)$',
+    def _extract_signature_function(self, video_id, player_url, slen):
+        id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$',
                         player_url)
         player_type = id_m.group('ext')
         player_id = id_m.group('id')
 
-        # TODO read from filesystem cache
+        # Read from filesystem cache
+        func_id = '%s_%s_%d' % (player_type, player_id, slen)
+        assert os.path.basename(func_id) == func_id
+        cache_dir = self.downloader.params.get('cachedir',
+                                               u'~/.youtube-dl/cache')
+
+        if cache_dir is not False:
+            cache_fn = os.path.join(os.path.expanduser(cache_dir),
+                                    u'youtube-sigfuncs',
+                                    func_id + '.json')
+            try:
+                with io.open(cache_fn, '', encoding='utf-8') as cachef:
+                    cache_spec = json.load(cachef)
+                return lambda s: u''.join(s[i] for i in cache_spec)
+            except OSError:
+                pass  # No cache available
 
         if player_type == 'js':
             code = self._download_webpage(
@@ -436,7 +453,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 note=u'Downloading %s player %s' % (player_type, player_id),
                 errnote=u'Download of %s failed' % player_url)
             res = self._parse_sig_js(code)
-        elif player_tpye == 'swf':
+        elif player_type == 'swf':
             urlh = self._request_webpage(
                 player_url, video_id,
                 note=u'Downloading %s player %s' % (player_type, player_id),
@@ -446,7 +463,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
         else:
             assert False, 'Invalid player type %r' % player_type
 
-        # TODO write cache
+        if cache_dir is not False:
+            cache_res = res(map(compat_chr, range(slen)))
+            cache_spec = [ord(c) for c in cache_res]
+            shutil.makedirs(os.path.dirname(cache_fn))
+            write_json_file(cache_spec, cache_fn)
 
         return res
 
@@ -983,7 +1004,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             try:
                 if player_url not in self._player_cache:
                     func = self._extract_signature_function(
-                        video_id, player_url
+                        video_id, player_url, len(s)
                     )
                     self._player_cache[player_url] = func
                 return self._player_cache[player_url](s)