summary refs log tree commit diff
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2017-05-26 21:58:18 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2017-05-26 21:58:18 +0800
commit5552c9eb0fece567f7dda13810939fca32d7d65a (patch)
treef3bde11319d42fbe7013dc1326c83ad799c39892
parent59ed87cbd9ea08c889514a05b646141004f432a1 (diff)
downloadyoutube-dl-5552c9eb0fece567f7dda13810939fca32d7d65a.tar.gz
youtube-dl-5552c9eb0fece567f7dda13810939fca32d7d65a.tar.xz
youtube-dl-5552c9eb0fece567f7dda13810939fca32d7d65a.zip
[utils] Recognize more patterns in strip_jsonp()
Used in Youku Show pages
-rw-r--r--ChangeLog1
-rw-r--r--test/test_utils.py8
-rw-r--r--youtube_dl/utils.py7
3 files changed, 15 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 6a05657ab..d6e980c5a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
 version <unreleased>
 
 Core
++ [utils] strip_jsonp() can recognize more patterns
 * [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182)
 
 Extractors
diff --git a/test/test_utils.py b/test/test_utils.py
index f31559e71..d7e05817c 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -678,6 +678,14 @@ class TestUtil(unittest.TestCase):
         d = json.loads(stripped)
         self.assertEqual(d, {'status': 'success'})
 
+        stripped = strip_jsonp('window.cb && window.cb({"status": "success"});')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'status': 'success'})
+
+        stripped = strip_jsonp('window.cb && cb({"status": "success"});')
+        d = json.loads(stripped)
+        self.assertEqual(d, {'status': 'success'})
+
     def test_uppercase_escape(self):
         self.assertEqual(uppercase_escape('aä'), 'aä')
         self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4293a77f5..6c84bfe0f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2211,7 +2211,12 @@ def parse_age_limit(s):
 
 def strip_jsonp(code):
     return re.sub(
-        r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
+        r'''(?sx)^
+            (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
+            (?:\s*&&\s*(?P=func_name))?
+            \s*\(\s*(?P<callback_data>.*)\);?
+            \s*?(?://[^\n]*)*$''',
+        r'\g<callback_data>', code)
 
 
 def js_to_json(code):