about summary refs log tree commit diff
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2022-08-31 23:22:48 +0100
committerdirkf <fieldhouse@gmx.net>2022-09-01 10:57:12 +0100
commit55c823634db890a328ffc23588fcd6f35d9b3ddf (patch)
tree22d6c4e803eff07cb82a23f9b4b519b10b504b5b
parent4050e10a4c3445c5399239567eb074acb2f65c18 (diff)
downloadyoutube-dl-55c823634db890a328ffc23588fcd6f35d9b3ddf.tar.gz
youtube-dl-55c823634db890a328ffc23588fcd6f35d9b3ddf.tar.xz
youtube-dl-55c823634db890a328ffc23588fcd6f35d9b3ddf.zip
[jsinterp] Handle new YT players 113ca41c, c57c113c
* add NaN
* allow any white-space character for `after_op`
* align with yt-dlp f26af78a8ac11d9d617ed31ea5282cfaa5bcbcfa (charcodeAt and bitwise overflow)
* allow escaping in regex, fixing player c57c113c
-rw-r--r--test/test_jsinterp.py21
-rw-r--r--test/test_youtube_signature.py16
-rw-r--r--youtube_dl/jsinterp.py46
3 files changed, 65 insertions, 18 deletions
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index fb4882d00..5121c8cf8 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -136,6 +136,11 @@ class TestJSInterpreter(unittest.TestCase):
 
     def test_builtins(self):
         jsi = JSInterpreter('''
+        function x() { return NaN }
+        ''')
+        self.assertTrue(math.isnan(jsi.call_function('x')))
+
+        jsi = JSInterpreter('''
         function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
         ''')
         self.assertEqual(jsi.call_function('x'), 86000)
@@ -385,6 +390,22 @@ class TestJSInterpreter(unittest.TestCase):
         ''')
         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I)
 
+    def test_char_code_at(self):
+        jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}')
+        self.assertEqual(jsi.call_function('x', 0), 116)
+        self.assertEqual(jsi.call_function('x', 1), 101)
+        self.assertEqual(jsi.call_function('x', 2), 115)
+        self.assertEqual(jsi.call_function('x', 3), 116)
+        self.assertEqual(jsi.call_function('x', 4), None)
+        self.assertEqual(jsi.call_function('x', 'not_a_number'), 116)
+
+    def test_bitwise_operators_overflow(self):
+        jsi = JSInterpreter('function x(){return -524999584 << 5}')
+        self.assertEqual(jsi.call_function('x'), 379882496)
+
+        jsi = JSInterpreter('function x(){return 1236566549 << 5}')
+        self.assertEqual(jsi.call_function('x'), 915423904)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 4bb0a30b0..ec914a871 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -112,9 +112,25 @@ _NSIG_TESTS = [
         'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',
     ),
     (
+        'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js',
+        '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw',
+    ),
+    (
         'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',
         '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',
     ),
+    (
+        'https://www.youtube.com/s/player/c2199353/player_ias.vflset/en_US/base.js',
+        '5EHDMgYLV6HPGk_Mu-kk', 'AD5rgS85EkrE7',
+    ),
+    (
+        'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js',
+        'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg',
+    ),
+    (
+        'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js',
+        '-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg',
+    ),
 ]
 
 
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 08726e478..d13329396 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -23,10 +23,11 @@ from .compat import (
 
 def _js_bit_op(op):
 
+    def zeroise(x):
+        return 0 if x in (None, JS_Undefined) else x
+
     def wrapped(a, b):
-        def zeroise(x):
-            return 0 if x in (None, JS_Undefined) else x
-        return op(zeroise(a), zeroise(b))
+        return op(zeroise(a), zeroise(b)) & 0xffffffff
 
     return wrapped
 
@@ -44,7 +45,7 @@ def _js_arith_op(op):
 def _js_div(a, b):
     if JS_Undefined in (a, b) or not (a and b):
         return float('nan')
-    return float('inf') if not b else operator.truediv(a or 0, b)
+    return operator.truediv(a or 0, b) if b else float('inf')
 
 
 def _js_mod(a, b):
@@ -260,13 +261,14 @@ class JSInterpreter(object):
                     counters[_MATCHING_PARENS[char]] += 1
                 elif char in counters:
                     counters[char] -= 1
-            if not escaping and char in _QUOTES and in_quote in (char, None):
-                if in_quote or after_op or char != '/':
-                    in_quote = None if in_quote and not in_regex_char_group else char
-            elif in_quote == '/' and char in '[]':
-                in_regex_char_group = char == '['
+            if not escaping:
+                if char in _QUOTES and in_quote in (char, None):
+                    if in_quote or after_op or char != '/':
+                        in_quote = None if in_quote and not in_regex_char_group else char
+                elif in_quote == '/' and char in '[]':
+                    in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
-            after_op = not in_quote and char in cls.OP_CHARS or (char == ' ' and after_op)
+            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))
 
             if char != delim[pos] or any(counters.values()) or in_quote:
                 pos = skipping = 0
@@ -590,6 +592,8 @@ class JSInterpreter(object):
 
         elif expr == 'undefined':
             return JS_Undefined, should_return
+        elif expr == 'NaN':
+            return float('NaN'), should_return
 
         elif md.get('return'):
             return local_vars[m.group('name')], should_return
@@ -635,7 +639,8 @@ class JSInterpreter(object):
             def assertion(cndn, msg):
                 """ assert, but without risk of getting optimized out """
                 if not cndn:
-                    raise ExtractorError('{member} {msg}'.format(**locals()), expr=expr)
+                    memb = member
+                    raise self.Exception('{member} {msg}'.format(**locals()), expr=expr)
 
             def eval_method():
                 if (variable, member) == ('console', 'debug'):
@@ -737,6 +742,13 @@ class JSInterpreter(object):
                         return obj.index(idx, start)
                     except ValueError:
                         return -1
+                elif member == 'charCodeAt':
+                    assertion(isinstance(obj, compat_str), 'must be applied on a string')
+                    # assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced
+                    idx = argvals[0] if isinstance(argvals[0], int) else 0
+                    if idx >= len(obj):
+                        return None
+                    return ord(obj[idx])
 
                 idx = int(member) if isinstance(obj, list) else member
                 return obj[idx](argvals, allow_recursion=allow_recursion)
@@ -820,12 +832,10 @@ class JSInterpreter(object):
             if mobj is None:
                 break
             start, body_start = mobj.span()
-            body, remaining = self._separate_at_paren(code[body_start - 1:])
-            name = self._named_object(
-                local_vars,
-                self.extract_function_from_code(
-                    self.build_arglist(mobj.group('args')),
-                    body, local_vars, *global_stack))
+            body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
+            name = self._named_object(local_vars, self.extract_function_from_code(
+                [x.strip() for x in mobj.group('args').split(',')],
+                body, local_vars, *global_stack))
             code = code[:start] + name + remaining
         return self.build_function(argnames, code, local_vars, *global_stack)
 
@@ -854,7 +864,7 @@ class JSInterpreter(object):
                 zip_longest(argnames, args, fillvalue=None))
             global_stack[0].update(kwargs)
             var_stack = LocalNameSpace(*global_stack)
-            ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1)
+            ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
             if should_abort:
                 return ret
         return resf