[InfoExtractor] Add search methods for Next/Nuxt.js from yt-dlp

* add _search_nextjs_data(), from https://github.com/yt-dlp/yt-dlp/pull/1386 thanks selfisekai * add _search_nuxt_data(), from https://github.com/yt-dlp/yt-dlp/pull/1921, thanks Lesmiscore, pukkandan * add tests for the above * also fix HTML5 type recognition and tests, from https://github.com/yt-dlp/yt-dlp/commit/222a230871fe4fe63f35c49590379c9a77116819, thanks Lesmiscore * update extractors in PR using above, fix tests.
author: dirkf <fieldhouse@gmx.net> 2023-05-05 19:25:42 +0100
committer: dirkf <fieldhouse@gmx.net> 2023-07-19 22:14:50 +0100
commit: b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863 (patch)
tree: caf46c5f7dd2af308ba0a69797097c8cd8ce77ac /youtube_dl/extractor/whyp.py
parent: 846522204104e3078c597fa1872465024a684ad6 (diff)
download: youtube-dl-b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863.tar.gz
youtube-dl-b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863.tar.xz
youtube-dl-b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863.zip
1 files changed, 1 insertions, 24 deletions
diff --git a/youtube_dl/extractor/whyp.py b/youtube_dl/extractor/whyp.py
index 16f9154ad..644eb4617 100644
--- a/youtube_dl/extractor/whyp.py
+++ b/youtube_dl/extractor/whyp.py
@@ -21,7 +21,7 @@ class WhypIE(InfoExtractor):
             'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
             'id': '18337',
             'title': 'Home Page Example Track',
-            'description': 'md5:bd758000fb93f3159339c852b5b9133c',
+            'description': r're:(?s).+\bexample track\b',
             'ext': 'mp3',
             'duration': 52.82,
             'uploader': 'Brad',
@@ -33,29 +33,6 @@ class WhypIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', fatal=True, traverse=('data', 0)):
-        """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
-
-        import functools
-        import json
-        import re
-        from ..utils import (js_to_json, NO_DEFAULT)
-
-        re_ctx = re.escape(context_name)
-        FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
-        js, arg_keys, arg_vals = self._search_regex(
-            (p.format(re_ctx, FUNCTION_RE) for p in (r'<script>\s*window\.{0}={1}\s*\)\s*;?\s*</script>', r'{0}\(.*?{1}')),
-            webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
-            default=NO_DEFAULT if fatal else (None, None, None))
-        if js is None:
-            return {}
-
-        args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
-            '[{0}]'.format(arg_vals), video_id, transform_source=js_to_json, fatal=fatal) or ())))
-
-        ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
-        return traverse_obj(ret, traverse) or {}
-
     def _real_extract(self, url):
         unique_id = self._match_id(url)
         webpage = self._download_webpage(url, unique_id)
author	dirkf <fieldhouse@gmx.net>	2023-05-05 19:25:42 +0100
committer	dirkf <fieldhouse@gmx.net>	2023-07-19 22:14:50 +0100
commit	b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863 (patch)
tree	caf46c5f7dd2af308ba0a69797097c8cd8ce77ac /youtube_dl/extractor/whyp.py
parent	846522204104e3078c597fa1872465024a684ad6 (diff)
download	youtube-dl-b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863.tar.gz youtube-dl-b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863.tar.xz youtube-dl-b2741f2654e6ddfebc1771b5d5fadb5fd6fe3863.zip