about summary refs log tree commit diff
path: root/youtube_dl/extractor/ndr.py
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2022-01-17 03:22:32 +0000
committerdirkf <fieldhouse@gmx.net>2022-02-05 02:12:44 +0000
commitf0a05a55c2ee512880546c056cfbec5ad3399798 (patch)
tree5b2b278259cb9aa9427b71255cf7677ccb6212c8 /youtube_dl/extractor/ndr.py
parent4186e817772d49d6f66b07c5ac8c248f026a6446 (diff)
downloadyoutube-dl-f0a05a55c2ee512880546c056cfbec5ad3399798.tar.gz
youtube-dl-f0a05a55c2ee512880546c056cfbec5ad3399798.tar.xz
youtube-dl-f0a05a55c2ee512880546c056cfbec5ad3399798.zip
NJoy: improve extraction of NDR id, description, etc with current page formats
Diffstat (limited to 'youtube_dl/extractor/ndr.py')
-rw-r--r--youtube_dl/extractor/ndr.py15
1 files changed, 11 insertions, 4 deletions
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index a0d553f00..0a723e3b0 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -196,18 +196,25 @@ class NJoyIE(NDRBaseIE):
         'only_matching': True,
     }]
 
-    def _extract_embed(self, webpage, display_id):
+    def _extract_embed(self, webpage, display_id, url=None):
+        # find tell-tale URL with the actual ID, or ...
         video_id = self._search_regex(
-            r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
-        description = self._search_regex(
+            (r'''\bsrc\s*=\s*(?:"|')?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
+             r'<iframe[^>]+id="pp_([\da-z]+)"', ),
+            webpage, 'NDR id', default=None)
+
+        description = (
+            self._html_search_meta('description', webpage)
+            or self._search_regex(
                 r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
-            webpage, 'description', fatal=False)
+                webpage, 'description', fatal=False))
         return {
             '_type': 'url_transparent',
             'ie_key': 'NDREmbedBase',
             'url': 'ndr:%s' % video_id,
             'display_id': display_id,
             'description': description,
+            'title': display_id.replace('-', ' ').strip(),
         }