about summary refs log tree commit diff
diff options
context:
space:
mode:
authorBen Rog-Wilhelm <zorba-github@pavlovian.net>2021-05-04 14:14:35 -0500
committerGitHub <noreply@github.com>2021-05-05 02:14:35 +0700
commitfe05191b8c59538a48b6cbc95f4fe54fc7e6a0ac (patch)
tree9de69c412b0836b723ba9965d98a4b1c01ee1f88
parent0204838163bd4068fe23b40414573d1307d817ab (diff)
downloadyoutube-dl-fe05191b8c59538a48b6cbc95f4fe54fc7e6a0ac.tar.gz
youtube-dl-fe05191b8c59538a48b6cbc95f4fe54fc7e6a0ac.tar.xz
youtube-dl-fe05191b8c59538a48b6cbc95f4fe54fc7e6a0ac.zip
[kaltura] Improve iframe extraction (#28969)
Co-authored-by: Sergey M. <dstftw@gmail.com>
-rw-r--r--youtube_dl/extractor/gdcvault.py15
-rw-r--r--youtube_dl/extractor/kaltura.py2
2 files changed, 16 insertions, 1 deletions
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py
index 2f555c1d4..5ad40ee23 100644
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@@ -102,6 +102,21 @@ class GDCVaultIE(InfoExtractor):
                 'format': 'mp4-408',
             },
         },
+        {
+            # Kaltura embed, whitespace between quote and embedded URL in iframe's src
+            'url': 'https://www.gdcvault.com/play/1025699',
+            'info_dict': {
+                'id': '0_zagynv0a',
+                'ext': 'mp4',
+                'title': 'Tech Toolbox',
+                'upload_date': '20190408',
+                'uploader_id': 'joe@blazestreaming.com',
+                'timestamp': 1554764629,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
     ]
 
     def _login(self, webpage_url, display_id):
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 5d0ff0418..c731612c4 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -145,7 +145,7 @@ class KalturaIE(InfoExtractor):
                 ''', webpage))
             or list(re.finditer(
                 r'''(?xs)
-                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
                       (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
                       (?:(?!(?P=q1)).)*
                       [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)