[extractor/common] Fix inline HTML5 media tags processing and add test (closes #27345)

author: Sergey M․ <dstftw@gmail.com> 2020-12-09 00:05:21 +0700
committer: Sergey M․ <dstftw@gmail.com> 2020-12-09 00:05:21 +0700
commit: 5a1fbbf8b7215aab0e6382e93eaa1561093352cf (patch)
tree: cf405ecd230d2259465ed2d65be86190b1d6ef3f
parent: e2bdf8bf4f3de7698d1d2844687e3acc760b34e7 (diff)
download: youtube-dl-5a1fbbf8b7215aab0e6382e93eaa1561093352cf.tar.gz
youtube-dl-5a1fbbf8b7215aab0e6382e93eaa1561093352cf.tar.xz
youtube-dl-5a1fbbf8b7215aab0e6382e93eaa1561093352cf.zip
2 files changed, 15 insertions, 3 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 71f6608fe..644b3759c 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -108,6 +108,18 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
 
     def test_parse_html5_media_entries(self):
+        # inline video tag
+        expect_dict(
+            self,
+            self.ie._parse_html5_media_entries(
+                'https://127.0.0.1/video.html',
+                r'<html><video src="/vid.mp4" /></html>', None)[0],
+            {
+                'formats': [{
+                    'url': 'https://127.0.0.1/vid.mp4',
+                }],
+            })
+
         # from https://www.r18.com/
         # with kpbs in label
         expect_dict(
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index dd07a1cae..74e40fabb 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2515,9 +2515,9 @@ class InfoExtractor(object):
         # https://www.ampproject.org/docs/reference/components/amp-video)
         # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
         _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
-        media_tags = [(media_tag, media_type, '')
-                      for media_tag, media_type
-                      in re.findall(r'(?s)(<%s[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
+        media_tags = [(media_tag, media_tag_name, media_type, '')
+                      for media_tag, media_tag_name, media_type
+                      in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
         media_tags.extend(re.findall(
             # We only allow video|audio followed by a whitespace or '>'.
             # Allowing more characters may end up in significant slow down (see
author	Sergey M․ <dstftw@gmail.com>	2020-12-09 00:05:21 +0700
committer	Sergey M․ <dstftw@gmail.com>	2020-12-09 00:05:21 +0700
commit	5a1fbbf8b7215aab0e6382e93eaa1561093352cf (patch)
tree	cf405ecd230d2259465ed2d65be86190b1d6ef3f
parent	e2bdf8bf4f3de7698d1d2844687e3acc760b34e7 (diff)
download	youtube-dl-5a1fbbf8b7215aab0e6382e93eaa1561093352cf.tar.gz youtube-dl-5a1fbbf8b7215aab0e6382e93eaa1561093352cf.tar.xz youtube-dl-5a1fbbf8b7215aab0e6382e93eaa1561093352cf.zip