about summary refs log tree commit diff
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2023-02-14 02:47:09 +0000
committerdirkf <fieldhouse@gmx.net>2023-02-14 02:53:16 +0000
commit42b098dd79e91295376ca98f394876555481a3eb (patch)
tree640d8a7fc761fd384fccd6c6a65ec22b8ab0d57e
parent6f8c2635a573c84ef66c02f73b4aeff1cc36ae4e (diff)
downloadyoutube-dl-42b098dd79e91295376ca98f394876555481a3eb.tar.gz
youtube-dl-42b098dd79e91295376ca98f394876555481a3eb.tar.xz
youtube-dl-42b098dd79e91295376ca98f394876555481a3eb.zip
[InfoExtractor] Handle unquoted values in OpenGraph searches
-rw-r--r--test/test_InfoExtractor.py2
-rw-r--r--youtube_dl/extractor/common.py2
2 files changed, 3 insertions, 1 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index dd69a681b..4db5c93f1 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -62,6 +62,7 @@ class TestInfoExtractor(unittest.TestCase):
             <meta name="og:test1" content='foo > < bar'/>
             <meta name="og:test2" content="foo >//< bar"/>
             <meta property=og-test3 content='Ill-formatted opengraph'/>
+            <meta property=og:test4 content=unquoted-value/>
             '''
         self.assertEqual(ie._og_search_title(html), 'Foo')
         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
@@ -74,6 +75,7 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
         self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
         self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
+        self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value')
 
     def test_html_search_meta(self):
         ie = self.ie
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a0a796d7b..7244e5df6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1087,7 +1087,7 @@ class InfoExtractor(object):
     # Helper functions for extracting OpenGraph info
     @staticmethod
     def _og_regexes(prop):
-        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
+        content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))'
         property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
                        % {'prop': re.escape(prop)})
         template = r'<meta[^>]+?%s[^>]+?%s'