summary refs log tree commit diff
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2017-08-19 21:40:53 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2017-08-19 21:40:53 +0800
commit95f3f7c20a05e7ac490e768b8470b20538ef8581 (patch)
treefb04d34cdd3f55d0421b7e568201e5cba6dde4ba
parentf5469da9e6e259c1690c7ef54f1da1c19f65036f (diff)
downloadyoutube-dl-95f3f7c20a05e7ac490e768b8470b20538ef8581.tar.gz
youtube-dl-95f3f7c20a05e7ac490e768b8470b20538ef8581.tar.xz
youtube-dl-95f3f7c20a05e7ac490e768b8470b20538ef8581.zip
[utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)
-rw-r--r--ChangeLog6
-rw-r--r--test/test_utils.py1
-rw-r--r--youtube_dl/utils.py2
3 files changed, 8 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 9a0fad673..9eab4d1e7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+version <unreleased>
+
+Core
+* [utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)
+
+
 version 2017.08.18
 
 Core
diff --git a/test/test_utils.py b/test/test_utils.py
index 2aab16b97..e50f3764e 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -279,6 +279,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unescapeHTML('&#47;'), '/')
         self.assertEqual(unescapeHTML('&eacute;'), 'é')
         self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
+        self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
         # HTML5 entities
         self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
 
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c9cbd5842..2554a2abd 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -596,7 +596,7 @@ def unescapeHTML(s):
     assert type(s) == compat_str
 
     return re.sub(
-        r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+        r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 
 
 def get_subprocess_encoding():