about summary refs log tree commit diff
diff options
context:
space:
mode:
authordf <fieldhouse@gmx.net>2021-08-29 05:34:20 +0100
committerdf <fieldhouse@gmx.net>2021-08-29 06:27:54 +0100
commit1e222005ba6bb9c288dd3a2b777ca5664b220c81 (patch)
tree1a21f442e7c9cbb84474b7f731726aca9732e1e5
parent197215782bb5df5d1632deb7f275ab03c1468a60 (diff)
downloadyoutube-dl-1e222005ba6bb9c288dd3a2b777ca5664b220c81.tar.gz
youtube-dl-1e222005ba6bb9c288dd3a2b777ca5664b220c81.tar.xz
youtube-dl-1e222005ba6bb9c288dd3a2b777ca5664b220c81.zip
Fix urlhandle_detect_ext() non-ASCII error in Py2, with test
-rw-r--r--test/test_utils.py25
-rw-r--r--youtube_dl/utils.py8
2 files changed, 31 insertions, 2 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 6f8945792..44a4f6ff7 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -105,6 +105,7 @@ from youtube_dl.utils import (
     cli_valueless_option,
     cli_bool_option,
     parse_codecs,
+    urlhandle_detect_ext,
 )
 from youtube_dl.compat import (
     compat_chr,
@@ -1475,6 +1476,30 @@ Line 1
         self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
         self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
 
+    def test_urlhandle_detect_ext(self):
+
+        class UrlHandle(object):
+            _info = {}
+
+            def __init__(self, info):
+                self._info = info
+
+            @property
+            def headers(self):
+                return self._info
+
+        # header with non-ASCII character and contradictory Content-Type
+        urlh = UrlHandle({
+            'Content-Disposition': b'attachment; filename="Epis\xf3dio contains non-ASCI ISO 8859-1 character.mp3"',
+            'Content-Type': b'audio/aac',
+        })
+        self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
+        # header with no Content-Disposition
+        urlh = UrlHandle({
+            'Content-Type': b'audio/mp3',
+        })
+        self.assertEqual(urlhandle_detect_ext(urlh), 'mp3')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index eaf86bb44..5dde9768d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -4288,7 +4288,10 @@ def parse_codecs(codecs_str):
 def urlhandle_detect_ext(url_handle):
     getheader = url_handle.headers.get
 
-    cd = getheader('Content-Disposition')
+    def encode_compat_str_or_none(x, encoding='iso-8859-1', errors='ignore'):
+        return encode_compat_str(x, encoding=encoding, errors=errors) if x else None
+
+    cd = encode_compat_str_or_none(getheader('Content-Disposition'))
     if cd:
         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
         if m:
@@ -4296,7 +4299,8 @@ def urlhandle_detect_ext(url_handle):
             if e:
                 return e
 
-    return mimetype2ext(getheader('Content-Type'))
+    ct = encode_compat_str_or_none(getheader('Content-Type'))
+    return mimetype2ext(ct)
 
 
 def encode_data_uri(data, mime_type):