diff options
author | df <fieldhouse@gmx.net> | 2021-08-29 05:34:20 +0100 |
---|---|---|
committer | df <fieldhouse@gmx.net> | 2021-08-29 06:27:54 +0100 |
commit | 1e222005ba6bb9c288dd3a2b777ca5664b220c81 (patch) | |
tree | 1a21f442e7c9cbb84474b7f731726aca9732e1e5 | |
parent | 197215782bb5df5d1632deb7f275ab03c1468a60 (diff) | |
download | youtube-dl-1e222005ba6bb9c288dd3a2b777ca5664b220c81.tar.gz youtube-dl-1e222005ba6bb9c288dd3a2b777ca5664b220c81.tar.xz youtube-dl-1e222005ba6bb9c288dd3a2b777ca5664b220c81.zip |
Fix urlhandle_detect_ext() non-ASCII error in Py2, with test
-rw-r--r-- | test/test_utils.py | 25 | ||||
-rw-r--r-- | youtube_dl/utils.py | 8 |
2 files changed, 31 insertions, 2 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index 6f8945792..44a4f6ff7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -105,6 +105,7 @@ from youtube_dl.utils import ( cli_valueless_option, cli_bool_option, parse_codecs, + urlhandle_detect_ext, ) from youtube_dl.compat import ( compat_chr, @@ -1475,6 +1476,30 @@ Line 1 self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') + def test_urlhandle_detect_ext(self): + + class UrlHandle(object): + _info = {} + + def __init__(self, info): + self._info = info + + @property + def headers(self): + return self._info + + # header with non-ASCII character and contradictory Content-Type + urlh = UrlHandle({ + 'Content-Disposition': b'attachment; filename="Epis\xf3dio contains non-ASCI ISO 8859-1 character.mp3"', + 'Content-Type': b'audio/aac', + }) + self.assertEqual(urlhandle_detect_ext(urlh), 'mp3') + # header with no Content-Disposition + urlh = UrlHandle({ + 'Content-Type': b'audio/mp3', + }) + self.assertEqual(urlhandle_detect_ext(urlh), 'mp3') + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index eaf86bb44..5dde9768d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -4288,7 +4288,10 @@ def parse_codecs(codecs_str): def urlhandle_detect_ext(url_handle): getheader = url_handle.headers.get - cd = getheader('Content-Disposition') + def encode_compat_str_or_none(x, encoding='iso-8859-1', errors='ignore'): + return encode_compat_str(x, encoding=encoding, errors=errors) if x else None + + cd = encode_compat_str_or_none(getheader('Content-Disposition')) if cd: m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd) if m: @@ -4296,7 +4299,8 @@ def urlhandle_detect_ext(url_handle): if e: return e - return mimetype2ext(getheader('Content-Type')) + ct = encode_compat_str_or_none(getheader('Content-Type')) + return mimetype2ext(ct) def encode_data_uri(data, mime_type): |