about summary refs log tree commit diff
diff options
context:
space:
mode:
authorHubert Hirtz <hubert@hirtz.pm>2024-03-04 01:27:55 +0000
committerGitHub <noreply@github.com>2024-03-04 01:27:55 +0000
commitf0812d784836d18fd25ea32f9b5a0c9c6e92425b (patch)
tree173402901f9b0ea2c025d67287451f916cec447c
parent40bd5c18153afe765caa6726302ee1dd8a9a2ce6 (diff)
downloadyoutube-dl-f0812d784836d18fd25ea32f9b5a0c9c6e92425b.tar.gz
youtube-dl-f0812d784836d18fd25ea32f9b5a0c9c6e92425b.tar.xz
youtube-dl-f0812d784836d18fd25ea32f9b5a0c9c6e92425b.zip
[utils] Handle user:pass in URLs (#28801)
* Handle user:pass in URLs

Fixes "nonnumeric port" errors when youtube-dl is given URLs with
usernames and passwords such as:

    http://username:password@example.com/myvideo.mp4

Refs:
- https://en.wikipedia.org/wiki/Basic_access_authentication
- https://tools.ietf.org/html/rfc1738#section-3.1
- https://docs.python.org/3.8/library/urllib.parse.html#urllib.parse.urlsplit

Fixes #18276 (point 4)
Fixes #20258
Fixes #26211 (see comment)

* Align code with yt-dlp

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
-rw-r--r--test/test_utils.py13
-rw-r--r--youtube_dl/utils.py22
2 files changed, 34 insertions, 1 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 102420fcb..90d64b581 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -81,6 +81,7 @@ from youtube_dl.utils import (
     sanitize_filename,
     sanitize_path,
     sanitize_url,
+    sanitized_Request,
     shell_quote,
     smuggle_url,
     str_or_none,
@@ -255,6 +256,18 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
         self.assertEqual(sanitize_url('foo bar'), 'foo bar')
 
+    def test_sanitized_Request(self):
+        self.assertFalse(sanitized_Request('http://foo.bar').has_header('Authorization'))
+        self.assertFalse(sanitized_Request('http://:foo.bar').has_header('Authorization'))
+        self.assertEqual(sanitized_Request('http://@foo.bar').get_header('Authorization'),
+                         'Basic Og==')
+        self.assertEqual(sanitized_Request('http://:pass@foo.bar').get_header('Authorization'),
+                         'Basic OnBhc3M=')
+        self.assertEqual(sanitized_Request('http://user:@foo.bar').get_header('Authorization'),
+                         'Basic dXNlcjo=')
+        self.assertEqual(sanitized_Request('http://user:pass@foo.bar').get_header('Authorization'),
+                         'Basic dXNlcjpwYXNz')
+
     def test_expand_path(self):
         def env(var):
             return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 61b94d84c..c249e7168 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2182,8 +2182,28 @@ def sanitize_url(url):
     return url
 
 
+def extract_basic_auth(url):
+    parts = compat_urllib_parse.urlsplit(url)
+    if parts.username is None:
+        return url, None
+    url = compat_urllib_parse.urlunsplit(parts._replace(netloc=(
+        parts.hostname if parts.port is None
+        else '%s:%d' % (parts.hostname, parts.port))))
+    auth_payload = base64.b64encode(
+        ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
+    return url, 'Basic {0}'.format(auth_payload.decode('ascii'))
+
+
 def sanitized_Request(url, *args, **kwargs):
-    return compat_urllib_request.Request(escape_url(sanitize_url(url)), *args, **kwargs)
+    url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
+    if auth_header is not None:
+        headers = args[1] if len(args) > 1 else kwargs.get('headers')
+        headers = headers or {}
+        headers['Authorization'] = auth_header
+        if len(args) <= 1 and kwargs.get('headers') is None:
+            kwargs['headers'] = headers
+            kwargs = compat_kwargs(kwargs)
+    return compat_urllib_request.Request(url, *args, **kwargs)
 
 
 def expand_path(s):