about summary refs log tree commit diff
path: root/youtube_dl/YoutubeDL.py
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2023-03-13 19:45:54 +0000
committerdirkf <fieldhouse@gmx.net>2023-03-14 16:23:20 +0000
commite8de54bce50f6f77a4d7e8e80675f7003d5bf630 (patch)
tree44fb3fc30acb05ff99073ee3cfeed37457d541b5 /youtube_dl/YoutubeDL.py
parentbaa6c5e95cb307e7d716645780ff8aef22de6aca (diff)
downloadyoutube-dl-e8de54bce50f6f77a4d7e8e80675f7003d5bf630.tar.gz
youtube-dl-e8de54bce50f6f77a4d7e8e80675f7003d5bf630.tar.xz
youtube-dl-e8de54bce50f6f77a4d7e8e80675f7003d5bf630.zip
[core] Handle `/../` sequences in HTTP URLs
* use Python's RFC implementation for embedded sequences
* hack: strip unbalanced leading `../` from path, like eg Firefox

See https://github.com/yt-dlp/yt-dlp/issues/3355
Diffstat (limited to 'youtube_dl/YoutubeDL.py')
-rwxr-xr-xyoutube_dl/YoutubeDL.py24
1 files changed, 24 insertions, 0 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 8e8546596..bcf781744 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -39,6 +39,7 @@ from .compat import (
     compat_str,
     compat_tokenize_tokenize,
     compat_urllib_error,
+    compat_urllib_parse,
     compat_urllib_request,
     compat_urllib_request_DataHandler,
 )
@@ -60,6 +61,7 @@ from .utils import (
     format_bytes,
     formatSeconds,
     GeoRestrictedError,
+    HEADRequest,
     int_or_none,
     ISO3166Utils,
     locked_file,
@@ -74,6 +76,7 @@ from .utils import (
     preferredencoding,
     prepend_extension,
     process_communicate_or_kill,
+    PUTRequest,
     register_socks_protocols,
     render_table,
     replace_extension,
@@ -2297,6 +2300,27 @@ class YoutubeDL(object):
         """ Start an HTTP download """
         if isinstance(req, compat_basestring):
             req = sanitized_Request(req)
+        # an embedded /../ sequence is not automatically handled by urllib2
+        # see https://github.com/yt-dlp/yt-dlp/issues/3355
+        url = req.get_full_url()
+        parts = url.partition('/../')
+        if parts[1]:
+            url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
+        if url:
+            # worse, URL path may have initial /../ against RFCs: work-around
+            # by stripping such prefixes, like eg Firefox
+            parts = compat_urllib_parse.urlsplit(url)
+            path = parts.path
+            while path.startswith('/../'):
+                path = path[3:]
+            url = parts._replace(path=path).geturl()
+            # get a new Request with the munged URL
+            if url != req.get_full_url():
+                req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
+                    req.get_method(), compat_urllib_request.Request)
+                req = req_type(
+                    url, data=req.data, headers=dict(req.header_items()),
+                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
         return self._opener.open(req, timeout=self._socket_timeout)
 
     def print_debug_header(self):