about summary refs log tree commit diff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2021-04-17 03:22:13 +0700
committerSergey M․ <dstftw@gmail.com>2021-04-17 03:22:13 +0700
commita00a7e0cad3308d999599bf17df5d3e6aba502d8 (patch)
tree241860a081f7b782bb916117f69e1dc82f09eaf7
parent54558e0baa4d62a94af105cd1d7f8abcbd16b468 (diff)
downloadyoutube-dl-a00a7e0cad3308d999599bf17df5d3e6aba502d8.tar.gz
youtube-dl-a00a7e0cad3308d999599bf17df5d3e6aba502d8.tar.xz
youtube-dl-a00a7e0cad3308d999599bf17df5d3e6aba502d8.zip
[utils] Add support for support for experimental HTTP response status code 308 Permanent Redirect (refs #27877, refs #28768)
-rw-r--r--youtube_dl/utils.py62
1 files changed, 56 insertions, 6 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 8e4d144c9..538cc2b63 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -39,6 +39,7 @@ import zlib
 from .compat import (
     compat_HTMLParseError,
     compat_HTMLParser,
+    compat_HTTPError,
     compat_basestring,
     compat_chr,
     compat_cookiejar,
@@ -2879,12 +2880,61 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
 
 
 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
-    if sys.version_info[0] < 3:
-        def redirect_request(self, req, fp, code, msg, headers, newurl):
-            # On python 2 urlh.geturl() may sometimes return redirect URL
-            # as byte string instead of unicode. This workaround allows
-            # to force it always return unicode.
-            return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
+    """YoutubeDL redirect handler
+
+    The code is based on HTTPRedirectHandler implementation from CPython [1].
+
+    This redirect handler solves two issues:
+     - ensures redirect URL is always unicode under python 2
+     - introduces support for experimental HTTP response status code
+       308 Permanent Redirect [2] used by some sites [3]
+
+    1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
+    2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
+    3. https://github.com/ytdl-org/youtube-dl/issues/28768
+    """
+
+    http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
+
+    def redirect_request(self, req, fp, code, msg, headers, newurl):
+        """Return a Request or None in response to a redirect.
+
+        This is called by the http_error_30x methods when a
+        redirection response is received.  If a redirection should
+        take place, return a new Request to allow http_error_30x to
+        perform the redirect.  Otherwise, raise HTTPError if no-one
+        else should try to handle this url.  Return None if you can't
+        but another Handler might.
+        """
+        m = req.get_method()
+        if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
+            or code in (301, 302, 303) and m == "POST")):
+            raise compat_HTTPError(req.full_url, code, msg, headers, fp)
+        # Strictly (according to RFC 2616), 301 or 302 in response to
+        # a POST MUST NOT cause a redirection without confirmation
+        # from the user (of urllib.request, in this case).  In practice,
+        # essentially all clients do redirect in this case, so we do
+        # the same.
+
+        # On python 2 urlh.geturl() may sometimes return redirect URL
+        # as byte string instead of unicode. This workaround allows
+        # to force it always return unicode.
+        if sys.version_info[0] < 3:
+            newurl = compat_str(newurl)
+
+        # Be conciliant with URIs containing a space.  This is mainly
+        # redundant with the more complete encoding done in http_error_302(),
+        # but it is kept for compatibility with other callers.
+        newurl = newurl.replace(' ', '%20')
+
+        CONTENT_HEADERS = ("content-length", "content-type")
+        # NB: don't use dict comprehension for python 2.6 compatibility
+        newheaders = dict((k, v) for k, v in req.headers.items()
+                           if k.lower() not in CONTENT_HEADERS)
+        return compat_urllib_request.Request(newurl,
+                       headers=newheaders,
+                       origin_req_host=req.origin_req_host,
+                       unverifiable=True)
 
 
 def extract_timezone(date_str):