summary refs log tree commit diff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-02-19 22:50:23 +0700
committerSergey M․ <dstftw@gmail.com>2018-02-19 22:50:23 +0700
commitbefa4708fd2165b85d04002c3845adf191d34302 (patch)
treebe0eaae66b9f4aa11100d12c6c0c6a1e0d18851b
parent90830004c893e2d5f0643c05af064cfc7a3b579e (diff)
downloadyoutube-dl-befa4708fd2165b85d04002c3845adf191d34302.tar.gz
youtube-dl-befa4708fd2165b85d04002c3845adf191d34302.tar.xz
youtube-dl-befa4708fd2165b85d04002c3845adf191d34302.zip
[utils] Fixup some common URL's typos in sanitize_url (closes #15649)
-rw-r--r--test/test_utils.py7
-rw-r--r--youtube_dl/utils.py18
2 files changed, 22 insertions, 3 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index fdf6031f7..d8d257d1d 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -57,6 +57,7 @@ from youtube_dl.utils import (
     read_batch_urls,
     sanitize_filename,
     sanitize_path,
+    sanitize_url,
     expand_path,
     prepend_extension,
     replace_extension,
@@ -219,6 +220,12 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(sanitize_path('./abc'), 'abc')
         self.assertEqual(sanitize_path('./../abc'), '..\\abc')
 
+    def test_sanitize_url(self):
+        self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
+        self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
+        self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar')
+        self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
+
     def test_expand_path(self):
         def env(var):
             return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 7f24cbb04..af639a124 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -538,10 +538,22 @@ def sanitize_path(s):
     return os.path.join(*sanitized_path)
 
 
-# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
-# unwanted failures due to missing protocol
 def sanitize_url(url):
-    return 'http:%s' % url if url.startswith('//') else url
+    # Prepend protocol-less URLs with `http:` scheme in order to mitigate
+    # the number of unwanted failures due to missing protocol
+    if url.startswith('//'):
+        return 'http:%s' % url
+    # Fix some common typos seen so far
+    COMMON_TYPOS = (
+        # https://github.com/rg3/youtube-dl/issues/15649
+        (r'^httpss://', r'https://'),
+        # https://bx1.be/lives/direct-tv/
+        (r'^rmtp([es]?)://', r'rtmp\1://'),
+    )
+    for mistake, fixup in COMMON_TYPOS:
+        if re.match(mistake, url):
+            return re.sub(mistake, fixup, url)
+    return url
 
 
 def sanitized_Request(url, *args, **kwargs):