summary refs log tree commit diff
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2017-02-05 21:41:08 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2017-02-05 21:41:08 +0800
commite4e50f60b1040a4b6aa8ecb9139f7d5de195f407 (patch)
treedafe6feff97f24a78904bb6d004a41fd65228ee5
parent6ef3e65a7b244d5e432e764772177c7d48cab237 (diff)
downloadyoutube-dl-e4e50f60b1040a4b6aa8ecb9139f7d5de195f407.tar.gz
youtube-dl-e4e50f60b1040a4b6aa8ecb9139f7d5de195f407.tar.xz
youtube-dl-e4e50f60b1040a4b6aa8ecb9139f7d5de195f407.zip
[googledrive] Fix extraction on Python 3.6
Since Python 3.6, invalid escape sequences are deprecated. It's likely
that there are invalid escape sequences somewhere on the webpage, so
instead of unescaping the whole webpage, just unescape the URL.

See https://bugs.python.org/issue27364. That change was designed for
string literals, while it affects the 'unicode_escape' encoding as well.
The code path is:

str.decode('unicode_escape')
    codecs.unicode_escape_decode()
        PyUnicode_DecodeUnicodeEscape()
-rw-r--r--ChangeLog6
-rw-r--r--youtube_dl/extractor/googledrive.py9
2 files changed, 11 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 23a729559..a0025ab91 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+version <unreleased>
+
+Extractors
+* [googledrive] Fix extraction on Python 3.6
+
+
 version 2017.02.04.1
 
 Extractors
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index 766fc26d0..fec36cbbb 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     int_or_none,
+    lowercase_escape,
 )
 
 
@@ -13,12 +14,12 @@ class GoogleDriveIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
     _TESTS = [{
         'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
-        'md5': '881f7700aec4f538571fa1e0eed4a7b6',
+        'md5': 'd109872761f7e7ecf353fa108c0dbe1e',
         'info_dict': {
             'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
             'ext': 'mp4',
             'title': 'Big Buck Bunny.mp4',
-            'duration': 46,
+            'duration': 45,
         }
     }, {
         # video id is longer than 28 characters
@@ -55,7 +56,7 @@ class GoogleDriveIE(InfoExtractor):
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(
-            'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape')
+            'http://docs.google.com/file/d/%s' % video_id, video_id)
 
         reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None)
         if reason:
@@ -74,7 +75,7 @@ class GoogleDriveIE(InfoExtractor):
             resolution = fmt.split('/')[1]
             width, height = resolution.split('x')
             formats.append({
-                'url': fmt_url,
+                'url': lowercase_escape(fmt_url),
                 'format_id': fmt_id,
                 'resolution': resolution,
                 'width': int_or_none(width),