summary refs log tree commit diff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-11-23 21:14:03 +0600
committerSergey M․ <dstftw@gmail.com>2015-11-23 21:14:03 +0600
commit526b3b071632bc3c840ae4dd3579e015f41df6f5 (patch)
treedafc978c3bb59c49f5f3fd174e643c9d05b4c2bd
parent61f92af1cfacb9a5a6e368d0093fb71dbac0af6b (diff)
downloadyoutube-dl-526b3b071632bc3c840ae4dd3579e015f41df6f5.tar.gz
youtube-dl-526b3b071632bc3c840ae4dd3579e015f41df6f5.tar.xz
youtube-dl-526b3b071632bc3c840ae4dd3579e015f41df6f5.zip
[youtube] Clarify ytplayer.config extraction rationale
-rw-r--r--youtube_dl/extractor/youtube.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 824335d0a..5482aac3b 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -898,6 +898,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _get_ytplayer_config(self, video_id, webpage):
         patterns = (
+            # User data may contain arbitrary character sequences that may affect
+            # JSON extraction with regex, e.g. when '};' is contained the second
+            # regex won't capture the whole JSON. Yet working around by trying more
+            # concrete regex first keeping in mind proper quoted string handling
+            # to be implemented in future that will replace this workaround (see
+            # https://github.com/rg3/youtube-dl/issues/7468,
+            # https://github.com/rg3/youtube-dl/pull/7599)
             r';ytplayer\.config\s*=\s*({.+?});ytplayer',
             r';ytplayer\.config\s*=\s*({.+?});',
         )