From 06e9363b7a21acf6a592780a706b0fdd6b5a2d4e Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Sun, 8 Jan 2017 22:27:28 +0530 Subject: [openload] Fix extraction (closes #10408) Just a minor fix for openload --- youtube_dl/extractor/openload.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 2ce9f3826..3d4ad7dca 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -64,16 +64,17 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) ol_id = self._search_regex( - ']+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)', + ']+id="[^"]+"[^>]*>([0-9]+)', webpage, 'openload ID') - first_two_chars = int(float(ol_id[0:][:2])) + first_three_chars = int(float(ol_id[0:][:3])) + fifth_char = int(float(ol_id[3:5])) urlcode = '' - num = 2 + num = 5 while num < len(ol_id): - urlcode += compat_chr(int(float(ol_id[num:][:3])) - - first_two_chars * int(float(ol_id[num + 3:][:2]))) + urlcode += compat_chr(int(float(ol_id[num:][:3])) + + first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2]))) num += 5 video_url = 'https://openload.co/stream/' + urlcode -- cgit 1.4.1 From 17f8deeb481a7aa3079d7e11da2c255f893b9e8c Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Wed, 25 Jan 2017 23:27:22 +0700 Subject: [extractor/generic] Add support for openload embeds (closes #11536, closes #11812) --- youtube_dl/extractor/generic.py | 7 +++++++ youtube_dl/extractor/openload.py | 8 ++++++++ 2 files changed, 15 insertions(+) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40201f311..a23486620 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -80,6 +80,7 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE +from .openload import OpenloadIE class GenericIE(InfoExtractor): @@ -2431,6 +2432,12 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( twentymin_urls, ie=TwentyMinutenIE.ie_key()) + # Look for Openload embeds + openload_urls = OpenloadIE._extract_urls(webpage) + if openload_urls: + return _playlist_from_matches( + openload_urls, ie=OpenloadIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 3d4ad7dca..4893ade5d 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_chr from ..utils import ( @@ -56,6 +58,12 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src=["\']((?:https?://)?(?:openload\.(?:co|io)|oload\.tv)/embed/[a-zA-Z0-9-_]+)', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) -- cgit 1.4.1 From c1fa3f46727ccbbb75389ce82753f2e63449ece6 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Wed, 25 Jan 2017 23:28:45 +0700 Subject: [openload] Fallback video extension to mp4 --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 4893ade5d..32289d897 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -101,7 +101,7 @@ class OpenloadIE(InfoExtractor): 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'url': video_url, # Seems all videos have extensions in their titles - 'ext': determine_ext(title), + 'ext': determine_ext(title, 'mp4'), 'subtitles': subtitles, } return info_dict -- cgit 1.4.1 From d94badc755228ee3159b9b499aa718d27fa472ed Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Tue, 7 Feb 2017 10:32:45 +0530 Subject: [openload] Semifix extraction (closes #10408) just updated the code. i don't do much python still i tried to convert my code. lemme know if there is any prob with it --- youtube_dl/extractor/openload.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 32289d897..bd1120fd8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -75,17 +75,20 @@ class OpenloadIE(InfoExtractor): ']+id="[^"]+"[^>]*>([0-9]+)', webpage, 'openload ID') - first_three_chars = int(float(ol_id[0:][:3])) - fifth_char = int(float(ol_id[3:5])) - urlcode = '' - num = 5 + first_two_chars = int(float(ol_id[0:][:2])) + urlcode = {} + num = 2 while num < len(ol_id): - urlcode += compat_chr(int(float(ol_id[num:][:3])) + - first_three_chars - fifth_char * int(float(ol_id[num + 3:][:2]))) + key = int(float(ol_id[num + 3:][:2])) + urlcode[key] = compat_chr(int(float(ol_id[num:][:3])) - first_two_chars) num += 5 + + sorted(urlcode, key=lambda key: urlcode[key]) - video_url = 'https://openload.co/stream/' + urlcode + urllink = ''.join(['%s' % (value) for (key, value) in urlcode.items()]) + + video_url = 'https://openload.co/stream/' + urllink title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, -- cgit 1.4.1 From 90fad0e74cd8079246c5f3d8150650b5f65f998b Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Fri, 17 Feb 2017 22:31:16 +0700 Subject: [openload] Fix extraction (closes #12002) --- youtube_dl/extractor/openload.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index bd1120fd8..10896c442 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -76,19 +76,16 @@ class OpenloadIE(InfoExtractor): webpage, 'openload ID') first_two_chars = int(float(ol_id[0:][:2])) - urlcode = {} + urlcode = [] num = 2 while num < len(ol_id): key = int(float(ol_id[num + 3:][:2])) - urlcode[key] = compat_chr(int(float(ol_id[num:][:3])) - first_two_chars) + urlcode.append((key, compat_chr(int(float(ol_id[num:][:3])) - first_two_chars))) num += 5 - - sorted(urlcode, key=lambda key: urlcode[key]) - urllink = ''.join(['%s' % (value) for (key, value) in urlcode.items()]) - - video_url = 'https://openload.co/stream/' + urllink + video_url = 'https://openload.co/stream/' + ''.join( + [value for _, value in sorted(urlcode, key=lambda x: x[0])]) title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, -- cgit 1.4.1 From 39e7277ed16c1647d636c766d57870121f5f2d68 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 24 Feb 2017 11:21:13 +0100 Subject: [openload] fix extraction(closes #10408) --- youtube_dl/extractor/openload.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 10896c442..fc7ff43a6 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -72,16 +72,21 @@ class OpenloadIE(InfoExtractor): raise ExtractorError('File not found', expected=True) ol_id = self._search_regex( - ']+id="[^"]+"[^>]*>([0-9]+)', + ']+id="[^"]+"[^>]*>([0-9A-Za-z]+)', webpage, 'openload ID') - first_two_chars = int(float(ol_id[0:][:2])) + first_char = int(ol_id[0]) urlcode = [] - num = 2 + num = 1 while num < len(ol_id): - key = int(float(ol_id[num + 3:][:2])) - urlcode.append((key, compat_chr(int(float(ol_id[num:][:3])) - first_two_chars))) + i = ord(ol_id[num]) + key = 0 + if i <= 90: + key = i - 65 + elif i >= 97: + key = 25 + i - 97 + urlcode.append((key, compat_chr(int(ol_id[num + 2:num + 5]) // int(ol_id[num + 1]) - first_char))) num += 5 video_url = 'https://openload.co/stream/' + ''.join( -- cgit 1.4.1 From da92da4b886a0e44fe28591ddf5b746fba1c9ade Mon Sep 17 00:00:00 2001 From: denneboomyo Date: Mon, 6 Mar 2017 11:00:17 +0100 Subject: Openload fix extraction (#12357) * Fix extraction --- youtube_dl/extractor/openload.py | 47 ++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 16 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index fc7ff43a6..25f6a9aca 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -75,22 +75,37 @@ class OpenloadIE(InfoExtractor): ']+id="[^"]+"[^>]*>([0-9A-Za-z]+)', webpage, 'openload ID') - first_char = int(ol_id[0]) - urlcode = [] - num = 1 - - while num < len(ol_id): - i = ord(ol_id[num]) - key = 0 - if i <= 90: - key = i - 65 - elif i >= 97: - key = 25 + i - 97 - urlcode.append((key, compat_chr(int(ol_id[num + 2:num + 5]) // int(ol_id[num + 1]) - first_char))) - num += 5 - - video_url = 'https://openload.co/stream/' + ''.join( - [value for _, value in sorted(urlcode, key=lambda x: x[0])]) + video_url_chars = [] + + first_char = ord(ol_id[0]) + key = first_char - 55 + maxKey = max(2, key) + key = min(maxKey, len(ol_id) - 14) + t = ol_id[key:key + 12] + + hashMap = {} + v = ol_id.replace(t, "") + h = 0 + + while h < len(t): + f = t[h:h + 2] + i = int(f, 16) + hashMap[h / 2] = i + h += 2 + + h = 0 + + while h < len(v): + B = v[h:h + 2] + i = int(B, 16) + index = (h / 2) % 6 + A = hashMap[index] + i = i ^ A + video_url_chars.append(compat_chr(i)) + h += 2 + + video_url = 'https://openload.co/stream/%s?mime=true' + video_url = video_url % (''.join(video_url_chars)) title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, -- cgit 1.4.1 From b08cc749d6e1a3078d807580c424437bca143b73 Mon Sep 17 00:00:00 2001 From: denneboomyo Date: Tue, 7 Mar 2017 23:01:27 +0100 Subject: [openload] Fix extraction --- youtube_dl/extractor/openload.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 25f6a9aca..5a5607357 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -78,10 +78,10 @@ class OpenloadIE(InfoExtractor): video_url_chars = [] first_char = ord(ol_id[0]) - key = first_char - 55 + key = first_char - 50 maxKey = max(2, key) - key = min(maxKey, len(ol_id) - 14) - t = ol_id[key:key + 12] + key = min(maxKey, len(ol_id) - 22) + t = ol_id[key:key + 20] hashMap = {} v = ol_id.replace(t, "") @@ -98,8 +98,9 @@ class OpenloadIE(InfoExtractor): while h < len(v): B = v[h:h + 2] i = int(B, 16) - index = (h / 2) % 6 + index = (h / 2) % 10 A = hashMap[index] + i = i ^ 137 i = i ^ A video_url_chars.append(compat_chr(i)) h += 2 -- cgit 1.4.1 From ff9d509d200577a0be962ee47894cd257c7ef818 Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Mon, 13 Mar 2017 01:52:35 +0530 Subject: [openload] Fix extraction Just a minor fix for openload --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 5a5607357..9a42ab895 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -100,7 +100,7 @@ class OpenloadIE(InfoExtractor): i = int(B, 16) index = (h / 2) % 10 A = hashMap[index] - i = i ^ 137 + i = i ^ 96 i = i ^ A video_url_chars.append(compat_chr(i)) h += 2 -- cgit 1.4.1 From 398887b4c09b3691379720314f4918bc094d1b7b Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Tue, 14 Mar 2017 05:19:18 +0530 Subject: [Openload] Fixed Extraction They did changed it again. --- youtube_dl/extractor/openload.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 9a42ab895..5ea749f35 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -96,14 +96,16 @@ class OpenloadIE(InfoExtractor): h = 0 while h < len(v): - B = v[h:h + 2] + B = v[h:h + 3] i = int(B, 16) - index = (h / 2) % 10 + if (h / 3) % 3 == 0: + i = int(B, 8) + index = (h / 3) % 10 A = hashMap[index] - i = i ^ 96 + i = i ^ 47 i = i ^ A video_url_chars.append(compat_chr(i)) - h += 2 + h += 3 video_url = 'https://openload.co/stream/%s?mime=true' video_url = video_url % (''.join(video_url_chars)) -- cgit 1.4.1 From ea883a687c054692fcfe3cea15a22269044b64bb Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Fri, 17 Mar 2017 15:20:12 +0800 Subject: [openload] Fix extraction (closes #10408) Thanks to @makgun02 Ref: http://pastebin.com/raw/JX9gHFUz --- ChangeLog | 6 ++++++ youtube_dl/extractor/openload.py | 43 +++++++++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 16 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/ChangeLog b/ChangeLog index 75a8bd7a6..eeb5813c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [openload] Fix extraction (#10408) + + version 2017.03.16 Core diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 5ea749f35..fa876b127 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -78,34 +78,45 @@ class OpenloadIE(InfoExtractor): video_url_chars = [] first_char = ord(ol_id[0]) - key = first_char - 50 + key = first_char - 55 maxKey = max(2, key) - key = min(maxKey, len(ol_id) - 22) - t = ol_id[key:key + 20] + key = min(maxKey, len(ol_id) - 26) + t = ol_id[key:key + 24] hashMap = {} - v = ol_id.replace(t, "") + v = ol_id.replace(t, '') h = 0 while h < len(t): - f = t[h:h + 2] - i = int(f, 16) - hashMap[h / 2] = i - h += 2 + f = t[h:h + 3] + i = int(f, 8) + hashMap[h / 3] = i + h += 3 h = 0 - + H = 0 while h < len(v): - B = v[h:h + 3] + B = '' + C = '' + if len(v) >= h + 2: + B = v[h:h + 2] + if len(v) >= h + 3: + C = v[h:h + 3] i = int(B, 16) - if (h / 3) % 3 == 0: - i = int(B, 8) - index = (h / 3) % 10 + h += 2 + if H % 3 == 0: + i = int(C, 8) + h += 1 + elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60: + i = int(C, 10) + h += 1 + index = H % 8 + A = hashMap[index] - i = i ^ 47 - i = i ^ A + i ^= 213 + i ^= A video_url_chars.append(compat_chr(i)) - h += 3 + H += 1 video_url = 'https://openload.co/stream/%s?mime=true' video_url = video_url % (''.join(video_url_chars)) -- cgit 1.4.1 From 5f0daab1ca60803f4f49b344ddb3757c418a2d8e Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Sat, 18 Mar 2017 04:32:55 +0530 Subject: [openload] Fix extraction --- youtube_dl/extractor/openload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index fa876b127..435aec28e 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -80,8 +80,8 @@ class OpenloadIE(InfoExtractor): first_char = ord(ol_id[0]) key = first_char - 55 maxKey = max(2, key) - key = min(maxKey, len(ol_id) - 26) - t = ol_id[key:key + 24] + key = min(maxKey, len(ol_id) - 38) + t = ol_id[key:key + 36] hashMap = {} v = ol_id.replace(t, '') @@ -110,7 +110,7 @@ class OpenloadIE(InfoExtractor): elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60: i = int(C, 10) h += 1 - index = H % 8 + index = H % 12 A = hashMap[index] i ^= 213 -- cgit 1.4.1 From 957f453429d584615ac4d2277caeb0d75d0fe1d9 Mon Sep 17 00:00:00 2001 From: Vijay Singh Date: Mon, 20 Mar 2017 09:22:32 +0530 Subject: [Openload.co] Fixed Extraction They did it again. just a minor change though. here's quick fix --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 435aec28e..58ffde541 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -110,7 +110,7 @@ class OpenloadIE(InfoExtractor): elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60: i = int(C, 10) h += 1 - index = H % 12 + index = H % 7 A = hashMap[index] i ^= 213 -- cgit 1.4.1 From aea1dccbd07b073ef36b325a9a21eb3f642322d9 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Tue, 28 Mar 2017 15:42:03 +0200 Subject: [openload] fix extractor --- youtube_dl/extractor/openload.py | 73 +++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 43 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 58ffde541..d8036b54a 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -75,51 +75,38 @@ class OpenloadIE(InfoExtractor): ']+id="[^"]+"[^>]*>([0-9A-Za-z]+)', webpage, 'openload ID') - video_url_chars = [] - - first_char = ord(ol_id[0]) - key = first_char - 55 - maxKey = max(2, key) - key = min(maxKey, len(ol_id) - 38) - t = ol_id[key:key + 36] - - hashMap = {} - v = ol_id.replace(t, '') - h = 0 - - while h < len(t): - f = t[h:h + 3] - i = int(f, 8) - hashMap[h / 3] = i - h += 3 - - h = 0 - H = 0 - while h < len(v): - B = '' - C = '' - if len(v) >= h + 2: - B = v[h:h + 2] - if len(v) >= h + 3: - C = v[h:h + 3] - i = int(B, 16) - h += 2 - if H % 3 == 0: - i = int(C, 8) - h += 1 - elif H % 2 == 0 and H != 0 and ord(v[H - 1]) < 60: - i = int(C, 10) - h += 1 - index = H % 7 - - A = hashMap[index] - i ^= 213 - i ^= A - video_url_chars.append(compat_chr(i)) - H += 1 + decoded = '' + a = ol_id[0:24] + b = [] + for i in range(0, len(a), 8): + b.append(int(a[i:i + 8] or '0', 16)) + ol_id = ol_id[24:] + j = 0 + k = 0 + while j < len(ol_id): + c = 128 + d = 0 + e = 0 + f = 0 + _more = True + while _more: + if j + 1 >= len(ol_id): + c = 143 + f = int(ol_id[j:j + 2] or '0', 16) + j += 2 + d += (f & 127) << e + e += 7 + _more = f >= c + g = d ^ b[k % 3] + for i in range(4): + char_dec = (g >> 8 * i) & (c + 127) + char = compat_chr(char_dec) + if char != '#': + decoded += char + k += 1 video_url = 'https://openload.co/stream/%s?mime=true' - video_url = video_url % (''.join(video_url_chars)) + video_url = video_url % decoded title = self._og_search_title(webpage, default=None) or self._search_regex( r']+class=["\']title["\'][^>]*>([^<]+)', webpage, -- cgit 1.4.1 From 47e0cef46e9a76e589a2b1cde1b2dfa8bcf01d8e Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sun, 16 Apr 2017 00:34:34 +0200 Subject: [openload] rewrite extractor --- youtube_dl/extractor/openload.py | 118 +++++++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 47 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index d8036b54a..789bf997e 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,12 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import os import re +import subprocess +import tempfile from .common import InfoExtractor -from ..compat import compat_chr from ..utils import ( + check_executable, determine_ext, + encodeArgument, ExtractorError, ) @@ -58,6 +62,39 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] + _PHANTOMJS_SCRIPT = r''' + phantom.onError = function(msg, trace) { + var msgStack = ['PHANTOM ERROR: ' + msg]; + if(trace && trace.length) { + msgStack.push('TRACE:'); + trace.forEach(function(t) { + msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + + (t.function ? ' (in function ' + t.function +')' : '')); + }); + } + console.error(msgStack.join('\n')); + phantom.exit(1); + }; + var page = require('webpage').create(); + page.settings.resourceTimeout = 10000; + page.onInitialized = function() { + page.evaluate(function() { + delete window._phantom; + delete window.callPhantom; + }); + }; + page.open('https://openload.co/embed/%s/', function(status) { + var info = page.evaluate(function() { + return { + decoded_id: document.getElementById('streamurl').innerHTML, + title: document.querySelector('meta[name="og:title"],' + + 'meta[name=description]').content + }; + }); + console.log(info.decoded_id + ' ' + info.title); + phantom.exit(); + });''' + @staticmethod def _extract_urls(webpage): return re.findall( @@ -65,61 +102,48 @@ class OpenloadIE(InfoExtractor): webpage) def _real_extract(self, url): + exe = check_executable('phantomjs', ['-v']) + if not exe: + raise ExtractorError('PhantomJS executable not found in PATH, ' + 'download it from http://phantomjs.org', + expected=True) + video_id = self._match_id(url) - webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) + url = 'https://openload.co/embed/%s/' % video_id + webpage = self._download_webpage(url, video_id) if 'File not found' in webpage or 'deleted by the owner' in webpage: - raise ExtractorError('File not found', expected=True) - - ol_id = self._search_regex( - ']+id="[^"]+"[^>]*>([0-9A-Za-z]+)', - webpage, 'openload ID') - - decoded = '' - a = ol_id[0:24] - b = [] - for i in range(0, len(a), 8): - b.append(int(a[i:i + 8] or '0', 16)) - ol_id = ol_id[24:] - j = 0 - k = 0 - while j < len(ol_id): - c = 128 - d = 0 - e = 0 - f = 0 - _more = True - while _more: - if j + 1 >= len(ol_id): - c = 143 - f = int(ol_id[j:j + 2] or '0', 16) - j += 2 - d += (f & 127) << e - e += 7 - _more = f >= c - g = d ^ b[k % 3] - for i in range(4): - char_dec = (g >> 8 * i) & (c + 127) - char = compat_chr(char_dec) - if char != '#': - decoded += char - k += 1 - - video_url = 'https://openload.co/stream/%s?mime=true' - video_url = video_url % decoded - - title = self._og_search_title(webpage, default=None) or self._search_regex( - r']+class=["\']title["\'][^>]*>([^<]+)', webpage, - 'title', default=None) or self._html_search_meta( - 'description', webpage, 'title', fatal=True) + raise ExtractorError('File not found', expected=True, video_id=video_id) + + script_file = tempfile.NamedTemporaryFile(mode='w', delete=False) + + # write JS script to file and close it + with script_file: + script_file.write(self._PHANTOMJS_SCRIPT % video_id) + + self.to_screen('%s: Decoding video ID with PhantomJS' % video_id) + + p = subprocess.Popen([exe, '--ssl-protocol=any', script_file.name], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, err = p.communicate() + if p.returncode != 0: + raise ExtractorError('Decoding failed\n:' + + encodeArgument(err)) + else: + decoded_id, title = encodeArgument(output).strip().split(' ', 1) + + os.remove(script_file.name) + + video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id entries = self._parse_html5_media_entries(url, webpage, video_id) - subtitles = entries[0]['subtitles'] if entries else None + entry = entries[0] if entries else {} + subtitles = entry.get('subtitles') info_dict = { 'id': video_id, 'title': title, - 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), 'url': video_url, # Seems all videos have extensions in their titles 'ext': determine_ext(title, 'mp4'), -- cgit 1.4.1 From da57ebaf84225240b356530cdf02d12596f0dce8 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Tue, 25 Apr 2017 01:06:14 +0200 Subject: [openload] separate PhantomJS code from extractor --- youtube_dl/extractor/openload.py | 78 +++++----------------- youtube_dl/utils.py | 141 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 62 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 789bf997e..ac5e0bb08 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,17 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import os import re -import subprocess -import tempfile from .common import InfoExtractor from ..utils import ( - check_executable, determine_ext, - encodeArgument, ExtractorError, + get_element_by_id, + PhantomJSwrapper, ) @@ -62,38 +59,7 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] - _PHANTOMJS_SCRIPT = r''' - phantom.onError = function(msg, trace) { - var msgStack = ['PHANTOM ERROR: ' + msg]; - if(trace && trace.length) { - msgStack.push('TRACE:'); - trace.forEach(function(t) { - msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line - + (t.function ? ' (in function ' + t.function +')' : '')); - }); - } - console.error(msgStack.join('\n')); - phantom.exit(1); - }; - var page = require('webpage').create(); - page.settings.resourceTimeout = 10000; - page.onInitialized = function() { - page.evaluate(function() { - delete window._phantom; - delete window.callPhantom; - }); - }; - page.open('https://openload.co/embed/%s/', function(status) { - var info = page.evaluate(function() { - return { - decoded_id: document.getElementById('streamurl').innerHTML, - title: document.querySelector('meta[name="og:title"],' - + 'meta[name=description]').content - }; - }); - console.log(info.decoded_id + ' ' + info.title); - phantom.exit(); - });''' + _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' @staticmethod def _extract_urls(webpage): @@ -102,40 +68,27 @@ class OpenloadIE(InfoExtractor): webpage) def _real_extract(self, url): - exe = check_executable('phantomjs', ['-v']) - if not exe: - raise ExtractorError('PhantomJS executable not found in PATH, ' - 'download it from http://phantomjs.org', - expected=True) - video_id = self._match_id(url) url = 'https://openload.co/embed/%s/' % video_id - webpage = self._download_webpage(url, video_id) + headers = { + 'User-Agent': self._USER_AGENT, + } + + phantom = PhantomJSwrapper(self) + webpage, _ = phantom.get(url, video_id=video_id, headers=headers) if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True, video_id=video_id) - script_file = tempfile.NamedTemporaryFile(mode='w', delete=False) - - # write JS script to file and close it - with script_file: - script_file.write(self._PHANTOMJS_SCRIPT % video_id) - - self.to_screen('%s: Decoding video ID with PhantomJS' % video_id) - - p = subprocess.Popen([exe, '--ssl-protocol=any', script_file.name], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output, err = p.communicate() - if p.returncode != 0: - raise ExtractorError('Decoding failed\n:' - + encodeArgument(err)) - else: - decoded_id, title = encodeArgument(output).strip().split(' ', 1) - - os.remove(script_file.name) + decoded_id = get_element_by_id('streamurl', webpage) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id + title = self._og_search_title(webpage, default=None) or self._search_regex( + r']+class=["\']title["\'][^>]*>([^<]+)', webpage, + 'title', default=None) or self._html_search_meta( + 'description', webpage, 'title', fatal=True) + entries = self._parse_html5_media_entries(url, webpage, video_id) entry = entries[0] if entries else {} subtitles = entry.get('subtitles') @@ -148,5 +101,6 @@ class OpenloadIE(InfoExtractor): # Seems all videos have extensions in their titles 'ext': determine_ext(title, 'mp4'), 'subtitles': subtitles, + 'http_headers': headers, } return info_dict diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2340bc306..94e1b07a6 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3652,3 +3652,144 @@ def write_xattr(path, key, value): "Couldn't find a tool to set the xattrs. " "Install either the python 'xattr' module, " "or the 'xattr' binary.") + + +class PhantomJSwrapper(object): + """PhantomJS wrapper class""" + + _TEMPLATE = r''' + phantom.onError = function(msg, trace) {{ + var msgStack = ['PHANTOM ERROR: ' + msg]; + if(trace && trace.length) {{ + msgStack.push('TRACE:'); + trace.forEach(function(t) {{ + msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + + (t.function ? ' (in function ' + t.function +')' : '')); + }}); + }} + console.error(msgStack.join('\n')); + phantom.exit(1); + }}; + var page = require('webpage').create(); + var fs = require('fs'); + var read = {{ mode: 'r', charset: 'utf-8' }}; + var write = {{ mode: 'w', charset: 'utf-8' }}; + page.settings.resourceTimeout = {timeout}; + page.settings.userAgent = "{ua}"; + page.onLoadStarted = function() {{ + page.evaluate(function() {{ + delete window._phantom; + delete window.callPhantom; + }}); + }}; + var saveAndExit = function() {{ + fs.write("{html}", page.content, write); + phantom.exit(); + }}; + page.onLoadFinished = function(status) {{ + if(page.url === "") {{ + page.setContent(fs.read("{html}", read), "{url}"); + }} + else {{ + {jscode} + }} + }}; + page.open(""); + ''' + + _TMP_FILE_NAMES = ['script', 'html'] + + def __init__(self, extractor, timeout=10000): + self.exe = check_executable('phantomjs', ['-v']) + if not self.exe: + raise ExtractorError('PhantomJS executable not found in PATH, ' + 'download it from http://phantomjs.org', + expected=True) + self.extractor = extractor + self.options = { + 'timeout': timeout, + } + self._TMP_FILES = {} + for name in self._TMP_FILE_NAMES: + tmp = tempfile.NamedTemporaryFile(delete=False) + tmp.close() + self._TMP_FILES[name] = tmp + + def __del__(self): + for name in self._TMP_FILE_NAMES: + try: + os.remove(self._TMP_FILES[name].name) + except: + pass + + def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): + """ + Downloads webpage (if needed) and executes JS + + Params: + url: website url + html: optional, html code of website + video_id: video id + note: optional, displayed when downloading webpage + note2: optional, displayed when executing JS + headers: custom http headers + jscode: code to be executed when page is loaded + + Returns tuple with: + * downloaded website (after JS execution) + * anything you print with `console.log` (but not inside `page.execute`!) + + In most cases you don't need to add any `jscode`. + It is executed in `page.onLoadFinished`. + `saveAndExit();` is mandatory, use it instead of `phantom.exit()` + It is possible to wait for some element on the webpage, for example: + var check = function() { + var elementFound = page.evaluate(function() { + return document.querySelector('#b.done') !== null; + }); + if(elementFound) + saveAndExit(); + else + window.setTimeout(check, 500); + } + + page.evaluate(function(){ + document.querySelector('#a').click(); + }); + check(); + """ + if 'saveAndExit();' not in jscode: + raise ExtractorError('`saveAndExit();` not found in `jscode`') + if not html: + html = self.extractor._download_webpage(url, video_id, note=note, headers=headers) + with open(self._TMP_FILES['html'].name, 'wb') as f: + f.write(html.encode('utf-8')) + + replaces = self.options + replaces['url'] = url + user_agent = headers.get('User-Agent') or std_headers['User-Agent'] + replaces['ua'] = user_agent.replace('"', '\\"') + replaces['jscode'] = jscode + + for x in self._TMP_FILE_NAMES: + replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') + + with open(self._TMP_FILES['script'].name, 'wb') as f: + f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) + + if video_id is None: + self.extractor.to_screen('%s' % (note2,)) + else: + self.extractor.to_screen('%s: %s' % (video_id, note2)) + + p = subprocess.Popen([self.exe, '--ssl-protocol=any', + self._TMP_FILES['script'].name], stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + out, err = p.communicate() + if p.returncode != 0: + raise ExtractorError('Executing JS failed\n:' + + encodeArgument(err)) + with open(self._TMP_FILES['html'].name, 'rb') as f: + html = f.read().decode('utf-8') + return (html, encodeArgument(out)) + -- cgit 1.4.1 From fcace2d1adac5d1f306b22219fde3a4542bcd719 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sat, 29 Apr 2017 10:30:45 +0200 Subject: [openload] raise `not found` before executing js --- youtube_dl/extractor/openload.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index ac5e0bb08..0adf17765 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -74,12 +74,14 @@ class OpenloadIE(InfoExtractor): 'User-Agent': self._USER_AGENT, } - phantom = PhantomJSwrapper(self) - webpage, _ = phantom.get(url, video_id=video_id, headers=headers) + webpage = self._download_webpage(url, video_id, headers=headers) if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True, video_id=video_id) + phantom = PhantomJSwrapper(self) + webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers) + decoded_id = get_element_by_id('streamurl', webpage) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id -- cgit 1.4.1 From 7552f96352f35cd877e52fd0770b77ba1856fc62 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Sat, 29 Apr 2017 12:41:57 +0200 Subject: [openload] Add required version --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 0adf17765..292476ef8 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -79,7 +79,7 @@ class OpenloadIE(InfoExtractor): if 'File not found' in webpage or 'deleted by the owner' in webpage: raise ExtractorError('File not found', expected=True, video_id=video_id) - phantom = PhantomJSwrapper(self) + phantom = PhantomJSwrapper(self, required_version='2.0') webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers) decoded_id = get_element_by_id('streamurl', webpage) -- cgit 1.4.1 From 4c54b89e0310abd6be643604dc0f1ec3a608b68d Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sun, 24 Sep 2017 00:08:27 +0700 Subject: Hide experimental phantomjs wrapper --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/extractor/openload.py | 228 ++++++++++++++++++++++++++++++++++++++- youtube_dl/utils.py | 214 ------------------------------------ 3 files changed, 228 insertions(+), 216 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index bfb4ff225..0a7f36c98 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -89,10 +89,10 @@ from .utils import ( write_string, YoutubeDLCookieProcessor, YoutubeDLHandler, - PhantomJSwrapper, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER +from .extractor.openload import PhantomJSwrapper from .downloader import get_suitable_downloader from .downloader.rtmp import rtmpdump_version from .postprocessor import ( diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 292476ef8..da1ef02e5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,17 +1,243 @@ # coding: utf-8 from __future__ import unicode_literals +import json +import os import re +import subprocess +import tempfile from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( + check_executable, determine_ext, + encodeArgument, ExtractorError, get_element_by_id, - PhantomJSwrapper, + get_exe_version, + is_outdated_version, + std_headers, ) +def cookie_to_dict(cookie): + cookie_dict = { + 'name': cookie.name, + 'value': cookie.value, + } + if cookie.port_specified: + cookie_dict['port'] = cookie.port + if cookie.domain_specified: + cookie_dict['domain'] = cookie.domain + if cookie.path_specified: + cookie_dict['path'] = cookie.path + if cookie.expires is not None: + cookie_dict['expires'] = cookie.expires + if cookie.secure is not None: + cookie_dict['secure'] = cookie.secure + if cookie.discard is not None: + cookie_dict['discard'] = cookie.discard + try: + if (cookie.has_nonstandard_attr('httpOnly') or + cookie.has_nonstandard_attr('httponly') or + cookie.has_nonstandard_attr('HttpOnly')): + cookie_dict['httponly'] = True + except TypeError: + pass + return cookie_dict + + +def cookie_jar_to_list(cookie_jar): + return [cookie_to_dict(cookie) for cookie in cookie_jar] + + +class PhantomJSwrapper(object): + """PhantomJS wrapper class + + This class is experimental. + """ + + _TEMPLATE = r''' + phantom.onError = function(msg, trace) {{ + var msgStack = ['PHANTOM ERROR: ' + msg]; + if(trace && trace.length) {{ + msgStack.push('TRACE:'); + trace.forEach(function(t) {{ + msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + + (t.function ? ' (in function ' + t.function +')' : '')); + }}); + }} + console.error(msgStack.join('\n')); + phantom.exit(1); + }}; + var page = require('webpage').create(); + var fs = require('fs'); + var read = {{ mode: 'r', charset: 'utf-8' }}; + var write = {{ mode: 'w', charset: 'utf-8' }}; + JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{ + phantom.addCookie(x); + }}); + page.settings.resourceTimeout = {timeout}; + page.settings.userAgent = "{ua}"; + page.onLoadStarted = function() {{ + page.evaluate(function() {{ + delete window._phantom; + delete window.callPhantom; + }}); + }}; + var saveAndExit = function() {{ + fs.write("{html}", page.content, write); + fs.write("{cookies}", JSON.stringify(phantom.cookies), write); + phantom.exit(); + }}; + page.onLoadFinished = function(status) {{ + if(page.url === "") {{ + page.setContent(fs.read("{html}", read), "{url}"); + }} + else {{ + {jscode} + }} + }}; + page.open(""); + ''' + + _TMP_FILE_NAMES = ['script', 'html', 'cookies'] + + @staticmethod + def _version(): + return get_exe_version('phantomjs', version_re=r'([0-9.]+)') + + def __init__(self, extractor, required_version=None, timeout=10000): + self.exe = check_executable('phantomjs', ['-v']) + if not self.exe: + raise ExtractorError('PhantomJS executable not found in PATH, ' + 'download it from http://phantomjs.org', + expected=True) + + self.extractor = extractor + + if required_version: + version = self._version() + if is_outdated_version(version, required_version): + self.extractor._downloader.report_warning( + 'Your copy of PhantomJS is outdated, update it to version ' + '%s or newer if you encounter any errors.' % required_version) + + self.options = { + 'timeout': timeout, + } + self._TMP_FILES = {} + for name in self._TMP_FILE_NAMES: + tmp = tempfile.NamedTemporaryFile(delete=False) + tmp.close() + self._TMP_FILES[name] = tmp + + def __del__(self): + for name in self._TMP_FILE_NAMES: + try: + os.remove(self._TMP_FILES[name].name) + except: + pass + + def _save_cookies(self, url): + cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar) + for cookie in cookies: + if 'path' not in cookie: + cookie['path'] = '/' + if 'domain' not in cookie: + cookie['domain'] = compat_urlparse.urlparse(url).netloc + with open(self._TMP_FILES['cookies'].name, 'wb') as f: + f.write(json.dumps(cookies).encode('utf-8')) + + def _load_cookies(self): + with open(self._TMP_FILES['cookies'].name, 'rb') as f: + cookies = json.loads(f.read().decode('utf-8')) + for cookie in cookies: + if cookie['httponly'] is True: + cookie['rest'] = {'httpOnly': None} + if 'expiry' in cookie: + cookie['expire_time'] = cookie['expiry'] + self.extractor._set_cookie(**cookie) + + def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): + """ + Downloads webpage (if needed) and executes JS + + Params: + url: website url + html: optional, html code of website + video_id: video id + note: optional, displayed when downloading webpage + note2: optional, displayed when executing JS + headers: custom http headers + jscode: code to be executed when page is loaded + + Returns tuple with: + * downloaded website (after JS execution) + * anything you print with `console.log` (but not inside `page.execute`!) + + In most cases you don't need to add any `jscode`. + It is executed in `page.onLoadFinished`. + `saveAndExit();` is mandatory, use it instead of `phantom.exit()` + It is possible to wait for some element on the webpage, for example: + var check = function() { + var elementFound = page.evaluate(function() { + return document.querySelector('#b.done') !== null; + }); + if(elementFound) + saveAndExit(); + else + window.setTimeout(check, 500); + } + + page.evaluate(function(){ + document.querySelector('#a').click(); + }); + check(); + """ + if 'saveAndExit();' not in jscode: + raise ExtractorError('`saveAndExit();` not found in `jscode`') + if not html: + html = self.extractor._download_webpage(url, video_id, note=note, headers=headers) + with open(self._TMP_FILES['html'].name, 'wb') as f: + f.write(html.encode('utf-8')) + + self._save_cookies(url) + + replaces = self.options + replaces['url'] = url + user_agent = headers.get('User-Agent') or std_headers['User-Agent'] + replaces['ua'] = user_agent.replace('"', '\\"') + replaces['jscode'] = jscode + + for x in self._TMP_FILE_NAMES: + replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') + + with open(self._TMP_FILES['script'].name, 'wb') as f: + f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) + + if video_id is None: + self.extractor.to_screen('%s' % (note2,)) + else: + self.extractor.to_screen('%s: %s' % (video_id, note2)) + + p = subprocess.Popen([ + self.exe, '--ssl-protocol=any', + self._TMP_FILES['script'].name + ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + if p.returncode != 0: + raise ExtractorError( + 'Executing JS failed\n:' + encodeArgument(err)) + with open(self._TMP_FILES['html'].name, 'rb') as f: + html = f.read().decode('utf-8') + + self._load_cookies() + + return (html, encodeArgument(out)) + + class OpenloadIE(InfoExtractor): _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index acc4f987b..92b22e639 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3826,220 +3826,6 @@ def write_xattr(path, key, value): "or the 'xattr' binary.") -def cookie_to_dict(cookie): - cookie_dict = { - 'name': cookie.name, - 'value': cookie.value, - } - if cookie.port_specified: - cookie_dict['port'] = cookie.port - if cookie.domain_specified: - cookie_dict['domain'] = cookie.domain - if cookie.path_specified: - cookie_dict['path'] = cookie.path - if cookie.expires is not None: - cookie_dict['expires'] = cookie.expires - if cookie.secure is not None: - cookie_dict['secure'] = cookie.secure - if cookie.discard is not None: - cookie_dict['discard'] = cookie.discard - try: - if (cookie.has_nonstandard_attr('httpOnly') or - cookie.has_nonstandard_attr('httponly') or - cookie.has_nonstandard_attr('HttpOnly')): - cookie_dict['httponly'] = True - except TypeError: - pass - return cookie_dict - - -def cookie_jar_to_list(cookie_jar): - return [cookie_to_dict(cookie) for cookie in cookie_jar] - - -class PhantomJSwrapper(object): - """PhantomJS wrapper class""" - - _TEMPLATE = r''' - phantom.onError = function(msg, trace) {{ - var msgStack = ['PHANTOM ERROR: ' + msg]; - if(trace && trace.length) {{ - msgStack.push('TRACE:'); - trace.forEach(function(t) {{ - msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line - + (t.function ? ' (in function ' + t.function +')' : '')); - }}); - }} - console.error(msgStack.join('\n')); - phantom.exit(1); - }}; - var page = require('webpage').create(); - var fs = require('fs'); - var read = {{ mode: 'r', charset: 'utf-8' }}; - var write = {{ mode: 'w', charset: 'utf-8' }}; - JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{ - phantom.addCookie(x); - }}); - page.settings.resourceTimeout = {timeout}; - page.settings.userAgent = "{ua}"; - page.onLoadStarted = function() {{ - page.evaluate(function() {{ - delete window._phantom; - delete window.callPhantom; - }}); - }}; - var saveAndExit = function() {{ - fs.write("{html}", page.content, write); - fs.write("{cookies}", JSON.stringify(phantom.cookies), write); - phantom.exit(); - }}; - page.onLoadFinished = function(status) {{ - if(page.url === "") {{ - page.setContent(fs.read("{html}", read), "{url}"); - }} - else {{ - {jscode} - }} - }}; - page.open(""); - ''' - - _TMP_FILE_NAMES = ['script', 'html', 'cookies'] - - @staticmethod - def _version(): - return get_exe_version('phantomjs', version_re=r'([0-9.]+)') - - def __init__(self, extractor, required_version=None, timeout=10000): - self.exe = check_executable('phantomjs', ['-v']) - if not self.exe: - raise ExtractorError('PhantomJS executable not found in PATH, ' - 'download it from http://phantomjs.org', - expected=True) - - self.extractor = extractor - - if required_version: - version = self._version() - if is_outdated_version(version, required_version): - self.extractor._downloader.report_warning( - 'Your copy of PhantomJS is outdated, update it to version ' - '%s or newer if you encounter any errors.' % required_version) - - self.options = { - 'timeout': timeout, - } - self._TMP_FILES = {} - for name in self._TMP_FILE_NAMES: - tmp = tempfile.NamedTemporaryFile(delete=False) - tmp.close() - self._TMP_FILES[name] = tmp - - def __del__(self): - for name in self._TMP_FILE_NAMES: - try: - os.remove(self._TMP_FILES[name].name) - except: - pass - - def _save_cookies(self, url): - cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar) - for cookie in cookies: - if 'path' not in cookie: - cookie['path'] = '/' - if 'domain' not in cookie: - cookie['domain'] = compat_urlparse.urlparse(url).netloc - with open(self._TMP_FILES['cookies'].name, 'wb') as f: - f.write(json.dumps(cookies).encode('utf-8')) - - def _load_cookies(self): - with open(self._TMP_FILES['cookies'].name, 'rb') as f: - cookies = json.loads(f.read().decode('utf-8')) - for cookie in cookies: - if cookie['httponly'] is True: - cookie['rest'] = {'httpOnly': None} - if 'expiry' in cookie: - cookie['expire_time'] = cookie['expiry'] - self.extractor._set_cookie(**cookie) - - def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): - """ - Downloads webpage (if needed) and executes JS - - Params: - url: website url - html: optional, html code of website - video_id: video id - note: optional, displayed when downloading webpage - note2: optional, displayed when executing JS - headers: custom http headers - jscode: code to be executed when page is loaded - - Returns tuple with: - * downloaded website (after JS execution) - * anything you print with `console.log` (but not inside `page.execute`!) - - In most cases you don't need to add any `jscode`. - It is executed in `page.onLoadFinished`. - `saveAndExit();` is mandatory, use it instead of `phantom.exit()` - It is possible to wait for some element on the webpage, for example: - var check = function() { - var elementFound = page.evaluate(function() { - return document.querySelector('#b.done') !== null; - }); - if(elementFound) - saveAndExit(); - else - window.setTimeout(check, 500); - } - - page.evaluate(function(){ - document.querySelector('#a').click(); - }); - check(); - """ - if 'saveAndExit();' not in jscode: - raise ExtractorError('`saveAndExit();` not found in `jscode`') - if not html: - html = self.extractor._download_webpage(url, video_id, note=note, headers=headers) - with open(self._TMP_FILES['html'].name, 'wb') as f: - f.write(html.encode('utf-8')) - - self._save_cookies(url) - - replaces = self.options - replaces['url'] = url - user_agent = headers.get('User-Agent') or std_headers['User-Agent'] - replaces['ua'] = user_agent.replace('"', '\\"') - replaces['jscode'] = jscode - - for x in self._TMP_FILE_NAMES: - replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') - - with open(self._TMP_FILES['script'].name, 'wb') as f: - f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) - - if video_id is None: - self.extractor.to_screen('%s' % (note2,)) - else: - self.extractor.to_screen('%s: %s' % (video_id, note2)) - - p = subprocess.Popen([ - self.exe, '--ssl-protocol=any', - self._TMP_FILES['script'].name - ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate() - if p.returncode != 0: - raise ExtractorError( - 'Executing JS failed\n:' + encodeArgument(err)) - with open(self._TMP_FILES['html'].name, 'rb') as f: - html = f.read().decode('utf-8') - - self._load_cookies() - - return (html, encodeArgument(out)) - - def random_birthday(year_field, month_field, day_field): return { year_field: str(random.randint(1950, 1995)), -- cgit 1.4.1 From 011da618bdb907cbe02aacefba421dad4cf49a4d Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sun, 24 Sep 2017 00:12:40 +0700 Subject: [openload] Fix _load_cookies for python 2.6 --- youtube_dl/extractor/openload.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index da1ef02e5..b50d6c77b 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -8,7 +8,10 @@ import subprocess import tempfile from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_urlparse, + compat_kwargs, +) from ..utils import ( check_executable, determine_ext, @@ -158,7 +161,7 @@ class PhantomJSwrapper(object): cookie['rest'] = {'httpOnly': None} if 'expiry' in cookie: cookie['expire_time'] = cookie['expiry'] - self.extractor._set_cookie(**cookie) + self.extractor._set_cookie(**compat_kwargs(cookie)) def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): """ -- cgit 1.4.1 From 9ef909f2b2bf81e16c4fd2e428a740a523ad61cf Mon Sep 17 00:00:00 2001 From: jahudka Date: Sat, 25 Nov 2017 18:04:13 +0100 Subject: [openload] Add support for openload.link --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b50d6c77b..efa5d4601 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -242,7 +242,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.tv)/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -286,6 +286,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'https://oload.tv/embed/KnG-kKZdcfY/', 'only_matching': True, + }, { + 'url': 'http://www.openload.link/f/KnG-kKZdcfY', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' -- cgit 1.4.1 From 6ff27b8d5a11f0960f53b93a1ea2423d3384af21 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sun, 26 Nov 2017 00:05:28 +0700 Subject: [openload] Don't use bare except when removing temp files --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index efa5d4601..a99af12a4 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -140,7 +140,7 @@ class PhantomJSwrapper(object): for name in self._TMP_FILE_NAMES: try: os.remove(self._TMP_FILES[name].name) - except: + except (IOError, OSError): pass def _save_cookies(self, url): -- cgit 1.4.1 From 273c23d960cbd2da18fadaef002473db41b5f56b Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sun, 24 Dec 2017 13:53:27 +0700 Subject: [openload] Add support for oload.stream (closes #15070) --- youtube_dl/extractor/openload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index a99af12a4..aed579f36 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -242,7 +242,7 @@ class PhantomJSwrapper(object): class OpenloadIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.tv)/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream))/(?:f|embed)/(?P[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', @@ -289,6 +289,9 @@ class OpenloadIE(InfoExtractor): }, { 'url': 'http://www.openload.link/f/KnG-kKZdcfY', 'only_matching': True, + }, { + 'url': 'https://oload.stream/f/KnG-kKZdcfY', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' -- cgit 1.4.1 From a75419586bb900df711de49adf5047afa9f083ef Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 24 Dec 2017 20:47:42 +0800 Subject: [openload] Remove a confusing exception If phantomjs is not installed, there's an error besides the missing phantomjs exception: Exception ignored in: > Traceback (most recent call last): File "/home/yen/Projects/youtube-dl/youtube_dl/extractor/openload.py", line 142, in __del__ os.remove(self._TMP_FILES[name].name) AttributeError: 'PhantomJSwrapper' object has no attribute '_TMP_FILES' --- youtube_dl/extractor/openload.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index aed579f36..d1eb3be25 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -112,6 +112,8 @@ class PhantomJSwrapper(object): return get_exe_version('phantomjs', version_re=r'([0-9.]+)') def __init__(self, extractor, required_version=None, timeout=10000): + self._TMP_FILES = {} + self.exe = check_executable('phantomjs', ['-v']) if not self.exe: raise ExtractorError('PhantomJS executable not found in PATH, ' @@ -130,7 +132,6 @@ class PhantomJSwrapper(object): self.options = { 'timeout': timeout, } - self._TMP_FILES = {} for name in self._TMP_FILE_NAMES: tmp = tempfile.NamedTemporaryFile(delete=False) tmp.close() @@ -140,7 +141,7 @@ class PhantomJSwrapper(object): for name in self._TMP_FILE_NAMES: try: os.remove(self._TMP_FILES[name].name) - except (IOError, OSError): + except (IOError, OSError, KeyError): pass def _save_cookies(self, url): -- cgit 1.4.1 From d2c5b5a951868ae974bc3af6659ab39b8abd2157 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sat, 30 Dec 2017 05:52:35 +0700 Subject: [openload] Fallback on f-page extraction (closes #14665, closes #14879) --- youtube_dl/extractor/openload.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index d1eb3be25..81c1317b6 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -284,6 +284,11 @@ class OpenloadIE(InfoExtractor): # for title and ext 'url': 'https://openload.co/embed/Sxz5sADo82g/', 'only_matching': True, + }, { + # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available + # via https://openload.co/f/e-Ixz9ZR5L0/ + 'url': 'https://openload.co/f/e-Ixz9ZR5L0/', + 'only_matching': True, }, { 'url': 'https://oload.tv/embed/KnG-kKZdcfY/', 'only_matching': True, @@ -305,18 +310,27 @@ class OpenloadIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - url = 'https://openload.co/embed/%s/' % video_id + url_pattern = 'https://openload.co/%%s/%s/' % video_id headers = { 'User-Agent': self._USER_AGENT, } - webpage = self._download_webpage(url, video_id, headers=headers) - - if 'File not found' in webpage or 'deleted by the owner' in webpage: - raise ExtractorError('File not found', expected=True, video_id=video_id) + for path in ('embed', 'f'): + page_url = url_pattern % path + last = path == 'f' + webpage = self._download_webpage( + page_url, video_id, 'Downloading %s webpage' % path, + headers=headers, fatal=last) + if not webpage: + continue + if 'File not found' in webpage or 'deleted by the owner' in webpage: + if not last: + continue + raise ExtractorError('File not found', expected=True, video_id=video_id) + break phantom = PhantomJSwrapper(self, required_version='2.0') - webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers) + webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) decoded_id = get_element_by_id('streamurl', webpage) @@ -327,7 +341,7 @@ class OpenloadIE(InfoExtractor): 'title', default=None) or self._html_search_meta( 'description', webpage, 'title', fatal=True) - entries = self._parse_html5_media_entries(url, webpage, video_id) + entries = self._parse_html5_media_entries(page_url, webpage, video_id) entry = entries[0] if entries else {} subtitles = entry.get('subtitles') -- cgit 1.4.1 From 620ee8712e264c71b444748c86a51b258818b4a1 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 30 Dec 2017 15:03:13 +0800 Subject: [openload] Fix extraction (closes #15118) --- ChangeLog | 6 ++++++ youtube_dl/extractor/openload.py | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/ChangeLog b/ChangeLog index 4323cfc5c..08aed1ba7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [openload] Fix extraction (#15118) + + version 2017.12.28 Extractors diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 81c1317b6..b282bcfd9 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -332,7 +332,8 @@ class OpenloadIE(InfoExtractor): phantom = PhantomJSwrapper(self, required_version='2.0') webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) - decoded_id = get_element_by_id('streamurl', webpage) + decoded_id = (get_element_by_id('streamurl', webpage) or + get_element_by_id('streamuri', webpage)) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id -- cgit 1.4.1 From de329f64abd920e148701436d6a20bfc5b2d3ef3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 4 Jan 2018 13:26:08 +0800 Subject: [openload] Fix extraction (closes #15166) --- ChangeLog | 1 + youtube_dl/extractor/openload.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/openload.py') diff --git a/ChangeLog b/ChangeLog index 3e6afca92..94b27b6a1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [openload] Fix extraction (#15166) * [rtve.es:alacarta] Fix extraction of some new URLs diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index b282bcfd9..eaaaf8a08 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -333,7 +333,11 @@ class OpenloadIE(InfoExtractor): webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) decoded_id = (get_element_by_id('streamurl', webpage) or - get_element_by_id('streamuri', webpage)) + get_element_by_id('streamuri', webpage) or + get_element_by_id('streamurj', webpage)) + + if not decoded_id: + raise ExtractorError('Can\'t find stream URL', video_id=video_id) video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id -- cgit 1.4.1