From fdf9b959bc600c9377d6567d29081baa4f23f0da Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 8 Feb 2017 16:23:42 +0100 Subject: [nbc] add support adobe pass auth(closes #12006) --- youtube_dl/extractor/nbc.py | 69 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 15 deletions(-) (limited to 'youtube_dl/extractor/nbc.py') diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 434a94de4..d2a44d05d 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -4,23 +4,26 @@ import re from .common import InfoExtractor from .theplatform import ThePlatformIE +from .adobepass import AdobePassIE +from ..compat import compat_urllib_parse_urlparse from ..utils import ( find_xpath_attr, lowercase_escape, smuggle_url, unescapeHTML, update_url_query, + int_or_none, ) -class NBCIE(InfoExtractor): +class NBCIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?Pn?\d+)' _TESTS = [ { - 'url': 'http://www.nbc.com/the-tonight-show/segments/112966', + 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237', 'info_dict': { - 'id': '112966', + 'id': '2848237', 'ext': 'mp4', 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', @@ -69,7 +72,7 @@ class NBCIE(InfoExtractor): # HLS streams requires the 'hdnea3' cookie 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', 'info_dict': { - 'id': 'n1806', + 'id': '101528f5a9e8127b107e98c5e6ce4638', 'ext': 'mp4', 'title': 'Goliath', 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.', @@ -87,21 +90,57 @@ class NBCIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex( - [ - r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', - r']+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"', - r'"embedURL"\s*:\s*"([^"]+)"' - ], - webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/'))) - if theplatform_url.startswith('//'): - theplatform_url = 'http:' + theplatform_url - return { + info = { '_type': 'url_transparent', 'ie_key': 'ThePlatform', - 'url': smuggle_url(theplatform_url, {'source_url': url}), 'id': video_id, } + video_data = None + preload = self._search_regex( + r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None) + if preload: + preload_data = self._parse_json(preload, video_id) + path = compat_urllib_parse_urlparse(url).path.rstrip('/') + entity_id = preload_data.get('xref', {}).get(path) + video_data = preload_data.get('entities', {}).get(entity_id) + if video_data: + query = { + 'mbr': 'true', + 'manifest': 'm3u', + } + video_id = video_data['guid'] + title = video_data['title'] + if video_data.get('entitlement') == 'auth': + resource = self._get_mvpd_resource( + 'nbcentertainment', title, video_id, + video_data.get('vChipRating')) + query['auth'] = self._extract_mvpd_auth( + url, video_id, 'nbcentertainment', resource) + theplatform_url = smuggle_url(update_url_query( + 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id, + query), {'force_smil_url': True}) + info.update({ + 'id': video_id, + 'title': title, + 'url': theplatform_url, + 'description': video_data.get('description'), + 'keywords': video_data.get('keywords'), + 'season_number': int_or_none(video_data.get('seasonNumber')), + 'episode_number': int_or_none(video_data.get('episodeNumber')), + 'series': video_data.get('showName'), + }) + else: + theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex( + [ + r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', + r']+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"', + r'"embedURL"\s*:\s*"([^"]+)"' + ], + webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/'))) + if theplatform_url.startswith('//'): + theplatform_url = 'http:' + theplatform_url + info['url'] = smuggle_url(theplatform_url, {'source_url': url}) + return info class NBCSportsVPlayerIE(InfoExtractor): -- cgit 1.4.1 From 2eeb588efe9a7df4b2dcd90de9e461e8ff4a40fa Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 May 2017 08:58:34 +0100 Subject: [nbc] improve extraction(closes #12364) --- youtube_dl/extractor/nbc.py | 94 ++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 65 deletions(-) (limited to 'youtube_dl/extractor/nbc.py') diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index d2a44d05d..3b31ca3ef 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -17,7 +17,7 @@ from ..utils import ( class NBCIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?Pn?\d+)' + _VALID_URL = r'https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?Pn?\d+)' _TESTS = [ { @@ -36,16 +36,6 @@ class NBCIE(AdobePassIE): 'skip_download': True, }, }, - { - 'url': 'http://www.nbc.com/the-tonight-show/episodes/176', - 'info_dict': { - 'id': '176', - 'ext': 'flv', - 'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen', - 'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.', - }, - 'skip': '404 Not Found', - }, { 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821', 'info_dict': { @@ -63,11 +53,6 @@ class NBCIE(AdobePassIE): }, 'skip': 'Only works from US', }, - { - # This video has expired but with an escaped embedURL - 'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515', - 'only_matching': True, - }, { # HLS streams requires the 'hdnea3' cookie 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', @@ -89,58 +74,37 @@ class NBCIE(AdobePassIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - info = { + video_data = self._download_json( + 'https://api.nbc.com/v3/videos', video_id, query={ + 'filter[permalink]': url, + })['data'][0]['attributes'] + query = { + 'mbr': 'true', + 'manifest': 'm3u', + } + video_id = video_data['guid'] + title = video_data['title'] + if video_data.get('entitlement') == 'auth': + resource = self._get_mvpd_resource( + 'nbcentertainment', title, video_id, + video_data.get('vChipRating')) + query['auth'] = self._extract_mvpd_auth( + url, video_id, 'nbcentertainment', resource) + theplatform_url = smuggle_url(update_url_query( + 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id, + query), {'force_smil_url': True}) + return { '_type': 'url_transparent', - 'ie_key': 'ThePlatform', 'id': video_id, + 'title': title, + 'url': theplatform_url, + 'description': video_data.get('description'), + 'keywords': video_data.get('keywords'), + 'season_number': int_or_none(video_data.get('seasonNumber')), + 'episode_number': int_or_none(video_data.get('episodeNumber')), + 'series': video_data.get('showName'), + 'ie_key': 'ThePlatform', } - video_data = None - preload = self._search_regex( - r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None) - if preload: - preload_data = self._parse_json(preload, video_id) - path = compat_urllib_parse_urlparse(url).path.rstrip('/') - entity_id = preload_data.get('xref', {}).get(path) - video_data = preload_data.get('entities', {}).get(entity_id) - if video_data: - query = { - 'mbr': 'true', - 'manifest': 'm3u', - } - video_id = video_data['guid'] - title = video_data['title'] - if video_data.get('entitlement') == 'auth': - resource = self._get_mvpd_resource( - 'nbcentertainment', title, video_id, - video_data.get('vChipRating')) - query['auth'] = self._extract_mvpd_auth( - url, video_id, 'nbcentertainment', resource) - theplatform_url = smuggle_url(update_url_query( - 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id, - query), {'force_smil_url': True}) - info.update({ - 'id': video_id, - 'title': title, - 'url': theplatform_url, - 'description': video_data.get('description'), - 'keywords': video_data.get('keywords'), - 'season_number': int_or_none(video_data.get('seasonNumber')), - 'episode_number': int_or_none(video_data.get('episodeNumber')), - 'series': video_data.get('showName'), - }) - else: - theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex( - [ - r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"', - r']+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"', - r'"embedURL"\s*:\s*"([^"]+)"' - ], - webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/'))) - if theplatform_url.startswith('//'): - theplatform_url = 'http:' + theplatform_url - info['url'] = smuggle_url(theplatform_url, {'source_url': url}) - return info class NBCSportsVPlayerIE(InfoExtractor): -- cgit 1.4.1 From 52294cdda761ad08785e7118ae8e121ceef257ec Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 7 May 2017 09:31:14 +0100 Subject: [nbc] remove unused imports and extract permalink from modified urls --- youtube_dl/extractor/nbc.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'youtube_dl/extractor/nbc.py') diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 3b31ca3ef..62db70b43 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -5,10 +5,8 @@ import re from .common import InfoExtractor from .theplatform import ThePlatformIE from .adobepass import AdobePassIE -from ..compat import compat_urllib_parse_urlparse from ..utils import ( find_xpath_attr, - lowercase_escape, smuggle_url, unescapeHTML, update_url_query, @@ -17,7 +15,7 @@ from ..utils import ( class NBCIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?Pn?\d+)' + _VALID_URL = r'(?Phttps?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?Pn?\d+))' _TESTS = [ { @@ -73,10 +71,10 @@ class NBCIE(AdobePassIE): ] def _real_extract(self, url): - video_id = self._match_id(url) + permalink, video_id = re.match(self._VALID_URL, url).groups() video_data = self._download_json( 'https://api.nbc.com/v3/videos', video_id, query={ - 'filter[permalink]': url, + 'filter[permalink]': permalink, })['data'][0]['attributes'] query = { 'mbr': 'true', -- cgit 1.4.1 From 12ea5c79fb0bfa878d62d130cf67057fc230dfa7 Mon Sep 17 00:00:00 2001 From: Parmjit Virk Date: Wed, 20 Sep 2017 14:53:06 -0500 Subject: [nbcsports:vplayer] Correct theplatform URL (closes #13873) --- youtube_dl/extractor/nbc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'youtube_dl/extractor/nbc.py') diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 62db70b43..836a41f06 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -109,10 +109,10 @@ class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P[0-9a-zA-Z_]+)' _TESTS = [{ - 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_share/select/9CsDKds0kvHI', + 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI', 'info_dict': { 'id': '9CsDKds0kvHI', - 'ext': 'flv', + 'ext': 'mp4', 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', 'timestamp': 1426270238, @@ -120,7 +120,7 @@ class NBCSportsVPlayerIE(InfoExtractor): 'uploader': 'NBCU-SPORTS', } }, { - 'url': 'http://vplayer.nbcsports.com/p/BxmELC/nbc_embedshare/select/_hqLjQ95yx8Z', + 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z', 'only_matching': True, }] @@ -134,7 +134,8 @@ class NBCSportsVPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - theplatform_url = self._og_search_video_url(webpage) + theplatform_url = self._og_search_video_url(webpage).replace( + 'vplayer.nbcsports.com', 'player.theplatform.com') return self.url_result(theplatform_url, 'ThePlatform') -- cgit 1.4.1 From b7e14f06a4a4fbaafc593c2f118e4b0f5d8d7937 Mon Sep 17 00:00:00 2001 From: Matthias Küch Date: Tue, 3 Oct 2017 15:17:28 +0200 Subject: Fix for JSON meta data download Added fixes according to #13651 and user @remitamine --- youtube_dl/extractor/nbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/nbc.py') diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 836a41f06..35151f527 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -15,7 +15,7 @@ from ..utils import ( class NBCIE(AdobePassIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?Pn?\d+))' + _VALID_URL = r'https?(?P://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?Pn?\d+))' _TESTS = [ { @@ -72,6 +72,7 @@ class NBCIE(AdobePassIE): def _real_extract(self, url): permalink, video_id = re.match(self._VALID_URL, url).groups() + permalink = 'http' + permalink video_data = self._download_json( 'https://api.nbc.com/v3/videos', video_id, query={ 'filter[permalink]': permalink, -- cgit 1.4.1 From d673ab65628e1c83e31d3396f5090dad26218232 Mon Sep 17 00:00:00 2001 From: Logan B Date: Thu, 26 Oct 2017 05:23:27 +1300 Subject: [nbc] Add support for classic-tv videos --- youtube_dl/extractor/nbc.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/nbc.py') diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 35151f527..554dec36e 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -15,7 +15,7 @@ from ..utils import ( class NBCIE(AdobePassIE): - _VALID_URL = r'https?(?P://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?Pn?\d+))' + _VALID_URL = r'https?(?P://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?Pn?\d+))' _TESTS = [ { @@ -67,7 +67,11 @@ class NBCIE(AdobePassIE): 'skip_download': True, }, 'skip': 'Only works from US', - } + }, + { + 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310', + 'only_matching': True, + }, ] def _real_extract(self, url): -- cgit 1.4.1