From 56fc078da84a7f26d8290b2b425cc2da66a5975a Mon Sep 17 00:00:00 2001 From: Andre Walker Date: Sat, 28 Jan 2017 16:19:38 +0100 Subject: [npo] Update subtitles url NPO websites changed the domain they used for subtitles, from e.omroep.nl to tt888.omroep.nl. --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index c91f58461..962437145 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -241,7 +241,7 @@ class NPOIE(NPOBaseIE): if metadata.get('tt888') == 'ja': subtitles['nl'] = [{ 'ext': 'vtt', - 'url': 'http://e.omroep.nl/tt888/%s' % video_id, + 'url': 'http://tt888.omroep.nl/tt888/%s' % video_id, }] return { -- cgit 1.4.1 From 0dc5a86a329314f551f86c2ef3202342b7506667 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Mon, 27 Feb 2017 22:43:19 +0700 Subject: [npo] Add support for hetklokhuis.nl (closes #12293) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/npo.py | 44 ++++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 11 deletions(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d09104096..0910b7b05 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -669,6 +669,7 @@ from .npo import ( NPORadioIE, NPORadioFragmentIE, SchoolTVIE, + HetKlokhuisIE, VPROIE, WNLIE, ) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 962437145..09e8d9987 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -416,7 +416,21 @@ class NPORadioFragmentIE(InfoExtractor): } -class SchoolTVIE(InfoExtractor): +class NPODataMidEmbedIE(InfoExtractor): + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'data-mid=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video_id', group='id') + return { + '_type': 'url_transparent', + 'ie_key': 'NPO', + 'url': 'npo:%s' % video_id, + 'display_id': display_id + } + + +class SchoolTVIE(NPODataMidEmbedIE): IE_NAME = 'schooltv' _VALID_URL = r'https?://(?:www\.)?schooltv\.nl/video/(?P[^/?#&]+)' @@ -435,17 +449,25 @@ class SchoolTVIE(InfoExtractor): } } - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - r'data-mid=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video_id', group='id') - return { - '_type': 'url_transparent', - 'ie_key': 'NPO', - 'url': 'npo:%s' % video_id, - 'display_id': display_id + +class HetKlokhuisIE(NPODataMidEmbedIE): + IE_NAME = 'schooltv' + _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P[^/?#&]+)' + + _TEST = { + 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven', + 'info_dict': { + 'id': 'VPWON_1260528', + 'display_id': 'Zwaartekrachtsgolven', + 'ext': 'm4v', + 'title': 'Het Klokhuis: Zwaartekrachtsgolven', + 'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48', + 'upload_date': '20170223', + }, + 'params': { + 'skip_download': True } + } class NPOPlaylistBaseIE(NPOIE): -- cgit 1.4.1 From f264c62334fdd31a7620b4fdefb822e1bae6bd77 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Mon, 27 Feb 2017 23:10:00 +0700 Subject: [npo] Add support for zapp.nl --- youtube_dl/extractor/npo.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 09e8d9987..7c2c93f27 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -51,7 +51,8 @@ class NPOIE(NPOBaseIE): (?: npo\.nl/(?!live|radio)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| - omroepwnl\.nl/video/fragment/[^/]+__ + omroepwnl\.nl/video/fragment/[^/]+__| + zapp\.nl/[^/]+/(?:gemist|filmpjes)/ ) ) (?P[^/?#]+) @@ -140,6 +141,14 @@ class NPOIE(NPOBaseIE): 'upload_date': '20150508', 'duration': 462, }, + }, + { + 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', + 'only_matching': True, + }, + { + 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', + 'only_matching': True, } ] -- cgit 1.4.1 From dbc01fdb6f4b4c58469ffb75d00a179f5af5cdcb Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Mon, 27 Feb 2017 23:10:29 +0700 Subject: [hetklokhuis] Fix IE_NAME --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 7c2c93f27..b53c29993 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -460,7 +460,7 @@ class SchoolTVIE(NPODataMidEmbedIE): class HetKlokhuisIE(NPODataMidEmbedIE): - IE_NAME = 'schooltv' + IE_NAME = 'hetklokhuis' _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P[^/?#&]+)' _TEST = { -- cgit 1.4.1 From 18abb743762ce5b9b2ffd4d9d5e01b62621cc62e Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Mon, 27 Feb 2017 23:13:51 +0700 Subject: [npo] Relax _VALID_URL for zapp.nl --- youtube_dl/extractor/npo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index b53c29993..50473d777 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -52,7 +52,7 @@ class NPOIE(NPOBaseIE): npo\.nl/(?!live|radio)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| omroepwnl\.nl/video/fragment/[^/]+__| - zapp\.nl/[^/]+/(?:gemist|filmpjes)/ + zapp\.nl/[^/]+/[^/]+/ ) ) (?P[^/?#]+) @@ -149,6 +149,10 @@ class NPOIE(NPOBaseIE): { 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', 'only_matching': True, + }, + { + 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', + 'only_matching': True, } ] -- cgit 1.4.1 From aa9cc2ecbfea6c82944b4e07f3e93c904f1ff421 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Wed, 1 Mar 2017 05:03:14 +0700 Subject: [npo] Adapt to app.php API (closes #12311) --- youtube_dl/extractor/npo.py | 79 ++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 44 deletions(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 50473d777..89082c189 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -17,27 +17,9 @@ from ..utils import ( class NPOBaseIE(InfoExtractor): def _get_token(self, video_id): - token_page = self._download_webpage( - 'http://ida.omroep.nl/npoplayer/i.js', - video_id, note='Downloading token') - token = self._search_regex( - r'npoplayer\.token = "(.+?)"', token_page, 'token') - # Decryption algorithm extracted from http://npoplayer.omroep.nl/csjs/npoplayer-min.js - token_l = list(token) - first = second = None - for i in range(5, len(token_l) - 4): - if token_l[i].isdigit(): - if first is None: - first = i - elif second is None: - second = i - if first is None or second is None: - first = 12 - second = 13 - - token_l[first], token_l[second] = token_l[second], token_l[first] - - return ''.join(token_l) + return self._download_json( + 'http://ida.omroep.nl/app.php/auth', video_id, + note='Downloading token')['token'] class NPOIE(NPOBaseIE): @@ -187,32 +169,41 @@ class NPOIE(NPOBaseIE): pubopties = metadata.get('pubopties') if pubopties: quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) - for format_id in pubopties: - format_info = self._download_json( - 'http://ida.omroep.nl/odi/?prid=%s&puboptions=%s&adaptive=yes&token=%s' - % (video_id, format_id, token), - video_id, 'Downloading %s JSON' % format_id) - if format_info.get('error_code', 0) or format_info.get('errorcode', 0): + items = self._download_json( + 'http://ida.omroep.nl/app.php/%s' % video_id, + 'Downloading formats JSON', query={ + 'adaptive': 'yes', + 'token': token, + })['items'][0] + for num, item in enumerate(items): + item_url = item.get('url') + if not item_url: continue - streams = format_info.get('streams') - if streams: - try: - video_info = self._download_json( - streams[0] + '&type=json', - video_id, 'Downloading %s stream JSON' % format_id) - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: - error = (self._parse_json(ee.cause.read().decode(), video_id, fatal=False) or {}).get('errorstring') - if error: - raise ExtractorError(error, expected=True) - raise - else: - video_info = format_info - video_url = video_info.get('url') + format_id = self._search_regex( + r'video/ida/([^/]+)', item_url, 'format id', + default=None) + try: + stream_info = self._download_json( + item_url + '&type=json', video_id, + 'Downloading %s stream JSON' % item.get('label') or format_id or num) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error = (self._parse_json( + ee.cause.read().decode(), video_id, + fatal=False) or {}).get('errorstring') + if error: + raise ExtractorError(error, expected=True) + raise + if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0): + continue + video_url = stream_info.get('url') if not video_url: continue - if format_id == 'adaptive': - formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4')) + if stream_info.get('family') == 'adaptive': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) else: formats.append({ 'url': video_url, -- cgit 1.4.1 From 83e8fce628e810e2a5639ef9a21be839526512fb Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Wed, 1 Mar 2017 22:14:46 +0700 Subject: [npo] Improve extraction and update tests --- youtube_dl/extractor/npo.py | 402 ++++++++++++++++++++++---------------------- 1 file changed, 197 insertions(+), 205 deletions(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 89082c189..be10fc486 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -3,15 +3,19 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..compat import ( + compat_HTTPError, + compat_str, +) from ..utils import ( + determine_ext, + ExtractorError, fix_xml_ampersands, orderedSet, parse_duration, qualities, strip_jsonp, unified_strdate, - ExtractorError, ) @@ -40,103 +44,101 @@ class NPOIE(NPOBaseIE): (?P[^/?#]+) ''' - _TESTS = [ - { - 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', - 'md5': '4b3f9c429157ec4775f2c9cb7b911016', - 'info_dict': { - 'id': 'VPWON_1220719', - 'ext': 'm4v', - 'title': 'Nieuwsuur', - 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', - 'upload_date': '20140622', - }, + _TESTS = [{ + 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', + 'md5': '4b3f9c429157ec4775f2c9cb7b911016', + 'info_dict': { + 'id': 'VPWON_1220719', + 'ext': 'm4v', + 'title': 'Nieuwsuur', + 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', + 'upload_date': '20140622', }, - { - 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', - 'md5': 'da50a5787dbfc1603c4ad80f31c5120b', - 'info_dict': { - 'id': 'VARA_101191800', - 'ext': 'm4v', - 'title': 'De Mega Mike & Mega Thomas show: The best of.', - 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', - 'upload_date': '20090227', - 'duration': 2400, - }, + }, { + 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800', + 'md5': 'da50a5787dbfc1603c4ad80f31c5120b', + 'info_dict': { + 'id': 'VARA_101191800', + 'ext': 'm4v', + 'title': 'De Mega Mike & Mega Thomas show: The best of.', + 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', + 'upload_date': '20090227', + 'duration': 2400, }, - { - 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289', - 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', - 'info_dict': { - 'id': 'VPWON_1169289', - 'ext': 'm4v', - 'title': 'Tegenlicht: De toekomst komt uit Afrika', - 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', - 'upload_date': '20130225', - 'duration': 3000, - }, + }, { + 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289', + 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', + 'info_dict': { + 'id': 'VPWON_1169289', + 'ext': 'm4v', + 'title': 'Tegenlicht: Zwart geld. De toekomst komt uit Afrika', + 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', + 'upload_date': '20130225', + 'duration': 3000, }, - { - 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706', - 'info_dict': { - 'id': 'WO_VPRO_043706', - 'ext': 'wmv', - 'title': 'De nieuwe mens - Deel 1', - 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b', - 'duration': 4680, - }, - 'params': { - # mplayer mms download - 'skip_download': True, - } + }, { + 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706', + 'info_dict': { + 'id': 'WO_VPRO_043706', + 'ext': 'm4v', + 'title': 'De nieuwe mens - Deel 1', + 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b', + 'duration': 4680, }, + 'params': { + 'skip_download': True, + } + }, { # non asf in streams - { - 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771', - 'md5': 'b3da13de374cbe2d5332a7e910bef97f', - 'info_dict': { - 'id': 'WO_NOS_762771', - 'ext': 'mp4', - 'title': 'Hoe gaat Europa verder na Parijs?', - }, - }, - { - 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content', - 'md5': '01c6a2841675995da1f0cf776f03a9c3', - 'info_dict': { - 'id': 'VPWON_1233944', - 'ext': 'm4v', - 'title': 'Aap, poot, pies', - 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde', - 'upload_date': '20150508', - 'duration': 599, - }, - }, - { - 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698', - 'md5': 'd30cd8417b8b9bca1fdff27428860d08', - 'info_dict': { - 'id': 'POW_00996502', - 'ext': 'm4v', - 'title': '''"Dit is wel een 'landslide'..."''', - 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8', - 'upload_date': '20150508', - 'duration': 462, - }, + 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771', + 'info_dict': { + 'id': 'WO_NOS_762771', + 'ext': 'mp4', + 'title': 'Hoe gaat Europa verder na Parijs?', }, - { - 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', - 'only_matching': True, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content', + 'info_dict': { + 'id': 'VPWON_1233944', + 'ext': 'm4v', + 'title': 'Aap, poot, pies', + 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde', + 'upload_date': '20150508', + 'duration': 599, }, - { - 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', - 'only_matching': True, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698', + 'info_dict': { + 'id': 'POW_00996502', + 'ext': 'm4v', + 'title': '''"Dit is wel een 'landslide'..."''', + 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8', + 'upload_date': '20150508', + 'duration': 462, }, - { - 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', - 'only_matching': True, + 'params': { + 'skip_download': True, } - ] + }, { + 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', + 'only_matching': True, + }, { + 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118', + 'only_matching': True, + }, { + 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', + 'only_matching': True, + }, { + # live stream + 'url': 'npo:LI_NL1_4188102', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -165,79 +167,115 @@ class NPOIE(NPOBaseIE): token = self._get_token(video_id) formats = [] + urls = set() + + quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) + items = self._download_json( + 'http://ida.omroep.nl/app.php/%s' % video_id, video_id, + 'Downloading formats JSON', query={ + 'adaptive': 'yes', + 'token': token, + })['items'][0] + for num, item in enumerate(items): + item_url = item.get('url') + if not item_url or item_url in urls: + continue + urls.add(item_url) + format_id = self._search_regex( + r'video/ida/([^/]+)', item_url, 'format id', + default=None) + + def add_format_url(format_url): + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'quality': quality(format_id), + }) - pubopties = metadata.get('pubopties') - if pubopties: - quality = qualities(['adaptive', 'wmv_sb', 'h264_sb', 'wmv_bb', 'h264_bb', 'wvc1_std', 'h264_std']) - items = self._download_json( - 'http://ida.omroep.nl/app.php/%s' % video_id, - 'Downloading formats JSON', query={ - 'adaptive': 'yes', - 'token': token, - })['items'][0] - for num, item in enumerate(items): - item_url = item.get('url') - if not item_url: - continue - format_id = self._search_regex( - r'video/ida/([^/]+)', item_url, 'format id', - default=None) - try: - stream_info = self._download_json( - item_url + '&type=json', video_id, - 'Downloading %s stream JSON' % item.get('label') or format_id or num) - except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: - error = (self._parse_json( - ee.cause.read().decode(), video_id, - fatal=False) or {}).get('errorstring') - if error: - raise ExtractorError(error, expected=True) - raise - if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0): + # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 + if item.get('contentType') == 'url': + add_format_url(item_url) + continue + + try: + stream_info = self._download_json( + item_url + '&type=json', video_id, + 'Downloading %s stream JSON' + % item.get('label') or format_id or num) + except ExtractorError as ee: + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error = (self._parse_json( + ee.cause.read().decode(), video_id, + fatal=False) or {}).get('errorstring') + if error: + raise ExtractorError(error, expected=True) + raise + # Stream URL instead of JSON, example: npo:LI_NL1_4188102 + if isinstance(stream_info, compat_str): + if not stream_info.startswith('http'): continue + video_url = stream_info + # JSON + else: video_url = stream_info.get('url') - if not video_url: + if not video_url or video_url in urls: + continue + urls.add(item_url) + if determine_ext(video_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + else: + add_format_url(video_url) + + is_live = metadata.get('medium') == 'live' + + if not is_live: + for num, stream in enumerate(metadata.get('streams', [])): + stream_url = stream.get('url') + if not stream_url or stream_url in urls: + continue + urls.add(stream_url) + # smooth streaming is not supported + stream_type = stream.get('type', '').lower() + if stream_type in ['ss', 'ms']: continue - if stream_info.get('family') == 'adaptive': + if stream_type == 'hds': + f4m_formats = self._extract_f4m_formats( + stream_url, video_id, fatal=False) + # f4m downloader downloads only piece of live stream + for f4m_format in f4m_formats: + f4m_format['preference'] = -1 + formats.extend(f4m_formats) + elif stream_type == 'hls': formats.extend(self._extract_m3u8_formats( - video_url, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - else: + stream_url, video_id, ext='mp4', fatal=False)) + # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 + elif '.asf' in stream_url: + asx = self._download_xml( + stream_url, video_id, + 'Downloading stream %d ASX playlist' % num, + transform_source=fix_xml_ampersands, fatal=False) + if not asx: + continue + ref = asx.find('./ENTRY/Ref') + if ref is None: + continue + video_url = ref.get('href') + if not video_url or video_url in urls: + continue + urls.add(video_url) formats.append({ 'url': video_url, - 'format_id': format_id, - 'quality': quality(format_id), + 'ext': stream.get('formaat', 'asf'), + 'quality': stream.get('kwaliteit'), + 'preference': -10, }) - - streams = metadata.get('streams') - if streams: - for i, stream in enumerate(streams): - stream_url = stream.get('url') - if not stream_url: - continue - if '.asf' not in stream_url: + else: formats.append({ 'url': stream_url, 'quality': stream.get('kwaliteit'), }) - continue - asx = self._download_xml( - stream_url, video_id, - 'Downloading stream %d ASX playlist' % i, - transform_source=fix_xml_ampersands) - ref = asx.find('./ENTRY/Ref') - if ref is None: - continue - video_url = ref.get('href') - if not video_url: - continue - formats.append({ - 'url': video_url, - 'ext': stream.get('formaat', 'asf'), - 'quality': stream.get('kwaliteit'), - }) self._sort_formats(formats) @@ -250,28 +288,28 @@ class NPOIE(NPOBaseIE): return { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if is_live else title, 'description': metadata.get('info'), 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], 'upload_date': unified_strdate(metadata.get('gidsdatum')), 'duration': parse_duration(metadata.get('tijdsduur')), 'formats': formats, 'subtitles': subtitles, + 'is_live': is_live, } class NPOLiveIE(NPOBaseIE): IE_NAME = 'npo.nl:live' - _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P[^/?#&]+)' _TEST = { 'url': 'http://www.npo.nl/live/npo-1', 'info_dict': { - 'id': 'LI_NEDERLAND1_136692', + 'id': 'LI_NL1_4188102', 'display_id': 'npo-1', 'ext': 'mp4', - 'title': 're:^Nederland 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'Livestream', + 'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, }, 'params': { @@ -287,58 +325,12 @@ class NPOLiveIE(NPOBaseIE): live_id = self._search_regex( r'data-prid="([^"]+)"', webpage, 'live id') - metadata = self._download_json( - 'http://e.omroep.nl/metadata/%s' % live_id, - display_id, transform_source=strip_jsonp) - - token = self._get_token(display_id) - - formats = [] - - streams = metadata.get('streams') - if streams: - for stream in streams: - stream_type = stream.get('type').lower() - # smooth streaming is not supported - if stream_type in ['ss', 'ms']: - continue - stream_info = self._download_json( - 'http://ida.omroep.nl/aapi/?stream=%s&token=%s&type=jsonp' - % (stream.get('url'), token), - display_id, 'Downloading %s JSON' % stream_type) - if stream_info.get('error_code', 0) or stream_info.get('errorcode', 0): - continue - stream_url = self._download_json( - stream_info['stream'], display_id, - 'Downloading %s URL' % stream_type, - 'Unable to download %s URL' % stream_type, - transform_source=strip_jsonp, fatal=False) - if not stream_url: - continue - if stream_type == 'hds': - f4m_formats = self._extract_f4m_formats(stream_url, display_id) - # f4m downloader downloads only piece of live stream - for f4m_format in f4m_formats: - f4m_format['preference'] = -1 - formats.extend(f4m_formats) - elif stream_type == 'hls': - formats.extend(self._extract_m3u8_formats(stream_url, display_id, 'mp4')) - else: - formats.append({ - 'url': stream_url, - 'preference': -10, - }) - - self._sort_formats(formats) - return { + '_type': 'url_transparent', + 'url': 'npo:%s' % live_id, + 'ie_key': NPOIE.ie_key(), 'id': live_id, 'display_id': display_id, - 'title': self._live_title(metadata['titel']), - 'description': metadata['info'], - 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], - 'formats': formats, - 'is_live': True, } -- cgit 1.4.1 From 4b8a984c67cdc1b2bfde77398d74096406db9644 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Wed, 1 Mar 2017 22:21:13 +0700 Subject: [npo] Add support for audio --- youtube_dl/extractor/npo.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index be10fc486..38fefe492 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -125,6 +125,18 @@ class NPOIE(NPOBaseIE): 'params': { 'skip_download': True, } + }, { + # audio + 'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437', + 'info_dict': { + 'id': 'RBX_FUNX_6683215', + 'ext': 'mp3', + 'title': 'Jouw Stad Rotterdam', + 'description': 'md5:db251505244f097717ec59fabc372d9f', + }, + 'params': { + 'skip_download': True, + } }, { 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547', 'only_matching': True, @@ -193,7 +205,7 @@ class NPOIE(NPOBaseIE): }) # Example: http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706 - if item.get('contentType') == 'url': + if item.get('contentType') in ('url', 'audio'): add_format_url(item_url) continue @@ -201,7 +213,7 @@ class NPOIE(NPOBaseIE): stream_info = self._download_json( item_url + '&type=json', video_id, 'Downloading %s stream JSON' - % item.get('label') or format_id or num) + % item.get('label') or item.get('format') or format_id or num) except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: error = (self._parse_json( -- cgit 1.4.1 From 1591ba258a49b682124e6e545c13a5da3bc660e0 Mon Sep 17 00:00:00 2001 From: Aldo Gunsing Date: Sat, 25 Mar 2017 16:51:36 +0100 Subject: [npo:live] Add support for default url --- youtube_dl/extractor/npo.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 38fefe492..d1c9bc20e 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -313,9 +313,9 @@ class NPOIE(NPOBaseIE): class NPOLiveIE(NPOBaseIE): IE_NAME = 'npo.nl:live' - _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(/(?P[^/?#&]+))?' - _TEST = { + _TESTS = [{ 'url': 'http://www.npo.nl/live/npo-1', 'info_dict': { 'id': 'LI_NL1_4188102', @@ -327,10 +327,22 @@ class NPOLiveIE(NPOBaseIE): 'params': { 'skip_download': True, } - } + }, { + 'url': 'http://www.npo.nl/live', + 'info_dict': { + 'id': 'LI_NL1_4188102', + 'display_id': 'npo-1', + 'ext': 'mp4', + 'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + }] def _real_extract(self, url): - display_id = self._match_id(url) + display_id = self._match_id(url) or 'npo-1' webpage = self._download_webpage(url, display_id) -- cgit 1.4.1 From 04e431cf97e8b73190e8db77de0e3d2b5655c195 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sat, 8 Apr 2017 21:31:22 +0700 Subject: [npo:live] Improve (closes #12555) --- youtube_dl/extractor/npo.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index d1c9bc20e..79296f0ef 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -313,7 +313,7 @@ class NPOIE(NPOBaseIE): class NPOLiveIE(NPOBaseIE): IE_NAME = 'npo.nl:live' - _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(/(?P[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P[^/?#&]+))?' _TESTS = [{ 'url': 'http://www.npo.nl/live/npo-1', @@ -329,16 +329,7 @@ class NPOLiveIE(NPOBaseIE): } }, { 'url': 'http://www.npo.nl/live', - 'info_dict': { - 'id': 'LI_NL1_4188102', - 'display_id': 'npo-1', - 'ext': 'mp4', - 'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'is_live': True, - }, - 'params': { - 'skip_download': True, - } + 'only_matching': True, }] def _real_extract(self, url): -- cgit 1.4.1 From 7dd5415cd0e824b00e6abf9a18d55701d52babec Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Wed, 14 Jun 2017 21:33:40 +0700 Subject: [npo] Improve _VALID_URL (closes #13376) --- youtube_dl/extractor/npo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 79296f0ef..5f8b6def1 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -35,7 +35,7 @@ class NPOIE(NPOBaseIE): https?:// (?:www\.)? (?: - npo\.nl/(?!live|radio)(?:[^/]+/){2}| + npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| omroepwnl\.nl/video/fragment/[^/]+__| zapp\.nl/[^/]+/[^/]+/ @@ -150,6 +150,9 @@ class NPOIE(NPOBaseIE): # live stream 'url': 'npo:LI_NL1_4188102', 'only_matching': True, + }, { + 'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373', + 'only_matching': True, }] def _real_extract(self, url): -- cgit 1.4.1 From 7a5773090789bec38a3f58dfb09039155919a540 Mon Sep 17 00:00:00 2001 From: rrooij Date: Sun, 9 Jul 2017 09:21:40 +0200 Subject: [npo:live] Fix live stream id extraction (closes #13568) --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 5f8b6def1..516b1e941 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -341,7 +341,7 @@ class NPOLiveIE(NPOBaseIE): webpage = self._download_webpage(url, display_id) live_id = self._search_regex( - r'data-prid="([^"]+)"', webpage, 'live id') + [r'media-id="([^"]+)"', r'data-prid="([^"]+)"'], webpage, 'live id') return { '_type': 'url_transparent', -- cgit 1.4.1 From 359aa2fdd145d11a29a04f620fed95acbf142f66 Mon Sep 17 00:00:00 2001 From: dubber0 Date: Sat, 22 Jul 2017 14:15:55 +0200 Subject: [npo] Add support for npo3.nl URLs --- youtube_dl/extractor/npo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 516b1e941..fa4ef20c5 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -28,7 +28,7 @@ class NPOBaseIE(InfoExtractor): class NPOIE(NPOBaseIE): IE_NAME = 'npo' - IE_DESC = 'npo.nl and ntr.nl' + IE_DESC = 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl' _VALID_URL = r'''(?x) (?: npo:| @@ -38,7 +38,7 @@ class NPOIE(NPOBaseIE): npo\.nl/(?!(?:live|radio)/)(?:[^/]+/){2}| ntr\.nl/(?:[^/]+/){2,}| omroepwnl\.nl/video/fragment/[^/]+__| - zapp\.nl/[^/]+/[^/]+/ + (?:zapp|npo3)\.nl/(?:[^/]+/){2} ) ) (?P[^/?#]+) @@ -146,6 +146,9 @@ class NPOIE(NPOBaseIE): }, { 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990', 'only_matching': True, + }, { + 'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870', + 'only_matching': True, }, { # live stream 'url': 'npo:LI_NL1_4188102', -- cgit 1.4.1 From 197224b7a4e37a6581bf1a0da18d0f67ea61a476 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Mon, 9 Oct 2017 23:50:53 +0700 Subject: Fix some regexes --- youtube_dl/extractor/aenetworks.py | 2 +- youtube_dl/extractor/appletrailers.py | 4 ++-- youtube_dl/extractor/ard.py | 2 +- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/dailymotion.py | 2 +- youtube_dl/extractor/deezer.py | 2 +- youtube_dl/extractor/freespeech.py | 2 +- youtube_dl/extractor/generic.py | 2 +- youtube_dl/extractor/googleplus.py | 2 +- youtube_dl/extractor/hrti.py | 2 +- youtube_dl/extractor/ign.py | 2 +- youtube_dl/extractor/infoq.py | 6 +++--- youtube_dl/extractor/jeuxvideo.py | 2 +- youtube_dl/extractor/livestream.py | 2 +- youtube_dl/extractor/makertv.py | 2 +- youtube_dl/extractor/mangomolo.py | 2 +- youtube_dl/extractor/meipai.py | 2 +- youtube_dl/extractor/mtv.py | 2 +- youtube_dl/extractor/myvideo.py | 2 +- youtube_dl/extractor/nationalgeographic.py | 2 +- youtube_dl/extractor/naver.py | 2 +- youtube_dl/extractor/npo.py | 2 +- youtube_dl/extractor/ruhd.py | 2 +- youtube_dl/extractor/stanfordoc.py | 4 ++-- youtube_dl/extractor/theplatform.py | 2 +- youtube_dl/extractor/thisav.py | 4 ++-- youtube_dl/extractor/twitter.py | 2 +- youtube_dl/extractor/vice.py | 2 +- youtube_dl/extractor/videopremium.py | 2 +- youtube_dl/extractor/youtube.py | 2 +- 30 files changed, 35 insertions(+), 35 deletions(-) (limited to 'youtube_dl/extractor/npo.py') diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 2dcdba9d2..da1b566c2 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -131,7 +131,7 @@ class AENetworksIE(AENetworksBaseIE): r'data-media-url=(["\'])(?P(?:(?!\1).)+?)\1'], webpage, 'video url', group='url') theplatform_metadata = self._download_theplatform_metadata(self._search_regex( - r'https?://link.theplatform.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) + r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) info = self._parse_theplatform_metadata(theplatform_metadata) if theplatform_metadata.get('AETN$isBehindWall'): requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index b45b431e1..a9ef733e0 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -117,7 +117,7 @@ class AppleTrailersIE(InfoExtractor): continue formats.append({ 'format_id': '%s-%s' % (version, size), - 'url': re.sub(r'_(\d+p.mov)', r'_h\1', src), + 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), 'width': int_or_none(size_data.get('width')), 'height': int_or_none(size_data.get('height')), 'language': version[:2], @@ -179,7 +179,7 @@ class AppleTrailersIE(InfoExtractor): formats = [] for format in settings['metadata']['sizes']: # The src is a file pointing to the real video file - format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src']) + format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) formats.append({ 'url': format_url, 'format': format['type'], diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 3f248b147..915f8862e 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -195,7 +195,7 @@ class ARDMediathekIE(InfoExtractor): title = self._html_search_regex( [r'(.*?)', - r'', + r'', r'

(.*?)

'], webpage, 'title') description = self._html_search_meta( diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 8b20c03d6..5525f7c9b 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -386,7 +386,7 @@ class BBCCoUkIE(InfoExtractor): m3u8_id=format_id, fatal=False)) if re.search(self._USP_RE, href): usp_formats = self._extract_m3u8_formats( - re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href), + re.sub(self._USP_RE, r'/\1\.ism/\1\.m3u8', href), programme_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) for f in usp_formats: diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index e9d0dd19c..21a2d0239 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -235,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): # vevo embed vevo_id = self._search_regex( - r'[\w]*)', + r'[\w]*)', webpage, 'vevo embed', default=None) if vevo_id: return self.url_result('vevo:%s' % vevo_id, 'Vevo') diff --git a/youtube_dl/extractor/deezer.py b/youtube_dl/extractor/deezer.py index ec87b94db..a38b2683d 100644 --- a/youtube_dl/extractor/deezer.py +++ b/youtube_dl/extractor/deezer.py @@ -19,7 +19,7 @@ class DeezerPlaylistIE(InfoExtractor): 'id': '176747451', 'title': 'Best!', 'uploader': 'Anonymous', - 'thumbnail': r're:^https?://cdn-images.deezer.com/images/cover/.*\.jpg$', + 'thumbnail': r're:^https?://cdn-images\.deezer\.com/images/cover/.*\.jpg$', }, 'playlist_count': 30, 'skip': 'Only available in .de', diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 0a70ca763..7fa271b51 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -27,7 +27,7 @@ class FreespeechIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) title = mobj.group('title') webpage = self._download_webpage(url, title) - info_json = self._search_regex(r'jQuery.extend\(Drupal.settings, ({.*?})\);', webpage, 'info') + info_json = self._search_regex(r'jQuery\.extend\(Drupal\.settings, ({.*?})\);', webpage, 'info') info = json.loads(info_json) return { diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1721a3dbd..68b633839 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -2206,7 +2206,7 @@ class GenericIE(InfoExtractor): # And then there are the jokers who advertise that they use RTA, # but actually don't. AGE_LIMIT_MARKERS = [ - r'Proudly Labeled RTA', + r'Proudly Labeled RTA', ] if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS): age_limit = 18 diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 427499b11..6b927bb44 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -61,7 +61,7 @@ class GooglePlusIE(InfoExtractor): 'width': int(width), 'height': int(height), } for width, height, video_url in re.findall( - r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent.com.*?)"', webpage)] + r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)] self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py index 4f0369433..7cef5f6ce 100644 --- a/youtube_dl/extractor/hrti.py +++ b/youtube_dl/extractor/hrti.py @@ -173,7 +173,7 @@ class HRTiIE(HRTiBaseIE): class HRTiPlaylistIE(HRTiBaseIE): - _VALID_URL = r'https?://hrti.hrt.hr/#/video/list/category/(?P[0-9]+)/(?P[^/]+)?' + _VALID_URL = r'https?://hrti\.hrt\.hr/#/video/list/category/(?P[0-9]+)/(?P[^/]+)?' _TESTS = [{ 'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena', 'info_dict': { diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index c1367cf51..a96ea8010 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -203,7 +203,7 @@ class PCMagIE(IGNIE): _VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?Pvideos|article2)(/.+)?/(?P.+)' IE_NAME = 'pcmag' - _EMBED_RE = r'iframe.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content.html?[^"]*url=([^"]+)["&]' + _EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]' _TESTS = [{ 'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data', diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index fe425e786..57c9b0cc4 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -69,9 +69,9 @@ class InfoQIE(BokeCCBaseIE): }] def _extract_cookies(self, webpage): - policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') - signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') - key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') + policy = self._search_regex(r'InfoQConstants\.scp\s*=\s*\'([^\']+)\'', webpage, 'policy') + signature = self._search_regex(r'InfoQConstants\.scs\s*=\s*\'([^\']+)\'', webpage, 'signature') + key_pair_id = self._search_regex(r'InfoQConstants\.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id') return 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % ( policy, signature, key_pair_id) diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index 1a4227f6b..e9f4ed738 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -30,7 +30,7 @@ class JeuxVideoIE(InfoExtractor): webpage = self._download_webpage(url, title) title = self._html_search_meta('name', webpage) or self._og_search_title(webpage) config_url = self._html_search_regex( - r'data-src(?:set-video)?="(/contenu/medias/video.php.*?)"', + r'data-src(?:set-video)?="(/contenu/medias/video\.php.*?)"', webpage, 'config URL') config_url = 'http://www.jeuxvideo.com' + config_url diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 7f946c6ed..317ebbc4e 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -338,7 +338,7 @@ class LivestreamOriginalIE(InfoExtractor): info = { 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), - 'thumbnail': self._search_regex(r'channelLogo.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), + 'thumbnail': self._search_regex(r'channelLogo\.src\s*=\s*"([^"]+)"', webpage, 'thumbnail', None), } video_data = self._download_json(stream_url, content_id) is_live = video_data.get('isLive') diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py index 3c34d4604..8eda69cfc 100644 --- a/youtube_dl/extractor/makertv.py +++ b/youtube_dl/extractor/makertv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class MakerTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer.com/embed/maker)/(?P[a-zA-Z0-9]{12})' + _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P[a-zA-Z0-9]{12})' _TEST = { 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc', 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py index 1885ac7df..dbd761a67 100644 --- a/youtube_dl/extractor/mangomolo.py +++ b/youtube_dl/extractor/mangomolo.py @@ -22,7 +22,7 @@ class MangomoloBaseIE(InfoExtractor): format_url = self._html_search_regex( [ - r'file\s*:\s*"(https?://[^"]+?/playlist.m3u8)', + r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', r']+href="(rtsp://[^"]+)"' ], webpage, 'format url') formats = self._extract_wowza_formats( diff --git a/youtube_dl/extractor/meipai.py b/youtube_dl/extractor/meipai.py index c8eacb4f4..2445b8b39 100644 --- a/youtube_dl/extractor/meipai.py +++ b/youtube_dl/extractor/meipai.py @@ -11,7 +11,7 @@ from ..utils import ( class MeipaiIE(InfoExtractor): IE_DESC = '美拍' - _VALID_URL = r'https?://(?:www\.)?meipai.com/media/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?meipai\.com/media/(?P[0-9]+)' _TESTS = [{ # regular uploaded video 'url': 'http://www.meipai.com/media/531697625', diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 25af5ddfd..1154a3536 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -258,7 +258,7 @@ class MTVServicesInfoExtractor(InfoExtractor): if mgid is None or ':' not in mgid: mgid = self._search_regex( - [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], + [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'], webpage, 'mgid', default=None) if not mgid: diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index 6bb64eb63..367e811db 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -160,7 +160,7 @@ class MyVideoIE(InfoExtractor): else: video_playpath = '' - video_swfobj = self._search_regex(r'swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') + video_swfobj = self._search_regex(r'swfobject\.embedSWF\(\'(.+?)\'', webpage, 'swfobj') video_swfobj = compat_urllib_parse_unquote(video_swfobj) video_title = self._html_search_regex("(.*?)", diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index b91d86528..9e8d28f48 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -111,7 +111,7 @@ class NationalGeographicIE(ThePlatformIE, AdobePassIE): release_url = self._search_regex( r'video_auth_playlist_url\s*=\s*"([^"]+)"', webpage, 'release url') - theplatform_path = self._search_regex(r'https?://link.theplatform.com/s/([^?]+)', release_url, 'theplatform path') + theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path') video_id = theplatform_path.split('/')[-1] query = { 'mbr': 'true', diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index e8131333f..2047d4402 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -43,7 +43,7 @@ class NaverIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', + m_id = re.search(r'var rmcPlayer = new nhn\.rmcnmv\.RMCVideoPlayer\("(.+?)", "(.+?)"', webpage) if m_id is None: error = self._html_search_regex( diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index fa4ef20c5..b8fe24407 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -469,7 +469,7 @@ class SchoolTVIE(NPODataMidEmbedIE): class HetKlokhuisIE(NPODataMidEmbedIE): IE_NAME = 'hetklokhuis' - _VALID_URL = r'https?://(?:www\.)?hetklokhuis.nl/[^/]+/\d+/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P[^/?#&]+)' _TEST = { 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven', diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py index 2b830cf47..3c8053a26 100644 --- a/youtube_dl/extractor/ruhd.py +++ b/youtube_dl/extractor/ruhd.py @@ -25,7 +25,7 @@ class RUHDIE(InfoExtractor): video_url = self._html_search_regex( r'([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!', + r'([^<]+)   RUHD\.ru - Видео Высокого качества №1 в России!', webpage, 'title') description = self._html_search_regex( r'(?s)
(.+?)', diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py index cce65fb10..ae3dd1380 100644 --- a/youtube_dl/extractor/stanfordoc.py +++ b/youtube_dl/extractor/stanfordoc.py @@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor): r'(?s)([^<]+)', coursepage, 'description', fatal=False) - links = orderedSet(re.findall(r'', coursepage)) + links = orderedSet(re.findall(r'', coursepage)) info['entries'] = [self.url_result( 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) ) for l in links] @@ -84,7 +84,7 @@ class StanfordOpenClassroomIE(InfoExtractor): rootpage = self._download_webpage(rootURL, info['id'], errnote='Unable to download course info page') - links = orderedSet(re.findall(r'', rootpage)) + links = orderedSet(re.findall(r'', rootpage)) info['entries'] = [self.url_result( 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) ) for l in links] diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index de236bbba..b1a985ff6 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -216,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): def hex_to_bytes(hex): return binascii.a2b_hex(hex.encode('ascii')) - relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1) + relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1) clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path)) checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() sig = flags + expiration_date + checksum + str_to_hex(sig_secret) diff --git a/youtube_dl/extractor/thisav.py b/youtube_dl/extractor/thisav.py index 33683b139..dc3dd03c8 100644 --- a/youtube_dl/extractor/thisav.py +++ b/youtube_dl/extractor/thisav.py @@ -57,10 +57,10 @@ class ThisAVIE(InfoExtractor): info_dict = self._extract_jwplayer_data( webpage, video_id, require_title=False) uploader = self._html_search_regex( - r': ([^<]+)', + r': ([^<]+)', webpage, 'uploader name', fatal=False) uploader_id = self._html_search_regex( - r': (?:[^<]+)', + r': (?:[^<]+)', webpage, 'uploader id', fatal=False) info_dict.update({ diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 0df3ad7c7..1b0b96371 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -174,7 +174,7 @@ class TwitterCardIE(TwitterBaseIE): webpage = self._download_webpage(url, video_id) iframe_url = self._html_search_regex( - r']+src="((?:https?:)?//(?:www.youtube.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', + r']+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"', webpage, 'video iframe', default=None) if iframe_url: return self.url_result(iframe_url) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index b8b8bf979..bcc28693a 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -198,7 +198,7 @@ class ViceShowIE(InfoExtractor): class ViceArticleIE(InfoExtractor): IE_NAME = 'vice:article' - _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P[^?#]+)' + _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P[^?#]+)' _TESTS = [{ 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah', diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py index 5de8273c3..cf690d7b0 100644 --- a/youtube_dl/extractor/videopremium.py +++ b/youtube_dl/extractor/videopremium.py @@ -26,7 +26,7 @@ class VideoPremiumIE(InfoExtractor): webpage_url = 'http://videopremium.tv/' + video_id webpage = self._download_webpage(webpage_url, video_id) - if re.match(r'^]*>window.location\s*=', webpage): + if re.match(r'^]*>window\.location\s*=', webpage): # Download again, we need a cookie webpage = self._download_webpage( webpage_url, video_id, diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index edd8713b7..54f5d7279 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1683,7 +1683,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_uploader_id = None video_uploader_url = None mobj = re.search( - r'', + r'', video_webpage) if mobj is not None: video_uploader_id = mobj.group('uploader_id') -- cgit 1.4.1