From f802c4866017017afa642ca9ea1ea87edad081b4 Mon Sep 17 00:00:00 2001 From: runningbits Date: Fri, 10 Mar 2017 16:59:32 +0100 Subject: [wdr:maus] Fix extraction and update tests --- youtube_dl/extractor/wdr.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'youtube_dl/extractor/wdr.py') diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index f7e6360a3..110999827 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -19,9 +19,9 @@ class WDRBaseIE(InfoExtractor): def _extract_wdr_video(self, webpage, display_id): # for wdr.de the data-extension is in a tag with the class "mediaLink" # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" - # for wdrmaus its in a link to the page in a multiline "videoLink"-tag + # for wdrmaus it is in a link to the page in a multiline "videoLink"-tag json_metadata = self._html_search_regex( - r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', + r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', webpage, 'media link', default=None, flags=re.MULTILINE) if not json_metadata: @@ -161,23 +161,23 @@ class WDRIE(WDRBaseIE): { 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', 'info_dict': { - 'id': 'mdb-1096487', - 'ext': 'flv', + 'id': 'mdb-1323501', + 'ext': 'mp4', 'upload_date': 're:^[0-9]{8}$', 'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$', - 'description': '- Die Sendung mit der Maus -', + 'description': 'Die Seite mit der Maus -', }, 'skip': 'The id changes from week to week because of the new episode' }, { - 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', + 'url': 'http://www.wdrmaus.de/filme/sachgeschichten/achterbahn.php5', 'md5': '803138901f6368ee497b4d195bb164f2', 'info_dict': { 'id': 'mdb-186083', 'ext': 'mp4', 'upload_date': '20130919', 'title': 'Sachgeschichte - Achterbahn ', - 'description': '- Die Sendung mit der Maus -', + 'description': 'Die Seite mit der Maus -', }, }, { @@ -186,7 +186,7 @@ class WDRIE(WDRBaseIE): 'info_dict': { 'id': 'mdb-869971', 'ext': 'flv', - 'title': 'Funkhaus Europa Livestream', + 'title': 'COSMO Livestream', 'description': 'md5:2309992a6716c347891c045be50992e4', 'upload_date': '20160101', }, -- cgit 1.4.1 From bd34c32bd754f30dd34b2d43604de73681b7148b Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Fri, 10 Mar 2017 23:07:36 +0700 Subject: [wdr] Actualize comment --- youtube_dl/extractor/wdr.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/wdr.py') diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 110999827..8bb7362bb 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -19,7 +19,8 @@ class WDRBaseIE(InfoExtractor): def _extract_wdr_video(self, webpage, display_id): # for wdr.de the data-extension is in a tag with the class "mediaLink" # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" - # for wdrmaus it is in a link to the page in a multiline "videoLink"-tag + # for wdrmaus, in a tag with the class "videoButton" (previously a link + # to the page in a multiline "videoLink"-tag) json_metadata = self._html_search_regex( r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', webpage, 'media link', default=None, flags=re.MULTILINE) @@ -32,7 +33,7 @@ class WDRBaseIE(InfoExtractor): jsonp_url = media_link_obj['mediaObj']['url'] metadata = self._download_json( - jsonp_url, 'metadata', transform_source=strip_jsonp) + jsonp_url, display_id, transform_source=strip_jsonp) metadata_tracker_data = metadata['trackerData'] metadata_media_resource = metadata['mediaResource'] -- cgit 1.4.1 From 89923316210f8e17bb1a085278940e1c56fcff48 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sun, 8 Oct 2017 21:36:50 +0700 Subject: [wdr] Relax media link regex (closes #14447) --- youtube_dl/extractor/wdr.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/wdr.py') diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 8bb7362bb..621de1e1e 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -22,8 +22,13 @@ class WDRBaseIE(InfoExtractor): # for wdrmaus, in a tag with the class "videoButton" (previously a link # to the page in a multiline "videoLink"-tag) json_metadata = self._html_search_regex( - r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', - webpage, 'media link', default=None, flags=re.MULTILINE) + r'''(?sx)class= + (?: + (["\'])(?:mediaLink|wdrrPlayerPlayBtn|videoButton)\b.*?\1[^>]+| + (["\'])videoLink\b.*?\2[\s]*>\n[^\n]* + )data-extension=(["\'])(?P(?:(?!\3).)+)\3 + ''', + webpage, 'media link', default=None, group='data') if not json_metadata: return -- cgit 1.4.1