From ff99fe529e52b2465f1d973e69df01a6391568d6 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Tue, 25 Apr 2017 22:07:10 +0700 Subject: Don't list master m3u8 playlists in format list (closes #12832) --- youtube_dl/extractor/anvato.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'youtube_dl/extractor/anvato.py') diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index 623f44dce..9fd91c2f6 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -178,12 +178,7 @@ class AnvatoIE(InfoExtractor): } if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'): - # Not using _extract_m3u8_formats here as individual media - # playlists are also included in published_urls. - if tbr is None: - formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls')) - continue - else: + if tbr is not None: a_format.update({ 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), 'ext': 'mp4', -- cgit 1.4.1 From 7986c3abcdad819b61bdf0fb7111759f9fc1fc32 Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sat, 29 Apr 2017 19:49:04 +0700 Subject: [anvato] Improve extraction (closes #12913) * Promote to regular shortcut based extractor * Add mcp to access key mapping table * Add support for embeds extraction * Add support for anvato embeds in generic extractor --- youtube_dl/extractor/anvato.py | 59 ++++++++++++++++++++++++++++++++++++-- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/generic.py | 16 +++++++++++ 3 files changed, 73 insertions(+), 3 deletions(-) (limited to 'youtube_dl/extractor/anvato.py') diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index 9fd91c2f6..ab8c237e9 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -5,6 +5,7 @@ import base64 import hashlib import json import random +import re import time from .common import InfoExtractor @@ -16,6 +17,7 @@ from ..utils import ( intlist_to_bytes, int_or_none, strip_jsonp, + unescapeHTML, ) @@ -26,6 +28,8 @@ def md5_text(s): class AnvatoIE(InfoExtractor): + _VALID_URL = r'anvato:(?P[^:]+):(?P\d+)' + # Copied from anvplayer.min.js _ANVACK_TABLE = { 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', @@ -114,6 +118,22 @@ class AnvatoIE(InfoExtractor): 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ' } + _MCP_TO_ACCESS_KEY_TABLE = { + 'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922', + 'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749', + 'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa', + 'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa', + 'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a', + 'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336', + 'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336', + 'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3', + 'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', + 'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', + 'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', + 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' + } + + _ANVP_RE = r']+\bdata-anvp\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' def __init__(self, *args, **kwargs): @@ -217,9 +237,42 @@ class AnvatoIE(InfoExtractor): 'subtitles': subtitles, } + @staticmethod + def _extract_urls(ie, webpage, video_id): + entries = [] + for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage): + anvplayer_data = ie._parse_json( + mobj.group('anvp'), video_id, transform_source=unescapeHTML, + fatal=False) + if not anvplayer_data: + continue + video = anvplayer_data.get('video') + if not isinstance(video, compat_str) or not video.isdigit(): + continue + access_key = anvplayer_data.get('accessKey') + if not access_key: + mcp = anvplayer_data.get('mcp') + if mcp: + access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get( + mcp.lower()) + if not access_key: + continue + entries.append(ie.url_result( + 'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(), + video_id=video)) + return entries + def _extract_anvato_videos(self, webpage, video_id): - anvplayer_data = self._parse_json(self._html_search_regex( - r']+data-anvp=\'([^\']+)\'', webpage, - 'Anvato player data'), video_id) + anvplayer_data = self._parse_json( + self._html_search_regex( + self._ANVP_RE, webpage, 'Anvato player data', group='anvp'), + video_id) return self._get_anvato_videos( anvplayer_data['accessKey'], anvplayer_data['video']) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + access_key, video_id = mobj.group('access_key_or_mcp', 'id') + if access_key not in self._ANVACK_TABLE: + access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key] + return self._get_anvato_videos(access_key, video_id) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 355a4e56f..39e5380b8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -41,6 +41,7 @@ from .alphaporno import AlphaPornoIE from .amcnetworks import AMCNetworksIE from .animeondemand import AnimeOnDemandIE from .anitube import AnitubeIE +from .anvato import AnvatoIE from .anysex import AnySexIE from .aol import AolIE from .allocine import AllocineIE diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 67184bc5d..7f7c1ba29 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -86,6 +86,7 @@ from .openload import OpenloadIE from .videopress import VideoPressIE from .rutube import RutubeIE from .limelight import LimelightBaseIE +from .anvato import AnvatoIE class GenericIE(InfoExtractor): @@ -1677,6 +1678,15 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 5, }, + { + 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/', + 'info_dict': { + 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest', + 'title': 'Standoff with Walnut Creek murder suspect ends', + 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788', + }, + 'playlist_mincount': 4, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2537,6 +2547,12 @@ class GenericIE(InfoExtractor): 'limelight:media:%s' % mobj.group('id'), {'source_url': url}), 'LimelightMedia', mobj.group('id')) + # Look for Anvato embeds + anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id) + if anvato_urls: + return self.playlist_result( + anvato_urls, video_id, video_title, video_description) + # Look for AdobeTVVideo embeds mobj = re.search( r']+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', -- cgit 1.4.1 From 238cec17ae51c0f624588095d3a370a3b6964bdd Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sun, 30 Apr 2017 22:04:21 +0700 Subject: [extractor/anvato] PEP 8 --- youtube_dl/extractor/anvato.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/anvato.py') diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index ab8c237e9..8023da702 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -29,7 +29,7 @@ def md5_text(s): class AnvatoIE(InfoExtractor): _VALID_URL = r'anvato:(?P[^:]+):(?P\d+)' - + # Copied from anvplayer.min.js _ANVACK_TABLE = { 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', -- cgit 1.4.1 From 210a2720bcdaf4f98561fefea021f42cae39462d Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sun, 15 Oct 2017 01:44:57 +0700 Subject: [anvato] Process master m3u8 manifests >>> Individual m3u8 manifests are not always present, e.g. anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:4173834 --- youtube_dl/extractor/anvato.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'youtube_dl/extractor/anvato.py') diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index 8023da702..e443ecff6 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -197,12 +197,16 @@ class AnvatoIE(InfoExtractor): 'tbr': tbr if tbr != 0 else None, } - if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'): - if tbr is not None: - a_format.update({ - 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), - 'ext': 'mp4', - }) + if media_format == 'm3u8' and tbr is not None: + a_format.update({ + 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), + 'ext': 'mp4', + }) + elif media_format == 'm3u8-variant' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue elif ext == 'mp3' or media_format == 'mp3': a_format['vcodec'] = 'none' else: -- cgit 1.4.1 From ee093a0ea04d973cc6dbd0d53b57c976a9e68dad Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sun, 15 Oct 2017 06:11:02 +0700 Subject: [anvato] Add ability to bypass geo restriction --- youtube_dl/extractor/anvato.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'youtube_dl/extractor/anvato.py') diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py index e443ecff6..7a29cd2c6 100644 --- a/youtube_dl/extractor/anvato.py +++ b/youtube_dl/extractor/anvato.py @@ -18,6 +18,7 @@ from ..utils import ( int_or_none, strip_jsonp, unescapeHTML, + unsmuggle_url, ) @@ -275,6 +276,9 @@ class AnvatoIE(InfoExtractor): anvplayer_data['accessKey'], anvplayer_data['video']) def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) + mobj = re.match(self._VALID_URL, url) access_key, video_id = mobj.group('access_key_or_mcp', 'id') if access_key not in self._ANVACK_TABLE: -- cgit 1.4.1