From ff99fe529e52b2465f1d973e69df01a6391568d6 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Tue, 25 Apr 2017 22:07:10 +0700
Subject: Don't list master m3u8 playlists in format list (closes #12832)

---
 youtube_dl/extractor/anvato.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'youtube_dl/extractor/anvato.py')
diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
index 623f44dce..9fd91c2f6 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@@ -178,12 +178,7 @@ class AnvatoIE(InfoExtractor):
             }
 
             if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
-                # Not using _extract_m3u8_formats here as individual media
-                # playlists are also included in published_urls.
-                if tbr is None:
-                    formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
-                    continue
-                else:
+                if tbr is not None:
                     a_format.update({
                         'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
                         'ext': 'mp4',
-- 
cgit 1.4.1


From 7986c3abcdad819b61bdf0fb7111759f9fc1fc32 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 29 Apr 2017 19:49:04 +0700
Subject: [anvato] Improve extraction (closes #12913) * Promote to regular
 shortcut based extractor * Add mcp to access key mapping table * Add support
 for embeds extraction * Add support for anvato embeds in generic extractor

---
 youtube_dl/extractor/anvato.py     | 59 ++++++++++++++++++++++++++++++++++++--
 youtube_dl/extractor/extractors.py |  1 +
 youtube_dl/extractor/generic.py    | 16 +++++++++++
 3 files changed, 73 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/extractor/anvato.py')

diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
index 9fd91c2f6..ab8c237e9 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@@ -5,6 +5,7 @@ import base64
 import hashlib
 import json
 import random
+import re
 import time
 
 from .common import InfoExtractor
@@ -16,6 +17,7 @@ from ..utils import (
     intlist_to_bytes,
     int_or_none,
     strip_jsonp,
+    unescapeHTML,
 )
 
 
@@ -26,6 +28,8 @@ def md5_text(s):
 
 
 class AnvatoIE(InfoExtractor):
+    _VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
+    
     # Copied from anvplayer.min.js
     _ANVACK_TABLE = {
         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
@@ -114,6 +118,22 @@ class AnvatoIE(InfoExtractor):
         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
     }
 
+    _MCP_TO_ACCESS_KEY_TABLE = {
+        'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922',
+        'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749',
+        'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
+        'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
+        'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a',
+        'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
+        'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
+        'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3',
+        'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
+        'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
+        'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
+        'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
+    }
+
+    _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
     _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
 
     def __init__(self, *args, **kwargs):
@@ -217,9 +237,42 @@ class AnvatoIE(InfoExtractor):
             'subtitles': subtitles,
         }
 
+    @staticmethod
+    def _extract_urls(ie, webpage, video_id):
+        entries = []
+        for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
+            anvplayer_data = ie._parse_json(
+                mobj.group('anvp'), video_id, transform_source=unescapeHTML,
+                fatal=False)
+            if not anvplayer_data:
+                continue
+            video = anvplayer_data.get('video')
+            if not isinstance(video, compat_str) or not video.isdigit():
+                continue
+            access_key = anvplayer_data.get('accessKey')
+            if not access_key:
+                mcp = anvplayer_data.get('mcp')
+                if mcp:
+                    access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
+                        mcp.lower())
+            if not access_key:
+                continue
+            entries.append(ie.url_result(
+                'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
+                video_id=video))
+        return entries
+
     def _extract_anvato_videos(self, webpage, video_id):
-        anvplayer_data = self._parse_json(self._html_search_regex(
-            r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
-            'Anvato player data'), video_id)
+        anvplayer_data = self._parse_json(
+            self._html_search_regex(
+                self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
+            video_id)
         return self._get_anvato_videos(
             anvplayer_data['accessKey'], anvplayer_data['video'])
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        access_key, video_id = mobj.group('access_key_or_mcp', 'id')
+        if access_key not in self._ANVACK_TABLE:
+            access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
+        return self._get_anvato_videos(access_key, video_id)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 355a4e56f..39e5380b8 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -41,6 +41,7 @@ from .alphaporno import AlphaPornoIE
 from .amcnetworks import AMCNetworksIE
 from .animeondemand import AnimeOnDemandIE
 from .anitube import AnitubeIE
+from .anvato import AnvatoIE
 from .anysex import AnySexIE
 from .aol import AolIE
 from .allocine import AllocineIE
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 67184bc5d..7f7c1ba29 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -86,6 +86,7 @@ from .openload import OpenloadIE
 from .videopress import VideoPressIE
 from .rutube import RutubeIE
 from .limelight import LimelightBaseIE
+from .anvato import AnvatoIE
 
 
 class GenericIE(InfoExtractor):
@@ -1677,6 +1678,15 @@ class GenericIE(InfoExtractor):
             },
             'playlist_mincount': 5,
         },
+        {
+            'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
+            'info_dict': {
+                'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
+                'title': 'Standoff with Walnut Creek murder suspect ends',
+                'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
+            },
+            'playlist_mincount': 4,
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
@@ -2537,6 +2547,12 @@ class GenericIE(InfoExtractor):
                 'limelight:media:%s' % mobj.group('id'),
                 {'source_url': url}), 'LimelightMedia', mobj.group('id'))
 
+        # Look for Anvato embeds
+        anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
+        if anvato_urls:
+            return self.playlist_result(
+                anvato_urls, video_id, video_title, video_description)
+
         # Look for AdobeTVVideo embeds
         mobj = re.search(
             r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
-- 
cgit 1.4.1


From 238cec17ae51c0f624588095d3a370a3b6964bdd Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 30 Apr 2017 22:04:21 +0700
Subject: [extractor/anvato] PEP 8

---
 youtube_dl/extractor/anvato.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/anvato.py')

diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
index ab8c237e9..8023da702 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@@ -29,7 +29,7 @@ def md5_text(s):
 
 class AnvatoIE(InfoExtractor):
     _VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
-    
+
     # Copied from anvplayer.min.js
     _ANVACK_TABLE = {
         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
-- 
cgit 1.4.1


From 210a2720bcdaf4f98561fefea021f42cae39462d Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 15 Oct 2017 01:44:57 +0700
Subject: [anvato] Process master m3u8 manifests >>> Individual m3u8 manifests
 are not always present, e.g.
 anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:4173834

---
 youtube_dl/extractor/anvato.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'youtube_dl/extractor/anvato.py')

diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
index 8023da702..e443ecff6 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@@ -197,12 +197,16 @@ class AnvatoIE(InfoExtractor):
                 'tbr': tbr if tbr != 0 else None,
             }
 
-            if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
-                if tbr is not None:
-                    a_format.update({
-                        'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
-                        'ext': 'mp4',
-                    })
+            if media_format == 'm3u8' and tbr is not None:
+                a_format.update({
+                    'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
+                    'ext': 'mp4',
+                })
+            elif media_format == 'm3u8-variant' or ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+                continue
             elif ext == 'mp3' or media_format == 'mp3':
                 a_format['vcodec'] = 'none'
             else:
-- 
cgit 1.4.1


From ee093a0ea04d973cc6dbd0d53b57c976a9e68dad Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 15 Oct 2017 06:11:02 +0700
Subject: [anvato] Add ability to bypass geo restriction

---
 youtube_dl/extractor/anvato.py | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'youtube_dl/extractor/anvato.py')

diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py
index e443ecff6..7a29cd2c6 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dl/extractor/anvato.py
@@ -18,6 +18,7 @@ from ..utils import (
     int_or_none,
     strip_jsonp,
     unescapeHTML,
+    unsmuggle_url,
 )
 
 
@@ -275,6 +276,9 @@ class AnvatoIE(InfoExtractor):
             anvplayer_data['accessKey'], anvplayer_data['video'])
 
     def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+        self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
+
         mobj = re.match(self._VALID_URL, url)
         access_key, video_id = mobj.group('access_key_or_mcp', 'id')
         if access_key not in self._ANVACK_TABLE:
-- 
cgit 1.4.1