summary refs log tree commit diff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-06-24 18:33:31 +0700
committerSergey M․ <dstftw@gmail.com>2017-06-24 18:33:31 +0700
commitbd65f181532ab4e535b408d3ccf99723534eb326 (patch)
tree7c71ade20e913ff9cb539d33fa85b2e42526ce7e
parent73af5cc817ff19d21cb432c5a4e9e37dd35a353d (diff)
downloadyoutube-dl-bd65f181532ab4e535b408d3ccf99723534eb326.tar.gz
youtube-dl-bd65f181532ab4e535b408d3ccf99723534eb326.tar.xz
youtube-dl-bd65f181532ab4e535b408d3ccf99723534eb326.zip
[onetpl] Add support for videos embedded via pulsembed (closes #13482)
-rw-r--r--youtube_dl/extractor/onet.py30
1 files changed, 28 insertions, 2 deletions
diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py
index 94f57990b..58da1bc27 100644
--- a/youtube_dl/extractor/onet.py
+++ b/youtube_dl/extractor/onet.py
@@ -11,6 +11,7 @@ from ..utils import (
     get_element_by_class,
     int_or_none,
     js_to_json,
+    NO_DEFAULT,
     parse_iso8601,
     remove_start,
     strip_or_none,
@@ -199,6 +200,19 @@ class OnetPlIE(InfoExtractor):
             'timestamp': 1487078046,
         },
     }, {
+        # embedded via pulsembed
+        'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0',
+        'info_dict': {
+            'id': '501235.965429946',
+            'ext': 'mp4',
+            'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu',
+            'upload_date': '20170622',
+            'timestamp': 1498159955,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
         'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
         'only_matching': True,
     }, {
@@ -212,13 +226,25 @@ class OnetPlIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    def _search_mvp_id(self, webpage, default=NO_DEFAULT):
+        return self._search_regex(
+            r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id',
+            default=default)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
 
-        mvp_id = self._search_regex(
-            r'data-params-mvp=["\'](\d+\.\d+)', webpage, 'mvp id')
+        mvp_id = self._search_mvp_id(webpage, default=None)
+
+        if not mvp_id:
+            pulsembed_url = self._search_regex(
+                r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1',
+                webpage, 'pulsembed url', group='url')
+            webpage = self._download_webpage(
+                pulsembed_url, video_id, 'Downloading pulsembed webpage')
+            mvp_id = self._search_mvp_id(webpage)
 
         return self.url_result(
             'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)