about summary refs log tree commit diff
path: root/youtube_dl/extractor/webofstories.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-02-28 03:37:48 +0600
committerSergey M․ <dstftw@gmail.com>2016-02-28 03:37:48 +0600
commit8870bb4653982a81c2ff332103499e12a825099c (patch)
tree391bed4f1f6616b376a3f36726796d4505a56c56 /youtube_dl/extractor/webofstories.py
parent7a0e7779fe571972b72edc6aab4b8c93db4b22e8 (diff)
downloadyoutube-dl-8870bb4653982a81c2ff332103499e12a825099c.tar.gz
youtube-dl-8870bb4653982a81c2ff332103499e12a825099c.tar.xz
youtube-dl-8870bb4653982a81c2ff332103499e12a825099c.zip
[webofstories] Tolerate malforder og:title (Closes #8417)
Diffstat (limited to 'youtube_dl/extractor/webofstories.py')
-rw-r--r--youtube_dl/extractor/webofstories.py64
1 files changed, 39 insertions, 25 deletions
diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py
index 2037d9b3d..7aea47ed5 100644
--- a/youtube_dl/extractor/webofstories.py
+++ b/youtube_dl/extractor/webofstories.py
@@ -12,38 +12,52 @@ class WebOfStoriesIE(InfoExtractor):
     _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
     _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
     _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
-    _TESTS = [
-        {
-            'url': 'http://www.webofstories.com/play/hans.bethe/71',
-            'md5': '373e4dd915f60cfe3116322642ddf364',
-            'info_dict': {
-                'id': '4536',
-                'ext': 'mp4',
-                'title': 'The temperature of the sun',
-                'thumbnail': 're:^https?://.*\.jpg$',
-                'description': 'Hans Bethe talks about calculating the temperature of the sun',
-                'duration': 238,
-            }
+    _TESTS = [{
+        'url': 'http://www.webofstories.com/play/hans.bethe/71',
+        'md5': '373e4dd915f60cfe3116322642ddf364',
+        'info_dict': {
+            'id': '4536',
+            'ext': 'mp4',
+            'title': 'The temperature of the sun',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'Hans Bethe talks about calculating the temperature of the sun',
+            'duration': 238,
+        }
+    }, {
+        'url': 'http://www.webofstories.com/play/55908',
+        'md5': '2985a698e1fe3211022422c4b5ed962c',
+        'info_dict': {
+            'id': '55908',
+            'ext': 'mp4',
+            'title': 'The story of Gemmata obscuriglobus',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
+            'duration': 169,
+        },
+        'skip': 'notfound',
+    }, {
+        # malformed og:title meta
+        'url': 'http://www.webofstories.com/play/54215?o=MS',
+        'info_dict': {
+            'id': '54215',
+            'ext': 'mp4',
+            'title': '"A Leg to Stand On"',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'Oliver Sacks talks about the death and resurrection of a limb',
+            'duration': 97,
         },
-        {
-            'url': 'http://www.webofstories.com/play/55908',
-            'md5': '2985a698e1fe3211022422c4b5ed962c',
-            'info_dict': {
-                'id': '55908',
-                'ext': 'mp4',
-                'title': 'The story of Gemmata obscuriglobus',
-                'thumbnail': 're:^https?://.*\.jpg$',
-                'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
-                'duration': 169,
-            }
+        'params': {
+            'skip_download': True,
         },
-    ]
+    }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         webpage = self._download_webpage(url, video_id)
-        title = self._og_search_title(webpage)
+        # Sometimes og:title meta is malformed
+        title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+            r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
         description = self._html_search_meta('description', webpage)
         thumbnail = self._og_search_thumbnail(webpage)