[yahoo] Extract all <iframe>s

Fixes test_yahoo_6 (https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html)
author: Yen Chi Hsuan <yan12125@gmail.com> 2016-04-24 17:46:25 +0800
committer: Yen Chi Hsuan <yan12125@gmail.com> 2016-04-24 17:46:25 +0800
commit: d9ed362116969362e1c404aea63d9f6f3e833478 (patch)
tree: 9b6826a2e56ee02b72c065a1d57504c7d77a1f48 /youtube_dl/extractor/yahoo.py
parent: 4f549580977ab94364fd404cdebba22575c74b91 (diff)
download: youtube-dl-d9ed362116969362e1c404aea63d9f6f3e833478.tar.gz
youtube-dl-d9ed362116969362e1c404aea63d9f6f3e833478.tar.xz
youtube-dl-d9ed362116969362e1c404aea63d9f6f3e833478.zip
1 files changed, 30 insertions, 11 deletions
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index e4f3d8937..73396f9f8 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -92,14 +92,28 @@ class YahooIE(InfoExtractor):
             }
         }, {
             'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html',
-            'md5': '226a895aae7e21b0129e2a2006fe9690',
             'info_dict': {
-                'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
-                'ext': 'mp4',
-                'title': '\'The Interview\' TV Spot: War',
-                'description': 'The Interview',
-                'duration': 30,
-            }
+                'id': '154609075',
+            },
+            'playlist': [{
+                'md5': 'f8e336c6b66f503282e5f719641d6565',
+                'info_dict': {
+                    'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
+                    'ext': 'mp4',
+                    'title': '\'The Interview\' TV Spot: War',
+                    'description': 'The Interview',
+                    'duration': 30,
+                },
+            }, {
+                'md5': '958bcb90b4d6df71c56312137ee1cd5a',
+                'info_dict': {
+                    'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9',
+                    'ext': 'mp4',
+                    'title': '\'The Interview\' TV Spot: Guys',
+                    'description': 'The Interview',
+                    'duration': 30,
+                },
+            }],
         }, {
             'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
             'md5': '88e209b417f173d86186bef6e4d1f160',
@@ -191,16 +205,21 @@ class YahooIE(InfoExtractor):
         webpage = self._download_webpage(url, display_id)
 
         # Look for iframed media first
-        iframe_m = re.search(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
-        if iframe_m:
+        entries = []
+        iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
+        for idx, iframe_url in enumerate(iframe_urls):
             iframepage = self._download_webpage(
-                host + iframe_m.group(1), display_id, 'Downloading iframe webpage')
+                host + iframe_url, display_id,
+                note='Downloading iframe webpage for video #%d' % idx)
             items_json = self._search_regex(
                 r'mediaItems: (\[.+?\])$', iframepage, 'items', flags=re.MULTILINE, default=None)
             if items_json:
                 items = json.loads(items_json)
                 video_id = items[0]['id']
-                return self._get_info(video_id, display_id, webpage)
+                entries.append(self._get_info(video_id, display_id, webpage))
+        if entries:
+            return self.playlist_result(entries, page_id)
+
         # Look for NBCSports iframes
         nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
         if nbc_sports_url:
author	Yen Chi Hsuan <yan12125@gmail.com>	2016-04-24 17:46:25 +0800
committer	Yen Chi Hsuan <yan12125@gmail.com>	2016-04-24 17:46:25 +0800
commit	d9ed362116969362e1c404aea63d9f6f3e833478 (patch)
tree	9b6826a2e56ee02b72c065a1d57504c7d77a1f48 /youtube_dl/extractor/yahoo.py
parent	4f549580977ab94364fd404cdebba22575c74b91 (diff)
download	youtube-dl-d9ed362116969362e1c404aea63d9f6f3e833478.tar.gz youtube-dl-d9ed362116969362e1c404aea63d9f6f3e833478.tar.xz youtube-dl-d9ed362116969362e1c404aea63d9f6f3e833478.zip