about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2015-03-02 15:21:11 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2015-03-02 18:21:31 +0100
commit76c73715fb1e0eee61ace5ff7855d8237abdcd54 (patch)
tree18b355f14ccb055e1bc540f36b64a261a0deac78
parentc75f0b361a1b00f6ac1298615d6fee101994b2b9 (diff)
downloadyoutube-dl-76c73715fb1e0eee61ace5ff7855d8237abdcd54.tar.gz
youtube-dl-76c73715fb1e0eee61ace5ff7855d8237abdcd54.tar.xz
youtube-dl-76c73715fb1e0eee61ace5ff7855d8237abdcd54.zip
[generic] Parse RSS enclosure URLs (Fixes #5091)
-rw-r--r--youtube_dl/extractor/generic.py34
1 files changed, 29 insertions, 5 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 27e2bc300..5dc53685c 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -26,6 +26,7 @@ from ..utils import (
     unsmuggle_url,
     UnsupportedError,
     url_basename,
+    xpath_text,
 )
 from .brightcove import BrightcoveIE
 from .ooyala import OoyalaIE
@@ -569,6 +570,16 @@ class GenericIE(InfoExtractor):
                 'title': 'John Carlson Postgame 2/25/15',
             },
         },
+        # RSS feed with enclosure
+        {
+            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+            'info_dict': {
+                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+                'ext': 'm4v',
+                'upload_date': '20150228',
+                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+            }
+        }
     ]
 
     def report_following_redirect(self, new_url):
@@ -580,11 +591,24 @@ class GenericIE(InfoExtractor):
         playlist_desc_el = doc.find('./channel/description')
         playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
 
-        entries = [{
-            '_type': 'url',
-            'url': e.find('link').text,
-            'title': e.find('title').text,
-        } for e in doc.findall('./channel/item')]
+        entries = []
+        for it in doc.findall('./channel/item'):
+            next_url = xpath_text(it, 'link', fatal=False)
+            if not next_url:
+                enclosure_nodes = it.findall('./enclosure')
+                for e in enclosure_nodes:
+                    next_url = e.attrib.get('url')
+                    if next_url:
+                        break
+
+            if not next_url:
+                continue
+
+            entries.append({
+                '_type': 'url',
+                'url': next_url,
+                'title': it.find('title').text,
+            })
 
         return {
             '_type': 'playlist',