about summary refs log tree commit diff
path: root/youtube_dl/extractor/ard.py
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2013-06-23 20:24:07 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2013-06-23 20:24:07 +0200
commitd5822b96b00fce48e04a14953c4cb25cef1cdbaf (patch)
tree600e8a81e4b95e2316bd2f36e81618d1ef91adeb /youtube_dl/extractor/ard.py
parentb3d14cbfa7d593a74cf1423ec58265abff6758a3 (diff)
downloadyoutube-dl-d5822b96b00fce48e04a14953c4cb25cef1cdbaf.tar.gz
youtube-dl-d5822b96b00fce48e04a14953c4cb25cef1cdbaf.tar.xz
youtube-dl-d5822b96b00fce48e04a14953c4cb25cef1cdbaf.zip
Move ARD, Arte, ZDF into their own files
Diffstat (limited to 'youtube_dl/extractor/ard.py')
-rw-r--r--youtube_dl/extractor/ard.py45
1 files changed, 45 insertions, 0 deletions
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
new file mode 100644
index 000000000..e1ecdf4d3
--- /dev/null
+++ b/youtube_dl/extractor/ard.py
@@ -0,0 +1,45 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+)
+
+class ARDIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?'
+    _TITLE = r'<h1(?: class="boxTopHeadline")?>(?P<title>.*)</h1>'
+    _MEDIA_STREAM = r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)'
+
+    def _real_extract(self, url):
+        # determine video id from url
+        m = re.match(self._VALID_URL, url)
+
+        numid = re.search(r'documentId=([0-9]+)', url)
+        if numid:
+            video_id = numid.group(1)
+        else:
+            video_id = m.group('video_id')
+
+        # determine title and media streams from webpage
+        html = self._download_webpage(url, video_id)
+        title = re.search(self._TITLE, html).group('title')
+        streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
+        if not streams:
+            assert '"fsk"' in html
+            raise ExtractorError(u'This video is only available after 8:00 pm')
+
+        # choose default media type and highest quality for now
+        stream = max([s for s in streams if int(s["media_type"]) == 0],
+                     key=lambda s: int(s["quality"]))
+
+        # there's two possibilities: RTMP stream or HTTP download
+        info = {'id': video_id, 'title': title, 'ext': 'mp4'}
+        if stream['rtmp_url']:
+            self.to_screen(u'RTMP download detected')
+            assert stream['video_url'].startswith('mp4:')
+            info["url"] = stream["rtmp_url"]
+            info["play_path"] = stream['video_url']
+        else:
+            assert stream["video_url"].endswith('.mp4')
+            info["url"] = stream["video_url"]
+        return [info]