about summary refs log tree commit diff
path: root/youtube_dl/extractor/esri.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-08-15 15:59:35 +0600
committerSergey M․ <dstftw@gmail.com>2015-08-15 15:59:35 +0600
commit9c21f229236c77a8865c857b43c6cbd95dcc6f23 (patch)
tree15213f9cb6762b1f64964591bc90f3bb22f4b91f /youtube_dl/extractor/esri.py
parent3aa697f993e3719cf032c5b1e192a034100b0534 (diff)
downloadyoutube-dl-9c21f229236c77a8865c857b43c6cbd95dcc6f23.tar.gz
youtube-dl-9c21f229236c77a8865c857b43c6cbd95dcc6f23.tar.xz
youtube-dl-9c21f229236c77a8865c857b43c6cbd95dcc6f23.zip
[esri:video] Rename extractor
Diffstat (limited to 'youtube_dl/extractor/esri.py')
-rw-r--r--youtube_dl/extractor/esri.py74
1 files changed, 74 insertions, 0 deletions
diff --git a/youtube_dl/extractor/esri.py b/youtube_dl/extractor/esri.py
new file mode 100644
index 000000000..bf5d2019f
--- /dev/null
+++ b/youtube_dl/extractor/esri.py
@@ -0,0 +1,74 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+    int_or_none,
+    parse_filesize,
+    unified_strdate,
+)
+
+
+class EsriVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'https://video.esri.com/watch/1124/arcgis-online-_dash_-developing-applications',
+        'md5': 'd4aaf1408b221f1b38227a9bbaeb95bc',
+        'info_dict': {
+            'id': '1124',
+            'ext': 'mp4',
+            'title': 'ArcGIS Online - Developing Applications',
+            'description': 'Jeremy Bartley demonstrates how to develop applications with ArcGIS Online.',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'duration': 185,
+            'upload_date': '20120419',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        formats = []
+        for width, height, content in re.findall(
+                r'(?s)<li><strong>(\d+)x(\d+):</strong>(.+?)</li>', webpage):
+            for video_url, ext, filesize in re.findall(
+                    r'<a[^>]+href="([^"]+)">([^<]+)&nbsp;\(([^<]+)\)</a>', content):
+                formats.append({
+                    'url': compat_urlparse.urljoin(url, video_url),
+                    'ext': ext.lower(),
+                    'format_id': '%s-%s' % (ext.lower(), height),
+                    'width': int(width),
+                    'height': int(height),
+                    'filesize_approx': parse_filesize(filesize),
+                })
+        self._sort_formats(formats)
+
+        title = self._html_search_meta('title', webpage, 'title')
+        description = self._html_search_meta(
+            'description', webpage, 'description', fatal=False)
+
+        thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail', fatal=False)
+        if thumbnail:
+            thumbnail = re.sub(r'_[st]\.jpg$', '_x.jpg', thumbnail)
+
+        duration = int_or_none(self._search_regex(
+            [r'var\s+videoSeconds\s*=\s*(\d+)', r"'duration'\s*:\s*(\d+)"],
+            webpage, 'duration', fatal=False))
+
+        upload_date = unified_strdate(self._html_search_meta(
+            'last-modified', webpage, 'upload date', fatal=None))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+            'upload_date': upload_date,
+            'formats': formats
+        }