about summary refs log tree commit diff
path: root/youtube_dl/extractor/closertotruth.py
diff options
context:
space:
mode:
authorSteven Gosseling <steven@stevengosseling.nl>2016-02-26 13:31:52 +0100
committerSergey M․ <dstftw@gmail.com>2016-06-18 23:19:56 +0700
commit41c1023300596f62dff93d9275f5e4d7a6762e66 (patch)
tree29fe978678aa6c3e5b1c578fc00ae0105123c3b3 /youtube_dl/extractor/closertotruth.py
parent90b6288cce3e5a433a521bc862d98d31be9624c2 (diff)
downloadyoutube-dl-41c1023300596f62dff93d9275f5e4d7a6762e66.tar.gz
youtube-dl-41c1023300596f62dff93d9275f5e4d7a6762e66.tar.xz
youtube-dl-41c1023300596f62dff93d9275f5e4d7a6762e66.zip
[closertotruth] Add extractor
Removed print statement from code.

Replaced two regex searches with the corret ones.

Removed some unnecessary semicolumns

fixed title extraction

refactored everything to search_regex

processed comments on commit 5650b0d, fixed feedback from flake8

Improved regexes and returns info dict now.

Added support for closertotruth interview URL

Added support for episodes page
Diffstat (limited to 'youtube_dl/extractor/closertotruth.py')
-rw-r--r--youtube_dl/extractor/closertotruth.py69
1 files changed, 69 insertions, 0 deletions
diff --git a/youtube_dl/extractor/closertotruth.py b/youtube_dl/extractor/closertotruth.py
new file mode 100644
index 000000000..d04ff5e4f
--- /dev/null
+++ b/youtube_dl/extractor/closertotruth.py
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class CloserToTruthIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(episodes/|(series|interviews)/(?:[^#]+#video-)?(?P<id>\d+))'
+    _TESTS = [
+        {
+            'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
+            'md5': '5c548bde260a9247ddfdc07c7458ed29',
+            'info_dict': {
+                'id': '0_zof1ktre',
+                'ext': 'mov',
+                'title': 'Solutions to the Mind-Body Problem?',
+                'upload_date': '20140221',
+                'timestamp': 1392956007,
+                'uploader_id': 'CTTXML'
+            }
+        },
+        {
+            'url': 'http://closertotruth.com/interviews/1725',
+            'md5': 'b00598fd6a38372edb976408f72c5792',
+            'info_dict': {
+                'id': '0_19qv5rn1',
+                'ext': 'mov',
+                'title': 'AyaFr-002 - Francisco J. Ayala',
+                'upload_date': '20140307',
+                'timestamp': 1394236431,
+                'uploader_id': 'CTTXML'
+            }
+        },
+        {
+            'url': 'http://closertotruth.com/episodes/how-do-brains-work',
+            'md5': '4dd96aa0a5c296afa5c0bd24895c2f16',
+            'info_dict': {
+                'id': '0_iuxai6g6',
+                'ext': 'mov',
+                'title': 'How do Brains Work?',
+                'upload_date': '20140221',
+                'timestamp': 1392956024,
+                'uploader_id': 'CTTXML'
+            }
+        },
+    ]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+
+        video_title = self._search_regex(r'<title>(.+) \|.+</title>', webpage, 'video title')
+
+        entry_id = self._search_regex(r'<a[^>]+id="(?:video-%s|embed-kaltura)"[^>]+data-kaltura="([^"]+)' % video_id, webpage, "video entry_id")
+
+        interviewee_name = self._search_regex(r'<div id="(?:node_interview_full_group_white_wrapper|node_interview_series_full_group_ajax_content)"(?:.|\n)*<h3>(.*)</h3>.+', webpage, "video interviewee_name", False)
+
+        if interviewee_name:
+            video_title = video_title + ' - ' + interviewee_name
+
+        p_id = self._search_regex(r'<script[^>]+src=["\'].+?partner_id/(\d+)', webpage, "kaltura partner_id")
+
+        return {
+            '_type': 'url_transparent',
+            'id': entry_id,
+            'url': 'kaltura:%s:%s' % (p_id, entry_id),
+            'ie_key': 'Kaltura',
+            'title': video_title
+        }