summary refs log tree commit diff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2013-12-16 22:18:27 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2013-12-16 22:18:27 +0100
commitd90df974c3164ea377f2ce2b04742e6ff21379e8 (patch)
tree85d062c4bc25a3e665330f48973a8dab4d04dece
parent87a28127d225f698c3cbf77a7f943338a3499d6e (diff)
downloadyoutube-dl-d90df974c3164ea377f2ce2b04742e6ff21379e8.tar.gz
youtube-dl-d90df974c3164ea377f2ce2b04742e6ff21379e8.tar.xz
youtube-dl-d90df974c3164ea377f2ce2b04742e6ff21379e8.zip
[academicearth] Add support for courses (#1976)
-rw-r--r--test/test_playlists.py12
-rw-r--r--youtube_dl/extractor/__init__.py3
-rw-r--r--youtube_dl/extractor/academicearth.py36
3 files changed, 50 insertions, 1 deletions
diff --git a/test/test_playlists.py b/test/test_playlists.py
index 87ca401e5..b7c6850fd 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -12,6 +12,7 @@ from test.helper import FakeYDL
 
 
 from youtube_dl.extractor import (
+    AcademicEarthCourseIE,
     DailymotionPlaylistIE,
     DailymotionUserIE,
     VimeoChannelIE,
@@ -158,5 +159,16 @@ class TestPlaylists(unittest.TestCase):
         self.assertEqual(result['title'], u'Inspector')
         self.assertTrue(len(result['entries']) >= 9)
 
+    def test_AcademicEarthCourse(self):
+        dl = FakeYDL()
+        ie = AcademicEarthCourseIE(dl)
+        result = ie.extract(u'http://academicearth.org/courses/building-dynamic-websites/')
+        self.assertIsPlaylist(result)
+        self.assertEqual(result['id'], u'building-dynamic-websites')
+        self.assertEqual(result['title'], u'Building Dynamic Websites')
+        self.assertEqual(result['description'], "Today's websites are increasingly dynamic. Pages are no longer static HTML files but instead generated by scripts and database calls. User interfaces are more seamless, with technologies like Ajax replacing traditional page reloads. This course teaches students how to build dynamic websites with Ajax and with Linux, Apache, MySQL, and PHP (LAMP), one of today's most popular frameworks. Students learn how to set up domain names with DNS, how to structure pages with XHTML and CSS, how to program in JavaScript and PHP, how to configure Apache and MySQL, how to design and query databases with SQL, how to use Ajax with both XML and JSON, and how to build mashups. The course explores issues of security, scalability, and cross-browser support and also discusses enterprise-level deployments of websites, including third-party hosting, virtualization, colocation in data centers, firewalling, and load-balancing.")
+        self.assertEqual(len(result['entries']), 10)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 2761b5439..7f2f8806e 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -1,6 +1,7 @@
-from .appletrailers import AppleTrailersIE
+from .academicearth import AcademicEarthCourseIE
 from .addanime import AddAnimeIE
 from .anitube import AnitubeIE
+from .appletrailers import AppleTrailersIE
 from .archiveorg import ArchiveOrgIE
 from .ard import ARDIE
 from .arte import (
diff --git a/youtube_dl/extractor/academicearth.py b/youtube_dl/extractor/academicearth.py
new file mode 100644
index 000000000..5045e7332
--- /dev/null
+++ b/youtube_dl/extractor/academicearth.py
@@ -0,0 +1,36 @@
+import datetime
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    remove_start,
+)
+
+
+class AcademicEarthCourseIE(InfoExtractor):
+    _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/courses/(?P<id>[^?#/]+)'
+    IE_NAME = u'AcademicEarth:Course'
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        playlist_id = m.group('id')
+
+        webpage = self._download_webpage(url, playlist_id)
+        title = self._html_search_regex(
+            r'<h1 class="playlist-name">(.*?)</h1>', webpage, u'title')
+        description = self._html_search_regex(
+            r'<p class="excerpt">(.*?)</p>',
+            webpage, u'description', fatal=False)
+        urls = re.findall(
+            r'<h3 class="lecture-title"><a target="_blank" href="([^"]+)">',
+            webpage)
+        entries = [self.url_result(u) for u in urls]
+
+        return {
+            '_type': 'playlist',
+            'id': playlist_id,
+            'title': title,
+            'description': description,
+            'entries': entries,
+        }