about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMartin Weinelt <mweinelt@users.noreply.github.com>2018-01-06 17:33:40 +0100
committerSergey M <dstftw@gmail.com>2018-01-06 23:33:40 +0700
commit45283afdec81af21ba50ff3aca3d86fb6d2584b0 (patch)
treea9b157f84b04204caac6d22e0d44bcbbce46a952
parentb7c74c04036c07f8a81d3048b482afd6ef384b40 (diff)
downloadyoutube-dl-45283afdec81af21ba50ff3aca3d86fb6d2584b0.tar.gz
youtube-dl-45283afdec81af21ba50ff3aca3d86fb6d2584b0.tar.xz
youtube-dl-45283afdec81af21ba50ff3aca3d86fb6d2584b0.zip
[motherless] Add support for groups
-rw-r--r--youtube_dl/extractor/extractors.py5
-rw-r--r--youtube_dl/extractor/motherless.py73
2 files changed, 77 insertions, 1 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index e64defe62..fb0997d39 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -609,7 +609,10 @@ from .mofosex import MofosexIE
 from .mojvideo import MojvideoIE
 from .moniker import MonikerIE
 from .morningstar import MorningstarIE
-from .motherless import MotherlessIE
+from .motherless import (
+    MotherlessIE,
+    MotherlessGroupIE
+)
 from .motorsport import MotorsportIE
 from .movieclips import MovieClipsIE
 from .moviezine import MoviezineIE
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index 6fe3b6049..90ed91ba6 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -4,8 +4,11 @@ import datetime
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
     ExtractorError,
+    InAdvancePagedList,
+    orderedSet,
     str_to_int,
     unified_strdate,
 )
@@ -114,3 +117,73 @@ class MotherlessIE(InfoExtractor):
             'age_limit': age_limit,
             'url': video_url,
         }
+
+
+class MotherlessGroupIE(InfoExtractor):
+    _VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
+    _TESTS = [{
+        'url': 'http://motherless.com/g/movie_scenes',
+        'info_dict': {
+            'id': 'movie_scenes',
+            'title': 'Movie Scenes',
+            'description': 'Hot and sexy scenes from "regular" movies... '
+                           'Beautiful actresses fully nude... A looot of '
+                           'skin! :)Enjoy!',
+        },
+        'playlist_mincount': 662,
+    }, {
+        'url': 'http://motherless.com/gv/sex_must_be_funny',
+        'info_dict': {
+            'id': 'sex_must_be_funny',
+            'title': 'Sex must be funny',
+            'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
+                           'any kind!'
+        },
+        'playlist_mincount': 9,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return (False if MotherlessIE.suitable(url)
+                else super(MotherlessGroupIE, cls).suitable(url))
+
+    def _extract_entries(self, webpage, base):
+        return [
+            self.url_result(
+                compat_urlparse.urljoin(base, video_path),
+                MotherlessIE.ie_key(), video_title=title)
+            for video_path, title in orderedSet(re.findall(
+                r'href="/([^"]+)"[^>]+>\s+<img[^>]+alt="[^-]+-\s([^"]+)"',
+                webpage))
+        ]
+
+    def _real_extract(self, url):
+        group_id = self._match_id(url)
+        page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
+        webpage = self._download_webpage(page_url, group_id)
+        title = self._search_regex(
+            r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
+        description = self._html_search_meta(
+            'description', webpage, fatal=False)
+        page_count = self._int(self._search_regex(
+            r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
+            webpage, 'page_count'), 'page_count')
+        PAGE_SIZE = 80
+
+        def _get_page(idx):
+            webpage = self._download_webpage(
+                page_url, group_id, query={'page': idx + 1},
+                note='Downloading page %d/%d' % (idx + 1, page_count)
+            )
+            for entry in self._extract_entries(webpage, url):
+                yield entry
+
+        playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
+
+        return {
+            '_type': 'playlist',
+            'id': group_id,
+            'title': title,
+            'description': description,
+            'entries': playlist
+        }