From 45283afdec81af21ba50ff3aca3d86fb6d2584b0 Mon Sep 17 00:00:00 2001
From: Martin Weinelt <mweinelt@users.noreply.github.com>
Date: Sat, 6 Jan 2018 17:33:40 +0100
Subject: [motherless] Add support for groups

---
 youtube_dl/extractor/motherless.py | 73 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

(limited to 'youtube_dl/extractor/motherless.py')
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index 6fe3b6049..90ed91ba6 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -4,8 +4,11 @@ import datetime
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
     ExtractorError,
+    InAdvancePagedList,
+    orderedSet,
     str_to_int,
     unified_strdate,
 )
@@ -114,3 +117,73 @@ class MotherlessIE(InfoExtractor):
             'age_limit': age_limit,
             'url': video_url,
         }
+
+
+class MotherlessGroupIE(InfoExtractor):
+    _VALID_URL = 'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
+    _TESTS = [{
+        'url': 'http://motherless.com/g/movie_scenes',
+        'info_dict': {
+            'id': 'movie_scenes',
+            'title': 'Movie Scenes',
+            'description': 'Hot and sexy scenes from "regular" movies... '
+                           'Beautiful actresses fully nude... A looot of '
+                           'skin! :)Enjoy!',
+        },
+        'playlist_mincount': 662,
+    }, {
+        'url': 'http://motherless.com/gv/sex_must_be_funny',
+        'info_dict': {
+            'id': 'sex_must_be_funny',
+            'title': 'Sex must be funny',
+            'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
+                           'any kind!'
+        },
+        'playlist_mincount': 9,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return (False if MotherlessIE.suitable(url)
+                else super(MotherlessGroupIE, cls).suitable(url))
+
+    def _extract_entries(self, webpage, base):
+        return [
+            self.url_result(
+                compat_urlparse.urljoin(base, video_path),
+                MotherlessIE.ie_key(), video_title=title)
+            for video_path, title in orderedSet(re.findall(
+                r'href="/([^"]+)"[^>]+>\s+<img[^>]+alt="[^-]+-\s([^"]+)"',
+                webpage))
+        ]
+
+    def _real_extract(self, url):
+        group_id = self._match_id(url)
+        page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
+        webpage = self._download_webpage(page_url, group_id)
+        title = self._search_regex(
+            r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
+        description = self._html_search_meta(
+            'description', webpage, fatal=False)
+        page_count = self._int(self._search_regex(
+            r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
+            webpage, 'page_count'), 'page_count')
+        PAGE_SIZE = 80
+
+        def _get_page(idx):
+            webpage = self._download_webpage(
+                page_url, group_id, query={'page': idx + 1},
+                note='Downloading page %d/%d' % (idx + 1, page_count)
+            )
+            for entry in self._extract_entries(webpage, url):
+                yield entry
+
+        playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
+
+        return {
+            '_type': 'playlist',
+            'id': group_id,
+            'title': title,
+            'description': description,
+            'entries': playlist
+        }
-- 
cgit 1.4.1


From a133eb7764594b830cb975e3925972214e932704 Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 7 Jan 2018 00:02:41 +0700
Subject: [motherless:group] Capture leading slash of video path

---
 youtube_dl/extractor/motherless.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/extractor/motherless.py')

diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index 90ed91ba6..4adac691c 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -153,7 +153,7 @@ class MotherlessGroupIE(InfoExtractor):
                 compat_urlparse.urljoin(base, video_path),
                 MotherlessIE.ie_key(), video_title=title)
             for video_path, title in orderedSet(re.findall(
-                r'href="/([^"]+)"[^>]+>\s+<img[^>]+alt="[^-]+-\s([^"]+)"',
+                r'href="(/[^"]+)"[^>]+>\s+<img[^>]+alt="[^-]+-\s([^"]+)"',
                 webpage))
         ]
 
-- 
cgit 1.4.1


From 0a5b1295b7c1aa6395b65ee137087c540b37b32b Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sun, 7 Jan 2018 00:31:53 +0700
Subject: [motherless:group] Relax entry extraction and add a fallback scenario

---
 youtube_dl/extractor/motherless.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

(limited to 'youtube_dl/extractor/motherless.py')

diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index 4adac691c..e24396e79 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -148,14 +148,27 @@ class MotherlessGroupIE(InfoExtractor):
                 else super(MotherlessGroupIE, cls).suitable(url))
 
     def _extract_entries(self, webpage, base):
-        return [
-            self.url_result(
-                compat_urlparse.urljoin(base, video_path),
-                MotherlessIE.ie_key(), video_title=title)
-            for video_path, title in orderedSet(re.findall(
-                r'href="(/[^"]+)"[^>]+>\s+<img[^>]+alt="[^-]+-\s([^"]+)"',
-                webpage))
-        ]
+        entries = []
+        for mobj in re.finditer(
+                r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
+                webpage):
+            video_url = compat_urlparse.urljoin(base, mobj.group('href'))
+            if not MotherlessIE.suitable(video_url):
+                continue
+            video_id = MotherlessIE._match_id(video_url)
+            title = mobj.group('title')
+            entries.append(self.url_result(
+                video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
+                video_title=title))
+        # Alternative fallback
+        if not entries:
+            entries = [
+                self.url_result(
+                    compat_urlparse.urljoin(base, '/' + video_id),
+                    ie=MotherlessIE.ie_key(), video_id=video_id)
+                for video_id in orderedSet(re.findall(
+                    r'data-codename=["\']([A-Z0-9]+)', webpage))]
+        return entries
 
     def _real_extract(self, url):
         group_id = self._match_id(url)
-- 
cgit 1.4.1