[pornhub] Implement lazy playlist extraction

author: Sergey M․ <dstftw@gmail.com> 2021-02-04 04:36:57 +0700
committer: Sergey M․ <dstftw@gmail.com> 2021-02-04 04:42:14 +0700
commit: 89c5a7d5aabd138a14c76453d79d5d66ef573bde (patch)
tree: 66c02f3f699d8d267b4235810c21046ceb6dce8c
parent: 2adc0c51cdf38e039fba0ede11f65bbd9c71bde8 (diff)
download: youtube-dl-89c5a7d5aabd138a14c76453d79d5d66ef573bde.tar.gz
youtube-dl-89c5a7d5aabd138a14c76453d79d5d66ef573bde.tar.xz
youtube-dl-89c5a7d5aabd138a14c76453d79d5d66ef573bde.zip
1 files changed, 11 insertions, 10 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 83773aebb..b7631e4e1 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -547,13 +547,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
                 <button[^>]+\bid=["\']moreDataBtn
             ''', webpage) is not None
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        host = mobj.group('host')
-        item_id = mobj.group('id')
-
-        self._login(host)
-
+    def _entries(self, url, host, item_id):
         page = self._extract_page(url)
 
         VIDEOS = '/videos'
@@ -566,7 +560,6 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
         def is_404(e):
             return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
 
-        entries = []
         base_url = url
         has_page = page is not None
         first_page = page if has_page else 1
@@ -590,11 +583,19 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
             page_entries = self._extract_entries(webpage, host)
             if not page_entries:
                 break
-            entries.extend(page_entries)
+            for e in page_entries:
+                yield e
             if not self._has_more(webpage):
                 break
 
-        return self.playlist_result(orderedSet(entries), item_id)
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
+        item_id = mobj.group('id')
+
+        self._login(host)
+
+        return self.playlist_result(self._entries(url, host, item_id), item_id)
 
 
 class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
author	Sergey M․ <dstftw@gmail.com>	2021-02-04 04:36:57 +0700
committer	Sergey M․ <dstftw@gmail.com>	2021-02-04 04:42:14 +0700
commit	89c5a7d5aabd138a14c76453d79d5d66ef573bde (patch)
tree	66c02f3f699d8d267b4235810c21046ceb6dce8c
parent	2adc0c51cdf38e039fba0ede11f65bbd9c71bde8 (diff)
download	youtube-dl-89c5a7d5aabd138a14c76453d79d5d66ef573bde.tar.gz youtube-dl-89c5a7d5aabd138a14c76453d79d5d66ef573bde.tar.xz youtube-dl-89c5a7d5aabd138a14c76453d79d5d66ef573bde.zip