summary refs log tree commit diff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2014-09-15 19:19:37 +0700
committerSergey M․ <dstftw@gmail.com>2014-09-15 19:19:37 +0700
commit59d284c3161797e31d7ca36da71464696adb8620 (patch)
tree7e9754f6054e1b3266d00744e1bd1e3ed10fcc94
parentb04c8f735805ea2671429ac8d683c2887a6b4db8 (diff)
downloadyoutube-dl-59d284c3161797e31d7ca36da71464696adb8620.tar.gz
youtube-dl-59d284c3161797e31d7ca36da71464696adb8620.tar.xz
youtube-dl-59d284c3161797e31d7ca36da71464696adb8620.zip
[vporn] Make video URL regex more strict
There is a garbage instead of proper URL for some HD videos
-rw-r--r--youtube_dl/extractor/vporn.py60
1 files changed, 43 insertions, 17 deletions
diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py
index 426369c51..2d23effcc 100644
--- a/youtube_dl/extractor/vporn.py
+++ b/youtube_dl/extractor/vporn.py
@@ -11,22 +11,48 @@ from ..utils import (
 
 class VpornIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
-        'md5': 'facf37c1b86546fa0208058546842c55',
-        'info_dict': {
-            'id': '497944',
-            'display_id': 'violet-on-her-th-birthday',
-            'ext': 'mp4',
-            'title': 'Violet on her 19th birthday',
-            'description': 'Violet dances in front of the camera which is sure to get you horny.',
-            'thumbnail': 're:^https?://.*\.jpg$',
-            'uploader': 'kileyGrope',
-            'categories': ['Masturbation', 'Teen'],
-            'duration': 393,
-            'age_limit': 18,
-        }
-    }
+    _TESTS = [
+        {
+            'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
+            'md5': 'facf37c1b86546fa0208058546842c55',
+            'info_dict': {
+                'id': '497944',
+                'display_id': 'violet-on-her-th-birthday',
+                'ext': 'mp4',
+                'title': 'Violet on her 19th birthday',
+                'description': 'Violet dances in front of the camera which is sure to get you horny.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'kileyGrope',
+                'categories': ['Masturbation', 'Teen'],
+                'duration': 393,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+        {
+            'url': 'http://www.vporn.com/female/hana-shower/523564/',
+            'md5': 'ced35a4656198a1664cf2cda1575a25f',
+            'info_dict': {
+                'id': '523564',
+                'display_id': 'hana-shower',
+                'ext': 'mp4',
+                'title': 'Hana Shower',
+                'description': 'Hana showers at the bathroom.',
+                'thumbnail': 're:^https?://.*\.jpg$',
+                'uploader': 'Hmmmmm',
+                'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
+                'duration': 588,
+                'age_limit': 18,
+                'view_count': int,
+                'like_count': int,
+                'dislike_count': int,
+                'comment_count': int,
+            }
+        },
+    ]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -64,7 +90,7 @@ class VpornIE(InfoExtractor):
 
         formats = []
 
-        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"([^"]+)"', webpage):
+        for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
             video_url = video[1]
             fmt = {
                 'url': video_url,