summary refs log tree commit diff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-07-09 03:29:07 +0700
committerSergey M․ <dstftw@gmail.com>2016-07-09 03:29:07 +0700
commit0de168f7ed2da440f6a1bcb614abd26ff73bb840 (patch)
tree0e0aa51b7bf3e780b8731c30745f0eeae2a5c220
parent95b31e266b930dc753b8bf5a1673ced9b50fd519 (diff)
downloadyoutube-dl-0de168f7ed2da440f6a1bcb614abd26ff73bb840.tar.gz
youtube-dl-0de168f7ed2da440f6a1bcb614abd26ff73bb840.tar.xz
youtube-dl-0de168f7ed2da440f6a1bcb614abd26ff73bb840.zip
[extractor/generic] Detect schema.org/VideoObject embeds
-rw-r--r--youtube_dl/extractor/generic.py30
1 files changed, 30 insertions, 0 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 31527d1c6..62da9bbc0 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1313,6 +1313,23 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Kaltura'],
         },
+        {
+            # TODO: find another test
+            # http://schema.org/VideoObject
+            # 'url': 'https://flipagram.com/f/nyvTSJMKId',
+            # 'md5': '888dcf08b7ea671381f00fab74692755',
+            # 'info_dict': {
+            #     'id': 'nyvTSJMKId',
+            #     'ext': 'mp4',
+            #     'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+            #     'description': '#love for cats.',
+            #     'timestamp': 1461244995,
+            #     'upload_date': '20160421',
+            # },
+            # 'params': {
+            #     'force_generic_extractor': True,
+            # },
+        }
     ]
 
     def report_following_redirect(self, new_url):
@@ -2157,6 +2174,19 @@ class GenericIE(InfoExtractor):
         if embed_url:
             return self.url_result(embed_url)
 
+        # Looking for http://schema.org/VideoObject
+        json_ld = self._search_json_ld(
+            webpage, video_id, default=None, expected_type='VideoObject')
+        if json_ld and json_ld.get('url'):
+            info_dict.update({
+                'title': video_title or info_dict['title'],
+                'description': video_description,
+                'thumbnail': video_thumbnail,
+                'age_limit': age_limit
+            })
+            info_dict.update(json_ld)
+            return info_dict
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True