summary refs log tree commit diff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-10-27 14:40:25 +0100
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-10-27 14:40:25 +0100
commitc19f7764a5499b0f1e1914dd5101619b8d57d7cf (patch)
tree704df4061a4fc1ed3741dc69eff090d49ac29b7f
parentbc63d9d3294072e2b355c3363c0fb5c33756d3af (diff)
downloadyoutube-dl-c19f7764a5499b0f1e1914dd5101619b8d57d7cf.tar.gz
youtube-dl-c19f7764a5499b0f1e1914dd5101619b8d57d7cf.tar.xz
youtube-dl-c19f7764a5499b0f1e1914dd5101619b8d57d7cf.zip
[generic] Detect bandcamp pages that use custom domains (closes #1662)
They embed the original url in the 'og:url' property.
-rw-r--r--youtube_dl/extractor/generic.py18
1 files changed, 17 insertions, 1 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index ab4a5b7de..2c8fcf5ae 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -41,7 +41,17 @@ class GenericIE(InfoExtractor):
                 u"uploader_id": u"skillsmatter",
                 u"uploader": u"Skills Matter",
             }
-        }
+        },
+        # bandcamp page with custom domain
+        {
+            u'url': u'http://bronyrock.com/track/the-pony-mash',
+            u'file': u'3235767654.mp3',
+            u'info_dict': {
+                u'title': u'The Pony Mash',
+                u'uploader': u'M_Pallante',
+            },
+            u'skip': u'There is a limit of 200 free downloads / month for the test song',
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -155,6 +165,12 @@ class GenericIE(InfoExtractor):
             surl = unescapeHTML(mobj.group(1))
             return self.url_result(surl, 'Youtube')
 
+        # Look for Bandcamp pages with custom domain
+        mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
+        if mobj is not None:
+            burl = unescapeHTML(mobj.group(1))
+            return self.url_result(burl, 'Bandcamp')
+
         # Start with something easy: JW Player in SWFObject
         mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
         if mobj is None: