summary refs log tree commit diff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-10-25 08:55:12 +0200
committerPhilipp Hagemeister <phihag@phihag.de>2014-10-25 08:55:12 +0200
commit7fc54e5262966370762ca7f186e9c709990b354d (patch)
tree1f9a19ffa6eab4379b23ab485edc1f710f5080cc
parentec9c978481eee1035ac478926a73f729232be204 (diff)
parentd36cae46d877ebcc656f23a41b53e31731d2b77e (diff)
downloadyoutube-dl-7fc54e5262966370762ca7f186e9c709990b354d.tar.gz
youtube-dl-7fc54e5262966370762ca7f186e9c709990b354d.tar.xz
youtube-dl-7fc54e5262966370762ca7f186e9c709990b354d.zip
Merge remote-tracking branch 'xavierbeynon/audiomack'
-rw-r--r--youtube_dl/__init__.py1
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/audiomack.py67
3 files changed, 69 insertions, 0 deletions
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 59b851056..4f5ce604f 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -82,6 +82,7 @@ __authors__  = (
     'Dennis Scheiba',
     'Damon Timm',
     'winwon',
+    'Xavier Beynon'
 )
 
 __license__ = 'Public Domain'
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index ab4af2079..691fef5ca 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -20,6 +20,7 @@ from .arte import (
     ArteTVDDCIE,
     ArteTVEmbedIE,
 )
+from .audiomack import AudiomackIE
 from .auengine import AUEngineIE
 from .bambuser import BambuserIE, BambuserChannelIE
 from .bandcamp import BandcampIE, BandcampAlbumIE
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
new file mode 100644
index 000000000..2f32253af
--- /dev/null
+++ b/youtube_dl/extractor/audiomack.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .soundcloud import SoundcloudIE
+from ..utils import ExtractorError
+import datetime
+import time
+
+
+class AudiomackIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
+    IE_NAME = 'audiomack'
+    _TESTS = [
+        #hosted on audiomack
+        {
+            'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary',
+            'info_dict':
+            {
+                'id' : 'roosh-williams/extraordinary',
+                'ext': 'mp3',
+                'title': 'Roosh Williams - Extraordinary'
+            }
+        },
+        #hosted on soundcloud via audiomack
+        {
+            'url': 'http://www.audiomack.com/song/xclusiveszone/take-kare',
+            'file': '172419696.mp3',
+            'info_dict':
+            {
+                'ext': 'mp3',
+                'title': 'Young Thug ft Lil Wayne - Take Kare',
+                "upload_date": "20141016",
+                "description": "New track produced by London On Da Track called “Take Kare\"\n\nhttp://instagram.com/theyoungthugworld\nhttps://www.facebook.com/ThuggerThuggerCashMoney\n",
+                "uploader": "Young Thug World"
+            }
+        }
+    ]
+
+    def _real_extract(self, url):
+        #id is what follows /song/ in url, usually the uploader name + title
+        id = self._match_id(url)
+
+        #Call the api, which gives us a json doc with the real url inside
+        rightnow = int(time.time())
+        apiresponse = self._download_json("http://www.audiomack.com/api/music/url/song/"+id+"?_="+str(rightnow), id)
+
+        if "url" not in apiresponse:
+            raise ExtractorError("Unable to deduce api url of song")
+        realurl = apiresponse["url"]
+
+        #Audiomack wraps a lot of soundcloud tracks in their branded wrapper
+        # - if so, pass the work off to the soundcloud extractor
+        if SoundcloudIE.suitable(realurl):
+            return {'_type': 'url', 'url': realurl, 'ie_key': 'Soundcloud'}
+        else:
+            #Pull out metadata
+            page = self._download_webpage(url, id)
+            artist = self._html_search_regex(r'<span class="artist">(.*)</span>', page, "artist")
+            songtitle = self._html_search_regex(r'<h1 class="profile-title song-title"><span class="artist">.*</span>(.*)</h1>', page, "title")
+            title = artist+" - "+songtitle
+            return {
+                'id': id,  # ignore id, which is not useful in song name
+                'title': title,
+                'url': realurl,
+                'ext': 'mp3'
+            }