summary refs log tree commit diff
diff options
context:
space:
mode:
authorcodesparkle <adam.thalhammer@outlook.com>2014-05-19 22:17:54 +1000
committercodesparkle <adam.thalhammer@outlook.com>2014-05-19 22:17:54 +1000
commit77541837e5cb27e8e5d322d7c2ecb08f8823a757 (patch)
treee32e086c122f15f17198b73e965cf0b2b75b2432
parent89bb8e97ee82263a0f31eb3ace696d2bf7540c31 (diff)
downloadyoutube-dl-77541837e5cb27e8e5d322d7c2ecb08f8823a757.tar.gz
youtube-dl-77541837e5cb27e8e5d322d7c2ecb08f8823a757.tar.xz
youtube-dl-77541837e5cb27e8e5d322d7c2ecb08f8823a757.zip
The opening curly brace, '{', is a regex reserved control character, so it needs to be escaped (see http://stackoverflow.com/a/400316/1106367)
Minor improvements:
no need to sort the whole list if all we need is the maximum element, also instead of reinventing the wheel we can use utils to get indices from qualities.
-rw-r--r--youtube_dl/extractor/ndr.py15
1 files changed, 8 insertions, 7 deletions
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index 53b34f5e6..3d6096e46 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -7,6 +7,7 @@ from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
     int_or_none,
+    qualities,
 )
 
 
@@ -57,7 +58,7 @@ class NDRIE(InfoExtractor):
 
         formats = []
 
-        mp3_url = re.search(r'''{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
+        mp3_url = re.search(r'''\{src:'(?P<audio>[^']+)', type:"audio/mp3"},''', page)
         if mp3_url:
             formats.append({
                 'url': mp3_url.group('audio'),
@@ -66,15 +67,15 @@ class NDRIE(InfoExtractor):
 
         thumbnail = None
 
-        video_url = re.search(r'''3: {src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
+        video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page)
         if video_url:
-            thumbnails = re.findall(r'''\d+: {src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
+            thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page)
             if thumbnails:
-                QUALITIES = ['xs', 's', 'm', 'l', 'xl']
-                thumbnails.sort(key=lambda thumb: QUALITIES.index(thumb[1]) if thumb[1] in QUALITIES else -1)
-                thumbnail = 'http://www.ndr.de' + thumbnails[-1][0]
+                quality_key = qualities(['xs', 's', 'm', 'l', 'xl'])
+                largest = max(thumbnails, key=lambda thumb: quality_key(thumb[1]))
+                thumbnail = 'http://www.ndr.de' + largest[0]
 
-            for format_id in ['lo', 'hi', 'hq']:
+            for format_id in 'lo', 'hi', 'hq':
                 formats.append({
                     'url': '%s.%s.mp4' % (video_url.group('video'), format_id),
                     'format_id': format_id,