summary refs log tree commit diff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2015-01-23 00:04:05 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2015-01-23 00:04:05 +0100
commit083c9df93b08a24e967b68fbdd2f4a71ae74c8c8 (patch)
tree8b92a6762709d80c3f3c2d78f42bd181789cac32
parent50789175edbe3aaad77f45e6fe883ba09580cc6f (diff)
downloadyoutube-dl-083c9df93b08a24e967b68fbdd2f4a71ae74c8c8.tar.gz
youtube-dl-083c9df93b08a24e967b68fbdd2f4a71ae74c8c8.tar.xz
youtube-dl-083c9df93b08a24e967b68fbdd2f4a71ae74c8c8.zip
[YoutubeDL] Allow filtering by properties (Fixes #4584)
-rw-r--r--test/test_YoutubeDL.py55
-rwxr-xr-xyoutube_dl/YoutubeDL.py54
-rw-r--r--youtube_dl/options.py11
3 files changed, 120 insertions, 0 deletions
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 85d87f2c3..678b9f7d1 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -281,6 +281,61 @@ class TestFormatSelection(unittest.TestCase):
             downloaded = ydl.downloaded_info_dicts[0]
             self.assertEqual(downloaded['format_id'], f1id)
 
+    def test_format_filtering(self):
+        formats = [
+            {'format_id': 'A', 'filesize': 500, 'width': 1000},
+            {'format_id': 'B', 'filesize': 1000, 'width': 500},
+            {'format_id': 'C', 'filesize': 1000, 'width': 400},
+            {'format_id': 'D', 'filesize': 2000, 'width': 600},
+            {'format_id': 'E', 'filesize': 3000},
+            {'format_id': 'F'},
+            {'format_id': 'G', 'filesize': 1000000},
+        ]
+        for f in formats:
+            f['url'] = 'http://_/'
+            f['ext'] = 'unknown'
+        info_dict = _make_result(formats)
+
+        ydl = YDL({'format': 'best[filesize<3000]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'D')
+
+        ydl = YDL({'format': 'best[filesize<=3000]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'E')
+
+        ydl = YDL({'format': 'best[filesize <= ? 3000]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'F')
+
+        ydl = YDL({'format': 'best [filesize = 1000] [width>450]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'B')
+
+        ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'C')
+
+        ydl = YDL({'format': '[filesize>?1]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'G')
+
+        ydl = YDL({'format': '[filesize<1M]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'E')
+
+        ydl = YDL({'format': '[filesize<1MiB]'})
+        ydl.process_ie_result(info_dict)
+        downloaded = ydl.downloaded_info_dicts[0]
+        self.assertEqual(downloaded['format_id'], 'G')
+
     def test_add_extra_info(self):
         test_dict = {
             'extractor': 'Foo',
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 772fddd45..8ef74e414 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -10,6 +10,7 @@ import io
 import itertools
 import json
 import locale
+import operator
 import os
 import platform
 import re
@@ -49,6 +50,7 @@ from .utils import (
     make_HTTPS_handler,
     MaxDownloadsReached,
     PagedList,
+    parse_filesize,
     PostProcessingError,
     platform_name,
     preferredencoding,
@@ -768,7 +770,59 @@ class YoutubeDL(object):
         else:
             raise Exception('Invalid result type: %s' % result_type)
 
+    def _apply_format_filter(self, format_spec, available_formats):
+        " Returns a tuple of the remaining format_spec and filtered formats "
+
+        OPERATORS = {
+            '<': operator.lt,
+            '<=': operator.le,
+            '>': operator.gt,
+            '>=': operator.ge,
+            '=': operator.eq,
+            '!=': operator.ne,
+        }
+        operator_rex = re.compile(r'''(?x)\s*\[
+            (?P<key>width|height|tbr|abr|vbr|filesize)
+            \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+            (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
+            \]$
+            ''' % '|'.join(map(re.escape, OPERATORS.keys())))
+        m = operator_rex.search(format_spec)
+        if not m:
+            raise ValueError('Invalid format specification %r' % format_spec)
+
+        try:
+            comparison_value = int(m.group('value'))
+        except ValueError:
+            comparison_value = parse_filesize(m.group('value'))
+            if comparison_value is None:
+                comparison_value = parse_filesize(m.group('value') + 'B')
+            if comparison_value is None:
+                raise ValueError(
+                    'Invalid value %r in format specification %r' % (
+                        m.group('value'), format_spec))
+        op = OPERATORS[m.group('op')]
+
+        def _filter(f):
+            actual_value = f.get(m.group('key'))
+            if actual_value is None:
+                return m.group('none_inclusive')
+            return op(actual_value, comparison_value)
+        new_formats = [f for f in available_formats if _filter(f)]
+
+        new_format_spec = format_spec[:-len(m.group(0))]
+        if not new_format_spec:
+            new_format_spec = 'best'
+
+        return (new_format_spec, new_formats)
+
     def select_format(self, format_spec, available_formats):
+        while format_spec.endswith(']'):
+            format_spec, available_formats = self._apply_format_filter(
+                format_spec, available_formats)
+        if not available_formats:
+            return None
+
         if format_spec == 'best' or format_spec is None:
             return available_formats[-1]
         elif format_spec == 'worst':
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index f25c12e52..fd7b400b2 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -289,6 +289,17 @@ def parseOpts(overrideArguments=None):
             'extensions aac, m4a, mp3, mp4, ogg, wav, webm. '
             'You can also use the special names "best",'
             ' "bestvideo", "bestaudio", "worst". '
+            ' You can filter the video results by putting a condition in'
+            ' brackets, as in -f "best[height=720]"'
+            ' (or -f "[filesize>10M]"). '
+            ' This works for filesize, height, width, tbr, abr, and vbr'
+            ' and the comparisons <, <=, >, >=, =, != .'
+            ' Formats for which the value is not known are excluded unless you'
+            ' put a question mark (?) after the operator.'
+            ' You can combine format filters, so  '
+            '-f "[height <=? 720][tbr>500]" '
+            'selects up to 720p videos (or videos where the height is not '
+            'known) with a bitrate of at least 500 KBit/s.'
             ' By default, youtube-dl will pick the best quality.'
             ' Use commas to download multiple audio formats, such as'
             ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.'