diff options
author | dirkf <fieldhouse@gmx.net> | 2022-01-27 00:59:08 +0000 |
---|---|---|
committer | dirkf <fieldhouse@gmx.net> | 2022-01-27 05:38:14 +0000 |
commit | e0a1fe44eee68c0caf34476824c5406b1c4cabef (patch) | |
tree | 40aa84feb4194cfe5afc448ead65769d8605bb9c | |
parent | 70b7b0f3e3f64de60d6df48f7c21f3665098569a (diff) | |
download | youtube-dl-e0a1fe44eee68c0caf34476824c5406b1c4cabef.tar.gz youtube-dl-e0a1fe44eee68c0caf34476824c5406b1c4cabef.tar.xz youtube-dl-e0a1fe44eee68c0caf34476824c5406b1c4cabef.zip |
Improve parse_count() with single regex, based on yt-dlp 352d5da81219e2675ef8cac9383ab0dfbd161a19
-rw-r--r-- | test/test_utils.py | 7 | ||||
-rw-r--r-- | youtube_dl/utils.py | 19 |
2 files changed, 21 insertions, 5 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index 20183da1a..88d2da0c3 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1116,9 +1116,16 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_count('1000'), 1000) self.assertEqual(parse_count('1.000'), 1000) self.assertEqual(parse_count('1.1k'), 1100) + self.assertEqual(parse_count('1.1 k'), 1100) + self.assertEqual(parse_count('1,1 k'), 1100) + self.assertEqual(parse_count('1,1kk'), 1100000) + self.assertEqual(parse_count('100 views'), 100) + self.assertEqual(parse_count('1,100 views'), 1100) self.assertEqual(parse_count('1.1kk'), 1100000) self.assertEqual(parse_count('1.1kk '), 1100000) self.assertEqual(parse_count('1.1kk views'), 1100000) + self.assertEqual(parse_count('10M views'), 10000000) + self.assertEqual(parse_count('has 10M views'), 10000000) def test_parse_resolution(self): self.assertEqual(parse_resolution(None), {}) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 349d97f4c..ba85a90af 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3542,10 +3542,11 @@ def parse_count(s): if s is None: return None - s = s.strip() - - if re.match(r'^[\d,.]+$', s): - return str_to_int(s) + m = re.match(r'^(?:[^\d]+\s+)?(?P<val>(?P<num>[\d,.]+)(?P<rest>[\w\s]+?)?)(?:\s|\s*$)', s) + if m: + if not m.group('rest'): + return str_to_int(m.group('num')) + s = m.group('val') _UNIT_TABLE = { 'k': 1000, @@ -3554,9 +3555,17 @@ def parse_count(s): 'M': 1000 ** 2, 'kk': 1000 ** 2, 'KK': 1000 ** 2, + 'b': 1000 ** 3, + 'B': 1000 ** 3, } - return lookup_unit_table(_UNIT_TABLE, s) + ret = lookup_unit_table(_UNIT_TABLE, s) + if ret is not None: + return ret + + s = m and m.group('num') + if s is not None: + return str_to_int(s) def parse_resolution(s): |