about summary refs log tree commit diff
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2022-01-27 00:59:08 +0000
committerdirkf <fieldhouse@gmx.net>2022-01-27 05:38:14 +0000
commite0a1fe44eee68c0caf34476824c5406b1c4cabef (patch)
tree40aa84feb4194cfe5afc448ead65769d8605bb9c
parent70b7b0f3e3f64de60d6df48f7c21f3665098569a (diff)
downloadyoutube-dl-e0a1fe44eee68c0caf34476824c5406b1c4cabef.tar.gz
youtube-dl-e0a1fe44eee68c0caf34476824c5406b1c4cabef.tar.xz
youtube-dl-e0a1fe44eee68c0caf34476824c5406b1c4cabef.zip
Improve parse_count() with single regex, based on yt-dlp 352d5da81219e2675ef8cac9383ab0dfbd161a19
-rw-r--r--test/test_utils.py7
-rw-r--r--youtube_dl/utils.py19
2 files changed, 21 insertions, 5 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 20183da1a..88d2da0c3 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1116,9 +1116,16 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_count('1000'), 1000)
         self.assertEqual(parse_count('1.000'), 1000)
         self.assertEqual(parse_count('1.1k'), 1100)
+        self.assertEqual(parse_count('1.1 k'), 1100)
+        self.assertEqual(parse_count('1,1 k'), 1100)
+        self.assertEqual(parse_count('1,1kk'), 1100000)
+        self.assertEqual(parse_count('100 views'), 100)
+        self.assertEqual(parse_count('1,100 views'), 1100)
         self.assertEqual(parse_count('1.1kk'), 1100000)
         self.assertEqual(parse_count('1.1kk '), 1100000)
         self.assertEqual(parse_count('1.1kk views'), 1100000)
+        self.assertEqual(parse_count('10M views'), 10000000)
+        self.assertEqual(parse_count('has 10M views'), 10000000)
 
     def test_parse_resolution(self):
         self.assertEqual(parse_resolution(None), {})
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 349d97f4c..ba85a90af 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3542,10 +3542,11 @@ def parse_count(s):
     if s is None:
         return None
 
-    s = s.strip()
-
-    if re.match(r'^[\d,.]+$', s):
-        return str_to_int(s)
+    m = re.match(r'^(?:[^\d]+\s+)?(?P<val>(?P<num>[\d,.]+)(?P<rest>[\w\s]+?)?)(?:\s|\s*$)', s)
+    if m:
+        if not m.group('rest'):
+            return str_to_int(m.group('num'))
+        s = m.group('val')
 
     _UNIT_TABLE = {
         'k': 1000,
@@ -3554,9 +3555,17 @@ def parse_count(s):
         'M': 1000 ** 2,
         'kk': 1000 ** 2,
         'KK': 1000 ** 2,
+        'b': 1000 ** 3,
+        'B': 1000 ** 3,
     }
 
-    return lookup_unit_table(_UNIT_TABLE, s)
+    ret = lookup_unit_table(_UNIT_TABLE, s)
+    if ret is not None:
+        return ret
+
+    s = m and m.group('num')
+    if s is not None:
+        return str_to_int(s)
 
 
 def parse_resolution(s):