about summary refs log tree commit diff
diff options
context:
space:
mode:
authordf <fieldhouse@gmx.net>2021-09-13 01:00:04 +0100
committerdf <fieldhouse@gmx.net>2021-09-13 01:15:48 +0100
commitf798b40cf332c0a00f167fcfc9d560fa8f795c13 (patch)
treeb1dab6a942bd4bd11bbf39016dd3c44eebacdbc8
parent1e222005ba6bb9c288dd3a2b777ca5664b220c81 (diff)
downloadyoutube-dl-f798b40cf332c0a00f167fcfc9d560fa8f795c13.tar.gz
youtube-dl-f798b40cf332c0a00f167fcfc9d560fa8f795c13.tar.xz
youtube-dl-f798b40cf332c0a00f167fcfc9d560fa8f795c13.zip
Disambiguate 4-digit year and time-zone suffix
Restore check omitted from extract_timezone(); adjust DATE_FORMATS_DAY/MONTH_FIRST; add tests.
-rw-r--r--test/test_utils.py6
-rw-r--r--youtube_dl/utils.py24
2 files changed, 26 insertions, 4 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 44a4f6ff7..14607f6b8 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -371,6 +371,12 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
         self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
         self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
+        self.assertEqual(unified_timestamp('11:31 17-Jun-2021'), 1623929460)
+        self.assertEqual(unified_timestamp('11:31 17-Jun-2021-0000'), 1623929460)
+        from youtube_dl.utils import DATE_FORMATS_DAY_FIRST
+        DATE_FORMATS_DAY_FIRST.append('%H:%M %d-%m-%Y')
+        self.assertEqual(unified_timestamp('17:30 27-02-2016'), 1456594200)
+        self.assertEqual(unified_timestamp('17:30 27-02-2016-0000'), 1456594200)
 
     def test_determine_ext(self):
         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 5dde9768d..90eb9f93c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # coding: utf-8
 
 from __future__ import unicode_literals
@@ -1717,8 +1716,6 @@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙ
                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 
 DATE_FORMATS = (
-    '%d %B %Y',
-    '%d %b %Y',
     '%B %d %Y',
     '%B %dst %Y',
     '%B %dnd %Y',
@@ -1763,6 +1760,11 @@ DATE_FORMATS_DAY_FIRST.extend([
     '%d/%m/%Y',
     '%d/%m/%y',
     '%d/%m/%Y %H:%M:%S',
+    '%d %B %Y',
+    '%d %b %Y',
+    '%d-%b-%Y',
+    '%H:%M %d-%b-%Y',
+    '%H:%M:%S %d-%b-%Y',
 ])
 
 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
@@ -1772,6 +1774,11 @@ DATE_FORMATS_MONTH_FIRST.extend([
     '%m/%d/%Y',
     '%m/%d/%y',
     '%m/%d/%Y %H:%M:%S',
+    '%B %d %Y',
+    '%b %d %Y',
+    '%b-%d-%Y',
+    '%H:%M %b-%d-%Y',
+    '%H:%M:%S %b-%d-%Y',
 ])
 
 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
@@ -2939,7 +2946,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 
 def extract_timezone(date_str):
     m = re.search(
-        r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+        r'''(?x)
+            ^.{8,}?                                              # >=8 char non-TZ prefix, if present
+            (?P<tz>Z|                                            # just the UTC Z, or
+                (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
+                   (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
+                   [ ]?                                          # optional space
+                (?P<sign>\+|-)                                   # +/-
+                (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
+            $)
+        ''',
         date_str)
     if not m:
         timezone = datetime.timedelta()