diff options
author | df <fieldhouse@gmx.net> | 2021-09-13 01:00:04 +0100 |
---|---|---|
committer | df <fieldhouse@gmx.net> | 2021-09-13 01:15:48 +0100 |
commit | f798b40cf332c0a00f167fcfc9d560fa8f795c13 (patch) | |
tree | b1dab6a942bd4bd11bbf39016dd3c44eebacdbc8 | |
parent | 1e222005ba6bb9c288dd3a2b777ca5664b220c81 (diff) | |
download | youtube-dl-f798b40cf332c0a00f167fcfc9d560fa8f795c13.tar.gz youtube-dl-f798b40cf332c0a00f167fcfc9d560fa8f795c13.tar.xz youtube-dl-f798b40cf332c0a00f167fcfc9d560fa8f795c13.zip |
Disambiguate 4-digit year and time-zone suffix
Restore check omitted from extract_timezone(); adjust DATE_FORMATS_DAY/MONTH_FIRST; add tests.
-rw-r--r-- | test/test_utils.py | 6 | ||||
-rw-r--r-- | youtube_dl/utils.py | 24 |
2 files changed, 26 insertions, 4 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index 44a4f6ff7..14607f6b8 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -371,6 +371,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363) + self.assertEqual(unified_timestamp('11:31 17-Jun-2021'), 1623929460) + self.assertEqual(unified_timestamp('11:31 17-Jun-2021-0000'), 1623929460) + from youtube_dl.utils import DATE_FORMATS_DAY_FIRST + DATE_FORMATS_DAY_FIRST.append('%H:%M %d-%m-%Y') + self.assertEqual(unified_timestamp('17:30 27-02-2016'), 1456594200) + self.assertEqual(unified_timestamp('17:30 27-02-2016-0000'), 1456594200) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 5dde9768d..90eb9f93c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # coding: utf-8 from __future__ import unicode_literals @@ -1717,8 +1716,6 @@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐ،٠'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y'))) DATE_FORMATS = ( - '%d %B %Y', - '%d %b %Y', '%B %d %Y', '%B %dst %Y', '%B %dnd %Y', @@ -1763,6 +1760,11 @@ DATE_FORMATS_DAY_FIRST.extend([ '%d/%m/%Y', '%d/%m/%y', '%d/%m/%Y %H:%M:%S', + '%d %B %Y', + '%d %b %Y', + '%d-%b-%Y', + '%H:%M %d-%b-%Y', + '%H:%M:%S %d-%b-%Y', ]) DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS) @@ -1772,6 +1774,11 @@ DATE_FORMATS_MONTH_FIRST.extend([ '%m/%d/%Y', '%m/%d/%y', '%m/%d/%Y %H:%M:%S', + '%B %d %Y', + '%b %d %Y', + '%b-%d-%Y', + '%H:%M %b-%d-%Y', + '%H:%M:%S %b-%d-%Y', ]) PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" @@ -2939,7 +2946,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): def extract_timezone(date_str): m = re.search( - r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', + r'''(?x) + ^.{8,}? # >=8 char non-TZ prefix, if present + (?P<tz>Z| # just the UTC Z, or + (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or + (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits + [ ]? # optional space + (?P<sign>\+|-) # +/- + (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm + $) + ''', date_str) if not m: timezone = datetime.timedelta() |