about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRicardo Garcia <sarbalap+freshmeat@gmail.com>2010-11-19 19:31:26 +0100
committerRicardo Garcia <sarbalap+freshmeat@gmail.com>2010-11-19 19:31:26 +0100
commit138b11f36ee5e8018c29621d39c324d98d8291cc (patch)
tree81b53e6075fdc59dd16957de36e085cdb0665c2c
parent05df0c1d4acbc7077187579f16c6661260e62014 (diff)
downloadyoutube-dl-138b11f36ee5e8018c29621d39c324d98d8291cc.tar.gz
youtube-dl-138b11f36ee5e8018c29621d39c324d98d8291cc.tar.xz
youtube-dl-138b11f36ee5e8018c29621d39c324d98d8291cc.zip
Rework upload date mechanism after detecting problems in several tests
-rwxr-xr-xyoutube-dl49
1 files changed, 34 insertions, 15 deletions
diff --git a/youtube-dl b/youtube-dl
index e164d5c8c..3d20a9d6d 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -5,7 +5,6 @@
 # Author: Benjamin Johnson
 # License: Public domain code
 import cookielib
-import datetime
 import htmlentitydefs
 import httplib
 import locale
@@ -37,6 +36,21 @@ std_headers = {
 
 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
 
+month_name_to_number = {
+	'January':	'01',
+	'February':	'02',
+	'March':	'03',
+	'April':	'04',
+	'May':		'05',
+	'June':		'06',
+	'July':		'07',
+	'August':	'08',
+	'September':	'09',
+	'October':	'10',
+	'November':	'11',
+	'December':	'12',
+}
+
 def preferredencoding():
 	"""Get preferred encoding.
 
@@ -899,13 +913,18 @@ class YoutubeIE(InfoExtractor):
 		upload_date = u'NA'
 		mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
 		if mobj is not None:
-			upload_date = mobj.group(1).split()
-			format_expressions = ['%d %B %Y', '%B %d, %Y']
-			for expression in format_expressions:
-				try:
-					upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
-				except:
-					pass
+			try:
+				if ',' in mobj.group(1):
+					# Month Day, Year
+					m, d, y = mobj.group(1).replace(',', '').split()
+				else:
+					# Day Month Year, we'll suppose
+					d, m, y = mobj.group(1).split()
+				m = month_name_to_number[m]
+				d = '%02d' % (long(d))
+				upload_date = '%s%s%s' % (y, m, d)
+			except:
+				upload_date = u'NA'
 
 		# description
 		video_description = 'No description available.'
@@ -961,7 +980,7 @@ class YoutubeIE(InfoExtractor):
 					'id':		video_id.decode('utf-8'),
 					'url':		video_real_url.decode('utf-8'),
 					'uploader':	video_uploader.decode('utf-8'),
-					'uploaddate':	upload_date,
+					'upload_date':	upload_date,
 					'title':	video_title,
 					'stitle':	simple_title,
 					'ext':		video_extension.decode('utf-8'),
@@ -1108,7 +1127,7 @@ class MetacafeIE(InfoExtractor):
 				'id':		video_id.decode('utf-8'),
 				'url':		video_url.decode('utf-8'),
 				'uploader':	video_uploader.decode('utf-8'),
-				'uploaddate':	u'NA',
+				'upload_date':	u'NA',
 				'title':	video_title,
 				'stitle':	simple_title,
 				'ext':		video_extension.decode('utf-8'),
@@ -1197,7 +1216,7 @@ class DailymotionIE(InfoExtractor):
 				'id':		video_id.decode('utf-8'),
 				'url':		video_url.decode('utf-8'),
 				'uploader':	video_uploader.decode('utf-8'),
-				'uploaddate':	u'NA',
+				'upload_date':	u'NA',
 				'title':	video_title,
 				'stitle':	simple_title,
 				'ext':		video_extension.decode('utf-8'),
@@ -1307,7 +1326,7 @@ class GoogleIE(InfoExtractor):
 				'id':		video_id.decode('utf-8'),
 				'url':		video_url.decode('utf-8'),
 				'uploader':	u'NA',
-				'uploaddate':	u'NA',
+				'upload_date':	u'NA',
 				'title':	video_title,
 				'stitle':	simple_title,
 				'ext':		video_extension.decode('utf-8'),
@@ -1389,7 +1408,7 @@ class PhotobucketIE(InfoExtractor):
 				'id':		video_id.decode('utf-8'),
 				'url':		video_url.decode('utf-8'),
 				'uploader':	video_uploader,
-				'uploaddate':	u'NA',
+				'upload_date':	u'NA',
 				'title':	video_title,
 				'stitle':	simple_title,
 				'ext':		video_extension.decode('utf-8'),
@@ -1544,7 +1563,7 @@ class YahooIE(InfoExtractor):
 				'id':		video_id.decode('utf-8'),
 				'url':		video_url,
 				'uploader':	video_uploader,
-				'uploaddate':	u'NA',
+				'upload_date':	u'NA',
 				'title':	video_title,
 				'stitle':	simple_title,
 				'ext':		video_extension.decode('utf-8'),
@@ -1647,7 +1666,7 @@ class GenericIE(InfoExtractor):
 				'id':		video_id.decode('utf-8'),
 				'url':		video_url.decode('utf-8'),
 				'uploader':	video_uploader,
-				'uploaddate':	u'NA',
+				'upload_date':	u'NA',
 				'title':	video_title,
 				'stitle':	simple_title,
 				'ext':		video_extension.decode('utf-8'),