From 75a24854073e590f4efc9f037b57dee348f52b61 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 28 Jun 2016 18:07:50 +0100 Subject: [fragment,hls,f4m,dash,ism] improve fragment downloading - resume immediately - no need to concatenate segments and decrypt them on every resume - no need to save temp files for segments and for hls downloader: - no need to download keys for segments that already downloaded --- youtube_dl/downloader/f4m.py | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) (limited to 'youtube_dl/downloader/f4m.py') diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 688e086eb..e456ed58f 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -3,7 +3,6 @@ from __future__ import division, unicode_literals import base64 import io import itertools -import os import time from .fragment import FragmentFD @@ -16,9 +15,7 @@ from ..compat import ( compat_struct_unpack, ) from ..utils import ( - encodeFilename, fix_xml_ampersands, - sanitize_open, xpath_text, ) @@ -366,17 +363,21 @@ class F4mFD(FragmentFD): dest_stream = ctx['dest_stream'] - write_flv_header(dest_stream) - if not live: - write_metadata_tag(dest_stream, metadata) + if ctx['complete_frags_downloaded_bytes'] == 0: + write_flv_header(dest_stream) + if not live: + write_metadata_tag(dest_stream, metadata) base_url_parsed = compat_urllib_parse_urlparse(base_url) self._start_frag_download(ctx) - frags_filenames = [] + frag_index = 0 while fragments_list: seg_i, frag_i = fragments_list.pop(0) + frag_index += 1 + if frag_index <= ctx['frag_index']: + continue name = 'Seg%d-Frag%d' % (seg_i, frag_i) query = [] if base_url_parsed.query: @@ -386,17 +387,10 @@ class F4mFD(FragmentFD): if info_dict.get('extra_param_to_segment_url'): query.append(info_dict['extra_param_to_segment_url']) url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) - frag_filename = '%s-%s' % (ctx['tmpfilename'], name) try: - success = ctx['dl'].download(frag_filename, { - 'url': url_parsed.geturl(), - 'http_headers': info_dict.get('http_headers'), - }) + success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict) if not success: return False - (down, frag_sanitized) = sanitize_open(frag_filename, 'rb') - down_data = down.read() - down.close() reader = FlvReader(down_data) while True: try: @@ -411,12 +405,8 @@ class F4mFD(FragmentFD): break raise if box_type == b'mdat': - dest_stream.write(box_data) + self._append_fragment(ctx, box_data) break - if live: - os.remove(encodeFilename(frag_sanitized)) - else: - frags_filenames.append(frag_sanitized) except (compat_urllib_error.HTTPError, ) as err: if live and (err.code == 404 or err.code == 410): # We didn't keep up with the live window. Continue @@ -436,7 +426,4 @@ class F4mFD(FragmentFD): self._finish_frag_download(ctx) - for frag_file in frags_filenames: - os.remove(encodeFilename(frag_file)) - return True -- cgit 1.4.1 From 3e0304fe6e3a194cfb04f21aa261effb0850da40 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 22 Apr 2017 16:42:24 +0100 Subject: [downloader/fragment] use the documented names for fragment progress_hooks fields --- youtube_dl/downloader/dash.py | 2 +- youtube_dl/downloader/f4m.py | 2 +- youtube_dl/downloader/fragment.py | 18 ++++++++++-------- youtube_dl/downloader/hls.py | 2 +- youtube_dl/downloader/ism.py | 2 +- 5 files changed, 14 insertions(+), 12 deletions(-) (limited to 'youtube_dl/downloader/f4m.py') diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 94a13a543..7491fdad8 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -28,7 +28,7 @@ class DashSegmentsFD(FragmentFD): frag_index = 0 for i, segment in enumerate(segments): frag_index += 1 - if frag_index <= ctx['frag_index']: + if frag_index <= ctx['fragment_index']: continue # In DASH, the first segment contains necessary headers to # generate a valid MP4 file, so always abort for the first segment diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index e456ed58f..c8fde9a89 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -376,7 +376,7 @@ class F4mFD(FragmentFD): while fragments_list: seg_i, frag_i = fragments_list.pop(0) frag_index += 1 - if frag_index <= ctx['frag_index']: + if frag_index <= ctx['fragment_index']: continue name = 'Seg%d-Frag%d' % (seg_i, frag_i) query = [] diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 80bb14d61..6c02cfc98 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -66,7 +66,9 @@ class FragmentFD(FileDownloader): if not (ctx.get('live') or ctx['tmpfilename'] == '-'): frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') frag_index_stream.write(json.dumps({ - 'frag_index': ctx['frag_index'] + 'download': { + 'last_fragment_index': ctx['fragment_index'] + }, })) frag_index_stream.close() @@ -100,7 +102,7 @@ class FragmentFD(FileDownloader): ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) if os.path.isfile(ytdl_filename): frag_index_stream, _ = sanitize_open(ytdl_filename, 'r') - frag_index = json.loads(frag_index_stream.read())['frag_index'] + frag_index = json.loads(frag_index_stream.read())['download']['last_fragment_index'] frag_index_stream.close() dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) @@ -108,7 +110,7 @@ class FragmentFD(FileDownloader): 'dl': dl, 'dest_stream': dest_stream, 'tmpfilename': tmpfilename, - 'frag_index': frag_index, + 'fragment_index': frag_index, # Total complete fragments downloaded so far in bytes 'complete_frags_downloaded_bytes': resume_len, }) @@ -120,8 +122,8 @@ class FragmentFD(FileDownloader): state = { 'status': 'downloading', 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'], - 'frag_index': ctx['frag_index'], - 'frag_count': total_frags, + 'fragment_index': ctx['fragment_index'], + 'fragment_count': total_frags, 'filename': ctx['filename'], 'tmpfilename': ctx['tmpfilename'], } @@ -144,12 +146,12 @@ class FragmentFD(FileDownloader): if not ctx['live']: estimated_size = ( (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / - (state['frag_index'] + 1) * total_frags) + (state['fragment_index'] + 1) * total_frags) state['total_bytes_estimate'] = estimated_size if s['status'] == 'finished': - state['frag_index'] += 1 - ctx['frag_index'] = state['frag_index'] + state['fragment_index'] += 1 + ctx['fragment_index'] = state['fragment_index'] state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] ctx['prev_frag_downloaded_bytes'] = 0 diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 9a87d7ca8..0e29c8a2a 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -106,7 +106,7 @@ class HlsFD(FragmentFD): if line: if not line.startswith('#'): frag_index += 1 - if frag_index <= ctx['frag_index']: + if frag_index <= ctx['fragment_index']: continue frag_url = ( line diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 9f0fc36b3..338820e71 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -227,7 +227,7 @@ class IsmFD(FragmentFD): frag_index = 0 for i, segment in enumerate(segments): frag_index += 1 - if frag_index <= ctx['frag_index']: + if frag_index <= ctx['fragment_index']: continue count = 0 while count <= fragment_retries: -- cgit 1.4.1 From 48107c198bd76e611e3d4c2486cdc5403829a05a Mon Sep 17 00:00:00 2001 From: Sergey M․ Date: Sat, 4 Nov 2017 22:10:55 +0700 Subject: [f4m] Prefer baseURL for relative URLs (closes #14660) --- youtube_dl/downloader/f4m.py | 25 +++++++++++++++++-------- youtube_dl/extractor/common.py | 14 +++++++------- 2 files changed, 24 insertions(+), 15 deletions(-) (limited to 'youtube_dl/downloader/f4m.py') diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index c8fde9a89..fdb80f42a 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -243,8 +243,17 @@ def remove_encrypted_media(media): media)) -def _add_ns(prop): - return '{http://ns.adobe.com/f4m/1.0}%s' % prop +def _add_ns(prop, ver=1): + return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) + + +def get_base_url(manifest): + base_url = xpath_text( + manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], + 'base URL', default=None) + if base_url: + base_url = base_url.strip() + return base_url class F4mFD(FragmentFD): @@ -330,13 +339,13 @@ class F4mFD(FragmentFD): rate, media = list(filter( lambda f: int(f[0]) == requested_bitrate, formats))[0] - base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) + # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. + man_base_url = get_base_url(doc) or man_url + + base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url']) bootstrap_node = doc.find(_add_ns('bootstrapInfo')) - # From Adobe F4M 3.0 spec: - # The element SHALL be the base URL for all relative - # (HTTP-based) URLs in the manifest. If is not present, said - # URLs should be relative to the location of the containing document. - boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url) + boot_info, bootstrap_url = self._parse_bootstrap_node( + bootstrap_node, man_base_url) live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a67ac4411..64fb869aa 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -29,7 +29,10 @@ from ..compat import ( compat_urlparse, compat_xml_parse_error, ) -from ..downloader.f4m import remove_encrypted_media +from ..downloader.f4m import ( + get_base_url, + remove_encrypted_media, +) from ..utils import ( NO_DEFAULT, age_restricted, @@ -1239,11 +1242,8 @@ class InfoExtractor(object): media_nodes = remove_encrypted_media(media_nodes) if not media_nodes: return formats - base_url = xpath_text( - manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'], - 'base URL', default=None) - if base_url: - base_url = base_url.strip() + + manifest_base_url = get_base_url(manifest) bootstrap_info = xpath_element( manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'], @@ -1275,7 +1275,7 @@ class InfoExtractor(object): continue manifest_url = ( media_url if media_url.startswith('http://') or media_url.startswith('https://') - else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url)) + else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url)) # If media_url is itself a f4m manifest do the recursive extraction # since bitrates in parent manifest (this one) and media_url manifest # may differ leading to inability to resolve the format by requested -- cgit 1.4.1