From 75a24854073e590f4efc9f037b57dee348f52b61 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Tue, 28 Jun 2016 18:07:50 +0100
Subject: [fragment,hls,f4m,dash,ism] improve fragment downloading

- resume immediately
- no need to concatenate segments and decrypt them on every resume
- no need to save temp files for segments

and for hls downloader:
- no need to download keys for segments that already downloaded
---
 youtube_dl/downloader/f4m.py | 33 ++++++++++-----------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

(limited to 'youtube_dl/downloader/f4m.py')

diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 688e086eb..e456ed58f 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -3,7 +3,6 @@ from __future__ import division, unicode_literals
 import base64
 import io
 import itertools
-import os
 import time
 
 from .fragment import FragmentFD
@@ -16,9 +15,7 @@ from ..compat import (
     compat_struct_unpack,
 )
 from ..utils import (
-    encodeFilename,
     fix_xml_ampersands,
-    sanitize_open,
     xpath_text,
 )
 
@@ -366,17 +363,21 @@ class F4mFD(FragmentFD):
 
         dest_stream = ctx['dest_stream']
 
-        write_flv_header(dest_stream)
-        if not live:
-            write_metadata_tag(dest_stream, metadata)
+        if ctx['complete_frags_downloaded_bytes'] == 0:
+            write_flv_header(dest_stream)
+            if not live:
+                write_metadata_tag(dest_stream, metadata)
 
         base_url_parsed = compat_urllib_parse_urlparse(base_url)
 
         self._start_frag_download(ctx)
 
-        frags_filenames = []
+        frag_index = 0
         while fragments_list:
             seg_i, frag_i = fragments_list.pop(0)
+            frag_index += 1
+            if frag_index <= ctx['frag_index']:
+                continue
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             query = []
             if base_url_parsed.query:
@@ -386,17 +387,10 @@ class F4mFD(FragmentFD):
             if info_dict.get('extra_param_to_segment_url'):
                 query.append(info_dict['extra_param_to_segment_url'])
             url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
-            frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
             try:
-                success = ctx['dl'].download(frag_filename, {
-                    'url': url_parsed.geturl(),
-                    'http_headers': info_dict.get('http_headers'),
-                })
+                success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
                 if not success:
                     return False
-                (down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
-                down_data = down.read()
-                down.close()
                 reader = FlvReader(down_data)
                 while True:
                     try:
@@ -411,12 +405,8 @@ class F4mFD(FragmentFD):
                             break
                         raise
                     if box_type == b'mdat':
-                        dest_stream.write(box_data)
+                        self._append_fragment(ctx, box_data)
                         break
-                if live:
-                    os.remove(encodeFilename(frag_sanitized))
-                else:
-                    frags_filenames.append(frag_sanitized)
             except (compat_urllib_error.HTTPError, ) as err:
                 if live and (err.code == 404 or err.code == 410):
                     # We didn't keep up with the live window. Continue
@@ -436,7 +426,4 @@ class F4mFD(FragmentFD):
 
         self._finish_frag_download(ctx)
 
-        for frag_file in frags_filenames:
-            os.remove(encodeFilename(frag_file))
-
         return True
-- 
cgit 1.4.1


From 3e0304fe6e3a194cfb04f21aa261effb0850da40 Mon Sep 17 00:00:00 2001
From: Remita Amine <remitamine@gmail.com>
Date: Sat, 22 Apr 2017 16:42:24 +0100
Subject: [downloader/fragment] use the documented names for fragment
 progress_hooks fields

---
 youtube_dl/downloader/dash.py     |  2 +-
 youtube_dl/downloader/f4m.py      |  2 +-
 youtube_dl/downloader/fragment.py | 18 ++++++++++--------
 youtube_dl/downloader/hls.py      |  2 +-
 youtube_dl/downloader/ism.py      |  2 +-
 5 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'youtube_dl/downloader/f4m.py')

diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index 94a13a543..7491fdad8 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -28,7 +28,7 @@ class DashSegmentsFD(FragmentFD):
         frag_index = 0
         for i, segment in enumerate(segments):
             frag_index += 1
-            if frag_index <= ctx['frag_index']:
+            if frag_index <= ctx['fragment_index']:
                 continue
             # In DASH, the first segment contains necessary headers to
             # generate a valid MP4 file, so always abort for the first segment
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index e456ed58f..c8fde9a89 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -376,7 +376,7 @@ class F4mFD(FragmentFD):
         while fragments_list:
             seg_i, frag_i = fragments_list.pop(0)
             frag_index += 1
-            if frag_index <= ctx['frag_index']:
+            if frag_index <= ctx['fragment_index']:
                 continue
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             query = []
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 80bb14d61..6c02cfc98 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -66,7 +66,9 @@ class FragmentFD(FileDownloader):
         if not (ctx.get('live') or ctx['tmpfilename'] == '-'):
             frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
             frag_index_stream.write(json.dumps({
-                'frag_index': ctx['frag_index']
+                'download': {
+                    'last_fragment_index': ctx['fragment_index']
+                },
             }))
             frag_index_stream.close()
 
@@ -100,7 +102,7 @@ class FragmentFD(FileDownloader):
             ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
             if os.path.isfile(ytdl_filename):
                 frag_index_stream, _ = sanitize_open(ytdl_filename, 'r')
-                frag_index = json.loads(frag_index_stream.read())['frag_index']
+                frag_index = json.loads(frag_index_stream.read())['download']['last_fragment_index']
                 frag_index_stream.close()
         dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
 
@@ -108,7 +110,7 @@ class FragmentFD(FileDownloader):
             'dl': dl,
             'dest_stream': dest_stream,
             'tmpfilename': tmpfilename,
-            'frag_index': frag_index,
+            'fragment_index': frag_index,
             # Total complete fragments downloaded so far in bytes
             'complete_frags_downloaded_bytes': resume_len,
         })
@@ -120,8 +122,8 @@ class FragmentFD(FileDownloader):
         state = {
             'status': 'downloading',
             'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
-            'frag_index': ctx['frag_index'],
-            'frag_count': total_frags,
+            'fragment_index': ctx['fragment_index'],
+            'fragment_count': total_frags,
             'filename': ctx['filename'],
             'tmpfilename': ctx['tmpfilename'],
         }
@@ -144,12 +146,12 @@ class FragmentFD(FileDownloader):
             if not ctx['live']:
                 estimated_size = (
                     (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
-                    (state['frag_index'] + 1) * total_frags)
+                    (state['fragment_index'] + 1) * total_frags)
                 state['total_bytes_estimate'] = estimated_size
 
             if s['status'] == 'finished':
-                state['frag_index'] += 1
-                ctx['frag_index'] = state['frag_index']
+                state['fragment_index'] += 1
+                ctx['fragment_index'] = state['fragment_index']
                 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
                 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
                 ctx['prev_frag_downloaded_bytes'] = 0
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index 9a87d7ca8..0e29c8a2a 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -106,7 +106,7 @@ class HlsFD(FragmentFD):
             if line:
                 if not line.startswith('#'):
                     frag_index += 1
-                    if frag_index <= ctx['frag_index']:
+                    if frag_index <= ctx['fragment_index']:
                         continue
                     frag_url = (
                         line
diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py
index 9f0fc36b3..338820e71 100644
--- a/youtube_dl/downloader/ism.py
+++ b/youtube_dl/downloader/ism.py
@@ -227,7 +227,7 @@ class IsmFD(FragmentFD):
         frag_index = 0
         for i, segment in enumerate(segments):
             frag_index += 1
-            if frag_index <= ctx['frag_index']:
+            if frag_index <= ctx['fragment_index']:
                 continue
             count = 0
             while count <= fragment_retries:
-- 
cgit 1.4.1


From 48107c198bd76e611e3d4c2486cdc5403829a05a Mon Sep 17 00:00:00 2001
From: Sergey M․ <dstftw@gmail.com>
Date: Sat, 4 Nov 2017 22:10:55 +0700
Subject: [f4m] Prefer baseURL for relative URLs (closes #14660)

---
 youtube_dl/downloader/f4m.py   | 25 +++++++++++++++++--------
 youtube_dl/extractor/common.py | 14 +++++++-------
 2 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'youtube_dl/downloader/f4m.py')

diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index c8fde9a89..fdb80f42a 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -243,8 +243,17 @@ def remove_encrypted_media(media):
                        media))
 
 
-def _add_ns(prop):
-    return '{http://ns.adobe.com/f4m/1.0}%s' % prop
+def _add_ns(prop, ver=1):
+    return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+    base_url = xpath_text(
+        manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+        'base URL', default=None)
+    if base_url:
+        base_url = base_url.strip()
+    return base_url
 
 
 class F4mFD(FragmentFD):
@@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
             rate, media = list(filter(
                 lambda f: int(f[0]) == requested_bitrate, formats))[0]
 
-        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
+        # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+        man_base_url = get_base_url(doc) or man_url
+
+        base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
         bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        # From Adobe F4M 3.0 spec:
-        # The <baseURL> element SHALL be the base URL for all relative
-        # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
-        # URLs should be relative to the location of the containing document.
-        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
+        boot_info, bootstrap_url = self._parse_bootstrap_node(
+            bootstrap_node, man_base_url)
         live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a67ac4411..64fb869aa 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -29,7 +29,10 @@ from ..compat import (
     compat_urlparse,
     compat_xml_parse_error,
 )
-from ..downloader.f4m import remove_encrypted_media
+from ..downloader.f4m import (
+    get_base_url,
+    remove_encrypted_media,
+)
 from ..utils import (
     NO_DEFAULT,
     age_restricted,
@@ -1239,11 +1242,8 @@ class InfoExtractor(object):
         media_nodes = remove_encrypted_media(media_nodes)
         if not media_nodes:
             return formats
-        base_url = xpath_text(
-            manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
-            'base URL', default=None)
-        if base_url:
-            base_url = base_url.strip()
+
+        manifest_base_url = get_base_url(manifest)
 
         bootstrap_info = xpath_element(
             manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
@@ -1275,7 +1275,7 @@ class InfoExtractor(object):
                     continue
                 manifest_url = (
                     media_url if media_url.startswith('http://') or media_url.startswith('https://')
-                    else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+                    else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
                 # If media_url is itself a f4m manifest do the recursive extraction
                 # since bitrates in parent manifest (this one) and media_url manifest
                 # may differ leading to inability to resolve the format by requested
-- 
cgit 1.4.1