about summary refs log tree commit diff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-12-29 23:14:15 +0700
committerSergey M․ <dstftw@gmail.com>2017-12-29 23:14:56 +0700
commit9d6ac71c27b1dfb662c795ef598dbfd0286682da (patch)
treebdc88e44ae6e8dcd99e3ee3e03be15617107fd2f
parent84f085d4bdb66ee025fb337bcd571eab7469da97 (diff)
downloadyoutube-dl-9d6ac71c27b1dfb662c795ef598dbfd0286682da.tar.gz
youtube-dl-9d6ac71c27b1dfb662c795ef598dbfd0286682da.tar.xz
youtube-dl-9d6ac71c27b1dfb662c795ef598dbfd0286682da.zip
[extractor/common] Fix extraction of DASH formats with the same representation id (closes #15111)
-rw-r--r--test/test_InfoExtractor.py11
-rw-r--r--youtube_dl/extractor/common.py18
2 files changed, 19 insertions, 10 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 8a372d2c9..7b31d5198 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -493,10 +493,21 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
         _TEST_CASES = [
             (
                 # https://github.com/rg3/youtube-dl/issues/13919
+                # Also tests duplicate representation ids, see
+                # https://github.com/rg3/youtube-dl/issues/15111
                 'float_duration',
                 'http://unknown/manifest.mpd',
                 [{
                     'manifest_url': 'http://unknown/manifest.mpd',
+                    'ext': 'm4a',
+                    'format_id': '318597',
+                    'format_note': 'DASH audio',
+                    'protocol': 'http_dash_segments',
+                    'acodec': 'mp4a.40.2',
+                    'vcodec': 'none',
+                    'tbr': 61.587,
+                }, {
+                    'manifest_url': 'http://unknown/manifest.mpd',
                     'ext': 'mp4',
                     'format_id': '318597',
                     'format_note': 'DASH video',
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 3b79b8cb4..35d427eec 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -2007,16 +2007,14 @@ class InfoExtractor(object):
                                     f['url'] = initialization_url
                                 f['fragments'].append({location_key(initialization_url): initialization_url})
                             f['fragments'].extend(representation_ms_info['fragments'])
-                        try:
-                            existing_format = next(
-                                fo for fo in formats
-                                if fo['format_id'] == representation_id)
-                        except StopIteration:
-                            full_info = formats_dict.get(representation_id, {}).copy()
-                            full_info.update(f)
-                            formats.append(full_info)
-                        else:
-                            existing_format.update(f)
+                        # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
+                        # is not necessarily unique within a Period thus formats with
+                        # the same `format_id` are quite possible. There are numerous examples
+                        # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
+                        # https://github.com/rg3/youtube-dl/issues/13919)
+                        full_info = formats_dict.get(representation_id, {}).copy()
+                        full_info.update(f)
+                        formats.append(full_info)
                     else:
                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats