From fc61aff41beae0063b306dd9d74cc4ff27f0eff7 Mon Sep 17 00:00:00 2001 From: "Lauren N. Liberda" Date: Thu, 4 Aug 2022 02:42:12 +0200 Subject: [PATCH] Determine merge container better (See desc) (#1482) * Determine the container early. Closes #4069 * Use codecs instead of just file extensions * Obey `--prefer-free-formats` * Allow fallbacks in `--merge-output` Authored by: pukkandan, selfisekai --- README.md | 8 ++++---- test/test_utils.py | 26 ++++++++++++++++++++++++++ yt_dlp/YoutubeDL.py | 43 ++++++++++--------------------------------- yt_dlp/__init__.py | 3 ++- yt_dlp/options.py | 3 ++- yt_dlp/utils.py | 40 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 84 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 9fac6048e..4e806e14c 100644 --- a/README.md +++ b/README.md @@ -858,10 +858,10 @@ ## Video Format Options: downloadable -F, --list-formats List available formats of each video. Simulate unless --no-simulate is used - --merge-output-format FORMAT Container to use when merging formats (e.g. - bestvideo+bestaudio). Ignored if no merge is - required. (currently supported: avi, flv, - mkv, mov, mp4, webm) + --merge-output-format FORMAT Containers that may be used when merging + formats, separated by "/" (Eg: "mp4/mkv"). + Ignored if no merge is required. (currently + supported: avi, flv, mkv, mov, mp4, webm) ## Subtitle Options: --write-subs Write subtitle file diff --git a/test/test_utils.py b/test/test_utils.py index 8ec1413b8..989a99ea3 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -53,6 +53,7 @@ fix_xml_ampersands, float_or_none, format_bytes, + get_compatible_ext, get_element_by_attribute, get_element_by_class, get_element_html_by_attribute, @@ -1843,6 +1844,31 @@ def test_determine_file_encoding(self): self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0)) self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0)) + def test_get_compatible_ext(self): + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv') + + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm') + + self.assertEqual(get_compatible_ext( + vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4') + self.assertEqual(get_compatible_ext( + vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm') + + self.assertEqual(get_compatible_ext( + vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4') + self.assertEqual(get_compatible_ext( + vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv') + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 0d7564088..25473611b 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -102,6 +102,7 @@ format_decimal_suffix, format_field, formatSeconds, + get_compatible_ext, get_domain, int_or_none, iri_to_uri, @@ -134,6 +135,7 @@ timetuple_from_msec, to_high_limit_path, traverse_obj, + try_call, try_get, url_basename, variadic, @@ -372,7 +374,7 @@ class YoutubeDL: Progress hooks are guaranteed to be called at least twice (with status "started" and "finished") if the processing is successful. - merge_output_format: Extension to use when merging formats. + merge_output_format: "/" separated list of extensions to use when merging formats. final_ext: Expected final extension; used to detect when the file was already downloaded and converted fixup: Automatically correct known faults of the file. @@ -2088,14 +2090,13 @@ def _merge(formats_pair): the_only_video = video_fmts[0] if len(video_fmts) == 1 else None the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None - output_ext = self.params.get('merge_output_format') - if not output_ext: - if the_only_video: - output_ext = the_only_video['ext'] - elif the_only_audio and not video_fmts: - output_ext = the_only_audio['ext'] - else: - output_ext = 'mkv' + output_ext = get_compatible_ext( + vcodecs=[f.get('vcodec') for f in video_fmts], + acodecs=[f.get('acodec') for f in audio_fmts], + vexts=[f['ext'] for f in video_fmts], + aexts=[f['ext'] for f in audio_fmts], + preferences=(try_call(lambda: self.params['merge_output_format'].split('/')) + or self.params.get('prefer_free_formats') and ('webm', 'mkv'))) filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) @@ -3067,33 +3068,9 @@ def existing_video_file(*filepaths): return if info_dict.get('requested_formats') is not None: - - def compatible_formats(formats): - # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them. - video_formats = [format for format in formats if format.get('vcodec') != 'none'] - audio_formats = [format for format in formats if format.get('acodec') != 'none'] - if len(video_formats) > 2 or len(audio_formats) > 2: - return False - - # Check extension - exts = {format.get('ext') for format in formats} - COMPATIBLE_EXTS = ( - {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'}, - {'webm'}, - ) - for ext_sets in COMPATIBLE_EXTS: - if ext_sets.issuperset(exts): - return True - # TODO: Check acodec/vcodec - return False - requested_formats = info_dict['requested_formats'] old_ext = info_dict['ext'] if self.params.get('merge_output_format') is None: - if not compatible_formats(requested_formats): - info_dict['ext'] = 'mkv' - self.report_warning( - 'Requested formats are incompatible for merge and will be merged into mkv') if (info_dict['ext'] == 'webm' and info_dict.get('thumbnails') # check with type instead of pp_key, __name__, or isinstance diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 4024b6ba1..317dd2623 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -228,7 +228,8 @@ def validate_minmax(min_val, max_val, min_name, max_name=None): validate_regex('format sorting', f, InfoExtractor.FormatSort.regex) # Postprocessor formats - validate_in('merge output format', opts.merge_output_format, FFmpegMergerPP.SUPPORTED_EXTS) + validate_regex('merge output format', opts.merge_output_format, + r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS)))) validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS) validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 236cc714b..b70f5798e 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -782,7 +782,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): '--merge-output-format', action='store', dest='merge_output_format', metavar='FORMAT', default=None, help=( - 'Container to use when merging formats (e.g. bestvideo+bestaudio). Ignored if no merge is required. ' + 'Containers that may be used when merging formats, separated by "/" (Eg: "mp4/mkv"). ' + 'Ignored if no merge is required. ' f'(currently supported: {", ".join(sorted(FFmpegMergerPP.SUPPORTED_EXTS))})')) video_format.add_option( '--allow-unplayable-formats', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index c3ccb3a78..d405ed3e3 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3456,6 +3456,46 @@ def parse_codecs(codecs_str): return {} +def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None): + assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts) + + allow_mkv = not preferences or 'mkv' in preferences + + if allow_mkv and max(len(acodecs), len(vcodecs)) > 1: + return 'mkv' # TODO: any other format allows this? + + # TODO: All codecs supported by parse_codecs isn't handled here + COMPATIBLE_CODECS = { + 'mp4': { + 'av1', 'hevc', 'avc1', 'mp4a', # fourcc (m3u8, mpd) + 'h264', 'aacl', # Set in ISM + }, + 'webm': { + 'av1', 'vp9', 'vp8', 'opus', 'vrbs', + 'vp9x', 'vp8x', # in the webm spec + }, + } + + sanitize_codec = functools.partial(try_get, getter=lambda x: x.split('.')[0].replace('0', '')) + vcodec, acodec = sanitize_codec(vcodecs[0]), sanitize_codec(acodecs[0]) + + for ext in preferences or COMPATIBLE_CODECS.keys(): + codec_set = COMPATIBLE_CODECS.get(ext, set()) + if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)): + return ext + + COMPATIBLE_EXTS = ( + {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'}, + {'webm'}, + ) + for ext in preferences or vexts: + current_exts = {ext, *vexts, *aexts} + if ext == 'mkv' or current_exts == {ext} or any( + ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS): + return ext + return 'mkv' if allow_mkv else preferences[-1] + + def urlhandle_detect_ext(url_handle): getheader = url_handle.headers.get