Determine merge container better (See desc) (#1482)

* Determine the container early. Closes #4069
* Use codecs instead of just file extensions
* Obey `--prefer-free-formats`
* Allow fallbacks in `--merge-output`

Authored by: pukkandan, selfisekai
This commit is contained in:
Lauren N. Liberda 2022-08-04 02:42:12 +02:00 committed by GitHub
parent fe0918bb65
commit fc61aff41b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 84 additions and 39 deletions

View file

@ -858,10 +858,10 @@ ## Video Format Options:
downloadable downloadable
-F, --list-formats List available formats of each video. -F, --list-formats List available formats of each video.
Simulate unless --no-simulate is used Simulate unless --no-simulate is used
--merge-output-format FORMAT Container to use when merging formats (e.g. --merge-output-format FORMAT Containers that may be used when merging
bestvideo+bestaudio). Ignored if no merge is formats, separated by "/" (Eg: "mp4/mkv").
required. (currently supported: avi, flv, Ignored if no merge is required. (currently
mkv, mov, mp4, webm) supported: avi, flv, mkv, mov, mp4, webm)
## Subtitle Options: ## Subtitle Options:
--write-subs Write subtitle file --write-subs Write subtitle file

View file

@ -53,6 +53,7 @@
fix_xml_ampersands, fix_xml_ampersands,
float_or_none, float_or_none,
format_bytes, format_bytes,
get_compatible_ext,
get_element_by_attribute, get_element_by_attribute,
get_element_by_class, get_element_by_class,
get_element_html_by_attribute, get_element_html_by_attribute,
@ -1843,6 +1844,31 @@ def test_determine_file_encoding(self):
self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0)) self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0))
self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0)) self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0))
def test_get_compatible_ext(self):
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm')
self.assertEqual(get_compatible_ext(
vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4')
self.assertEqual(get_compatible_ext(
vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm')
self.assertEqual(get_compatible_ext(
vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4')
self.assertEqual(get_compatible_ext(
vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -102,6 +102,7 @@
format_decimal_suffix, format_decimal_suffix,
format_field, format_field,
formatSeconds, formatSeconds,
get_compatible_ext,
get_domain, get_domain,
int_or_none, int_or_none,
iri_to_uri, iri_to_uri,
@ -134,6 +135,7 @@
timetuple_from_msec, timetuple_from_msec,
to_high_limit_path, to_high_limit_path,
traverse_obj, traverse_obj,
try_call,
try_get, try_get,
url_basename, url_basename,
variadic, variadic,
@ -372,7 +374,7 @@ class YoutubeDL:
Progress hooks are guaranteed to be called at least twice Progress hooks are guaranteed to be called at least twice
(with status "started" and "finished") if the processing is successful. (with status "started" and "finished") if the processing is successful.
merge_output_format: Extension to use when merging formats. merge_output_format: "/" separated list of extensions to use when merging formats.
final_ext: Expected final extension; used to detect when the file was final_ext: Expected final extension; used to detect when the file was
already downloaded and converted already downloaded and converted
fixup: Automatically correct known faults of the file. fixup: Automatically correct known faults of the file.
@ -2088,14 +2090,13 @@ def _merge(formats_pair):
the_only_video = video_fmts[0] if len(video_fmts) == 1 else None the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
output_ext = self.params.get('merge_output_format') output_ext = get_compatible_ext(
if not output_ext: vcodecs=[f.get('vcodec') for f in video_fmts],
if the_only_video: acodecs=[f.get('acodec') for f in audio_fmts],
output_ext = the_only_video['ext'] vexts=[f['ext'] for f in video_fmts],
elif the_only_audio and not video_fmts: aexts=[f['ext'] for f in audio_fmts],
output_ext = the_only_audio['ext'] preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
else: or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
output_ext = 'mkv'
filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
@ -3067,33 +3068,9 @@ def existing_video_file(*filepaths):
return return
if info_dict.get('requested_formats') is not None: if info_dict.get('requested_formats') is not None:
def compatible_formats(formats):
# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
video_formats = [format for format in formats if format.get('vcodec') != 'none']
audio_formats = [format for format in formats if format.get('acodec') != 'none']
if len(video_formats) > 2 or len(audio_formats) > 2:
return False
# Check extension
exts = {format.get('ext') for format in formats}
COMPATIBLE_EXTS = (
{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
{'webm'},
)
for ext_sets in COMPATIBLE_EXTS:
if ext_sets.issuperset(exts):
return True
# TODO: Check acodec/vcodec
return False
requested_formats = info_dict['requested_formats'] requested_formats = info_dict['requested_formats']
old_ext = info_dict['ext'] old_ext = info_dict['ext']
if self.params.get('merge_output_format') is None: if self.params.get('merge_output_format') is None:
if not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv'
self.report_warning(
'Requested formats are incompatible for merge and will be merged into mkv')
if (info_dict['ext'] == 'webm' if (info_dict['ext'] == 'webm'
and info_dict.get('thumbnails') and info_dict.get('thumbnails')
# check with type instead of pp_key, __name__, or isinstance # check with type instead of pp_key, __name__, or isinstance

View file

@ -228,7 +228,8 @@ def validate_minmax(min_val, max_val, min_name, max_name=None):
validate_regex('format sorting', f, InfoExtractor.FormatSort.regex) validate_regex('format sorting', f, InfoExtractor.FormatSort.regex)
# Postprocessor formats # Postprocessor formats
validate_in('merge output format', opts.merge_output_format, FFmpegMergerPP.SUPPORTED_EXTS) validate_regex('merge output format', opts.merge_output_format,
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS) validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)
validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE) validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE)

View file

@ -782,7 +782,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'--merge-output-format', '--merge-output-format',
action='store', dest='merge_output_format', metavar='FORMAT', default=None, action='store', dest='merge_output_format', metavar='FORMAT', default=None,
help=( help=(
'Container to use when merging formats (e.g. bestvideo+bestaudio). Ignored if no merge is required. ' 'Containers that may be used when merging formats, separated by "/" (Eg: "mp4/mkv"). '
'Ignored if no merge is required. '
f'(currently supported: {", ".join(sorted(FFmpegMergerPP.SUPPORTED_EXTS))})')) f'(currently supported: {", ".join(sorted(FFmpegMergerPP.SUPPORTED_EXTS))})'))
video_format.add_option( video_format.add_option(
'--allow-unplayable-formats', '--allow-unplayable-formats',

View file

@ -3456,6 +3456,46 @@ def parse_codecs(codecs_str):
return {} return {}
def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
allow_mkv = not preferences or 'mkv' in preferences
if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
return 'mkv' # TODO: any other format allows this?
# TODO: All codecs supported by parse_codecs isn't handled here
COMPATIBLE_CODECS = {
'mp4': {
'av1', 'hevc', 'avc1', 'mp4a', # fourcc (m3u8, mpd)
'h264', 'aacl', # Set in ISM
},
'webm': {
'av1', 'vp9', 'vp8', 'opus', 'vrbs',
'vp9x', 'vp8x', # in the webm spec
},
}
sanitize_codec = functools.partial(try_get, getter=lambda x: x.split('.')[0].replace('0', ''))
vcodec, acodec = sanitize_codec(vcodecs[0]), sanitize_codec(acodecs[0])
for ext in preferences or COMPATIBLE_CODECS.keys():
codec_set = COMPATIBLE_CODECS.get(ext, set())
if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
return ext
COMPATIBLE_EXTS = (
{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
{'webm'},
)
for ext in preferences or vexts:
current_exts = {ext, *vexts, *aexts}
if ext == 'mkv' or current_exts == {ext} or any(
ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
return ext
return 'mkv' if allow_mkv else preferences[-1]
def urlhandle_detect_ext(url_handle): def urlhandle_detect_ext(url_handle):
getheader = url_handle.headers.get getheader = url_handle.headers.get