From a439a3a45ca884956cacc104680e0a32aa3faba5 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 15 Apr 2021 18:01:16 +0530 Subject: [PATCH] Improve output template (see desc) * Objects can be traversed like `%(field.key1.key2)s` * A number can be added to the field as `%(field+n)s` * Deprecates `--autonumber-start` --- README.md | 19 +++++++++++--- test/test_YoutubeDL.py | 9 +++++++ yt_dlp/YoutubeDL.py | 59 ++++++++++++++++++++++++++---------------- yt_dlp/options.py | 2 +- yt_dlp/utils.py | 25 ++++++++++++------ 5 files changed, 78 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index b4e60eff6..9442c1ba7 100644 --- a/README.md +++ b/README.md @@ -395,8 +395,6 @@ ## Filesystem Options: --output-na-placeholder TEXT Placeholder value for unavailable meta fields in output filename template (default: "NA") - --autonumber-start NUMBER Specify the start value for %(autonumber)s - (default is 1) --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames @@ -833,7 +831,19 @@ # OUTPUT TEMPLATE **tl;dr:** [navigate me to examples](#output-template-examples). -The basic usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is not recommended). However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Date/time fields can also be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it inside the parantheses separated from the field name using a `>`. For example, `%(duration>%H-%M-%S)s`. +The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break certain postprocessing). + +It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. + +The field names themselves (the part inside the parenthesis) can also have some special formatting: +1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s` or `%(upload_date>%Y-%m-%d)s` +2. **Offset numbers**: Numeric fields can have an initial offset specified by using a `+` seperator. Eg: `%(playlist_index+10)03d`. This can also be used in conjunction with the datetime formatting. Eg: `%(epoch+-3600>%H-%M-%S)s` +3. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) seperator. Eg: `%(tags.0)s` or `%(subtitles.en.-1.ext)`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields + +To summarize, the general syntax for a field is: +``` +%(name[.keys][+offset][>strf])[flags][width][.precision][length]type +``` Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different filetypes supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. @@ -992,7 +1002,7 @@ # FORMAT SELECTION **tl;dr:** [navigate me to examples](#format-selection-examples). -The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. +The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. @@ -1263,6 +1273,7 @@ # DEPRECATED OPTIONS --all-formats -f all --all-subs --sub-langs all --write-subs --autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d + --autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s --metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT" --prefer-avconv avconv is no longer officially supported (Alias: --no-prefer-ffmpeg) --prefer-ffmpeg Default (Alias: --no-prefer-avconv) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 405db9058..5454dcf5e 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -655,6 +655,8 @@ def test_prepare_filename(self): 'height': 1080, 'title1': '$PATH', 'title2': '%PATH%', + 'timestamp': 1618488000, + 'formats': [{'id': 'id1'}, {'id': 'id2'}] } def fname(templ, na_placeholder='NA'): @@ -671,6 +673,7 @@ def fname(templ, na_placeholder='NA'): # Or by provided placeholder self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4') self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4') + self.assertEqual(fname('%(height)s.%(ext)s'), '1080.mp4') self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4') self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4') self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4') @@ -688,6 +691,12 @@ def fname(templ, na_placeholder='NA'): self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH') self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%') + self.assertEqual(fname('%(timestamp+-1000>%H-%M-%S)s'), '11-43-20') + self.assertEqual(fname('%(id+1)05d'), '01235') + self.assertEqual(fname('%(width+100)05d'), 'NA') + self.assertEqual(fname('%(formats.0)s').replace("u", ""), "{'id' - 'id1'}") + self.assertEqual(fname('%(formats.-1.id)s'), 'id2') + self.assertEqual(fname('%(formats.2)s'), 'NA') def test_format_note(self): ydl = YoutubeDL() diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a3d7968ff..5aea1b807 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -99,6 +99,7 @@ strftime_or_none, subtitles_filename, to_high_limit_path, + traverse_dict, UnavailableVideoError, url_basename, version_tuple, @@ -796,6 +797,7 @@ def parse_outtmpl(self): def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)""" template_dict = dict(info_dict) + na = self.params.get('outtmpl_na_placeholder', 'NA') # duration_string template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs @@ -821,18 +823,10 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): elif template_dict.get('width'): template_dict['resolution'] = '%dx?' % template_dict['width'] - if sanitize is None: - sanitize = lambda k, v: v - template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) - for k, v in template_dict.items() - if v is not None and not isinstance(v, (list, tuple, dict))) - na = self.params.get('outtmpl_na_placeholder', 'NA') - template_dict = collections.defaultdict(lambda: na, template_dict) - # For fields playlist_index and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { - 'playlist_index': len(str(template_dict['n_entries'])), + 'playlist_index': len(str(template_dict.get('n_entries', na))), 'autonumber': autonumber_size, } FIELD_SIZE_COMPAT_RE = r'(?autonumber|playlist_index)\)s' @@ -844,32 +838,51 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): outtmpl) numeric_fields = list(self._NUMERIC_FIELDS) + if sanitize is None: + sanitize = lambda k, v: v - # Format date - FORMAT_DATE_RE = FORMAT_RE.format(r'(?P(?P\w+)>(?P.+?))') - for mobj in re.finditer(FORMAT_DATE_RE, outtmpl): - conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key') - if key in template_dict: - continue - value = strftime_or_none(template_dict.get(field), frmt, na) - if conv_type in 'crs': # string - value = sanitize(field, value) - else: # number - numeric_fields.append(key) - value = float_or_none(value, default=None) + # Internal Formatting = name.key1.key2+number>strf + INTERNAL_FORMAT_RE = FORMAT_RE.format( + r'''(?P + (?P\w+(?:\.[-\w]+)*) + (?:\+(?P-?\d+(?:\.\d+)?))? + (?:>(?P.+?))? + )''') + for mobj in re.finditer(INTERNAL_FORMAT_RE, outtmpl): + mobj = mobj.groupdict() + # Object traversal + fields = mobj['fields'].split('.') + final_key = mobj['final_key'] + value = traverse_dict(template_dict, fields) + # Offset the value + if mobj['add']: + value = float_or_none(value) + if value is not None: + value = value + float(mobj['add']) + # Datetime formatting + if mobj['strf_format']: + value = strftime_or_none(value, mobj['strf_format']) + if mobj['type'] in 'crs' and value is not None: # string + value = sanitize('%{}'.format(mobj['type']) % fields[-1], value) + else: # numeric + numeric_fields.append(final_key) + value = float_or_none(value) if value is not None: - template_dict[key] = value + template_dict[final_key] = value # Missing numeric fields used together with integer presentation types # in format specification will break the argument substitution since # string NA placeholder is returned for missing fields. We will patch # output template for missing fields to meet string presentation type. for numeric_field in numeric_fields: - if numeric_field not in template_dict: + if template_dict.get(numeric_field) is None: outtmpl = re.sub( FORMAT_RE.format(re.escape(numeric_field)), r'%({0})s'.format(numeric_field), outtmpl) + template_dict = collections.defaultdict(lambda: na, ( + (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v)) + for k, v in template_dict.items() if v is not None)) return outtmpl, template_dict def _prepare_filename(self, info_dict, tmpl_type='default'): diff --git a/yt_dlp/options.py b/yt_dlp/options.py index c21c453ed..60ae6c7fe 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -908,7 +908,7 @@ def _dict_from_multiple_values_options_callback( filesystem.add_option( '--autonumber-start', dest='autonumber_start', metavar='NUMBER', default=1, type=int, - help='Specify the start value for %(autonumber)s (default is %default)') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '--restrict-filenames', action='store_true', dest='restrictfilenames', default=False, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 3e566285f..461f64db0 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6092,11 +6092,20 @@ def load_plugins(name, type, namespace): def traverse_dict(dictn, keys, casesense=True): - if not isinstance(dictn, dict): - return None - first_key = keys[0] - if not casesense: - dictn = {key.lower(): val for key, val in dictn.items()} - first_key = first_key.lower() - value = dictn.get(first_key, None) - return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense) + keys = list(keys)[::-1] + while keys: + key = keys.pop() + if isinstance(dictn, dict): + if not casesense: + dictn = {k.lower(): v for k, v in dictn.items()} + key = key.lower() + dictn = dictn.get(key) + elif isinstance(dictn, (list, tuple, compat_str)): + key, n = int_or_none(key), len(dictn) + if key is not None and -n <= key < n: + dictn = dictn[key] + else: + dictn = None + else: + return None + return dictn