[downloader/aria2c] Native progress for aria2c via RPC (#3724)

Authored by: Lesmiscore, pukkandan

Closes #2038
This commit is contained in:
Lesmiscore 2023-01-02 02:16:25 +09:00 committed by GitHub
parent 193fb150b7
commit 8c53322cda
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 119 additions and 8 deletions

View file

@ -153,6 +153,7 @@ ### Differences in default behavior
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: `aria2c`). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is
For ease of use, a few more compat options are available:
@ -160,7 +161,7 @@ ### Differences in default behavior
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Currently does nothing. Use this to enable all future compat options
* `--compat-options 2022`: Same as `--compat-options no-external-downloader-progress`. Use this to enable all future compat options
# INSTALLATION

View file

@ -1,9 +1,11 @@
import enum
import json
import os.path
import re
import subprocess
import sys
import time
import uuid
from .fragment import FragmentFD
from ..compat import functools
@ -20,8 +22,10 @@
determine_ext,
encodeArgument,
encodeFilename,
find_available_port,
handle_youtubedl_headers,
remove_end,
sanitized_Request,
traverse_obj,
)
@ -60,7 +64,6 @@ def real_download(self, filename, info_dict):
}
if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename)
status.update({
'downloaded_bytes': fsize,
@ -129,8 +132,7 @@ def _call_downloader(self, tmpfilename, info_dict):
self._debug_cmd(cmd)
if 'fragments' not in info_dict:
_, stderr, returncode = Popen.run(
cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
_, stderr, returncode = self._call_process(cmd, info_dict)
if returncode and stderr:
self.to_stderr(stderr)
return returncode
@ -140,7 +142,7 @@ def _call_downloader(self, tmpfilename, info_dict):
retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
frag_index=None, fatal=not skip_unavailable_fragments)
for retry in retry_manager:
_, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE)
_, stderr, returncode = self._call_process(cmd, info_dict)
if not returncode:
break
# TODO: Decide whether to retry based on error code
@ -172,6 +174,9 @@ def _call_downloader(self, tmpfilename, info_dict):
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
return 0
def _call_process(self, cmd, info_dict):
return Popen.run(cmd, text=True, stderr=subprocess.PIPE)
class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
@ -256,6 +261,14 @@ def supports_manifest(manifest):
def _aria2c_filename(fn):
return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
def _call_downloader(self, tmpfilename, info_dict):
if 'no-external-downloader-progress' not in self.params.get('compat_opts', []):
info_dict['__rpc'] = {
'port': find_available_port() or 19190,
'secret': str(uuid.uuid4()),
}
return super()._call_downloader(tmpfilename, info_dict)
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
@ -276,6 +289,12 @@ def _make_cmd(self, tmpfilename, info_dict):
cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
cmd += self._configuration_args()
if '__rpc' in info_dict:
cmd += [
'--enable-rpc',
f'--rpc-listen-port={info_dict["__rpc"]["port"]}',
f'--rpc-secret={info_dict["__rpc"]["secret"]}']
# aria2c strips out spaces from the beginning/end of filenames and paths.
# We work around this issue by adding a "./" to the beginning of the
# filename and relative path, and adding a "/" at the end of the path.
@ -304,6 +323,88 @@ def _make_cmd(self, tmpfilename, info_dict):
cmd += ['--', info_dict['url']]
return cmd
def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()):
# Does not actually need to be UUID, just unique
sanitycheck = str(uuid.uuid4())
d = json.dumps({
'jsonrpc': '2.0',
'id': sanitycheck,
'method': method,
'params': [f'token:{rpc_secret}', *params],
}).encode('utf-8')
request = sanitized_Request(
f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={
'Content-Type': 'application/json',
'Content-Length': f'{len(d)}',
'Ytdl-request-proxy': '__noproxy__',
})
with self.ydl.urlopen(request) as r:
resp = json.load(r)
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
return resp['result']
def _call_process(self, cmd, info_dict):
if '__rpc' not in info_dict:
return super()._call_process(cmd, info_dict)
send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret'])
started = time.time()
fragmented = 'fragments' in info_dict
frag_count = len(info_dict['fragments']) if fragmented else 1
status = {
'filename': info_dict.get('_filename'),
'status': 'downloading',
'elapsed': 0,
'downloaded_bytes': 0,
'fragment_count': frag_count if fragmented else None,
'fragment_index': 0 if fragmented else None,
}
self._hook_progress(status, info_dict)
def get_stat(key, *obj, average=False):
val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0]
return sum(val) / (len(val) if average else 1)
with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p:
# Add a small sleep so that RPC client can receive response,
# or the connection stalls infinitely
time.sleep(0.2)
retval = p.poll()
while retval is None:
# We don't use tellStatus as we won't know the GID without reading stdout
# Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive
active = send_rpc('aria2.tellActive')
completed = send_rpc('aria2.tellStopped', [0, frag_count])
downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active)
speed = get_stat('downloadSpeed', active)
total = frag_count * get_stat('totalLength', active, completed, average=True)
if total < downloaded:
total = None
status.update({
'downloaded_bytes': int(downloaded),
'speed': speed,
'total_bytes': None if fragmented else total,
'total_bytes_estimate': total,
'eta': (total - downloaded) / (speed or 1),
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
'elapsed': time.time() - started
})
self._hook_progress(status, info_dict)
if not active and len(completed) >= frag_count:
send_rpc('aria2.shutdown')
retval = p.wait()
break
time.sleep(0.1)
retval = p.poll()
return '', p.stderr.read(), retval
class HttpieFD(ExternalFD):
AVAILABLE_OPT = '--version'

View file

@ -464,14 +464,14 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'allowed_values': {
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley',
'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
}, 'aliases': {
'youtube-dl': ['all', '-multistreams'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2022': [],
'2022': ['no-external-downloader-progress'],
}
}, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc '

View file

@ -5243,6 +5243,15 @@ def random_birthday(year_field, month_field, day_field):
}
def find_available_port(interface=''):
try:
with socket.socket() as sock:
sock.bind((interface, 0))
return sock.getsockname()[1]
except OSError:
return None
# Templates for internet shortcut files, which are plain text files.
DOT_URL_LINK_TEMPLATE = '''\
[InternetShortcut]