Add brotli content-encoding support (#2433)

Authored by: coletdjnz
This commit is contained in:
coletdev 2022-03-09 05:44:05 +13:00 committed by GitHub
parent 9e0e6adb2d
commit 4390d5ec12
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 36 additions and 4 deletions

View file

@ -268,6 +268,7 @@ ## DEPENDENCIES
* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE)
* [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE) * [**secretstorage**](https://github.com/mitya57/secretstorage) - For accessing the Gnome keyring while decrypting cookies of Chromium-based browsers on Linux. Licensed under [BSD](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING)
* [**brotli**](https://github.com/google/brotli) or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup>
* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu)
* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright)
* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) * [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
@ -284,7 +285,7 @@ ## DEPENDENCIES
## COMPILE ## COMPILE
**For Windows**: **For Windows**:
To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. To build the Windows executable, you must have pyinstaller (and any of yt-dlp's optional dependencies if needed). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it.
py -m pip install -U pyinstaller -r requirements.txt py -m pip install -U pyinstaller -r requirements.txt
py devscripts/make_lazy_extractors.py py devscripts/make_lazy_extractors.py

View file

@ -74,7 +74,7 @@ def version_to_list(version):
def dependency_options(): def dependency_options():
dependencies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') dependencies = [pycryptodome_module(), 'mutagen', 'brotli'] + collect_submodules('websockets')
excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc']
yield from (f'--hidden-import={module}' for module in dependencies) yield from (f'--hidden-import={module}' for module in dependencies)

View file

@ -1,3 +1,5 @@
mutagen mutagen
pycryptodomex pycryptodomex
websockets websockets
brotli; platform_python_implementation=='CPython'
brotlicffi; platform_python_implementation!='CPython'

View file

@ -23,7 +23,7 @@
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github', '**PS**: Some links in this document will not work since this is a copy of the README.md from Github',
open('README.md', 'r', encoding='utf-8').read())) open('README.md', 'r', encoding='utf-8').read()))
REQUIREMENTS = ['mutagen', 'pycryptodomex', 'websockets'] REQUIREMENTS = open('requirements.txt').read().splitlines()
if sys.argv[1:2] == ['py2exe']: if sys.argv[1:2] == ['py2exe']:

View file

@ -32,6 +32,7 @@
from .compat import ( from .compat import (
compat_basestring, compat_basestring,
compat_brotli,
compat_get_terminal_size, compat_get_terminal_size,
compat_kwargs, compat_kwargs,
compat_numeric_types, compat_numeric_types,
@ -3675,6 +3676,7 @@ def python_implementation():
from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
lib_str = join_nonempty( lib_str = join_nonempty(
compat_brotli and compat_brotli.__name__,
compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
SECRETSTORAGE_AVAILABLE and 'secretstorage', SECRETSTORAGE_AVAILABLE and 'secretstorage',
has_mutagen and 'mutagen', has_mutagen and 'mutagen',

View file

@ -170,6 +170,13 @@ def compat_expanduser(path):
except ImportError: except ImportError:
compat_pycrypto_AES = None compat_pycrypto_AES = None
try:
import brotlicffi as compat_brotli
except ImportError:
try:
import brotli as compat_brotli
except ImportError:
compat_brotli = None
WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
@ -258,6 +265,7 @@ def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.pytho
'compat_asyncio_run', 'compat_asyncio_run',
'compat_b64decode', 'compat_b64decode',
'compat_basestring', 'compat_basestring',
'compat_brotli',
'compat_chr', 'compat_chr',
'compat_collections_abc', 'compat_collections_abc',
'compat_cookiejar', 'compat_cookiejar',

View file

@ -47,6 +47,7 @@
compat_HTMLParser, compat_HTMLParser,
compat_HTTPError, compat_HTTPError,
compat_basestring, compat_basestring,
compat_brotli,
compat_chr, compat_chr,
compat_cookiejar, compat_cookiejar,
compat_ctypes_WINFUNCTYPE, compat_ctypes_WINFUNCTYPE,
@ -143,10 +144,16 @@ def random_user_agent():
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
SUPPORTED_ENCODINGS = [
'gzip', 'deflate'
]
if compat_brotli:
SUPPORTED_ENCODINGS.append('br')
std_headers = { std_headers = {
'User-Agent': random_user_agent(), 'User-Agent': random_user_agent(),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate', 'Accept-Encoding': ', '.join(SUPPORTED_ENCODINGS),
'Accept-Language': 'en-us,en;q=0.5', 'Accept-Language': 'en-us,en;q=0.5',
'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Mode': 'navigate',
} }
@ -1357,6 +1364,12 @@ def deflate(data):
except zlib.error: except zlib.error:
return zlib.decompress(data) return zlib.decompress(data)
@staticmethod
def brotli(data):
if not data:
return data
return compat_brotli.decompress(data)
def http_request(self, req): def http_request(self, req):
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
# always respected by websites, some tend to give out URLs with non percent-encoded # always respected by websites, some tend to give out URLs with non percent-encoded
@ -1417,6 +1430,12 @@ def http_response(self, req, resp):
resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg resp.msg = old_resp.msg
del resp.headers['Content-encoding'] del resp.headers['Content-encoding']
# brotli
if resp.headers.get('Content-encoding', '') == 'br':
resp = compat_urllib_request.addinfourl(
io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
# https://github.com/ytdl-org/youtube-dl/issues/6457). # https://github.com/ytdl-org/youtube-dl/issues/6457).
if 300 <= resp.code < 400: if 300 <= resp.code < 400: