diff --git a/test/test_download.py b/test/test_download.py index fd7752cdd..6f00a4ded 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -160,7 +160,7 @@ def try_rm_tcs_files(tcs=None): force_generic_extractor=params.get('force_generic_extractor', False)) except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one - if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].code == 503): + if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503): err.msg = f'{getattr(err, "msg", err)} ({tname})' raise diff --git a/test/test_networking.py b/test/test_networking.py index 147a4ff49..b60ed283b 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -1057,14 +1057,15 @@ def test_compat_request(self): urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'}) urllib_req.add_unredirected_header('Cookie', 'bob=bob') urllib_req.timeout = 2 - - req = ydl.urlopen(urllib_req).request - assert req.url == urllib_req.get_full_url() - assert req.data == urllib_req.data - assert req.method == urllib_req.get_method() - assert 'X-Test' in req.headers - assert 'Cookie' in req.headers - assert req.extensions.get('timeout') == 2 + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=DeprecationWarning) + req = ydl.urlopen(urllib_req).request + assert req.url == urllib_req.get_full_url() + assert req.data == urllib_req.data + assert req.method == urllib_req.get_method() + assert 'X-Test' in req.headers + assert 'Cookie' in req.headers + assert req.extensions.get('timeout') == 2 with pytest.raises(AssertionError): ydl.urlopen(None) @@ -1362,7 +1363,9 @@ def test_get_header(self): def test_compat(self): res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'}) - assert res.code == res.getcode() == res.status - assert res.geturl() == res.url - assert res.info() is res.headers - assert res.getheader('test') == res.get_header('test') + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=DeprecationWarning) + assert res.code == res.getcode() == res.status + assert res.geturl() == res.url + assert res.info() is res.headers + assert res.getheader('test') == res.get_header('test') diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py index f9f876af3..ef46f79ed 100644 --- a/test/test_networking_utils.py +++ b/test/test_networking_utils.py @@ -8,11 +8,13 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import contextlib import io import platform import random import ssl import urllib.error +import warnings from yt_dlp.cookies import YoutubeDLCookieJar from yt_dlp.dependencies import certifi @@ -202,20 +204,58 @@ def test_compat_http_error(self): assert isinstance(error, HTTPError) assert isinstance(error, urllib.error.HTTPError) - assert error.code == 403 - assert error.getcode() == 403 - assert error.hdrs is error.response.headers - assert error.info() is error.response.headers - assert error.headers is error.response.headers - assert error.filename == error.response.url - assert error.url == error.response.url - assert error.geturl() == error.response.url + @contextlib.contextmanager + def raises_deprecation_warning(): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + yield + + if len(w) == 0: + pytest.fail('Did not raise DeprecationWarning') + if len(w) > 1: + pytest.fail(f'Raised multiple warnings: {w}') + + if not issubclass(w[-1].category, DeprecationWarning): + pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}') + w.clear() + + with raises_deprecation_warning(): + assert error.code == 403 + + with raises_deprecation_warning(): + assert error.getcode() == 403 + + with raises_deprecation_warning(): + assert error.hdrs is error.response.headers + + with raises_deprecation_warning(): + assert error.info() is error.response.headers + + with raises_deprecation_warning(): + assert error.headers is error.response.headers + + with raises_deprecation_warning(): + assert error.filename == error.response.url + + with raises_deprecation_warning(): + assert error.url == error.response.url + + with raises_deprecation_warning(): + assert error.geturl() == error.response.url # Passthrough file operations - assert error.read() == b'test' - assert not error.closed - # Technically Response operations are also passed through, which should not be used. - assert error.get_header('test') == 'test' + with raises_deprecation_warning(): + assert error.read() == b'test' + + with raises_deprecation_warning(): + assert not error.closed + + with raises_deprecation_warning(): + # Technically Response operations are also passed through, which should not be used. + assert error.get_header('test') == 'test' + + # Should not raise a warning + error.close() @pytest.mark.skipif( platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 29a18aef0..850eb8ae0 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -33,7 +33,7 @@ from .extractor.common import UnsupportedURLIE from .extractor.openload import PhantomJSwrapper from .minicurses import format_text -from .networking import Request, RequestDirector +from .networking import HEADRequest, Request, RequestDirector from .networking.common import _REQUEST_HANDLERS from .networking.exceptions import ( HTTPError, @@ -41,6 +41,7 @@ RequestError, SSLError, _CompatHTTPError, + network_exceptions, ) from .plugins import directories as plugin_directories from .postprocessor import _PLUGIN_CLASSES as plugin_pps @@ -80,7 +81,6 @@ ExtractorError, FormatSorter, GeoRestrictedError, - HEADRequest, ISO3166Utils, LazyList, MaxDownloadsReached, @@ -122,7 +122,6 @@ locked_file, make_archive_id, make_dir, - network_exceptions, number_of_digits, orderedSet, orderedSet_from_options, @@ -135,7 +134,6 @@ sanitize_filename, sanitize_path, sanitize_url, - std_headers, str_or_none, strftime_or_none, subtitles_filename, @@ -158,6 +156,7 @@ HTTPHeaderDict, clean_headers, clean_proxies, + std_headers, ) from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__ @@ -4019,6 +4018,9 @@ def urlopen(self, req): if isinstance(req, str): req = Request(req) elif isinstance(req, urllib.request.Request): + self.deprecation_warning( + 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. ' + 'Use yt_dlp.networking.common.Request instead.') req = urllib_req_to_req(req) assert isinstance(req, Request) @@ -4242,7 +4244,7 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename except network_exceptions as err: - if isinstance(err, urllib.error.HTTPError) and err.code == 404: + if isinstance(err, HTTPError) and err.status == 404: self.to_screen(f'[info] {thumb_display_id.title()} does not exist') else: self.report_warning(f'Unable to download {thumb_display_id}: {err}') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index b81277a57..991dbcda7 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -57,11 +57,11 @@ read_stdin, render_table, setproctitle, - std_headers, traverse_obj, variadic, write_string, ) +from .utils.networking import std_headers from .YoutubeDL import YoutubeDL _IN_CLI = False diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py index 342f1f80d..14d37b236 100644 --- a/yt_dlp/compat/_deprecated.py +++ b/yt_dlp/compat/_deprecated.py @@ -8,7 +8,6 @@ compat_b64decode = base64.b64decode -compat_HTTPError = urllib.error.HTTPError compat_urlparse = urllib.parse compat_parse_qs = urllib.parse.parse_qs compat_urllib_parse_unquote = urllib.parse.unquote diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index 83bf869a8..912907a02 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -70,6 +70,7 @@ def compat_setenv(key, value, env=os.environ): compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser compat_http_client = http.client compat_http_server = http.server +compat_HTTPError = urllib.error.HTTPError compat_input = input compat_integer_types = (int, ) compat_itertools_count = itertools.count diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index d4045e58f..e307502db 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -10,6 +10,7 @@ from .fragment import FragmentFD from ..compat import functools +from ..networking import Request from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..utils import ( Popen, @@ -25,7 +26,6 @@ encodeFilename, find_available_port, remove_end, - sanitized_Request, traverse_obj, ) @@ -357,13 +357,12 @@ def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): 'method': method, 'params': [f'token:{rpc_secret}', *params], }).encode('utf-8') - request = sanitized_Request( + request = Request( f'http://localhost:{rpc_port}/jsonrpc', data=d, headers={ 'Content-Type': 'application/json', 'Content-Length': f'{len(d)}', - 'Ytdl-request-proxy': '__noproxy__', - }) + }, proxies={'all': None}) with self.ydl.urlopen(request) as r: resp = json.load(r) assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 306f92192..28cbba016 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -3,11 +3,11 @@ import itertools import struct import time -import urllib.error import urllib.parse from .fragment import FragmentFD from ..compat import compat_etree_fromstring +from ..networking.exceptions import HTTPError from ..utils import fix_xml_ampersands, xpath_text @@ -312,7 +312,7 @@ def real_download(self, filename, info_dict): self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) - man_url = urlh.geturl() + man_url = urlh.url # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 # and https://github.com/ytdl-org/youtube-dl/issues/7823) @@ -407,8 +407,8 @@ def real_download(self, filename, info_dict): if box_type == b'mdat': self._append_fragment(ctx, box_data) break - except urllib.error.HTTPError as err: - if live and (err.code == 404 or err.code == 410): + except HTTPError as err: + if live and (err.status == 404 or err.status == 410): # We didn't keep up with the live window. Continue # with the next available fragment. msg = 'Fragment %d unavailable' % frag_i diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 069815326..b4b680dae 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -1,24 +1,19 @@ import concurrent.futures import contextlib -import http.client import json import math import os import struct import time -import urllib.error from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..compat import compat_os_name -from ..utils import ( - DownloadError, - RetryManager, - encodeFilename, - sanitized_Request, - traverse_obj, -) +from ..networking import Request +from ..networking.exceptions import HTTPError, IncompleteRead +from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj +from ..utils.networking import HTTPHeaderDict class HttpQuietDownloader(HttpFD): @@ -75,7 +70,7 @@ def report_skip_fragment(self, frag_index, err=None): def _prepare_url(self, info_dict, url): headers = info_dict.get('http_headers') - return sanitized_Request(url, None, headers) if headers else url + return Request(url, None, headers) if headers else url def _prepare_and_start_frag_download(self, ctx, info_dict): self._prepare_frag_download(ctx) @@ -457,7 +452,7 @@ def download_fragment(fragment, ctx): frag_index = ctx['fragment_index'] = fragment['frag_index'] ctx['last_error'] = None - headers = info_dict.get('http_headers', {}).copy() + headers = HTTPHeaderDict(info_dict.get('http_headers')) byte_range = fragment.get('byte_range') if byte_range: headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) @@ -477,7 +472,7 @@ def error_callback(err, count, retries): if not self._download_fragment( ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')): return - except (urllib.error.HTTPError, http.client.IncompleteRead) as err: + except (HTTPError, IncompleteRead) as err: retry.error = err continue except DownloadError: # has own retry settings diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index ab7d496d4..d4b3f0320 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -75,7 +75,7 @@ def real_download(self, filename, info_dict): self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) - man_url = urlh.geturl() + man_url = urlh.url s = urlh.read().decode('utf-8', 'ignore') can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 45d094721..f5237443e 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -1,10 +1,14 @@ import os import random import time -import urllib.error from .common import FileDownloader -from ..networking.exceptions import CertificateVerifyError, TransportError +from ..networking import Request +from ..networking.exceptions import ( + CertificateVerifyError, + HTTPError, + TransportError, +) from ..utils import ( ContentTooShortError, RetryManager, @@ -14,10 +18,10 @@ encodeFilename, int_or_none, parse_http_range, - sanitized_Request, try_call, write_xattr, ) +from ..utils.networking import HTTPHeaderDict class HttpFD(FileDownloader): @@ -36,10 +40,7 @@ class DownloadContext(dict): ctx.stream = None # Disable compression - headers = {'Accept-Encoding': 'identity'} - add_headers = info_dict.get('http_headers') - if add_headers: - headers.update(add_headers) + headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers')) is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( @@ -110,10 +111,10 @@ def establish_connection(): if try_call(lambda: range_end >= ctx.content_len): range_end = ctx.content_len - 1 - request = sanitized_Request(url, request_data, headers) + request = Request(url, request_data, headers) has_range = range_start is not None if has_range: - request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}') + request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' # Establish connection try: ctx.data = self.ydl.urlopen(request) @@ -144,17 +145,17 @@ def establish_connection(): self.report_unable_to_resume() ctx.resume_len = 0 ctx.open_mode = 'wb' - ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None)) - except urllib.error.HTTPError as err: - if err.code == 416: + ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None)) + except HTTPError as err: + if err.status == 416: # Unable to resume (requested range not satisfiable) try: # Open the connection again without the range header ctx.data = self.ydl.urlopen( - sanitized_Request(url, request_data, headers)) - content_length = ctx.data.info()['Content-Length'] - except urllib.error.HTTPError as err: - if err.code < 500 or err.code >= 600: + Request(url, request_data, headers)) + content_length = ctx.data.headers['Content-Length'] + except HTTPError as err: + if err.status < 500 or err.status >= 600: raise else: # Examine the reported length @@ -182,7 +183,7 @@ def establish_connection(): ctx.resume_len = 0 ctx.open_mode = 'wb' return - elif err.code < 500 or err.code >= 600: + elif err.status < 500 or err.status >= 600: # Unexpected HTTP error raise raise RetryDownload(err) @@ -198,9 +199,9 @@ def close_stream(): ctx.stream = None def download(): - data_len = ctx.data.info().get('Content-length') + data_len = ctx.data.headers.get('Content-length') - if ctx.data.info().get('Content-encoding'): + if ctx.data.headers.get('Content-encoding'): # Content-encoding is present, Content-length is not reliable anymore as we are # doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176) data_len = None @@ -345,7 +346,7 @@ def retry(e): # Update file modification time if self.params.get('updatetime', True): - info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) + info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None)) self._hook_progress({ 'downloaded_bytes': byte_counter, diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index a157a8ad9..dd688f586 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -2,9 +2,9 @@ import io import struct import time -import urllib.error from .fragment import FragmentFD +from ..networking.exceptions import HTTPError from ..utils import RetryManager u8 = struct.Struct('>B') @@ -271,7 +271,7 @@ def real_download(self, filename, info_dict): write_piff_header(ctx['dest_stream'], info_dict['_download_params']) extra_state['ism_track_written'] = True self._append_fragment(ctx, frag_content) - except urllib.error.HTTPError as err: + except HTTPError as err: retry.error = err continue diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index 7d8575c2a..5720f6eb8 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -5,13 +5,8 @@ from . import get_suitable_downloader from .common import FileDownloader from .external import FFmpegFD -from ..utils import ( - DownloadError, - WebSocketsWrapper, - sanitized_Request, - str_or_none, - try_get, -) +from ..networking import Request +from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get class NiconicoDmcFD(FileDownloader): @@ -33,7 +28,7 @@ def real_download(self, filename, info_dict): heartbeat_data = heartbeat_info_dict['data'].encode() heartbeat_interval = heartbeat_info_dict.get('interval', 30) - request = sanitized_Request(heartbeat_url, heartbeat_data) + request = Request(heartbeat_url, heartbeat_data) def heartbeat(): try: diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index 5928fecf0..c7a86374a 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -1,8 +1,8 @@ import json import time -import urllib.error from .fragment import FragmentFD +from ..networking.exceptions import HTTPError from ..utils import ( RegexNotFoundError, RetryManager, @@ -10,6 +10,7 @@ int_or_none, try_get, ) +from ..utils.networking import HTTPHeaderDict class YoutubeLiveChatFD(FragmentFD): @@ -37,10 +38,7 @@ def real_download(self, filename, info_dict): start_time = int(time.time() * 1000) def dl_fragment(url, data=None, headers=None): - http_headers = info_dict.get('http_headers', {}) - if headers: - http_headers = http_headers.copy() - http_headers.update(headers) + http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers) return self._download_fragment(ctx, url, info_dict, http_headers, data) def parse_actions_replay(live_chat_continuation): @@ -129,7 +127,7 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None or frag_index == 1 and try_refresh_replay_beginning or parse_actions_replay) return (True, *func(live_chat_continuation)) - except urllib.error.HTTPError as err: + except HTTPError as err: retry.error = err continue return False, None, None, None diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index c9166b6b8..98ece8da7 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -22,7 +22,6 @@ int_or_none, intlist_to_bytes, OnDemandPagedList, - request_to_url, time_seconds, traverse_obj, update_url_query, @@ -137,7 +136,7 @@ def _get_videokey_from_ticket(self, ticket): return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) def abematv_license_open(self, url): - url = request_to_url(url) + url = url.get_full_url() if isinstance(url, urllib.request.Request) else url ticket = urllib.parse.urlparse(url).netloc response_data = self._get_videokey_from_ticket(ticket) return urllib.response.addinfourl(io.BytesIO(response_data), headers={ diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index f1f55e87f..b59dbc850 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -6,10 +6,8 @@ from .common import InfoExtractor from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 -from ..compat import ( - compat_HTTPError, - compat_b64decode, -) +from ..compat import compat_b64decode +from ..networking.exceptions import HTTPError from ..utils import ( ass_subtitles_timecode, bytes_to_intlist, @@ -142,9 +140,9 @@ def _perform_login(self, username, password): self._HEADERS = {'authorization': 'Bearer ' + access_token} except ExtractorError as e: message = None - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: resp = self._parse_json( - e.cause.read().decode(), None, fatal=False) or {} + e.cause.response.read().decode(), None, fatal=False) or {} message = resp.get('message') or resp.get('code') self.report_warning(message or self._LOGIN_ERR_MESSAGE) @@ -195,14 +193,14 @@ def _real_extract(self, url): }) break except ExtractorError as e: - if not isinstance(e.cause, compat_HTTPError): + if not isinstance(e.cause, HTTPError): raise e - if e.cause.code == 401: + if e.cause.status == 401: # This usually goes away with a different random pkcs1pad, so retry continue - error = self._parse_json(e.cause.read(), video_id) + error = self._parse_json(e.cause.response.read(), video_id) message = error.get('message') if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': self.raise_geo_restricted(msg=message) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 722a534ed..5eed0ca22 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -2,11 +2,11 @@ import json import re import time -import urllib.error import xml.etree.ElementTree as etree from .common import InfoExtractor from ..compat import compat_urlparse +from ..networking.exceptions import HTTPError from ..utils import ( NO_DEFAULT, ExtractorError, @@ -1394,7 +1394,7 @@ def post_form(form_page_res, note, data={}): form_page, urlh = form_page_res post_url = self._html_search_regex(r'