[extractor/generic] Use Accept-Encoding: identity for initial request

The existing comment seems to imply this was the desired behavior from the beginning. Partial fix for https://github.com/yt-dlp/yt-dlp/issues/5855, https://github.com/yt-dlp/yt-dlp/issues/5851, https://github.com/yt-dlp/yt-dlp/issues/4748
2024-09-20 04:53:58 +02:00 · 2023-01-01 18:40:26 +13:00 · 2023-01-01 18:40:26 +13:00 · 3e01ce744a
commit 3e01ce744a
parent 8e40b9d1ec
1 changed files with 16 additions and 1 deletions
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -2154,6 +2154,21 @@ class GenericIE(InfoExtractor):
                'age_limit': 0,
                'direct': True,
            }
        }, {
            'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.',
            'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867',
            'info_dict': {
                'id': 'cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867',
                'ext': 'mp4',
                'title': 'čauky lidi 70 finall',
                'description': 'čauky lidi 70 finall',
                'thumbnail': 'h',
                'upload_date': '20220606',
                'timestamp': 1654513791,
                'duration': 318.0,
                'direct': True,
                'age_limit': 0,
            }
        }
    ]
@ -2312,7 +2327,7 @@ def _real_extract(self, url):
        # It may probably better to solve this by checking Content-Type for application/octet-stream
        # after a HEAD request, but not sure if we can rely on this.
        full_response = self._request_webpage(url, video_id, headers={
-            'Accept-Encoding': '*',
+            'Accept-Encoding': 'identity',
            **smuggled_data.get('http_headers', {})
        })
        new_url = full_response.geturl()