[youtube] Extract data from multiple clients (#536)

* `player_client` accepts multiple clients
* default `player_client` = `android,web`
* music clients can be specifically requested
* Add IOS `player_client`
* Hide live dash since they can't be downloaded

Closes #501

Authored-by: pukkandan, colethedj
This commit is contained in:
pukkandan 2021-07-21 09:22:34 +05:30 committed by GitHub
parent c84aeac6b5
commit 11f9be0912
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 393 additions and 320 deletions

View file

@ -1337,8 +1337,8 @@ # EXTRACTOR ARGUMENTS
The following extractors use this feature: The following extractors use this feature:
* **youtube** * **youtube**
* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests
* `player_client`: `web` (default) or `android` (force use the android client fallbacks for video extraction) * `player_client`: Clients to extract video data from - one or more of `web`, `android`, `ios`, `web_music`, `android_music`, `ios_music`. By default, `android,web` is used. If the URL is from `music.youtube.com`, `android,web,android_music,web_music` is used
* `player_skip`: `configs` - skip requests if applicable for client configs and use defaults * `player_skip`: `configs` - skip any requests for client configs and use defaults
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side). * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side).
* `max_comments`: maximum amount of comments to download (default all). * `max_comments`: maximum amount of comments to download (default all).
* `max_comment_depth`: maximum depth for nested comments. YouTube supports depths 1 or 2 (default). * `max_comment_depth`: maximum depth for nested comments. YouTube supports depths 1 or 2 (default).

View file

@ -116,7 +116,7 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
if not success: if not success:
return False, None, None, None return False, None, None, None
try: try:
data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError: except RegexNotFoundError:
data = None data = None
if not data: if not data:
@ -146,7 +146,7 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
if not success: if not success:
return False return False
try: try:
data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError: except RegexNotFoundError:
return False return False
continuation_id = try_get( continuation_id = try_get(
@ -155,7 +155,7 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
# no data yet but required to call _append_fragment # no data yet but required to call _append_fragment
self._append_fragment(ctx, b'') self._append_fragment(ctx, b'')
ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace')) ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
if not ytcfg: if not ytcfg:
return False return False
@ -183,7 +183,7 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
if click_tracking_params: if click_tracking_params:
request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
headers = ie._generate_api_headers(ytcfg, visitor_data=visitor_data) headers = ie.generate_api_headers(ytcfg, visitor_data=visitor_data)
headers.update({'content-type': 'application/json'}) headers.update({'content-type': 'application/json'})
fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(

File diff suppressed because it is too large Load diff