[ie/soundcloud] Fix download format extraction (#10125)

Authored by: bashonly
This commit is contained in:
bashonly 2024-06-13 18:01:19 -05:00 committed by GitHub
parent 92a1c4abae
commit e53e56b735
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -95,7 +95,7 @@ def _update_client_id(self):
return return
raise ExtractorError('Unable to extract client id') raise ExtractorError('Unable to extract client id')
def _download_json(self, *args, **kwargs): def _call_api(self, *args, **kwargs):
non_fatal = kwargs.get('fatal') is False non_fatal = kwargs.get('fatal') is False
if non_fatal: if non_fatal:
del kwargs['fatal'] del kwargs['fatal']
@ -104,7 +104,7 @@ def _download_json(self, *args, **kwargs):
query['client_id'] = self._CLIENT_ID query['client_id'] = self._CLIENT_ID
kwargs['query'] = query kwargs['query'] = query
try: try:
return super()._download_json(*args, **kwargs) return self._download_json(*args, **kwargs)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403): if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
self._store_client_id(None) self._store_client_id(None)
@ -163,7 +163,7 @@ def genNumBlock():
'user_agent': self._USER_AGENT 'user_agent': self._USER_AGENT
} }
response = self._download_json( response = self._call_api(
self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID), self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
None, note='Verifying login token...', fatal=False, None, note='Verifying login token...', fatal=False,
data=json.dumps(payload).encode()) data=json.dumps(payload).encode())
@ -217,12 +217,26 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f
query['secret_token'] = secret_token query['secret_token'] = secret_token
if not extract_flat and info.get('downloadable') and info.get('has_downloads_left'): if not extract_flat and info.get('downloadable') and info.get('has_downloads_left'):
download_url = update_url_query( try:
self._API_V2_BASE + 'tracks/' + track_id + '/download', query) # Do not use _call_api(); HTTP Error codes have different meanings for this request
redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri') download_data = self._download_json(
if redirect_url: f'{self._API_V2_BASE}tracks/{track_id}/download', track_id,
'Downloading original download format info JSON', query=query, headers=self._HEADERS)
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
self.report_warning(
'Original download format is only available '
f'for registered users. {self._login_hint()}')
elif isinstance(e.cause, HTTPError) and e.cause.status == 403:
self.write_debug('Original download format is not available for this client')
else:
self.report_warning(e.msg)
download_data = None
if redirect_url := traverse_obj(download_data, ('redirectUri', {url_or_none})):
urlh = self._request_webpage( urlh = self._request_webpage(
HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False) HEADRequest(redirect_url), track_id, 'Checking original download format availability',
'Original download format is not available', fatal=False)
if urlh: if urlh:
format_url = urlh.url format_url = urlh.url
format_urls.add(format_url) format_urls.add(format_url)
@ -303,7 +317,7 @@ def add_format(f, protocol, is_preview=False):
stream = None stream = None
for retry in self.RetryManager(fatal=False): for retry in self.RetryManager(fatal=False):
try: try:
stream = self._download_json( stream = self._call_api(
format_url, track_id, f'Downloading {identifier} format info JSON', format_url, track_id, f'Downloading {identifier} format info JSON',
query=query, headers=self._HEADERS) query=query, headers=self._HEADERS)
except ExtractorError as e: except ExtractorError as e:
@ -630,7 +644,7 @@ def _real_extract(self, url):
resolve_title += f'/{token}' resolve_title += f'/{token}'
info_json_url = self._resolv_url(self._BASE_URL + resolve_title) info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
info = self._download_json( info = self._call_api(
info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS) info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
return self._extract_info_dict(info, full_title, token) return self._extract_info_dict(info, full_title, token)
@ -641,7 +655,7 @@ def _extract_set(self, playlist, token=None):
playlist_id = str(playlist['id']) playlist_id = str(playlist['id'])
tracks = playlist.get('tracks') or [] tracks = playlist.get('tracks') or []
if not all(t.get('permalink_url') for t in tracks) and token: if not all(t.get('permalink_url') for t in tracks) and token:
tracks = self._download_json( tracks = self._call_api(
self._API_V2_BASE + 'tracks', playlist_id, self._API_V2_BASE + 'tracks', playlist_id,
'Downloading tracks', query={ 'Downloading tracks', query={
'ids': ','.join([str(t['id']) for t in tracks]), 'ids': ','.join([str(t['id']) for t in tracks]),
@ -699,7 +713,7 @@ def _real_extract(self, url):
if token: if token:
full_title += '/' + token full_title += '/' + token
info = self._download_json(self._resolv_url( info = self._call_api(self._resolv_url(
self._BASE_URL + full_title), full_title, headers=self._HEADERS) self._BASE_URL + full_title), full_title, headers=self._HEADERS)
if 'errors' in info: if 'errors' in info:
@ -730,7 +744,7 @@ def _entries(self, url, playlist_id):
for i in itertools.count(): for i in itertools.count():
for retry in self.RetryManager(): for retry in self.RetryManager():
try: try:
response = self._download_json( response = self._call_api(
url, playlist_id, query=query, headers=self._HEADERS, url, playlist_id, query=query, headers=self._HEADERS,
note=f'Downloading track page {i + 1}') note=f'Downloading track page {i + 1}')
break break
@ -838,7 +852,7 @@ def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
uploader = mobj.group('user') uploader = mobj.group('user')
user = self._download_json( user = self._call_api(
self._resolv_url(self._BASE_URL + uploader), self._resolv_url(self._BASE_URL + uploader),
uploader, 'Downloading user info', headers=self._HEADERS) uploader, 'Downloading user info', headers=self._HEADERS)
@ -864,7 +878,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
user_id = self._match_id(url) user_id = self._match_id(url)
user = self._download_json( user = self._call_api(
self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS) self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS)
return self._extract_playlist( return self._extract_playlist(
@ -886,7 +900,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
track_name = self._match_id(url) track_name = self._match_id(url)
track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS) track = self._call_api(self._resolv_url(url), track_name, headers=self._HEADERS)
track_id = self._search_regex( track_id = self._search_regex(
r'soundcloud:track-stations:(\d+)', track['id'], 'track id') r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
@ -930,7 +944,7 @@ class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
slug, relation = self._match_valid_url(url).group('slug', 'relation') slug, relation = self._match_valid_url(url).group('slug', 'relation')
track = self._download_json( track = self._call_api(
self._resolv_url(self._BASE_URL + slug), self._resolv_url(self._BASE_URL + slug),
slug, 'Downloading track info', headers=self._HEADERS) slug, 'Downloading track info', headers=self._HEADERS)
@ -965,7 +979,7 @@ def _real_extract(self, url):
if token: if token:
query['secret_token'] = token query['secret_token'] = token
data = self._download_json( data = self._call_api(
self._API_V2_BASE + 'playlists/' + playlist_id, self._API_V2_BASE + 'playlists/' + playlist_id,
playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS) playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS)
@ -1000,7 +1014,7 @@ def _get_collection(self, endpoint, collection_id, **query):
next_url = update_url_query(self._API_V2_BASE + endpoint, query) next_url = update_url_query(self._API_V2_BASE + endpoint, query)
for i in itertools.count(1): for i in itertools.count(1):
response = self._download_json( response = self._call_api(
next_url, collection_id, f'Downloading page {i}', next_url, collection_id, f'Downloading page {i}',
'Unable to download API page', headers=self._HEADERS) 'Unable to download API page', headers=self._HEADERS)