[extractor/BilibiliSpace] Fix extractor, better error message (#5043)

Closes #5038 Authored by: lockmatrix
2024-11-10 01:02:13 +01:00 · 2022-10-04 10:59:05 +08:00 · 2022-10-04 10:59:05 +08:00 · 12f153a827
commit 12f153a827
parent 0d887f273a
1 changed files with 18 additions and 8 deletions
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -4,6 +4,7 @@
 import functools
 import math
 import re
+import urllib

 from .common import InfoExtractor, SearchInfoExtractor
 from ..compat import (
@ -508,11 +509,11 @@ def _real_extract(self, url):

 class BilibiliSpaceBaseIE(InfoExtractor):
    def _extract_playlist(self, fetch_page, get_metadata, get_entries):
-        first_page = fetch_page(1)
+        first_page = fetch_page(0)
        metadata = get_metadata(first_page)

        paged_list = InAdvancePagedList(
-            lambda idx: get_entries(fetch_page(idx) if idx > 1 else first_page),
+            lambda idx: get_entries(fetch_page(idx) if idx else first_page),
            metadata['page_count'], metadata['page_size'])

        return metadata, paged_list
@ -535,10 +536,19 @@ def _real_extract(self, url):
                           'To download audios, add a "/audio" to the URL')

        def fetch_page(page_idx):
-            return self._download_json(
-                'https://api.bilibili.com/x/space/arc/search', playlist_id,
-                note=f'Downloading page {page_idx}',
-                query={'mid': playlist_id, 'pn': page_idx, 'jsonp': 'jsonp'})['data']
+            try:
+                response = self._download_json('https://api.bilibili.com/x/space/arc/search',
+                                               playlist_id, note=f'Downloading page {page_idx}',
+                                               query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'})
+            except ExtractorError as e:
+                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
+                    raise ExtractorError(
+                        'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
+                raise
+            if response['code'] == -401:
+                raise ExtractorError(
+                    'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+            return response['data']

        def get_metadata(page_data):
            page_size = page_data['page']['ps']
@ -573,7 +583,7 @@ def fetch_page(page_idx):
            return self._download_json(
                'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
                note=f'Downloading page {page_idx}',
-                query={'uid': playlist_id, 'pn': page_idx, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
+                query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']

        def get_metadata(page_data):
            return {
@ -608,7 +618,7 @@ def fetch_page(page_idx):
            return self._download_json(
                'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
                playlist_id, note=f'Downloading page {page_idx}',
-                query={'mid': mid, 'season_id': sid, 'page_num': page_idx, 'page_size': 30})['data']
+                query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']

        def get_metadata(page_data):
            page_size = page_data['page']['page_size']