From e763efd36cd3496ecd5f110f37ad905e7063aa41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 14 Nov 2024 23:06:26 +0100 Subject: [PATCH] [bilibili] add workarounds for getting rate-limited (#6443) - set 3-6 second request_interval by default - retry request after waiting 5 minutes --- docs/configuration.rst | 6 +++++- gallery_dl/extractor/bilibili.py | 15 ++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index f5ef9bdd..ebbd3a0c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -394,7 +394,11 @@ Default * ``"2.0-4.0"`` ``behance``, ``imagefap``, ``[Nijie]`` * ``"3.0-6.0"`` - ``exhentai``, ``idolcomplex``, ``[reactor]``, ``readcomiconline`` + ``bilibili``, + ``exhentai``, + ``idolcomplex``, + ``[reactor]``, + ``readcomiconline`` * ``"6.0-6.1"`` ``twibooru`` * ``"6.0-12.0"`` diff --git a/gallery_dl/extractor/bilibili.py b/gallery_dl/extractor/bilibili.py index 718bbf37..d5c419eb 100644 --- a/gallery_dl/extractor/bilibili.py +++ b/gallery_dl/extractor/bilibili.py @@ -14,6 +14,7 @@ class BilibiliExtractor(Extractor): """Base class for bilibili extractors""" category = "bilibili" root = "https://www.bilibili.com" + request_interval = (3.0, 6.0) def _init(self): self.api = BilibiliAPI(self) @@ -102,6 +103,14 @@ class BilibiliAPI(): def article(self, article_id): url = "https://www.bilibili.com/opus/" + article_id - response = self.extractor.request(url) - return util.json_loads(text.extr( - response.text, "window.__INITIAL_STATE__=", "};") + "}") + + while True: + page = self.extractor.request(url).text + try: + return util.json_loads(text.extr( + page, "window.__INITIAL_STATE__=", "};") + "}") + except Exception: + if "window._riskdata_" not in page: + raise exception.StopExtraction( + "%s: Unable to extract INITIAL_STATE data", article_id) + self.extractor.wait(seconds=300)