From a9671f160b0fdd5dc84c71e5363721a6bf8db569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 5 Oct 2024 23:16:46 +0200 Subject: [PATCH] [deviantart] support converting 'tiptap' markup to HTML (#6207) --- gallery_dl/extractor/deviantart.py | 107 +++++++++++++++++++++++++++-- 1 file changed, 100 insertions(+), 7 deletions(-) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 5d778cec..d971219e 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -385,21 +385,97 @@ class DeviantartExtractor(Extractor): state = util.json_loads(text.extr( page, 'window.__INITIAL_STATE__ = JSON.parse("', '");') .replace("\\\\", "\\").replace("\\'", "'").replace('\\"', '"')) - deviations = state["@@entities"]["deviation"] content = deviations.popitem()[1]["textContent"] - html = content["html"]["markup"] - if html.startswith("{"): - self.log.warning("%s: Unsupported '%s' markup.", - deviation["index"], content["html"]["type"]) - html = content["excerpt"].replace("\n", "
") - return {"html": html} + html = self._textcontent_to_html(deviation, content) + if html: + return {"html": html} + return {"html": content["excerpt"].replace("\n", "
")} if "body" in deviation: return {"html": deviation.pop("body")} return None + def _textcontent_to_html(self, deviation, content): + html = content["html"] + markup = html["markup"] + + if not markup.startswith("{"): + return markup + + if html["type"] == "tiptap": + return self._tiptap_to_html(markup) + + self.log.warning("%s: Unsupported '%s' markup.", + deviation["index"], html["type"]) + + def _tiptap_to_html(self, markup): + html = [] + + html.append('
') + data = util.json_loads(markup) + for block in data["document"]["content"]: + self._tiptap_process_content(html, block) + html.append("
") + + return "".join(html) + + def _tiptap_process_content(self, html, content): + type = content["type"] + + if type == "paragraph": + html.append('

') + + for block in content["content"]: + self._tiptap_process_content(html, block) + + html.append("

") + + elif type == "text": + html.append(text.escape(content["text"])) + + elif type == "hardBreak": + html.append("

") + + elif type == "da-deviation": + dev = content["attrs"]["deviation"] + url, formats = self._eclipse_media(dev["media"]) + full = formats["fullview"] + + html.append('
') + + html.append('
') + + html.append('') + + html.append('')
+            html.append(text.escape(dev[') + + html.append("
") + + else: + self.log.warning("Unsupported content type '%s'", type) + def _extract_content(self, deviation): content = deviation["content"] @@ -577,6 +653,23 @@ class DeviantartExtractor(Extractor): self.log.info("Unwatching %s", username) self.api.user_friends_unwatch(username) + def _eclipse_media(self, media, format="preview"): + url = [media["baseUri"], ] + + formats = { + fmt["t"]: fmt + for fmt in media["types"] + } + + tokens = media["token"] + if len(tokens) == 1: + fmt = formats[format] + url.append(fmt["c"].replace("", media["prettyName"])) + url.append("?token=") + url.append(tokens[-1]) + + return "".join(url), formats + def _eclipse_to_oauth(self, eclipse_api, deviations): for obj in deviations: deviation = obj["deviation"] if "deviation" in obj else obj