From cfb7b3dd7147c2148e65b4361885c4bcd562fcfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 9 Oct 2024 20:59:36 +0200 Subject: [PATCH] [deviantart] improve 'tiptap' conversion (#6207) - support literature link embeds - support @ mentions - support more text styles --- gallery_dl/extractor/deviantart.py | 175 +++++++++++++++++++++-------- 1 file changed, 129 insertions(+), 46 deletions(-) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index d971219e..60846f52 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -373,13 +373,13 @@ class DeviantartExtractor(Extractor): html = text.extr( page, "

Literature Text

", - "
") + "") if html: return {"html": html} - self.log.warning("%s: Failed to extract journal HTML from " - "webpage. Falling back to __INITIAL_STATE__ " - "markup.", deviation["index"]) + self.log.debug("%s: Failed to extract journal HTML from webpage. " + "Falling back to __INITIAL_STATE__ markup.", + deviation["index"]) # parse __INITIAL_STATE__ as fallback state = util.json_loads(text.extr( @@ -405,7 +405,12 @@ class DeviantartExtractor(Extractor): return markup if html["type"] == "tiptap": - return self._tiptap_to_html(markup) + try: + return self._tiptap_to_html(markup) + except Exception as exc: + self.log.debug("", exc_info=exc) + self.log.error("%s: '%s: %s'", deviation["index"], + exc.__class__.__name__, exc) self.log.warning("%s: Unsupported '%s' markup.", deviation["index"], html["type"]) @@ -426,37 +431,89 @@ class DeviantartExtractor(Extractor): type = content["type"] if type == "paragraph": - html.append('

') + attrs = content["attrs"] + if "textAlign" in attrs: + html.append("text-align:") + html.append(attrs["textAlign"]) + html.append(";") + html.append('margin-inline-start:0px">') - for block in content["content"]: - self._tiptap_process_content(html, block) - - html.append("

") + for block in children: + self._tiptap_process_content(html, block) + html.append("

") + else: + html.append('


') elif type == "text": - html.append(text.escape(content["text"])) + self._tiptap_process_text(html, content) elif type == "hardBreak": html.append("

") + elif type == "horizontalRule": + html.append("
") + elif type == "da-deviation": - dev = content["attrs"]["deviation"] - url, formats = self._eclipse_media(dev["media"]) + self._tiptap_process_deviation(html, content) + + elif type == "da-mention": + user = content["attrs"]["user"]["username"] + html.append('@') + html.append(user) + html.append('') + + else: + self.log.warning("Unsupported content type '%s'", type) + + def _tiptap_process_text(self, html, content): + marks = content.get("marks") + if marks: + close = [] + for mark in marks: + type = mark["type"] + if type == "link": + html.append('') + close.append("") + elif type == "bold": + html.append("") + close.append("") + elif type == "italic": + html.append("") + close.append("") + elif type == "underline": + html.append("") + close.append("") + elif type == "textStyle" and len(mark) <= 1: + pass + else: + self.log.warning("Unsupported text marker '%s'", type) + close.reverse() + html.append(text.escape(content["text"])) + html.extend(close) + else: + html.append(text.escape(content["text"])) + + def _tiptap_process_deviation(self, html, content): + dev = content["attrs"]["deviation"] + media = dev.get("media") or () + + html.append('
') + html.append('
') + + if "baseUri" in media: + url, formats = self._eclipse_media(media) full = formats["fullview"] - html.append('
') - - html.append('
') - html.append('') + html.append("") - html.append("
") + elif "textContent" in dev: + html.append('') + + html.append('
') def _extract_content(self, deviation): content = deviation["content"] @@ -1938,25 +2018,28 @@ JOURNAL_TEMPLATE_HTML = """text: {title} - - - - - - + + + + + + - - - + + + +