From cfb7b3dd7147c2148e65b4361885c4bcd562fcfa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?=
Date: Wed, 9 Oct 2024 20:59:36 +0200
Subject: [PATCH] [deviantart] improve 'tiptap' conversion (#6207)
- support literature link embeds
- support @ mentions
- support more text styles
---
gallery_dl/extractor/deviantart.py | 175 +++++++++++++++++++++--------
1 file changed, 129 insertions(+), 46 deletions(-)
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index d971219e..60846f52 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -373,13 +373,13 @@ class DeviantartExtractor(Extractor):
html = text.extr(
page,
"Literature Text
",
- "
")
+ "")
if html:
return {"html": html}
- self.log.warning("%s: Failed to extract journal HTML from "
- "webpage. Falling back to __INITIAL_STATE__ "
- "markup.", deviation["index"])
+ self.log.debug("%s: Failed to extract journal HTML from webpage. "
+ "Falling back to __INITIAL_STATE__ markup.",
+ deviation["index"])
# parse __INITIAL_STATE__ as fallback
state = util.json_loads(text.extr(
@@ -405,7 +405,12 @@ class DeviantartExtractor(Extractor):
return markup
if html["type"] == "tiptap":
- return self._tiptap_to_html(markup)
+ try:
+ return self._tiptap_to_html(markup)
+ except Exception as exc:
+ self.log.debug("", exc_info=exc)
+ self.log.error("%s: '%s: %s'", deviation["index"],
+ exc.__class__.__name__, exc)
self.log.warning("%s: Unsupported '%s' markup.",
deviation["index"], html["type"])
@@ -426,37 +431,89 @@ class DeviantartExtractor(Extractor):
type = content["type"]
if type == "paragraph":
- html.append('')
+ attrs = content["attrs"]
+ if "textAlign" in attrs:
+ html.append("text-align:")
+ html.append(attrs["textAlign"])
+ html.append(";")
+ html.append('margin-inline-start:0px">')
- for block in content["content"]:
- self._tiptap_process_content(html, block)
-
- html.append("
")
+ for block in children:
+ self._tiptap_process_content(html, block)
+ html.append("
")
+ else:
+ html.append('
')
elif type == "text":
- html.append(text.escape(content["text"]))
+ self._tiptap_process_text(html, content)
elif type == "hardBreak":
html.append("
")
+ elif type == "horizontalRule":
+ html.append("
")
+
elif type == "da-deviation":
- dev = content["attrs"]["deviation"]
- url, formats = self._eclipse_media(dev["media"])
+ self._tiptap_process_deviation(html, content)
+
+ elif type == "da-mention":
+ user = content["attrs"]["user"]["username"]
+ html.append('@')
+ html.append(user)
+ html.append('')
+
+ else:
+ self.log.warning("Unsupported content type '%s'", type)
+
+ def _tiptap_process_text(self, html, content):
+ marks = content.get("marks")
+ if marks:
+ close = []
+ for mark in marks:
+ type = mark["type"]
+ if type == "link":
+ html.append('')
+ close.append("")
+ elif type == "bold":
+ html.append("")
+ close.append("")
+ elif type == "italic":
+ html.append("")
+ close.append("")
+ elif type == "underline":
+ html.append("")
+ close.append("")
+ elif type == "textStyle" and len(mark) <= 1:
+ pass
+ else:
+ self.log.warning("Unsupported text marker '%s'", type)
+ close.reverse()
+ html.append(text.escape(content["text"]))
+ html.extend(close)
+ else:
+ html.append(text.escape(content["text"]))
+
+ def _tiptap_process_deviation(self, html, content):
+ dev = content["attrs"]["deviation"]
+ media = dev.get("media") or ()
+
+ html.append('')
+ html.append('
')
def _extract_content(self, deviation):
content = deviation["content"]
@@ -1938,25 +2018,28 @@ JOURNAL_TEMPLATE_HTML = """text:
{title}
-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
+
+
+
+