[furaffinity] add 'descriptions' option (#1231)

2024-11-25 04:02:32 +01:00 · 2021-01-19 19:09:29 +01:00 · 2021-01-19 19:09:29 +01:00 · 89a2bcbb2d
commit 89a2bcbb2d
parent 36f281330a
2 changed files with 24 additions and 4 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -969,6 +969,19 @@ Description
      ``"l"``, ...) to use as an upper limit.


+extractor.furaffinity.descriptions
+----------------------------------
+Type
+    ``string``
+Default
+    ``"text"``
+Description
+    Controls the format of ``description`` metadata fields.
+
+    * ``"text"``: Plain text with HTML tags removed
+    * ``"html"``: Raw HTML content
+
+
 extractor.furaffinity.include
 -----------------------------
 Type
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@ -29,6 +29,9 @@ class FuraffinityExtractor(Extractor):
        self.user = match.group(1)
        self.offset = 0

+        if self.config("descriptions") == "html":
+            self._process_description = lambda x: x.strip()
+
    def items(self):
        metadata = self.metadata()
        for post_id in util.advance(self.posts(), self.offset):
@ -83,8 +86,8 @@ class FuraffinityExtractor(Extractor):
        if tags:
            # new site layout
            data["tags"] = text.split_html(tags)
-            data["description"] = text.unescape(rh(extr(
-                'class="section-body">', '</div>'), "", ""))
+            data["description"] = self._process_description(extr(
+                'class="section-body">', '</div>'))
            data["views"] = pi(rh(extr('class="views">', '</span>')))
            data["favorites"] = pi(rh(extr('class="favorites">', '</span>')))
            data["comments"] = pi(rh(extr('class="comments">', '</span>')))
@ -109,12 +112,16 @@ class FuraffinityExtractor(Extractor):
            data["tags"] = text.split_html(extr(
                'id="keywords">', '</div>'))[::2]
            data["rating"] = extr('<img alt="', ' ')
-            data["description"] = text.unescape(text.remove_html(extr(
-                "</table>", "</table>"), "", ""))
+            data["description"] = self._process_description(extr(
+                "</table>", "</table>"))
        data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])

        return data

+    @staticmethod
+    def _process_description(description):
+        return text.unescape(text.remove_html(description, "", ""))
+
    def _pagination(self):
        num = 1