[pillowfort] add 'inline' option (#846)

to support images present in a post's 'content', but not listed in 'media'. also separates the file hash present at the beginning of each 'filename' into its own field.
2024-11-22 10:42:34 +01:00 · 2021-05-17 02:57:02 +02:00 · 2021-05-17 02:57:02 +02:00 · a7e4917ee1
commit a7e4917ee1
parent efa6cc8ec3
3 changed files with 38 additions and 6 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -1316,6 +1316,16 @@ Description
    Follow links to external sites, e.g. Twitter,
 extractor.pillowfort.inline
 ---------------------------
 Type
    ``bool``
 Default
    ``true``
 Description
    Extract inline images.
 extractor.pillowfort.reblogs
 ----------------------------
 Type
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@ -177,6 +177,7 @@
        "pillowfort":
        {
            "external": false,
            "inline": true,
            "reblogs": false
        },
        "pinterest":
--- a/gallery_dl/extractor/pillowfort.py
+++ b/gallery_dl/extractor/pillowfort.py
@ -10,6 +10,7 @@
 from .common import Extractor, Message
 from .. import text
 import re
 BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
@ -28,18 +29,26 @@ class PillowfortExtractor(Extractor):
        self.item = match.group(1)
    def items(self):
        inline = self.config("inline", True)
        reblogs = self.config("reblogs", False)
        external = self.config("external", False)
        if inline:
            inline = re.compile(r'src="(https://img\d+\.pillowfort\.social'
                                r'/posts/[^"]+)').findall
        for post in self.posts():
            if "original_post" in post and not reblogs:
                continue
-            files = post["media"]
+            files = post.pop("media")
-            del post["media"]
+            if inline:
                for url in inline(post["content"]):
                    files.append({"url": url})
            post["date"] = text.parse_datetime(
                post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
            post["post_id"] = post.pop("id")
            yield Message.Directory, post
            post["num"] = 0
@ -57,9 +66,17 @@ class PillowfortExtractor(Extractor):
                    msgtype = Message.Url
                post.update(file)
-                post["date"] = text.parse_datetime(
+                text.nameext_from_url(url, post)
-                    file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+                post["hash"], _, post["filename"] = \
-                yield msgtype, url, text.nameext_from_url(url, post)
+                    post["filename"].partition("_")
                if "id" not in file:
                    post["id"] = post["hash"]
                if "created_at" in file:
                    post["date"] = text.parse_datetime(
                        file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
                yield msgtype, url, post
 class PillowfortPostExtractor(PillowfortExtractor):
@ -120,10 +137,14 @@ class PillowfortPostExtractor(PillowfortExtractor):
            },
        }),
        ("https://www.pillowfort.social/posts/1557500", {
-            "options": (("external", True),),
+            "options": (("external", True), ("inline", False)),
            "pattern": r"https://twitter\.com/Aliciawitdaart/status"
                       r"/1282862493841457152",
        }),
        ("https://www.pillowfort.social/posts/1672518", {
            "options": (("inline", True),),
            "count": 3,
        }),
    )
    def posts(self):