1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-26 04:32:51 +01:00

[pillowfort] add 'inline' option (#846)

to support images present in a post's 'content',
but not listed in 'media'.

also separates the file hash present at the beginning
of each 'filename' into its own field.
This commit is contained in:
Mike Fährmann 2021-05-17 02:57:02 +02:00
parent efa6cc8ec3
commit a7e4917ee1
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 38 additions and 6 deletions

View File

@ -1316,6 +1316,16 @@ Description
Follow links to external sites, e.g. Twitter, Follow links to external sites, e.g. Twitter,
extractor.pillowfort.inline
---------------------------
Type
``bool``
Default
``true``
Description
Extract inline images.
extractor.pillowfort.reblogs extractor.pillowfort.reblogs
---------------------------- ----------------------------
Type Type

View File

@ -177,6 +177,7 @@
"pillowfort": "pillowfort":
{ {
"external": false, "external": false,
"inline": true,
"reblogs": false "reblogs": false
}, },
"pinterest": "pinterest":

View File

@ -10,6 +10,7 @@
from .common import Extractor, Message from .common import Extractor, Message
from .. import text from .. import text
import re
BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social" BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
@ -28,18 +29,26 @@ class PillowfortExtractor(Extractor):
self.item = match.group(1) self.item = match.group(1)
def items(self): def items(self):
inline = self.config("inline", True)
reblogs = self.config("reblogs", False) reblogs = self.config("reblogs", False)
external = self.config("external", False) external = self.config("external", False)
if inline:
inline = re.compile(r'src="(https://img\d+\.pillowfort\.social'
r'/posts/[^"]+)').findall
for post in self.posts(): for post in self.posts():
if "original_post" in post and not reblogs: if "original_post" in post and not reblogs:
continue continue
files = post["media"] files = post.pop("media")
del post["media"] if inline:
for url in inline(post["content"]):
files.append({"url": url})
post["date"] = text.parse_datetime( post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["post_id"] = post.pop("id")
yield Message.Directory, post yield Message.Directory, post
post["num"] = 0 post["num"] = 0
@ -57,9 +66,17 @@ class PillowfortExtractor(Extractor):
msgtype = Message.Url msgtype = Message.Url
post.update(file) post.update(file)
text.nameext_from_url(url, post)
post["hash"], _, post["filename"] = \
post["filename"].partition("_")
if "id" not in file:
post["id"] = post["hash"]
if "created_at" in file:
post["date"] = text.parse_datetime( post["date"] = text.parse_datetime(
file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
yield msgtype, url, text.nameext_from_url(url, post)
yield msgtype, url, post
class PillowfortPostExtractor(PillowfortExtractor): class PillowfortPostExtractor(PillowfortExtractor):
@ -120,10 +137,14 @@ class PillowfortPostExtractor(PillowfortExtractor):
}, },
}), }),
("https://www.pillowfort.social/posts/1557500", { ("https://www.pillowfort.social/posts/1557500", {
"options": (("external", True),), "options": (("external", True), ("inline", False)),
"pattern": r"https://twitter\.com/Aliciawitdaart/status" "pattern": r"https://twitter\.com/Aliciawitdaart/status"
r"/1282862493841457152", r"/1282862493841457152",
}), }),
("https://www.pillowfort.social/posts/1672518", {
"options": (("inline", True),),
"count": 3,
}),
) )
def posts(self): def posts(self):