2021-01-24 23:44:03 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-09-11 16:30:55 +02:00
|
|
|
# Copyright 2021-2023 Mike Fährmann
|
2021-01-24 23:44:03 +01:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
|
|
|
"""Extractors for https://www.pillowfort.social/"""
|
|
|
|
|
|
|
|
from .common import Extractor, Message
|
2021-05-19 02:57:36 +02:00
|
|
|
from ..cache import cache
|
|
|
|
from .. import text, exception
|
2021-05-17 02:57:02 +02:00
|
|
|
import re
|
2021-01-24 23:44:03 +01:00
|
|
|
|
|
|
|
BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
|
|
|
|
|
|
|
|
|
|
|
|
class PillowfortExtractor(Extractor):
|
|
|
|
"""Base class for pillowfort extractors"""
|
|
|
|
category = "pillowfort"
|
|
|
|
root = "https://www.pillowfort.social"
|
|
|
|
directory_fmt = ("{category}", "{username}")
|
2021-05-19 02:57:36 +02:00
|
|
|
filename_fmt = ("{post_id} {title|original_post[title]:?/ /}"
|
2021-01-24 23:44:03 +01:00
|
|
|
"{num:>02}.{extension}")
|
|
|
|
archive_fmt = "{id}"
|
2023-07-21 22:38:39 +02:00
|
|
|
cookies_domain = "www.pillowfort.social"
|
2021-01-24 23:44:03 +01:00
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
Extractor.__init__(self, match)
|
|
|
|
self.item = match.group(1)
|
|
|
|
|
|
|
|
def items(self):
|
2021-05-19 02:57:36 +02:00
|
|
|
self.login()
|
2021-05-17 02:57:02 +02:00
|
|
|
inline = self.config("inline", True)
|
2021-05-17 01:38:00 +02:00
|
|
|
reblogs = self.config("reblogs", False)
|
|
|
|
external = self.config("external", False)
|
2021-01-24 23:44:03 +01:00
|
|
|
|
2021-05-17 02:57:02 +02:00
|
|
|
if inline:
|
|
|
|
inline = re.compile(r'src="(https://img\d+\.pillowfort\.social'
|
|
|
|
r'/posts/[^"]+)').findall
|
|
|
|
|
2021-05-17 01:38:00 +02:00
|
|
|
for post in self.posts():
|
|
|
|
if "original_post" in post and not reblogs:
|
2021-01-25 00:38:19 +01:00
|
|
|
continue
|
|
|
|
|
2021-05-17 02:57:02 +02:00
|
|
|
files = post.pop("media")
|
|
|
|
if inline:
|
|
|
|
for url in inline(post["content"]):
|
|
|
|
files.append({"url": url})
|
2021-01-24 23:44:03 +01:00
|
|
|
|
|
|
|
post["date"] = text.parse_datetime(
|
|
|
|
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
2021-05-17 02:57:02 +02:00
|
|
|
post["post_id"] = post.pop("id")
|
2021-01-24 23:44:03 +01:00
|
|
|
yield Message.Directory, post
|
|
|
|
|
2021-01-30 21:48:51 +01:00
|
|
|
post["num"] = 0
|
|
|
|
for file in files:
|
2023-09-23 00:05:26 +02:00
|
|
|
url = file["url"] or file.get("b2_lg_url")
|
2021-05-17 01:38:00 +02:00
|
|
|
if not url:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if file.get("embed_code"):
|
|
|
|
if not external:
|
|
|
|
continue
|
|
|
|
msgtype = Message.Queue
|
|
|
|
else:
|
2021-01-30 21:48:51 +01:00
|
|
|
post["num"] += 1
|
2021-05-17 01:38:00 +02:00
|
|
|
msgtype = Message.Url
|
|
|
|
|
|
|
|
post.update(file)
|
2021-05-17 02:57:02 +02:00
|
|
|
text.nameext_from_url(url, post)
|
|
|
|
post["hash"], _, post["filename"] = \
|
|
|
|
post["filename"].partition("_")
|
|
|
|
|
|
|
|
if "id" not in file:
|
|
|
|
post["id"] = post["hash"]
|
|
|
|
if "created_at" in file:
|
|
|
|
post["date"] = text.parse_datetime(
|
|
|
|
file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
|
|
|
|
|
|
|
yield msgtype, url, post
|
2021-01-24 23:44:03 +01:00
|
|
|
|
2021-05-19 02:57:36 +02:00
|
|
|
def login(self):
|
2023-07-21 22:38:39 +02:00
|
|
|
if self.cookies.get("_Pf_new_session", domain=self.cookies_domain):
|
|
|
|
return
|
|
|
|
if self.cookies.get("remember_user_token", domain=self.cookies_domain):
|
2021-05-19 02:57:36 +02:00
|
|
|
return
|
|
|
|
|
|
|
|
username, password = self._get_auth_info()
|
|
|
|
if username:
|
2023-07-21 22:38:39 +02:00
|
|
|
self.cookies_update(self._login_impl(username, password))
|
2021-05-19 02:57:36 +02:00
|
|
|
|
|
|
|
@cache(maxage=14*24*3600, keyarg=1)
|
|
|
|
def _login_impl(self, username, password):
|
|
|
|
self.log.info("Logging in as %s", username)
|
|
|
|
|
|
|
|
url = "https://www.pillowfort.social/users/sign_in"
|
|
|
|
page = self.request(url).text
|
2022-11-04 23:39:38 +01:00
|
|
|
auth = text.extr(page, 'name="authenticity_token" value="', '"')
|
2021-05-19 02:57:36 +02:00
|
|
|
|
|
|
|
headers = {"Origin": self.root, "Referer": url}
|
|
|
|
data = {
|
|
|
|
"utf8" : "✓",
|
|
|
|
"authenticity_token": auth,
|
|
|
|
"user[email]" : username,
|
|
|
|
"user[password]" : password,
|
|
|
|
"user[remember_me]" : "1",
|
|
|
|
}
|
|
|
|
response = self.request(url, method="POST", headers=headers, data=data)
|
|
|
|
|
|
|
|
if not response.history:
|
|
|
|
raise exception.AuthenticationError()
|
|
|
|
|
|
|
|
return {
|
|
|
|
cookie.name: cookie.value
|
|
|
|
for cookie in response.history[0].cookies
|
|
|
|
}
|
|
|
|
|
2021-01-24 23:44:03 +01:00
|
|
|
|
|
|
|
class PillowfortPostExtractor(PillowfortExtractor):
|
|
|
|
"""Extractor for a single pillowfort post"""
|
|
|
|
subcategory = "post"
|
|
|
|
pattern = BASE_PATTERN + r"/posts/(\d+)"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://www.pillowfort.social/posts/12345"
|
2021-01-24 23:44:03 +01:00
|
|
|
|
|
|
|
def posts(self):
|
|
|
|
url = "{}/posts/{}/json/".format(self.root, self.item)
|
|
|
|
return (self.request(url).json(),)
|
|
|
|
|
|
|
|
|
|
|
|
class PillowfortUserExtractor(PillowfortExtractor):
|
|
|
|
"""Extractor for all posts of a pillowfort user"""
|
|
|
|
subcategory = "user"
|
2023-09-23 00:11:01 +02:00
|
|
|
pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+(?:/tagged/[^/?#]+)?)"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://www.pillowfort.social/USER"
|
2021-01-24 23:44:03 +01:00
|
|
|
|
|
|
|
def posts(self):
|
|
|
|
url = "{}/{}/json/".format(self.root, self.item)
|
|
|
|
params = {"p": 1}
|
|
|
|
|
|
|
|
while True:
|
|
|
|
posts = self.request(url, params=params).json()["posts"]
|
|
|
|
yield from posts
|
|
|
|
|
|
|
|
if len(posts) < 20:
|
|
|
|
return
|
|
|
|
params["p"] += 1
|