2022-06-20 11:25:42 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-01-05 12:29:31 +01:00
|
|
|
# Copyright 2022-2023 Mike Fährmann
|
2022-06-20 11:25:42 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
|
|
|
"""Extractors for https://poipiku.com/"""
|
|
|
|
|
|
|
|
from .common import Extractor, Message
|
|
|
|
from .. import text
|
|
|
|
|
|
|
|
BASE_PATTERN = r"(?:https?://)?poipiku\.com"
|
|
|
|
|
|
|
|
|
|
|
|
class PoipikuExtractor(Extractor):
|
|
|
|
"""Base class for poipiku extractors"""
|
|
|
|
category = "poipiku"
|
|
|
|
root = "https://poipiku.com"
|
|
|
|
directory_fmt = ("{category}", "{user_id} {user_name}")
|
|
|
|
filename_fmt = "{post_id}_{num}.{extension}"
|
|
|
|
archive_fmt = "{post_id}_{num}"
|
|
|
|
request_interval = (0.5, 1.5)
|
|
|
|
|
|
|
|
def items(self):
|
2022-06-22 18:21:01 +02:00
|
|
|
password = self.config("password", "")
|
|
|
|
|
2022-06-20 11:25:42 +02:00
|
|
|
for post_url in self.posts():
|
|
|
|
parts = post_url.split("/")
|
|
|
|
if post_url[0] == "/":
|
|
|
|
post_url = self.root + post_url
|
|
|
|
page = self.request(post_url).text
|
|
|
|
extr = text.extract_from(page)
|
|
|
|
|
|
|
|
post = {
|
|
|
|
"post_category": extr("<title>[", "]"),
|
|
|
|
"count" : extr("(", " "),
|
|
|
|
"post_id" : parts[-1].partition(".")[0],
|
|
|
|
"user_id" : parts[-2],
|
|
|
|
"user_name" : text.unescape(extr(
|
|
|
|
'<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
|
|
|
|
"description": text.unescape(extr(
|
|
|
|
'class="IllustItemDesc" >', '<')),
|
2022-09-12 11:21:01 +02:00
|
|
|
"_http_headers": {"Referer": post_url},
|
2022-06-20 11:25:42 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
yield Message.Directory, post
|
|
|
|
post["num"] = 0
|
|
|
|
|
|
|
|
while True:
|
|
|
|
thumb = extr('class="IllustItemThumbImg" src="', '"')
|
|
|
|
if not thumb:
|
|
|
|
break
|
2022-08-01 12:36:19 +02:00
|
|
|
elif thumb.startswith(("//img.poipiku.com/img/", "/img/")):
|
2022-06-20 11:25:42 +02:00
|
|
|
continue
|
|
|
|
post["num"] += 1
|
2022-09-12 11:21:01 +02:00
|
|
|
url = text.ensure_http_scheme(thumb[:-8]).replace(
|
|
|
|
"//img.", "//img-org.", 1)
|
2022-06-20 11:25:42 +02:00
|
|
|
yield Message.Url, url, text.nameext_from_url(url, post)
|
|
|
|
|
2022-12-25 18:23:46 +01:00
|
|
|
if not extr(' show all(+', '<'):
|
2022-06-20 11:25:42 +02:00
|
|
|
continue
|
|
|
|
|
|
|
|
url = self.root + "/f/ShowAppendFileF.jsp"
|
|
|
|
headers = {
|
|
|
|
"Accept" : "application/json, text/javascript, */*; q=0.01",
|
|
|
|
"X-Requested-With": "XMLHttpRequest",
|
|
|
|
"Origin" : self.root,
|
|
|
|
"Referer": post_url,
|
|
|
|
}
|
|
|
|
data = {
|
|
|
|
"UID": post["user_id"],
|
|
|
|
"IID": post["post_id"],
|
2022-06-22 18:21:01 +02:00
|
|
|
"PAS": password,
|
2022-06-20 11:25:42 +02:00
|
|
|
"MD" : "0",
|
|
|
|
"TWF": "-1",
|
|
|
|
}
|
|
|
|
page = self.request(
|
|
|
|
url, method="POST", headers=headers, data=data).json()["html"]
|
|
|
|
|
2023-02-11 15:10:49 +01:00
|
|
|
if page.startswith(("You need to", "Password is incorrect")):
|
2023-01-05 12:29:31 +01:00
|
|
|
self.log.warning("'%s'", page)
|
|
|
|
|
2022-06-20 11:25:42 +02:00
|
|
|
for thumb in text.extract_iter(
|
|
|
|
page, 'class="IllustItemThumbImg" src="', '"'):
|
|
|
|
post["num"] += 1
|
2022-09-12 11:21:01 +02:00
|
|
|
url = text.ensure_http_scheme(thumb[:-8]).replace(
|
|
|
|
"//img.", "//img-org.", 1)
|
2022-06-20 11:25:42 +02:00
|
|
|
yield Message.Url, url, text.nameext_from_url(url, post)
|
|
|
|
|
|
|
|
|
|
|
|
class PoipikuUserExtractor(PoipikuExtractor):
|
|
|
|
"""Extractor for posts from a poipiku user"""
|
|
|
|
subcategory = "user"
|
|
|
|
pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
|
|
|
|
r"(\d+)/?(?:$|[?&#])")
|
|
|
|
test = (
|
|
|
|
("https://poipiku.com/25049/", {
|
2022-09-12 11:21:01 +02:00
|
|
|
"pattern": r"https://img-org\.poipiku\.com/user_img\d+/000025049"
|
2022-06-20 11:25:42 +02:00
|
|
|
r"/\d+_\w+\.(jpe?g|png)$",
|
|
|
|
"range": "1-10",
|
|
|
|
"count": 10,
|
|
|
|
}),
|
|
|
|
("https://poipiku.com/IllustListPcV.jsp?PG=1&ID=25049&KWD=")
|
|
|
|
)
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
PoipikuExtractor.__init__(self, match)
|
|
|
|
self._page, self.user_id = match.groups()
|
|
|
|
|
|
|
|
def posts(self):
|
|
|
|
url = self.root + "/IllustListPcV.jsp"
|
|
|
|
params = {
|
|
|
|
"PG" : text.parse_int(self._page, 0),
|
|
|
|
"ID" : self.user_id,
|
|
|
|
"KWD": "",
|
|
|
|
}
|
|
|
|
|
|
|
|
while True:
|
|
|
|
page = self.request(url, params=params).text
|
|
|
|
|
|
|
|
cnt = 0
|
|
|
|
for path in text.extract_iter(
|
|
|
|
page, 'class="IllustInfo" href="', '"'):
|
|
|
|
yield path
|
|
|
|
cnt += 1
|
|
|
|
|
|
|
|
if cnt < 48:
|
|
|
|
return
|
|
|
|
params["PG"] += 1
|
|
|
|
|
|
|
|
|
|
|
|
class PoipikuPostExtractor(PoipikuExtractor):
|
|
|
|
"""Extractor for a poipiku post"""
|
|
|
|
subcategory = "post"
|
|
|
|
pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
|
|
|
|
test = (
|
|
|
|
("https://poipiku.com/25049/5864576.html", {
|
2022-09-12 11:21:01 +02:00
|
|
|
"pattern": r"https://img-org\.poipiku\.com/user_img\d+/000025049"
|
2022-06-20 11:25:42 +02:00
|
|
|
r"/005864576_EWN1Y65gQ\.png$",
|
|
|
|
"keyword": {
|
|
|
|
"count": "1",
|
|
|
|
"description": "",
|
|
|
|
"extension": "png",
|
|
|
|
"filename": "005864576_EWN1Y65gQ",
|
|
|
|
"num": 1,
|
|
|
|
"post_category": "DOODLE",
|
|
|
|
"post_id": "5864576",
|
|
|
|
"user_id": "25049",
|
|
|
|
"user_name": "ユキウサギ",
|
|
|
|
},
|
|
|
|
}),
|
|
|
|
("https://poipiku.com/2166245/6411749.html", {
|
2022-09-12 11:21:01 +02:00
|
|
|
"pattern": r"https://img-org\.poipiku\.com/user_img\d+/002166245"
|
2022-06-20 11:25:42 +02:00
|
|
|
r"/006411749_\w+\.jpeg$",
|
|
|
|
"count": 4,
|
|
|
|
"keyword": {
|
|
|
|
"count": "4",
|
|
|
|
"description": "絵茶の産物ネタバレあるやつ",
|
|
|
|
"num": int,
|
|
|
|
"post_category": "SPOILER",
|
|
|
|
"post_id": "6411749",
|
|
|
|
"user_id": "2166245",
|
|
|
|
"user_name": "wadahito",
|
|
|
|
},
|
|
|
|
}),
|
2022-12-25 18:23:46 +01:00
|
|
|
# different warning button style
|
|
|
|
("https://poipiku.com/3572553/5776587.html", {
|
|
|
|
"pattern": r"https://img-org\.poipiku.com/user_img\d+/003572553"
|
|
|
|
r"/005776587_(\d+_)?\w+\.jpeg$",
|
|
|
|
"count": 3,
|
|
|
|
"keyword": {
|
|
|
|
"count": "3",
|
|
|
|
"description": "ORANGE OASISボスネタバレ",
|
|
|
|
"num": int,
|
|
|
|
"post_category": "SPOILER",
|
|
|
|
"post_id": "5776587",
|
|
|
|
"user_id": "3572553",
|
|
|
|
"user_name": "nagakun",
|
|
|
|
},
|
|
|
|
}),
|
2022-06-20 11:25:42 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
PoipikuExtractor.__init__(self, match)
|
|
|
|
self.user_id, self.post_id = match.groups()
|
|
|
|
|
|
|
|
def posts(self):
|
|
|
|
return ("/{}/{}.html".format(self.user_id, self.post_id),)
|