mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
[poipiku] add 'user' and 'post' extractors (#1602)
This commit is contained in:
parent
1aae9f2b71
commit
27e8078fb7
@ -631,6 +631,12 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<td>Posts, Timelines</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Poipiku</td>
|
||||
<td>https://poipiku.com/</td>
|
||||
<td>Posts, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Porn Image</td>
|
||||
<td>https://porn-images-xxx.com/</td>
|
||||
|
@ -107,6 +107,7 @@ modules = [
|
||||
"pixiv",
|
||||
"pixnet",
|
||||
"plurk",
|
||||
"poipiku",
|
||||
"pornhub",
|
||||
"pururin",
|
||||
"reactor",
|
||||
|
167
gallery_dl/extractor/poipiku.py
Normal file
167
gallery_dl/extractor/poipiku.py
Normal file
@ -0,0 +1,167 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://poipiku.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?poipiku\.com"
|
||||
|
||||
|
||||
class PoipikuExtractor(Extractor):
|
||||
"""Base class for poipiku extractors"""
|
||||
category = "poipiku"
|
||||
root = "https://poipiku.com"
|
||||
directory_fmt = ("{category}", "{user_id} {user_name}")
|
||||
filename_fmt = "{post_id}_{num}.{extension}"
|
||||
archive_fmt = "{post_id}_{num}"
|
||||
request_interval = (0.5, 1.5)
|
||||
|
||||
def items(self):
|
||||
for post_url in self.posts():
|
||||
parts = post_url.split("/")
|
||||
if post_url[0] == "/":
|
||||
post_url = self.root + post_url
|
||||
page = self.request(post_url).text
|
||||
extr = text.extract_from(page)
|
||||
|
||||
post = {
|
||||
"post_category": extr("<title>[", "]"),
|
||||
"count" : extr("(", " "),
|
||||
"post_id" : parts[-1].partition(".")[0],
|
||||
"user_id" : parts[-2],
|
||||
"user_name" : text.unescape(extr(
|
||||
'<h2 class="UserInfoUserName">', '</').rpartition(">")[2]),
|
||||
"description": text.unescape(extr(
|
||||
'class="IllustItemDesc" >', '<')),
|
||||
}
|
||||
|
||||
yield Message.Directory, post
|
||||
post["num"] = 0
|
||||
|
||||
while True:
|
||||
thumb = extr('class="IllustItemThumbImg" src="', '"')
|
||||
if not thumb:
|
||||
break
|
||||
elif thumb.startswith("/img/"):
|
||||
continue
|
||||
post["num"] += 1
|
||||
url = text.ensure_http_scheme(thumb[:-8])
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
|
||||
if not extr('</i> show all', '<'):
|
||||
continue
|
||||
|
||||
url = self.root + "/f/ShowAppendFileF.jsp"
|
||||
headers = {
|
||||
"Accept" : "application/json, text/javascript, */*; q=0.01",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"Origin" : self.root,
|
||||
"Referer": post_url,
|
||||
}
|
||||
data = {
|
||||
"UID": post["user_id"],
|
||||
"IID": post["post_id"],
|
||||
"PAS": "",
|
||||
"MD" : "0",
|
||||
"TWF": "-1",
|
||||
}
|
||||
page = self.request(
|
||||
url, method="POST", headers=headers, data=data).json()["html"]
|
||||
|
||||
for thumb in text.extract_iter(
|
||||
page, 'class="IllustItemThumbImg" src="', '"'):
|
||||
post["num"] += 1
|
||||
url = text.ensure_http_scheme(thumb[:-8])
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
|
||||
|
||||
class PoipikuUserExtractor(PoipikuExtractor):
|
||||
"""Extractor for posts from a poipiku user"""
|
||||
subcategory = "user"
|
||||
pattern = (BASE_PATTERN + r"/(?:IllustListPcV\.jsp\?PG=(\d+)&ID=)?"
|
||||
r"(\d+)/?(?:$|[?&#])")
|
||||
test = (
|
||||
("https://poipiku.com/25049/", {
|
||||
"pattern": r"https://img\.poipiku\.com/user_img\d+/000025049"
|
||||
r"/\d+_\w+\.(jpe?g|png)$",
|
||||
"range": "1-10",
|
||||
"count": 10,
|
||||
}),
|
||||
("https://poipiku.com/IllustListPcV.jsp?PG=1&ID=25049&KWD=")
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
PoipikuExtractor.__init__(self, match)
|
||||
self._page, self.user_id = match.groups()
|
||||
|
||||
def posts(self):
|
||||
url = self.root + "/IllustListPcV.jsp"
|
||||
params = {
|
||||
"PG" : text.parse_int(self._page, 0),
|
||||
"ID" : self.user_id,
|
||||
"KWD": "",
|
||||
}
|
||||
|
||||
while True:
|
||||
page = self.request(url, params=params).text
|
||||
|
||||
cnt = 0
|
||||
for path in text.extract_iter(
|
||||
page, 'class="IllustInfo" href="', '"'):
|
||||
yield path
|
||||
cnt += 1
|
||||
|
||||
if cnt < 48:
|
||||
return
|
||||
params["PG"] += 1
|
||||
|
||||
|
||||
class PoipikuPostExtractor(PoipikuExtractor):
|
||||
"""Extractor for a poipiku post"""
|
||||
subcategory = "post"
|
||||
pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
|
||||
test = (
|
||||
("https://poipiku.com/25049/5864576.html", {
|
||||
"pattern": r"https://img\.poipiku\.com/user_img03/000025049"
|
||||
r"/005864576_EWN1Y65gQ\.png$",
|
||||
"keyword": {
|
||||
"count": "1",
|
||||
"description": "",
|
||||
"extension": "png",
|
||||
"filename": "005864576_EWN1Y65gQ",
|
||||
"num": 1,
|
||||
"post_category": "DOODLE",
|
||||
"post_id": "5864576",
|
||||
"user_id": "25049",
|
||||
"user_name": "ユキウサギ",
|
||||
},
|
||||
}),
|
||||
("https://poipiku.com/2166245/6411749.html", {
|
||||
"pattern": r"https://img\.poipiku\.com/user_img01/002166245"
|
||||
r"/006411749_\w+\.jpeg$",
|
||||
"count": 4,
|
||||
"keyword": {
|
||||
"count": "4",
|
||||
"description": "絵茶の産物ネタバレあるやつ",
|
||||
"num": int,
|
||||
"post_category": "SPOILER",
|
||||
"post_id": "6411749",
|
||||
"user_id": "2166245",
|
||||
"user_name": "wadahito",
|
||||
},
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
PoipikuExtractor.__init__(self, match)
|
||||
self.user_id, self.post_id = match.groups()
|
||||
|
||||
def posts(self):
|
||||
return ("/{}/{}.html".format(self.user_id, self.post_id),)
|
Loading…
Reference in New Issue
Block a user