1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 04:02:32 +01:00

[8chan] support '/last/' thread URLs (#6318)

This commit is contained in:
Mike Fährmann 2024-10-13 20:42:41 +02:00
parent c7f0d8945b
commit 93265db9b3
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 36 additions and 43 deletions

View File

@ -64,16 +64,14 @@ class _8chanThreadExtractor(_8chanExtractor):
"{threadId} {subject[:50]}")
filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
pattern = BASE_PATTERN + r"/([^/?#]+)/(?:res|last)/(\d+)"
example = "https://8chan.moe/a/res/12345.html"
def __init__(self, match):
_8chanExtractor.__init__(self, match)
_, self.board, self.thread = match.groups()
def items(self):
_, board, thread = self.groups
# fetch thread data
url = "{}/{}/res/{}.".format(self.root, self.board, self.thread)
url = "{}/{}/res/{}.".format(self.root, board, thread)
self.session.headers["Referer"] = url + "html"
thread = self.request(url + "json").json()
thread["postId"] = thread["threadId"]
@ -106,25 +104,22 @@ class _8chanBoardExtractor(_8chanExtractor):
pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$"
example = "https://8chan.moe/a/"
def __init__(self, match):
_8chanExtractor.__init__(self, match)
_, self.board, self.page = match.groups()
def items(self):
page = text.parse_int(self.page, 1)
url = "{}/{}/{}.json".format(self.root, self.board, page)
board = self.request(url).json()
threads = board["threads"]
_, board, pnum = self.groups
pnum = text.parse_int(pnum, 1)
url = "{}/{}/{}.json".format(self.root, board, pnum)
data = self.request(url).json()
threads = data["threads"]
while True:
for thread in threads:
thread["_extractor"] = _8chanThreadExtractor
url = "{}/{}/res/{}.html".format(
self.root, self.board, thread["threadId"])
self.root, board, thread["threadId"])
yield Message.Queue, url, thread
page += 1
if page > board["pageCount"]:
pnum += 1
if pnum > data["pageCount"]:
return
url = "{}/{}/{}.json".format(self.root, self.board, page)
url = "{}/{}/{}.json".format(self.root, board, pnum)
threads = self.request(url).json()["threads"]

View File

@ -11,7 +11,6 @@ _8chan = getattr(gallery_dl.extractor, "8chan")
__tests__ = (
{
"#url" : "https://8chan.moe/vhs/res/4.html",
"#category": ("", "8chan", "thread"),
"#class": _8chan._8chanThreadExtractor,
"#pattern": r"https://8chan\.moe/\.media/[0-9a-f]{64}\.\w+$",
"#count" : 14,
@ -50,27 +49,28 @@ __tests__ = (
"?wssPort" : int,
},
{
"#url" : "https://8chan.moe/vhs/last/4.html",
"#class": _8chan._8chanThreadExtractor,
},
{
"#url" : "https://8chan.se/vhs/res/4.html",
"#category": ("", "8chan", "thread"),
"#class": _8chan._8chanThreadExtractor,
},
{
"#url" : "https://8chan.cc/vhs/res/4.html",
"#category": ("", "8chan", "thread"),
"#class": _8chan._8chanThreadExtractor,
},
{
"#url" : "https://8chan.moe/vhs/",
"#category": ("", "8chan", "board"),
"#class": _8chan._8chanBoardExtractor,
},
{
"#url" : "https://8chan.moe/vhs/2.html",
"#category": ("", "8chan", "board"),
"#class": _8chan._8chanBoardExtractor,
"#pattern": _8chan._8chanThreadExtractor.pattern,
"#count" : range(24, 32),
@ -78,13 +78,11 @@ __tests__ = (
{
"#url" : "https://8chan.se/vhs/",
"#category": ("", "8chan", "board"),
"#class": _8chan._8chanBoardExtractor,
},
{
"#url" : "https://8chan.cc/vhs/",
"#category": ("", "8chan", "board"),
"#class": _8chan._8chanBoardExtractor,
},