mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-23 03:02:50 +01:00
[foolfuuka] use BaseExtractor
This commit is contained in:
parent
745a114c61
commit
671a95cae5
@ -8,21 +8,21 @@
|
||||
|
||||
"""Extractors for 4chan archives based on FoolFuuka"""
|
||||
|
||||
from .common import Extractor, Message, generate_extractors
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text
|
||||
import itertools
|
||||
|
||||
|
||||
class FoolfuukaExtractor(Extractor):
|
||||
class FoolfuukaExtractor(BaseExtractor):
|
||||
"""Base extractor for FoolFuuka based boards/archives"""
|
||||
basecategory = "foolfuuka"
|
||||
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
|
||||
external = "default"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
BaseExtractor.__init__(self, match)
|
||||
self.session.headers["Referer"] = self.root
|
||||
if self.external == "direct":
|
||||
if self.category == "b4k":
|
||||
self.remote = self._remote_direct
|
||||
|
||||
def items(self):
|
||||
@ -43,7 +43,7 @@ class FoolfuukaExtractor(Extractor):
|
||||
yield Message.Url, url, post
|
||||
|
||||
def metadata(self):
|
||||
""" """
|
||||
"""Return general metadata"""
|
||||
|
||||
def posts(self):
|
||||
"""Return an iterable with all relevant posts"""
|
||||
@ -59,16 +59,90 @@ class FoolfuukaExtractor(Extractor):
|
||||
return media["remote_media_link"]
|
||||
|
||||
|
||||
BASE_PATTERN = FoolfuukaExtractor.update({
|
||||
"4plebs": {
|
||||
"root": "https://archive.4plebs.org",
|
||||
"pattern": r"(?:archive\.)?4plebs\.org",
|
||||
},
|
||||
"archivedmoe": {
|
||||
"root": "https://archived.moe",
|
||||
},
|
||||
"archiveofsins": {
|
||||
"root": "https://archiveofsins.com",
|
||||
"pattern": r"(?:www\.)?archiveofsins\.com",
|
||||
},
|
||||
"b4k": {
|
||||
"root": "https://arch.b4k.co",
|
||||
},
|
||||
"desuarchive": {
|
||||
"root": "https://desuarchive.org",
|
||||
},
|
||||
"fireden": {
|
||||
"root": "https://boards.fireden.net",
|
||||
},
|
||||
"nyafuu": {
|
||||
"root": "https://archive.nyafuu.org",
|
||||
"pattern": r"(?:archive\.)?nyafuu\.org",
|
||||
},
|
||||
"rbt": {
|
||||
"root": "https://rbt.asia",
|
||||
"pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)",
|
||||
},
|
||||
"thebarchive": {
|
||||
"root": "https://thebarchive.com",
|
||||
"pattern": r"thebarchive\.com",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
||||
"""Base extractor for threads on FoolFuuka based boards/archives"""
|
||||
subcategory = "thread"
|
||||
directory_fmt = ("{category}", "{board[shortname]}",
|
||||
"{thread_num}{title:? - //}")
|
||||
pattern_fmt = r"/([^/?#]+)/thread/(\d+)"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
|
||||
test = (
|
||||
("https://archive.4plebs.org/tg/thread/54059290", {
|
||||
"url": "07452944164b602502b02b24521f8cee5c484d2a",
|
||||
}),
|
||||
("https://archived.moe/gd/thread/309639/", {
|
||||
"url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
|
||||
"content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
|
||||
}),
|
||||
("https://archived.moe/a/thread/159767162/", {
|
||||
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
|
||||
}),
|
||||
("https://archiveofsins.com/h/thread/4668813/", {
|
||||
"url": "f612d287087e10a228ef69517cf811539db9a102",
|
||||
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
|
||||
}),
|
||||
("https://arch.b4k.co/meta/thread/196/", {
|
||||
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
|
||||
}),
|
||||
("https://desuarchive.org/a/thread/159542679/", {
|
||||
"url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
|
||||
}),
|
||||
("https://boards.fireden.net/sci/thread/11264294/", {
|
||||
"url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
|
||||
}),
|
||||
("https://archive.nyafuu.org/c/thread/2849220/", {
|
||||
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
|
||||
}),
|
||||
("https://rbt.asia/g/thread/61487650/", {
|
||||
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
|
||||
}),
|
||||
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
|
||||
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
|
||||
}),
|
||||
("https://thebarchive.com/b/thread/739772332/", {
|
||||
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
self.board, self.thread = match.groups()
|
||||
self.board = match.group(match.lastindex-1)
|
||||
self.thread = match.group(match.lastindex)
|
||||
self.data = None
|
||||
|
||||
def metadata(self):
|
||||
@ -78,23 +152,34 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
||||
return self.data["op"]
|
||||
|
||||
def posts(self):
|
||||
op = (self.data["op"],)
|
||||
posts = self.data.get("posts")
|
||||
if posts:
|
||||
posts = list(posts.values())
|
||||
posts.sort(key=lambda p: p["timestamp"])
|
||||
else:
|
||||
posts = ()
|
||||
return itertools.chain((self.data["op"],), posts)
|
||||
return itertools.chain(op, posts)
|
||||
return op
|
||||
|
||||
|
||||
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
||||
"""Base extractor for FoolFuuka based boards/archives"""
|
||||
subcategory = "board"
|
||||
pattern_fmt = r"/([^/?#]+)/\d*$"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
|
||||
test = (
|
||||
("https://archive.4plebs.org/tg/"),
|
||||
("https://archived.moe/gd/"),
|
||||
("https://archiveofsins.com/h/"),
|
||||
("https://arch.b4k.co/meta/"),
|
||||
("https://desuarchive.org/a/"),
|
||||
("https://boards.fireden.net/sci/"),
|
||||
("https://archive.nyafuu.org/c/"),
|
||||
("https://rbt.asia/g/"),
|
||||
("https://thebarchive.com/b/"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
self.board = match.group(1)
|
||||
self.board = match.group(match.lastindex)
|
||||
|
||||
def items(self):
|
||||
index_base = "{}/_/api/chan/index/?board={}&page=".format(
|
||||
@ -113,7 +198,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
||||
|
||||
for num, thread in threads.items():
|
||||
thread["url"] = thread_base + format(num)
|
||||
thread["_extractor"] = self.childclass
|
||||
thread["_extractor"] = FoolfuukaThreadExtractor
|
||||
yield Message.Queue, thread["url"], thread
|
||||
|
||||
|
||||
@ -121,15 +206,24 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
||||
"""Base extractor for search results on FoolFuuka based boards/archives"""
|
||||
subcategory = "search"
|
||||
directory_fmt = ("{category}", "search", "{search}")
|
||||
pattern_fmt = r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
|
||||
request_interval = 1.0
|
||||
test = (
|
||||
("https://archive.4plebs.org/_/search/text/test/"),
|
||||
("https://archived.moe/_/search/text/test/"),
|
||||
("https://archiveofsins.com/_/search/text/test/"),
|
||||
("https://archiveofsins.com/_/search/text/test/"),
|
||||
("https://desuarchive.org/_/search/text/test/"),
|
||||
("https://boards.fireden.net/_/search/text/test/"),
|
||||
("https://archive.nyafuu.org/_/search/text/test/"),
|
||||
("https://rbt.asia/_/search/text/test/"),
|
||||
("https://thebarchive.com/_/search/text/test/"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
board, search = match.groups()
|
||||
|
||||
self.params = params = {}
|
||||
args = search.split("/")
|
||||
args = match.group(match.lastindex).split("/")
|
||||
key = None
|
||||
|
||||
for arg in args:
|
||||
@ -138,6 +232,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
||||
key = None
|
||||
else:
|
||||
key = arg
|
||||
|
||||
board = match.group(match.lastindex-1)
|
||||
if board != "_":
|
||||
params["boards"] = board
|
||||
|
||||
@ -170,105 +266,3 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
||||
if len(posts) <= 3:
|
||||
return
|
||||
params["page"] += 1
|
||||
|
||||
|
||||
EXTRACTORS = {
|
||||
"4plebs": {
|
||||
"name": "_4plebs",
|
||||
"root": "https://archive.4plebs.org",
|
||||
"pattern": r"(?:archive\.)?4plebs\.org",
|
||||
"test-thread": ("https://archive.4plebs.org/tg/thread/54059290", {
|
||||
"url": "07452944164b602502b02b24521f8cee5c484d2a",
|
||||
}),
|
||||
"test-board": ("https://archive.4plebs.org/tg/",),
|
||||
"test-search": ("https://archive.4plebs.org/_/search/text/test/",),
|
||||
},
|
||||
"archivedmoe": {
|
||||
"root": "https://archived.moe",
|
||||
"test-thread": (
|
||||
("https://archived.moe/gd/thread/309639/", {
|
||||
"url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
|
||||
"content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
|
||||
}),
|
||||
("https://archived.moe/a/thread/159767162/", {
|
||||
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
|
||||
}),
|
||||
),
|
||||
"test-board": ("https://archived.moe/gd/",),
|
||||
"test-search": ("https://archived.moe/_/search/text/test/",),
|
||||
},
|
||||
"archiveofsins": {
|
||||
"root": "https://archiveofsins.com",
|
||||
"pattern": r"(?:www\.)?archiveofsins\.com",
|
||||
"test-thread": ("https://archiveofsins.com/h/thread/4668813/", {
|
||||
"url": "f612d287087e10a228ef69517cf811539db9a102",
|
||||
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
|
||||
}),
|
||||
"test-board": ("https://archiveofsins.com/h/",),
|
||||
"test-search": ("https://archiveofsins.com/_/search/text/test/",),
|
||||
},
|
||||
"b4k": {
|
||||
"root": "https://arch.b4k.co",
|
||||
"extra": {"external": "direct"},
|
||||
"test-thread": ("https://arch.b4k.co/meta/thread/196/", {
|
||||
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
|
||||
}),
|
||||
"test-board": ("https://arch.b4k.co/meta/",),
|
||||
"test-search": ("https://arch.b4k.co/_/search/text/test/",),
|
||||
},
|
||||
"desuarchive": {
|
||||
"root": "https://desuarchive.org",
|
||||
"test-thread": ("https://desuarchive.org/a/thread/159542679/", {
|
||||
"url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
|
||||
}),
|
||||
"test-board": ("https://desuarchive.org/a/",),
|
||||
"test-search": ("https://desuarchive.org/_/search/text/test/",),
|
||||
},
|
||||
"fireden": {
|
||||
"root": "https://boards.fireden.net",
|
||||
"test-thread": ("https://boards.fireden.net/sci/thread/11264294/", {
|
||||
"url": "3adfe181ee86a8c23021c705f623b3657a9b0a43",
|
||||
}),
|
||||
"test-board": ("https://boards.fireden.net/sci/",),
|
||||
"test-search": ("https://boards.fireden.net/_/search/text/test/",),
|
||||
},
|
||||
"nyafuu": {
|
||||
"root": "https://archive.nyafuu.org",
|
||||
"pattern": r"(?:archive\.)?nyafuu\.org",
|
||||
"test-thread": ("https://archive.nyafuu.org/c/thread/2849220/", {
|
||||
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
|
||||
}),
|
||||
"test-board": ("https://archive.nyafuu.org/c/",),
|
||||
"test-search": ("https://archive.nyafuu.org/_/search/text/test/",),
|
||||
},
|
||||
"rbt": {
|
||||
"root": "https://rbt.asia",
|
||||
"pattern": r"(?:rbt\.asia|(?:archive\.)?rebeccablacktech\.com)",
|
||||
"test-thread": (
|
||||
("https://rbt.asia/g/thread/61487650/", {
|
||||
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
|
||||
}),
|
||||
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
|
||||
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
|
||||
}),
|
||||
),
|
||||
"test-board": ("https://rbt.asia/g/",),
|
||||
"test-search": ("https://rbt.asia/_/search/text/test/",),
|
||||
},
|
||||
"thebarchive": {
|
||||
"root": "https://thebarchive.com",
|
||||
"pattern": r"thebarchive\.com",
|
||||
"test-thread": ("https://thebarchive.com/b/thread/739772332/", {
|
||||
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
|
||||
}),
|
||||
"test-board": ("https://thebarchive.com/b/",),
|
||||
"test-search": ("https://thebarchive.com/_/search/text/test/",),
|
||||
},
|
||||
"_ckey": "childclass",
|
||||
}
|
||||
|
||||
generate_extractors(EXTRACTORS, globals(), (
|
||||
FoolfuukaThreadExtractor,
|
||||
FoolfuukaBoardExtractor,
|
||||
FoolfuukaSearchExtractor,
|
||||
))
|
||||
|
Loading…
Reference in New Issue
Block a user