mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
[foolfuuka] add support for more sites (#18)
- https://arch.b4k.co - https://archive.whatisthisimnotgoodwithcomputers.com - https://archive.yeet.net Notes: - The name "whatisthisimnotgoodwithcomputers" is way too long ... - archive.yeet.net is out of date and also blocked by 4chan servers - newest threads are 2 weeks old - using "https://archive.yeet.net" as Referer header results in "403 Forbidden" when accessing 4chan
This commit is contained in:
parent
84d4450410
commit
cebf800a7f
@ -7,6 +7,7 @@ Site URL Capabilities
|
|||||||
4chan https://www.4chan.org/ Threads
|
4chan https://www.4chan.org/ Threads
|
||||||
4plebs https://archive.4plebs.org/ Threads
|
4plebs https://archive.4plebs.org/ Threads
|
||||||
8chan https://8ch.net/ Threads
|
8chan https://8ch.net/ Threads
|
||||||
|
arch.b4k.co https://arch.b4k.co/ Threads
|
||||||
Archive of Sins https://archiveofsins.com/ Threads
|
Archive of Sins https://archiveofsins.com/ Threads
|
||||||
Archived.Moe https://archived.moe/ Threads
|
Archived.Moe https://archived.moe/ Threads
|
||||||
Batoto https://bato.to/ Chapters, Manga Optional
|
Batoto https://bato.to/ Chapters, Manga Optional
|
||||||
@ -73,6 +74,7 @@ Twitter https://twitter.com/ Tweets
|
|||||||
Warosu https://warosu.org/ Threads
|
Warosu https://warosu.org/ Threads
|
||||||
World Three http://www.slide.world-three.org/ Chapters, Manga
|
World Three http://www.slide.world-three.org/ Chapters, Manga
|
||||||
Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches
|
Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches
|
||||||
|
YEET Archive https://archive.yeet.net/ Threads
|
||||||
Acidimg https://acidimg.cc/ individual Images
|
Acidimg https://acidimg.cc/ individual Images
|
||||||
Chronos http://chronos.to/ individual Images
|
Chronos http://chronos.to/ individual Images
|
||||||
Coreimg http://coreimg.net/ individual Images
|
Coreimg http://coreimg.net/ individual Images
|
||||||
|
@ -18,6 +18,7 @@ modules = [
|
|||||||
"8chan",
|
"8chan",
|
||||||
"archivedmoe",
|
"archivedmoe",
|
||||||
"archiveofsins",
|
"archiveofsins",
|
||||||
|
"b4k",
|
||||||
"batoto",
|
"batoto",
|
||||||
"danbooru",
|
"danbooru",
|
||||||
"desuarchive",
|
"desuarchive",
|
||||||
@ -78,8 +79,10 @@ modules = [
|
|||||||
"tumblr",
|
"tumblr",
|
||||||
"twitter",
|
"twitter",
|
||||||
"warosu",
|
"warosu",
|
||||||
|
"whatisthisimnotgoodwithcomputers",
|
||||||
"worldthree",
|
"worldthree",
|
||||||
"yandere",
|
"yandere",
|
||||||
|
"yeet",
|
||||||
"imagehosts",
|
"imagehosts",
|
||||||
"directlink",
|
"directlink",
|
||||||
"recursive",
|
"recursive",
|
||||||
|
24
gallery_dl/extractor/b4k.py
Normal file
24
gallery_dl/extractor/b4k.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2017 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extract images from https://arch.b4k.co/"""
|
||||||
|
|
||||||
|
from . import chan
|
||||||
|
|
||||||
|
|
||||||
|
class BfourkThreadExtractor(chan.FoolfuukaThreadExtractor):
|
||||||
|
"""Extractor for images from threads on arch.b4k.co"""
|
||||||
|
category = "b4k"
|
||||||
|
root = "https://arch.b4k.co"
|
||||||
|
pattern = [r"(?:https?://)?arch\.b4k\.co/([^/]+)/thread/(\d+)"]
|
||||||
|
test = [("http://arch.b4k.co/meta/thread/196/", {
|
||||||
|
"url": "cdd4931ac1cd00264b0b54e2e3b0d8f6ae48957e",
|
||||||
|
})]
|
||||||
|
|
||||||
|
def remote(self, media):
|
||||||
|
return media["remote_media_link"]
|
@ -70,12 +70,14 @@ class FoolfuukaThreadExtractor(SharedConfigExtractor):
|
|||||||
"{thread_num} - {title}"]
|
"{thread_num} - {title}"]
|
||||||
filename_fmt = "{media[media]}"
|
filename_fmt = "{media[media]}"
|
||||||
root = ""
|
root = ""
|
||||||
|
referer = True
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
SharedConfigExtractor.__init__(self)
|
SharedConfigExtractor.__init__(self)
|
||||||
self.board, self.thread = match.groups()
|
self.board, self.thread = match.groups()
|
||||||
self.session.headers["User-Agent"] = "Mozilla 5.0"
|
self.session.headers["User-Agent"] = "Mozilla 5.0"
|
||||||
self.session.headers["Referer"] = self.root
|
if self.referer:
|
||||||
|
self.session.headers["Referer"] = self.root
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
op = True
|
op = True
|
||||||
@ -91,9 +93,9 @@ class FoolfuukaThreadExtractor(SharedConfigExtractor):
|
|||||||
url = media["media_link"]
|
url = media["media_link"]
|
||||||
|
|
||||||
if not url and "remote_media_link" in media:
|
if not url and "remote_media_link" in media:
|
||||||
needle = '<meta http-equiv="Refresh" content="0; url='
|
url = self.remote(media)
|
||||||
page = self.request(media["remote_media_link"]).text
|
if url.startswith("/"):
|
||||||
url = text.extract(page, needle, '"')[0]
|
url = self.root + url
|
||||||
|
|
||||||
post["extension"] = url.rpartition(".")[2]
|
post["extension"] = url.rpartition(".")[2]
|
||||||
yield Message.Url, url, post
|
yield Message.Url, url, post
|
||||||
@ -104,7 +106,12 @@ class FoolfuukaThreadExtractor(SharedConfigExtractor):
|
|||||||
data = self.request(url, params=params).json()[self.thread]
|
data = self.request(url, params=params).json()[self.thread]
|
||||||
|
|
||||||
# sort post-objects by their key
|
# sort post-objects by their key
|
||||||
posts = sorted(data["posts"].items(), key=operator.itemgetter(0))
|
posts = sorted(data.get("posts", {}).items())
|
||||||
posts = map(operator.itemgetter(1), posts)
|
posts = map(operator.itemgetter(1), posts)
|
||||||
|
|
||||||
return itertools.chain((data["op"],), posts)
|
return itertools.chain((data["op"],), posts)
|
||||||
|
|
||||||
|
def remote(self, media):
|
||||||
|
needle = '<meta http-equiv="Refresh" content="0; url='
|
||||||
|
page = self.request(media["remote_media_link"]).text
|
||||||
|
return text.extract(page, needle, '"')[0]
|
||||||
|
23
gallery_dl/extractor/whatisthisimnotgoodwithcomputers.py
Normal file
23
gallery_dl/extractor/whatisthisimnotgoodwithcomputers.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2017 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extract images from https://archive.whatisthisimnotgoodwithcomputers.com"""
|
||||||
|
|
||||||
|
from . import chan
|
||||||
|
|
||||||
|
|
||||||
|
class WitingwcThreadExtractor(chan.FoolfuukaThreadExtractor):
|
||||||
|
"""Extractor for archive.whatisthisimnotgoodwithcomputers.com"""
|
||||||
|
category = "whatisthisimnotgoodwithcomputers"
|
||||||
|
root = "https://archive.whatisthisimnotgoodwithcomputers.com"
|
||||||
|
pattern = [r"(?:https?://)?archive\.whatisthisimnotgoodwithcomputers\.com/"
|
||||||
|
r"([^/]+)/thread/(\d+)"]
|
||||||
|
test = [(("https://archive.whatisthisimnotgoodwithcomputers.com/"
|
||||||
|
"ref/thread/1094/"), {
|
||||||
|
"url": "cf8f6d4b4950767d2131de308ebc96eec05b04f6",
|
||||||
|
})]
|
22
gallery_dl/extractor/yeet.py
Normal file
22
gallery_dl/extractor/yeet.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2017 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extract images from https://archive.yeet.net/"""
|
||||||
|
|
||||||
|
from . import chan
|
||||||
|
|
||||||
|
|
||||||
|
class YeetThreadExtractor(chan.FoolfuukaThreadExtractor):
|
||||||
|
"""Extractor for images from threads on archive.yeet.net"""
|
||||||
|
category = "yeet"
|
||||||
|
root = "https://archive.yeet.net"
|
||||||
|
pattern = [r"(?:https?://)?archive\.yeet\.net/([^/]+)/thread/(\d+)"]
|
||||||
|
test = [("https://archive.yeet.net/yeet/thread/359/", {
|
||||||
|
"url": "ced64a1aadaafc4f359ab89d9f801050731803f1",
|
||||||
|
})]
|
||||||
|
referer = False
|
@ -12,6 +12,7 @@ CATEGORY_MAP = {
|
|||||||
"2chan" : "Futaba Channel",
|
"2chan" : "Futaba Channel",
|
||||||
"archivedmoe" : "Archived.Moe",
|
"archivedmoe" : "Archived.Moe",
|
||||||
"archiveofsins" : "Archive of Sins",
|
"archiveofsins" : "Archive of Sins",
|
||||||
|
"b4k" : "arch.b4k.co",
|
||||||
"deviantart" : "DeviantArt",
|
"deviantart" : "DeviantArt",
|
||||||
"dokireader" : "Doki Reader",
|
"dokireader" : "Doki Reader",
|
||||||
"dynastyscans" : "Dynasty Reader",
|
"dynastyscans" : "Dynasty Reader",
|
||||||
@ -54,6 +55,7 @@ CATEGORY_MAP = {
|
|||||||
"spectrumnexus" : "Spectrum Nexus",
|
"spectrumnexus" : "Spectrum Nexus",
|
||||||
"thebarchive" : "The /b/ Archive",
|
"thebarchive" : "The /b/ Archive",
|
||||||
"worldthree" : "World Three",
|
"worldthree" : "World Three",
|
||||||
|
"yeet" : "YEET Archive",
|
||||||
"yomanga" : "YoManga",
|
"yomanga" : "YoManga",
|
||||||
"yonkouprod" : "Yonkou Productions",
|
"yonkouprod" : "Yonkou Productions",
|
||||||
}
|
}
|
||||||
@ -85,6 +87,7 @@ AUTH_MAP = {
|
|||||||
|
|
||||||
IGNORE_LIST = (
|
IGNORE_LIST = (
|
||||||
"oauth",
|
"oauth",
|
||||||
|
"whatisthisimnotgoodwithcomputers",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user