diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 9c8b410a..4bb124d7 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -86,7 +86,8 @@ Reddit https://www.reddit.com/ individual Images, Subm rule #34 https://rule34.paheal.net/ Posts, Tag-Searches Rule 34 https://rule34.xxx/ Pools, Posts, Tag-Searches Safebooru https://safebooru.org/ Pools, Posts, Tag-Searches -Sankaku Channel https://chan.sankakucomplex.com/ Articles, Pools, Posts, Tag-Searches Optional +Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional +Sankaku Complex https://www.sankakucomplex.com/ Articles Sen Manga https://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/reader/ Chapters, Manga Sex.com https://www.sex.com/ Boards, Pins, Search Results diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 0cbf6252..3ee6d3db 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -78,6 +78,7 @@ modules = [ "rule34", "safebooru", "sankaku", + "sankakucomplex", "seiga", "senmanga", "sexcom", diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index a1e7219d..f67911b3 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://chan.sankakucomplex.com/""" +"""Extractors for https://chan.sankakucomplex.com/""" from .common import Extractor, Message, SharedConfigMixin from .. import text, util, exception @@ -297,72 +297,3 @@ class SankakuPostExtractor(SankakuExtractor): def get_posts(self): return (self.post_id,) - - -class SankakuArticleExtractor(Extractor): - """Extractor for articles on www.sankakucomplex.com""" - category = "sankaku" - subcategory = "article" - directory_fmt = ("{category}", "Articles", "{date:%Y-%m-%d} {title}") - filename_fmt = "{filename}.{extension}" - archive_fmt = "a_{date:%Y%m%d}_{filename}" - pattern = (r"(?:https?://)?www\.sankakucomplex\.com" - r"/(\d{4}/\d\d/\d\d)/([^/?]+)") - test = ( - ("https://www.sankakucomplex.com/2019/05/11/twitter-cosplayers", { - "url": "4a9ecc5ae917fbce469280da5b6a482510cae84d", - "keyword": "4ab96f31df9ee95d0dc6eefc2ca4e508c45c8e00", - }), - ("https://www.sankakucomplex.com/2009/12/01/sexy-goddesses-of-2ch", { - "url": "a1e249173fd6c899a8134fcfbd9c925588a63f7c", - "keyword": "a7876de642bf3e68fb4743dcd4d4e8778f2c17ab", - }), - ) - root = "https://www.sankakucomplex.com" - - def __init__(self, match): - Extractor.__init__(self, match) - self.date, self.title = match.groups() - - def items(self): - url = "{}/{}/{}/?pg=X".format(self.root, self.date, self.title) - extr = text.extract_from(self.request(url).text) - data = { - "title" : text.unescape( - extr('"og:title" content="', '"')), - "description": text.unescape( - extr('"og:description" content="', '"')), - "date" : text.parse_datetime( - extr('"og:updated_time" content="', '"')), - } - imgs = self.images(extr) - data["count"] = len(imgs) - data["tags"] = text.split_html(extr('="meta-tags">', ''))[::2] - - yield Message.Directory, data - for img in imgs: - img.update(data) - yield Message.Url, img["url"], img - - def images(self, extr): - num = 0 - imgs = [] - urls = set() - orig = re.compile(r"-\d+x\d+\.") - - extr('