diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index d973d573..e59087b3 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -98,6 +98,7 @@ WikiArt.org https://www.wikiart.org/ Artists, Artworks World Three http://www.slide.world-three.org/ Chapters, Manga XVideos https://www.xvideos.com/ Images from Users, Galleries Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches +yaplog! https://yaplog.jp/ Images from Users, Posts |yuki-S| https://yuki.la/ Threads Acidimg https://acidimg.cc/ individual Images Imagetwist https://imagetwist.com/ individual Images diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 595c5c4c..5a08ac7d 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -85,8 +85,9 @@ modules = [ "warosu", "weibo", "wikiart", - "yandere", "xvideos", + "yandere", + "yaplog", "yuki", "foolfuuka", "foolslide", diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 4b243a06..66163b0c 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -67,7 +67,7 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): }), ("https://luscious.net/albums/virgin-killer-sweater_282582/", { "url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c", - "keyword": "f8e5e7b32a7ff777cae5a89e93d06eb51afe3f48", + "keyword": "c147d8ef90843f68e37ed15e4fe017e62fc97c96", }), ("https://luscious.net/albums/not-found_277035/", { "exception": exception.NotFoundError, diff --git a/gallery_dl/extractor/yaplog.py b/gallery_dl/extractor/yaplog.py new file mode 100644 index 00000000..80f573f5 --- /dev/null +++ b/gallery_dl/extractor/yaplog.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://yaplog.jp/""" + +from .common import Extractor, Message +from .. import text, util + + +class YaplogExtractor(Extractor): + """Base class for yaplog extractors""" + category = "yaplog" + root = "https://yaplog.jp" + filename_fmt = "{post_id}_{image_id}_{title}.{extension}" + directory_fmt = ("{category}", "{user}") + archive_fmt = "{post_id}_{image_id}" + + def __init__(self, match): + Extractor.__init__(self, match) + self.user = None + + def items(self): + yield Message.Version, 1 + for images, data in self.posts(): + yield Message.Directory, data + for url in images: + iid, _, ext = url.rpartition("/")[2].rpartition(".") + image = { + "image_id" : text.parse_int(iid.partition("_")[0]), + "extension": ext, + } + image.update(data) + yield Message.Url, url, image + + def posts(self): + """Return an iterable with (data, image URLs) tuples""" + + def _extract_post(self, url): + page = self.request(url).text + title, pos = text.extract(page, 'class="title">', '<') + date , pos = text.extract(page, 'class="date">' , '<', pos) + post , pos = text.extract(page, '/archive/' , '"', pos) + url , pos = text.extract(page, 'class="last">= 2", + }) + + def __init__(self, match): + YaplogExtractor.__init__(self, match) + self.user = match.group(1) + + def posts(self): + url = "{}/{}/image/".format(self.root, self.user) + while url: + url, images, data = self._extract_post(url) + yield images, data + + +class YaplogPostExtractor(YaplogExtractor): + """Extractor for images from a single blog post on yaplog.jp""" + subcategory = "post" + pattern = (r"(?:https://)?(?:www\.)?yaplog\.jp" + r"/(\w+)/(?:archive|image)/(\d+)") + test = ("https://yaplog.jp/imamiami0726/image/1299", { + "url": "896cae20fa718735a57e723c48544e830ff31345", + "keyword": "5c700cb6c505d50b6161c9a3559a186d378eabe3", + }) + + def __init__(self, match): + YaplogExtractor.__init__(self, match) + self.user, self.post_id = match.groups() + + def posts(self): + url = "{}/{}/image/{}".format(self.root, self.user, self.post_id) + _, images, data = self._extract_post(url) + return ((images, data),) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 1b05fa09..b8394c71 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -66,6 +66,7 @@ CATEGORY_MAP = { "wikiart" : "WikiArt.org", "worldthree" : "World Three", "xvideos" : "XVideos", + "yaplog" : "yaplog!", "yuki" : "yuki.la 4chan archive", }