diff --git a/docs/supportedsites.md b/docs/supportedsites.md index d8cf7aa6..27bb0bbe 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -433,6 +433,12 @@ Consider all sites to be NSFW unless otherwise known. Games + + JPG Fish + https://jpg.fishing/ + Albums, individual Images, User Profiles + + Keenspot http://www.keenspot.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 049732f6..1f77f94a 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -75,6 +75,7 @@ modules = [ "issuu", "itaku", "itchio", + "jpgfish", "kabeuchi", "keenspot", "kemonoparty", diff --git a/gallery_dl/extractor/jpgfish.py b/gallery_dl/extractor/jpgfish.py new file mode 100644 index 00000000..cdcf35cb --- /dev/null +++ b/gallery_dl/extractor/jpgfish.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://jpg.fishing/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?jpg\.(?:fishing|church)" + + +class JpgfishExtractor(Extractor): + """Base class for jpgfish extractors""" + category = "jpgfish" + root = "https://jpg.fishing" + directory_fmt = ("{category}", "{user}", "{album}",) + archive_fmt = "{id}" + + def _pagination(self, url): + while url: + page = self.request(url).text + + for item in text.extract_iter( + page, '
<')[0] + + +class JpgfishImageExtractor(JpgfishExtractor): + """Extractor for jpgfish Images""" + subcategory = "image" + pattern = BASE_PATTERN + r"/img/((?:[^/?#]+\.)?(\w+))" + test = ( + ("https://jpg.fishing/img/funnymeme.LecXGS", { + "pattern": r"https://simp3\.jpg\.church/images/funnymeme\.jpg", + "content": "098e5e9b17ad634358426e0ffd1c93871474d13c", + "keyword": { + "album": "", + "extension": "jpg", + "filename": "funnymeme", + "id": "LecXGS", + "url": "https://simp3.jpg.church/images/funnymeme.jpg", + "user": "exearco", + }, + }), + ("https://jpg.church/img/auCruA", { + "pattern": r"https://simp2\.jpg\.church/hannahowo_00457\.jpg", + "keyword": {"album": "401-500"}, + }), + ("https://jpg.church/img/hannahowo-00424.au64iA"), + ) + + def __init__(self, match): + JpgfishExtractor.__init__(self, match) + self.path, self.image_id = match.groups() + + def items(self): + url = "{}/img/{}".format(self.root, self.path) + extr = text.extract_from(self.request(url).text) + + image = { + "id" : self.image_id, + "url" : extr('"), ">", "<")[0] or "", + "user" : extr('username: "', '"'), + } + + text.nameext_from_url(image["url"], image) + yield Message.Directory, image + yield Message.Url, image["url"], image + + +class JpgfishAlbumExtractor(JpgfishExtractor): + """Extractor for jpgfish Albums""" + subcategory = "album" + pattern = BASE_PATTERN + r"/a(?:lbum)?/([^/?#]+)(/sub)?" + test = ( + ("https://jpg.fishing/album/CDilP/?sort=date_desc&page=1", { + "count": 2, + }), + ("https://jpg.church/a/gunggingnsk.N9OOI", { + "count": 114, + }), + ("https://jpg.church/a/101-200.aNJ6A/", { + "count": 100, + }), + ("https://jpg.church/a/hannahowo.aNTdH/sub", { + "count": 606, + }), + ) + + def __init__(self, match): + JpgfishExtractor.__init__(self, match) + self.album, self.sub_albums = match.groups() + + def items(self): + url = "{}/a/{}".format(self.root, self.album) + data = {"_extractor": JpgfishImageExtractor} + + if self.sub_albums: + albums = self._pagination(url + "/sub") + else: + albums = (url,) + + for album in albums: + for image in self._pagination(album): + yield Message.Queue, image, data + + +class JpgfishUserExtractor(JpgfishExtractor): + """Extractor for jpgfish Users""" + subcategory = "user" + pattern = BASE_PATTERN + r"/(?!img|a(?:lbum)?)([^/?#]+)(/albums)?" + test = ( + ("https://jpg.fishing/exearco", { + "count": 3, + }), + ("https://jpg.church/exearco/albums", { + "count": 1, + }), + ) + + def __init__(self, match): + JpgfishExtractor.__init__(self, match) + self.user, self.albums = match.groups() + + def items(self): + url = "{}/{}".format(self.root, self.user) + + if self.albums: + url += "/albums" + data = {"_extractor": JpgfishAlbumExtractor} + else: + data = {"_extractor": JpgfishImageExtractor} + + for url in self._pagination(url): + yield Message.Queue, url, data diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 034a4d3b..80bf975c 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -64,6 +64,7 @@ CATEGORY_MAP = { "imgur" : "imgur", "joyreactor" : "JoyReactor", "itchio" : "itch.io", + "jpgfish" : "JPG Fish", "kabeuchi" : "かべうち", "kemonoparty" : "Kemono", "lineblog" : "LINE BLOG",