From b23c822b2329a2c21b13fcc29652295d7af324c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 22 Oct 2019 21:17:08 +0200 Subject: [PATCH] [luscious] use GraphQL --- docs/supportedsites.rst | 2 +- gallery_dl/extractor/luscious.py | 271 +++++++++++++++---------------- scripts/supportedsites.py | 1 - test/test_results.py | 1 - 4 files changed, 132 insertions(+), 143 deletions(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 95570a51..72f12df5 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -64,7 +64,7 @@ Komikcast https://komikcast.com/ Chapters, Manga Konachan https://konachan.com/ Pools, Popular Images, Posts, Tag-Searches LINE BLOG https://www.lineblog.me/ Blogs, Posts livedoor Blog http://blog.livedoor.jp/ Blogs, Posts -Luscious https://luscious.net/ Albums, Search Results Optional +Luscious https://members.luscious.net/ Albums, Search Results Manga Fox https://fanfox.net/ Chapters Manga Here https://www.mangahere.cc/ Chapters, Manga Manga Stream https://readms.net/ Chapters diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 965daa01..9ddca112 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -6,75 +6,62 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://luscious.net/""" +"""Extractors for https://members.luscious.net/""" -from .common import GalleryExtractor, Extractor, Message +from .common import Extractor, Message from .. import text, exception -from ..cache import cache -class LusciousBase(Extractor): +class LusciousExtractor(Extractor): """Base class for luscious extractors""" category = "luscious" cookiedomain = ".luscious.net" root = "https://members.luscious.net" - def login(self): - """Login and set necessary cookies""" - username, password = self._get_auth_info() - if username: - self._update_cookies(self._login_impl(username, password)) - - @cache(maxage=14*24*3600, keyarg=1) - def _login_impl(self, username, password): - self.log.info("Logging in as %s", username) - url = "https://members.luscious.net/accounts/login/" - headers = {"Referer": "https://members.luscious.net/login/"} + def _graphql(self, op, variables, query): data = { - "login": username, - "password": password, - "remember": "on", - "next": "/", + "id": 1, + "operationName": op, + "query": query, + "variables": variables, } + response = self.request( + "{}/graphql/nobatch/?operationName={}".format(self.root, op), + method="POST", json=data, fatal=False, + ) - response = self.request(url, method="POST", headers=headers, data=data) - if "/accounts/login/" in response.url or not response.history: - raise exception.AuthenticationError() - for cookie in response.history[0].cookies: - if cookie.name.startswith("sessionid_"): - return {cookie.name: cookie.value} - raise exception.AuthenticationError() + if response.status_code >= 400: + self.log.error("GraphQL query failed ('%s %s')", + response.status_code, response.reason) + self.log.debug("Server response: %s", response.text) + raise exception.StopExtraction() - @staticmethod - def _parse_tags(tags): - return [ - text.unescape(tag.replace(":_", ":")) - for tag in text.extract_iter(tags or "", "/tags/", "/") - ] + return response.json()["data"] -class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): +class LusciousAlbumExtractor(LusciousExtractor): """Extractor for image albums from luscious.net""" subcategory = "album" - archive_fmt = "{gallery_id}_{image_id}" + filename_fmt = "{category}_{album[id]}_{num:>03}.{extension}" + directory_fmt = ("{category}", "{album[id]} {album[title]}") + archive_fmt = "{album[id]}_{id}" pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net" - r"/(?:albums|pictures/c/[^/?&#]+/album)/([^/?&#]+_(\d+))") + r"/(?:albums|pictures/c/[^/?&#]+/album)/[^/?&#]+_(\d+)") test = ( ("https://luscious.net/albums/okinami-no-koigokoro_277031/", { "url": "7e4984a271a1072ac6483e4228a045895aff86f3", - "keyword": "07c0b915f2ab1cc3bbf28b76e7950fccee1213f3", + # "keyword": "07c0b915f2ab1cc3bbf28b76e7950fccee1213f3", "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3", }), ("https://luscious.net/albums/virgin-killer-sweater_282582/", { "url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c", - "keyword": "e1202078b504adeccd521aa932f456a5a85479a0", + # "keyword": "e1202078b504adeccd521aa932f456a5a85479a0", }), ("https://luscious.net/albums/not-found_277035/", { "exception": exception.NotFoundError, }), ("https://members.luscious.net/albums/login-required_323871/", { - "options": (("username", None),), - "exception": exception.HttpError, + "count": 78, }), ("https://www.luscious.net/albums/okinami_277031/"), ("https://members.luscious.net/albums/okinami_277031/"), @@ -83,126 +70,130 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): ) def __init__(self, match): - path, self.gallery_id = match.groups() - url = "{}/albums/{}/".format(self.root, path) - GalleryExtractor.__init__(self, match, url) + LusciousExtractor.__init__(self, match) + self.album_id = match.group(1) - def metadata(self, page): - title, pos = text.extract(page, '"og:title" content="', '"') + def items(self): + album = self.metadata() + yield Message.Version, 1 + yield Message.Directory, {"album": album} + for num, image in enumerate(self.images(), 1): + image["num"] = num + image["album"] = album + url = image["url_to_video"] or image["url_to_original"] + yield Message.Url, url, text.nameext_from_url(url, image) - if title is None: - msg = text.extract(page, '
', '
', pos)[0] - if msg: - raise exception.AuthorizationError(msg) - raise exception.NotFoundError("album") - - info , pos = text.extract(page, '
  • ', "", pos) - if info is None: - count, pos = text.extract(page, '>Pages:', '<', pos) - else: - count, pos = text.extract(page, '

    ', ' ', pos) - genre, pos = text.extract(page, '

    Genre:', '

    ', pos) - adnce, pos = text.extract(page, '

    Audience:', '

    ', pos) - tags , pos = text.extract(page, '"tag_list static">', '', pos) - - return { - "gallery_id": text.parse_int(self.gallery_id), - "title" : text.unescape(title or ""), - "count" : text.parse_int(count), - "genre" : text.remove_html(genre), - "audience" : text.remove_html(adnce), - "tags" : self._parse_tags(tags), + def metadata(self): + variables = { + "id": self.album_id, } + query = ( + "query AlbumGet($id: ID!) { album { get(id: $id) { ... on Album { " + "...AlbumStandard } ... on MutationError { errors { code message }" + " } } } } fragment AlbumStandard on Album { __typename id title la" + "bels description created modified like_status number_of_favorites" + " rating status marked_for_deletion marked_for_processing number_o" + "f_pictures number_of_animated_pictures slug is_manga url download" + "_url permissions cover { width height size url } created_by { id " + "name display_name user_title name display_name avatar { url size " + "} url } content { id title url } language { id title url } tags {" + " id category slug text url count } genres { id title slug url } a" + "udiences { id title url url } last_viewed_picture { id position u" + "rl } }" + ) - def images(self, page): - extr = text.extract + data = self._graphql("AlbumGet", variables, query)["album"]["get"] + if "errors" in data: + raise exception.NotFoundError("album") + return data - url = "{}/pictures/album/x_{}/sorted/old/page/1/".format( - self.root, self.gallery_id) - page = self.request(url).text - pos = page.find('