# -*- coding: utf-8 -*- # Copyright 2021 Seonghyeon Cho # Copyright 2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://comic.naver.com/""" from .common import GalleryExtractor, Extractor, Message from .. import text import re BASE_PATTERN = (r"(?:https?://)?comic\.naver\.com" r"/(webtoon|challenge|bestChallenge)") class NaverwebtoonBase(): """Base class for naver webtoon extractors""" category = "naverwebtoon" root = "https://comic.naver.com" class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor): subcategory = "episode" directory_fmt = ("{category}", "{comic}") filename_fmt = "{episode:>03}-{num:>02}.{extension}" archive_fmt = "{title_id}_{episode}_{num}" pattern = BASE_PATTERN + r"/detail(?:\.nhn)?\?([^#]+)" test = ( (("https://comic.naver.com/webtoon/detail" "?titleId=26458&no=1&weekday=tue"), { "url": "47a956ba8c7a837213d5985f50c569fcff986f75", "content": "3806b6e8befbb1920048de9888dfce6220f69a60", "count": 14 }), (("https://comic.naver.com/challenge/detail" "?titleId=765124&no=1"), { "pattern": r"https://image-comic\.pstatic\.net/nas" r"/user_contents_data/challenge_comic/2021/01/19" r"/342586/upload_7149856273586337846\.jpeg", "count": 1, }), (("https://comic.naver.com/bestChallenge/detail.nhn" "?titleId=771467&no=3"), { "pattern": r"https://image-comic\.pstatic\.net/nas" r"/user_contents_data/challenge_comic/2021/04/28" r"/345534/upload_3617293622396203109\.jpeg", "count": 1, }), ) def __init__(self, match): path, query = match.groups() url = "{}/{}/detail?{}".format(self.root, path, query) GalleryExtractor.__init__(self, match, url) query = text.parse_query(query) self.title_id = query.get("titleId") self.episode = query.get("no") def metadata(self, page): extr = text.extract_from(page) return { "title_id": self.title_id, "episode" : self.episode, "title" : extr('property="og:title" content="', '"'), "comic" : extr('
', '
'), "genre" : extr('', ''), "date" : extr('