gallery-dl/gallery_dl/extractor/fallenangels.py

# -*- coding: utf-8 -*-

# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://www.fascans.com/"""

from .common import ChapterExtractor, MangaExtractor
from .. import text, util


class FallenangelsChapterExtractor(ChapterExtractor):
    """Extractor for manga chapters from fascans.com"""
    category = "fallenangels"
    pattern = (r"(?:https?://)?(manga|truyen)\.fascans\.com"
               r"/manga/([^/?#]+)/([^/?#]+)")
    example = "https://manga.fascans.com/manga/NAME/CHAPTER/"

    def __init__(self, match):
        self.version, self.manga, self.chapter = match.groups()
        url = "https://{}.fascans.com/manga/{}/{}/1".format(
            self.version, self.manga, self.chapter)
        ChapterExtractor.__init__(self, match, url)

    def metadata(self, page):
        extr = text.extract_from(page)
        lang = "vi" if self.version == "truyen" else "en"
        chapter, sep, minor = self.chapter.partition(".")
        return {
            "manga"   : extr('name="description" content="', ' Chapter '),
            "title"   : extr(':  ', ' - Page 1'),
            "chapter" : chapter,
            "chapter_minor": sep + minor,
            "lang"    : lang,
            "language": util.code_to_language(lang),
        }

    @staticmethod
    def images(page):
        return [
            (img["page_image"], None)
            for img in util.json_loads(
                text.extr(page, "var pages = ", ";")
            )
        ]


class FallenangelsMangaExtractor(MangaExtractor):
    """Extractor for manga from fascans.com"""
    chapterclass = FallenangelsChapterExtractor
    category = "fallenangels"
    pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
    example = "https://manga.fascans.com/manga/NAME"

    def __init__(self, match):
        url = "https://" + match.group(1)
        self.lang = "vi" if match.group(2) == "truyen" else "en"
        MangaExtractor.__init__(self, match, url)

    def chapters(self, page):
        extr = text.extract_from(page)
        results = []
        language = util.code_to_language(self.lang)
        while extr('<li style="', '"'):
            vol = extr('class="volume-', '"')
            url = extr('href="', '"')
            cha = extr('>', '<')
            title = extr('<em>', '</em>')

            manga, _, chapter = cha.rpartition(" ")
            chapter, dot, minor = chapter.partition(".")
            results.append((url, {
                "manga"   : manga,
                "title"   : text.unescape(title),
                "volume"  : text.parse_int(vol),
                "chapter" : text.parse_int(chapter),
                "chapter_minor": dot + minor,
                "lang"    : self.lang,
                "language": language,
            }))
        return results
[fallenangels] add chapter extractor 2017-02-06 20:05:58 +01:00			`# -- coding: utf-8 --`

remove test results in extractor modules and add generic example URLs 2023-09-11 16:30:55 +02:00			`# Copyright 2017-2023 Mike Fährmann`
[fallenangels] add chapter extractor 2017-02-06 20:05:58 +01:00			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

replace json.loads with direct calls to JSONDecoder.decode 2023-02-07 23:14:53 +01:00			`"""Extractors for https://www.fascans.com/"""`
[fallenangels] add chapter extractor 2017-02-06 20:05:58 +01:00
implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`from .common import ChapterExtractor, MangaExtractor`
[fallenangels] support this site's Vietnamese version - https://truyen.fascans.com/ 2017-05-18 15:22:25 +02:00			`from .. import text, util`
[fallenangels] add chapter extractor 2017-02-06 20:05:58 +01:00

implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`class FallenangelsChapterExtractor(ChapterExtractor):`
remove test results in extractor modules and add generic example URLs 2023-09-11 16:30:55 +02:00			`"""Extractor for manga chapters from fascans.com"""`
[fallenangels] add chapter extractor 2017-02-06 20:05:58 +01:00			`category = "fallenangels"`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 2019-02-08 13:45:40 +01:00			`pattern = (r"(?:https?://)?(manga\|truyen)\.fascans\.com"`
[fallenangels] fix extraction of '.5' chapters 2020-10-23 16:56:08 +02:00			`r"/manga/([^/?#]+)/([^/?#]+)")`
remove test results in extractor modules and add generic example URLs 2023-09-11 16:30:55 +02:00			`example = "https://manga.fascans.com/manga/NAME/CHAPTER/"`
[fallenangels] update to new domain and site-layout 2017-04-09 11:37:21 +02:00
			`def __init__(self, match):`
[fallenangels] fix extraction of '.5' chapters 2020-10-23 16:56:08 +02:00			`self.version, self.manga, self.chapter = match.groups()`
[fallenangels] support this site's Vietnamese version - https://truyen.fascans.com/ 2017-05-18 15:22:25 +02:00			`url = "https://{}.fascans.com/manga/{}/{}/1".format(`
			`self.version, self.manga, self.chapter)`
propagate 'match' to base extractor constructor 2019-02-11 13:31:10 +01:00			`ChapterExtractor.__init__(self, match, url)`
[fallenangels] update to new domain and site-layout 2017-04-09 11:37:21 +02:00
change Chapter and MangaExtractor classes - unify and simplify constructors - rename get_metadata and get_images to just metadata() and images() - rename self.url to chapter_url and manga_url 2019-02-11 18:38:47 +01:00			`def metadata(self, page):`
use 'text.extract_from()' in a few places 2019-04-19 23:02:29 +02:00			`extr = text.extract_from(page)`
[fallenangels] support this site's Vietnamese version - https://truyen.fascans.com/ 2017-05-18 15:22:25 +02:00			`lang = "vi" if self.version == "truyen" else "en"`
[fallenangels] fix extraction of '.5' chapters 2020-10-23 16:56:08 +02:00			`chapter, sep, minor = self.chapter.partition(".")`
use 'text.extract_from()' in a few places 2019-04-19 23:02:29 +02:00			`return {`
			`"manga" : extr('name="description" content="', ' Chapter '),`
			`"title" : extr(': ', ' - Page 1'),`
[fallenangels] fix extraction of '.5' chapters 2020-10-23 16:56:08 +02:00			`"chapter" : chapter,`
			`"chapter_minor": sep + minor,`
use 'text.extract_from()' in a few places 2019-04-19 23:02:29 +02:00			`"lang" : lang,`
[fallenangels] support this site's Vietnamese version - https://truyen.fascans.com/ 2017-05-18 15:22:25 +02:00			`"language": util.code_to_language(lang),`
[fallenangels] update to new domain and site-layout 2017-04-09 11:37:21 +02:00			`}`

			`@staticmethod`
change Chapter and MangaExtractor classes - unify and simplify constructors - rename get_metadata and get_images to just metadata() and images() - rename self.url to chapter_url and manga_url 2019-02-11 18:38:47 +01:00			`def images(page):`
implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`return [`
			`(img["page_image"], None)`
replace json.loads with direct calls to JSONDecoder.decode 2023-02-07 23:14:53 +01:00			`for img in util.json_loads(`
replace 'text.extract()' with 'text.extr()' where possible 2022-11-04 23:39:38 +01:00			`text.extr(page, "var pages = ", ";")`
implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`)`
			`]`
[fallenangels] add manga extractor 2017-05-21 10:36:29 +02:00

			`class FallenangelsMangaExtractor(MangaExtractor):`
			`"""Extractor for manga from fascans.com"""`
add '_extractor' info to manga extractor results 2019-02-13 13:23:36 +01:00			`chapterclass = FallenangelsChapterExtractor`
[fallenangels] add manga extractor 2017-05-21 10:36:29 +02:00			`category = "fallenangels"`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 2019-02-08 13:45:40 +01:00			`pattern = r"(?:https?://)?((manga\|truyen)\.fascans\.com/manga/[^/]+)/?$"`
remove test results in extractor modules and add generic example URLs 2023-09-11 16:30:55 +02:00			`example = "https://manga.fascans.com/manga/NAME"`
[fallenangels] add manga extractor 2017-05-21 10:36:29 +02:00
[fallenangels] extract manga metadata 2017-09-15 20:51:40 +02:00			`def __init__(self, match):`
change Chapter and MangaExtractor classes - unify and simplify constructors - rename get_metadata and get_images to just metadata() and images() - rename self.url to chapter_url and manga_url 2019-02-11 18:38:47 +01:00			`url = "https://" + match.group(1)`
[fallenangels] extract manga metadata 2017-09-15 20:51:40 +02:00			`self.lang = "vi" if match.group(2) == "truyen" else "en"`
change Chapter and MangaExtractor classes - unify and simplify constructors - rename get_metadata and get_images to just metadata() and images() - rename self.url to chapter_url and manga_url 2019-02-11 18:38:47 +01:00			`MangaExtractor.__init__(self, match, url)`
[fallenangels] extract manga metadata 2017-09-15 20:51:40 +02:00
[fallenangels] add manga extractor 2017-05-21 10:36:29 +02:00			`def chapters(self, page):`
use 'text.extract_from()' in a few places 2019-04-19 23:02:29 +02:00			`extr = text.extract_from(page)`
[fallenangels] extract manga metadata 2017-09-15 20:51:40 +02:00			`results = []`
use 'text.extract_from()' in a few places 2019-04-19 23:02:29 +02:00			`language = util.code_to_language(self.lang)`
			`while extr('<li style="', '"'):`
			`vol = extr('class="volume-', '"')`
			`url = extr('href="', '"')`
			`cha = extr('>', '<')`
			`title = extr('<em>', '</em>')`
[fallenangels] extract manga metadata 2017-09-15 20:51:40 +02:00
use 'text.extract_from()' in a few places 2019-04-19 23:02:29 +02:00			`manga, _, chapter = cha.rpartition(" ")`
implement and use 'util.safe_int()' same as Python's 'int()', except it doesn't raise any exceptions and accepts a default value 2017-09-24 15:59:25 +02:00			`chapter, dot, minor = chapter.partition(".")`
[fallenangels] extract manga metadata 2017-09-15 20:51:40 +02:00			`results.append((url, {`
use 'text.extract_from()' in a few places 2019-04-19 23:02:29 +02:00			`"manga" : manga,`
			`"title" : text.unescape(title),`
			`"volume" : text.parse_int(vol),`
			`"chapter" : text.parse_int(chapter),`
implement and use 'util.safe_int()' same as Python's 'int()', except it doesn't raise any exceptions and accepts a default value 2017-09-24 15:59:25 +02:00			`"chapter_minor": dot + minor,`
use 'text.extract_from()' in a few places 2019-04-19 23:02:29 +02:00			`"lang" : self.lang,`
[fallenangels] unescape chapter titles 2018-10-20 18:31:26 +02:00			`"language": language,`
[fallenangels] extract manga metadata 2017-09-15 20:51:40 +02:00			`}))`
use 'text.extract_from()' in a few places 2019-04-19 23:02:29 +02:00			`return results`