mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 18:53:21 +01:00
[mangadex] add chapter- and manga-extractor
This commit is contained in:
parent
b58449fd88
commit
749fbbfa6c
@ -1,5 +1,7 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
## 1.3.0 - 2018-03-02
|
## 1.3.0 - 2018-03-02
|
||||||
- Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76))
|
- Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76))
|
||||||
- Added options to customize [archive ID formats](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorarchive-format) and [undefined replacement fields](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorkeywords-default)
|
- Added options to customize [archive ID formats](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorarchive-format) and [undefined replacement fields](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorkeywords-default)
|
||||||
|
136
docs/gallery-dl-example.conf
Normal file
136
docs/gallery-dl-example.conf
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
{
|
||||||
|
"base-directory": "/tmp/",
|
||||||
|
"netrc": false,
|
||||||
|
|
||||||
|
"downloader":
|
||||||
|
{
|
||||||
|
"part": true,
|
||||||
|
"part-directory": null,
|
||||||
|
"http":
|
||||||
|
{
|
||||||
|
"rate": null,
|
||||||
|
"retries": 5,
|
||||||
|
"timeout": 30,
|
||||||
|
"verify": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"extractor":
|
||||||
|
{
|
||||||
|
"archive": null,
|
||||||
|
"proxy": null,
|
||||||
|
"skip": true,
|
||||||
|
"sleep": 0,
|
||||||
|
|
||||||
|
"pixiv":
|
||||||
|
{
|
||||||
|
"user":
|
||||||
|
{
|
||||||
|
"directory": ["{category}", "{user[id]}"]
|
||||||
|
},
|
||||||
|
"bookmark":
|
||||||
|
{
|
||||||
|
"directory": ["{category}", "my bookmarks"]
|
||||||
|
},
|
||||||
|
"ugoira": true,
|
||||||
|
"username": null,
|
||||||
|
"password": null
|
||||||
|
},
|
||||||
|
"batoto":
|
||||||
|
{
|
||||||
|
"username": null,
|
||||||
|
"password": null
|
||||||
|
},
|
||||||
|
"exhentai":
|
||||||
|
{
|
||||||
|
"wait-min": 3,
|
||||||
|
"wait-max": 6,
|
||||||
|
"original": true,
|
||||||
|
"username": null,
|
||||||
|
"password": null,
|
||||||
|
"cookies": {
|
||||||
|
"igneous": null,
|
||||||
|
"s": null,
|
||||||
|
"yay": "louder"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nijie":
|
||||||
|
{
|
||||||
|
"username": null,
|
||||||
|
"password": null
|
||||||
|
},
|
||||||
|
"sankaku":
|
||||||
|
{
|
||||||
|
"wait-min": 2,
|
||||||
|
"wait-max": 4,
|
||||||
|
"username": null,
|
||||||
|
"password": null
|
||||||
|
},
|
||||||
|
"seiga":
|
||||||
|
{
|
||||||
|
"username": null,
|
||||||
|
"password": null
|
||||||
|
},
|
||||||
|
"gelbooru":
|
||||||
|
{
|
||||||
|
"filename": "{category}_{id:>07}_{md5}.{extension}",
|
||||||
|
"api": true
|
||||||
|
},
|
||||||
|
"reddit":
|
||||||
|
{
|
||||||
|
"refresh-token": null,
|
||||||
|
"comments": 500,
|
||||||
|
"morecomments": false,
|
||||||
|
"date-min": 0,
|
||||||
|
"date-max": 253402210800,
|
||||||
|
"date-format": "%Y-%m-%dT%H:%M:%S",
|
||||||
|
"id-min": "0",
|
||||||
|
"id-max": "ZIK0ZJ",
|
||||||
|
"recursion": 0
|
||||||
|
},
|
||||||
|
"flickr":
|
||||||
|
{
|
||||||
|
"access-token": null,
|
||||||
|
"access-token-secret": null,
|
||||||
|
"metadata": false,
|
||||||
|
"size-max": null
|
||||||
|
},
|
||||||
|
"deviantart":
|
||||||
|
{
|
||||||
|
"refresh-token": null,
|
||||||
|
"flat": true,
|
||||||
|
"mature": true,
|
||||||
|
"original": true
|
||||||
|
},
|
||||||
|
"gfycat":
|
||||||
|
{
|
||||||
|
"format": "mp4"
|
||||||
|
},
|
||||||
|
"imgur":
|
||||||
|
{
|
||||||
|
"mp4": true
|
||||||
|
},
|
||||||
|
"tumblr":
|
||||||
|
{
|
||||||
|
"posts": "photo",
|
||||||
|
"inline": false,
|
||||||
|
"reblogs": true,
|
||||||
|
"external": false
|
||||||
|
},
|
||||||
|
"recursive":
|
||||||
|
{
|
||||||
|
"blacklist": ["directlink", "oauth", "recursive", "test"]
|
||||||
|
},
|
||||||
|
"oauth":
|
||||||
|
{
|
||||||
|
"browser": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"output":
|
||||||
|
{
|
||||||
|
"mode": "auto",
|
||||||
|
"shorten": true,
|
||||||
|
"progress": true,
|
||||||
|
"logfile": null,
|
||||||
|
"unsupportedfile": null
|
||||||
|
}
|
||||||
|
}
|
@ -47,6 +47,7 @@ Luscious https://luscious.net/ Albums
|
|||||||
Manga Fox http://fanfox.net/ Chapters
|
Manga Fox http://fanfox.net/ Chapters
|
||||||
Manga Here http://www.mangahere.co/ Chapters, Manga
|
Manga Here http://www.mangahere.co/ Chapters, Manga
|
||||||
Manga Stream https://mangastream.com/ Chapters
|
Manga Stream https://mangastream.com/ Chapters
|
||||||
|
Mangadex https://mangadex.org/ Chapters, Manga
|
||||||
Mangapanda https://www.mangapanda.com/ Chapters, Manga
|
Mangapanda https://www.mangapanda.com/ Chapters, Manga
|
||||||
MangaPark https://mangapark.me/ Chapters, Manga
|
MangaPark https://mangapark.me/ Chapters, Manga
|
||||||
Mangareader https://www.mangareader.net/ Chapters, Manga
|
Mangareader https://www.mangareader.net/ Chapters, Manga
|
||||||
|
@ -51,6 +51,7 @@ modules = [
|
|||||||
"konachan",
|
"konachan",
|
||||||
"loveisover",
|
"loveisover",
|
||||||
"luscious",
|
"luscious",
|
||||||
|
"mangadex",
|
||||||
"mangafox",
|
"mangafox",
|
||||||
"mangahere",
|
"mangahere",
|
||||||
"mangapanda",
|
"mangapanda",
|
||||||
|
148
gallery_dl/extractor/mangadex.py
Normal file
148
gallery_dl/extractor/mangadex.py
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2018 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extract manga-chapters and entire manga from https://mangadex.org/"""
|
||||||
|
|
||||||
|
from .common import ChapterExtractor, MangaExtractor
|
||||||
|
from .. import text, util
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class MangadexExtractor():
|
||||||
|
"""Base class for mangadex extractors"""
|
||||||
|
category = "mangadex"
|
||||||
|
root = "https://mangadex.org"
|
||||||
|
|
||||||
|
|
||||||
|
class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
|
||||||
|
"""Extractor for manga-chapters from mangadex.org"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"]
|
||||||
|
test = [
|
||||||
|
("https://mangadex.org/chapter/122094", {
|
||||||
|
"keyword": "b4c83fe41f125eae745c2e00d29e087cc4eb78df",
|
||||||
|
"content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f",
|
||||||
|
}),
|
||||||
|
# oneshot
|
||||||
|
("https://mangadex.org/chapter/138086", {
|
||||||
|
"count": 64,
|
||||||
|
"keyword": "9b1b7292f7dbcf10983fbdc34b8cdceeb47328ee",
|
||||||
|
}),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
self.chapter_id = match.group(1)
|
||||||
|
url = self.root + "/chapter/" + self.chapter_id
|
||||||
|
ChapterExtractor.__init__(self, url)
|
||||||
|
|
||||||
|
def get_metadata(self, page):
|
||||||
|
info , pos = text.extract(page, '="og:title" content="', '"')
|
||||||
|
manga_id, pos = text.extract(page, '/images/manga/', '.', pos)
|
||||||
|
_ , pos = text.extract(page, ' id="jump_group"', '', pos)
|
||||||
|
_ , pos = text.extract(page, ' selected ', '', pos)
|
||||||
|
language, ___ = text.extract(page, " title='", "'", pos-100)
|
||||||
|
group , pos = text.extract(page, '>', '<', pos)
|
||||||
|
|
||||||
|
info = text.unescape(info)
|
||||||
|
match = re.match(
|
||||||
|
r"(?:(?:Vol\. (\d+) )?Ch\. (\d+)([^ ]*)|(.*)) "
|
||||||
|
r"\(([^)]+)\)",
|
||||||
|
info)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"manga": match.group(5),
|
||||||
|
"manga_id": util.safe_int(manga_id),
|
||||||
|
"volume": util.safe_int(match.group(1)),
|
||||||
|
"chapter": util.safe_int(match.group(2)),
|
||||||
|
"chapter_minor": match.group(3) or "",
|
||||||
|
"chapter_id": util.safe_int(self.chapter_id),
|
||||||
|
"chapter_string": info.rstrip(" - MangaDex"),
|
||||||
|
"group": text.unescape(group),
|
||||||
|
"lang": util.language_to_code(language),
|
||||||
|
"language": language,
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_images(self, page):
|
||||||
|
dataurl , pos = text.extract(page, "var dataurl = '", "'")
|
||||||
|
pagelist, pos = text.extract(page, "var page_array = [", "]", pos)
|
||||||
|
server , pos = text.extract(page, "var server = '", "'", pos)
|
||||||
|
|
||||||
|
base = urljoin(self.root, server + dataurl + "/")
|
||||||
|
|
||||||
|
return [
|
||||||
|
(base + page, None)
|
||||||
|
for page in json.loads(
|
||||||
|
"[" + pagelist.replace("'", '"').rstrip(",") + "]"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
|
||||||
|
"""Extractor for manga from mangadex.org"""
|
||||||
|
pattern = [r"(?:https?://)?(?:www\.)?(mangadex\.(?:org|com)/manga/\d+)"]
|
||||||
|
test = [
|
||||||
|
("https://mangadex.org/manga/2946/souten-no-koumori", {
|
||||||
|
"url": "9e77934759828458d0424473922e41f348719472",
|
||||||
|
"keywords": {
|
||||||
|
"manga": "Souten no Koumori",
|
||||||
|
"manga_id": 2946,
|
||||||
|
"title": "Oneshot",
|
||||||
|
"volume": int,
|
||||||
|
"chapter": int,
|
||||||
|
"chapter_minor": str,
|
||||||
|
"chapter_id": int,
|
||||||
|
"group": str,
|
||||||
|
"contributor": str,
|
||||||
|
"date": str,
|
||||||
|
"views": int,
|
||||||
|
"lang": str,
|
||||||
|
"language": str,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
]
|
||||||
|
|
||||||
|
def chapters(self, page):
|
||||||
|
results = []
|
||||||
|
extr = text.extract
|
||||||
|
|
||||||
|
manga = text.unescape(extr(
|
||||||
|
page, '"og:title" content="', '"')[0].rpartition(" (")[0])
|
||||||
|
manga_id = util.safe_int(extr(
|
||||||
|
page, '/images/manga/', '.')[0])
|
||||||
|
|
||||||
|
for info in text.extract_iter(page, "<tr id=", "</tr>"):
|
||||||
|
chid , pos = extr(info, 'data-chapter-id="', '"')
|
||||||
|
chapter , pos = extr(info, 'data-chapter-num="', '"', pos)
|
||||||
|
volume , pos = extr(info, 'data-volume-num="', '"', pos)
|
||||||
|
title , pos = extr(info, 'data-chapter-name="', '"', pos)
|
||||||
|
language, pos = extr(info, " title='", "'", pos)
|
||||||
|
group , pos = extr(info, "<td>", "</td>", pos)
|
||||||
|
user , pos = extr(info, "<td>", "</td>", pos)
|
||||||
|
views , pos = extr(info, ">", "<", pos)
|
||||||
|
date , pos = extr(info, ' datetime="', '"', pos)
|
||||||
|
|
||||||
|
chapter, sep, minor = chapter.partition(".")
|
||||||
|
|
||||||
|
results.append((self.root + "/chapter/" + chid, {
|
||||||
|
"manga": manga,
|
||||||
|
"manga_id": util.safe_int(manga_id),
|
||||||
|
"title": text.unescape(title),
|
||||||
|
"volume": util.safe_int(volume),
|
||||||
|
"chapter": util.safe_int(chapter),
|
||||||
|
"chapter_minor": sep + minor,
|
||||||
|
"chapter_id": util.safe_int(chid),
|
||||||
|
"group": text.unescape(text.remove_html(group)),
|
||||||
|
"contributor": text.remove_html(user),
|
||||||
|
"views": util.safe_int(views),
|
||||||
|
"date": date,
|
||||||
|
"lang": util.language_to_code(language),
|
||||||
|
"language": language,
|
||||||
|
}))
|
||||||
|
|
||||||
|
return results
|
@ -155,6 +155,8 @@ def language_to_code(lang, default=None):
|
|||||||
|
|
||||||
CODES = {
|
CODES = {
|
||||||
"ar": "Arabic",
|
"ar": "Arabic",
|
||||||
|
"bg": "Bulgarian",
|
||||||
|
"ca": "Catalan",
|
||||||
"cs": "Czech",
|
"cs": "Czech",
|
||||||
"da": "Danish",
|
"da": "Danish",
|
||||||
"de": "German",
|
"de": "German",
|
||||||
|
@ -6,4 +6,4 @@
|
|||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
__version__ = "1.3.0"
|
__version__ = "1.3.1-dev"
|
||||||
|
@ -18,7 +18,9 @@ SKIP = {
|
|||||||
"archivedmoe", "archiveofsins", "thebarchive",
|
"archivedmoe", "archiveofsins", "thebarchive",
|
||||||
|
|
||||||
# temporary issues
|
# temporary issues
|
||||||
|
"imgchili",
|
||||||
"powermanga",
|
"powermanga",
|
||||||
|
"pinterest",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user