mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
[mangadex] add chapter- and manga-extractor
This commit is contained in:
parent
b58449fd88
commit
749fbbfa6c
@ -1,5 +1,7 @@
|
||||
# Changelog
|
||||
|
||||
## Unreleased
|
||||
|
||||
## 1.3.0 - 2018-03-02
|
||||
- Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76))
|
||||
- Added options to customize [archive ID formats](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorarchive-format) and [undefined replacement fields](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorkeywords-default)
|
||||
|
136
docs/gallery-dl-example.conf
Normal file
136
docs/gallery-dl-example.conf
Normal file
@ -0,0 +1,136 @@
|
||||
{
|
||||
"base-directory": "/tmp/",
|
||||
"netrc": false,
|
||||
|
||||
"downloader":
|
||||
{
|
||||
"part": true,
|
||||
"part-directory": null,
|
||||
"http":
|
||||
{
|
||||
"rate": null,
|
||||
"retries": 5,
|
||||
"timeout": 30,
|
||||
"verify": true
|
||||
}
|
||||
},
|
||||
"extractor":
|
||||
{
|
||||
"archive": null,
|
||||
"proxy": null,
|
||||
"skip": true,
|
||||
"sleep": 0,
|
||||
|
||||
"pixiv":
|
||||
{
|
||||
"user":
|
||||
{
|
||||
"directory": ["{category}", "{user[id]}"]
|
||||
},
|
||||
"bookmark":
|
||||
{
|
||||
"directory": ["{category}", "my bookmarks"]
|
||||
},
|
||||
"ugoira": true,
|
||||
"username": null,
|
||||
"password": null
|
||||
},
|
||||
"batoto":
|
||||
{
|
||||
"username": null,
|
||||
"password": null
|
||||
},
|
||||
"exhentai":
|
||||
{
|
||||
"wait-min": 3,
|
||||
"wait-max": 6,
|
||||
"original": true,
|
||||
"username": null,
|
||||
"password": null,
|
||||
"cookies": {
|
||||
"igneous": null,
|
||||
"s": null,
|
||||
"yay": "louder"
|
||||
}
|
||||
},
|
||||
"nijie":
|
||||
{
|
||||
"username": null,
|
||||
"password": null
|
||||
},
|
||||
"sankaku":
|
||||
{
|
||||
"wait-min": 2,
|
||||
"wait-max": 4,
|
||||
"username": null,
|
||||
"password": null
|
||||
},
|
||||
"seiga":
|
||||
{
|
||||
"username": null,
|
||||
"password": null
|
||||
},
|
||||
"gelbooru":
|
||||
{
|
||||
"filename": "{category}_{id:>07}_{md5}.{extension}",
|
||||
"api": true
|
||||
},
|
||||
"reddit":
|
||||
{
|
||||
"refresh-token": null,
|
||||
"comments": 500,
|
||||
"morecomments": false,
|
||||
"date-min": 0,
|
||||
"date-max": 253402210800,
|
||||
"date-format": "%Y-%m-%dT%H:%M:%S",
|
||||
"id-min": "0",
|
||||
"id-max": "ZIK0ZJ",
|
||||
"recursion": 0
|
||||
},
|
||||
"flickr":
|
||||
{
|
||||
"access-token": null,
|
||||
"access-token-secret": null,
|
||||
"metadata": false,
|
||||
"size-max": null
|
||||
},
|
||||
"deviantart":
|
||||
{
|
||||
"refresh-token": null,
|
||||
"flat": true,
|
||||
"mature": true,
|
||||
"original": true
|
||||
},
|
||||
"gfycat":
|
||||
{
|
||||
"format": "mp4"
|
||||
},
|
||||
"imgur":
|
||||
{
|
||||
"mp4": true
|
||||
},
|
||||
"tumblr":
|
||||
{
|
||||
"posts": "photo",
|
||||
"inline": false,
|
||||
"reblogs": true,
|
||||
"external": false
|
||||
},
|
||||
"recursive":
|
||||
{
|
||||
"blacklist": ["directlink", "oauth", "recursive", "test"]
|
||||
},
|
||||
"oauth":
|
||||
{
|
||||
"browser": true
|
||||
}
|
||||
},
|
||||
"output":
|
||||
{
|
||||
"mode": "auto",
|
||||
"shorten": true,
|
||||
"progress": true,
|
||||
"logfile": null,
|
||||
"unsupportedfile": null
|
||||
}
|
||||
}
|
@ -47,6 +47,7 @@ Luscious https://luscious.net/ Albums
|
||||
Manga Fox http://fanfox.net/ Chapters
|
||||
Manga Here http://www.mangahere.co/ Chapters, Manga
|
||||
Manga Stream https://mangastream.com/ Chapters
|
||||
Mangadex https://mangadex.org/ Chapters, Manga
|
||||
Mangapanda https://www.mangapanda.com/ Chapters, Manga
|
||||
MangaPark https://mangapark.me/ Chapters, Manga
|
||||
Mangareader https://www.mangareader.net/ Chapters, Manga
|
||||
|
@ -51,6 +51,7 @@ modules = [
|
||||
"konachan",
|
||||
"loveisover",
|
||||
"luscious",
|
||||
"mangadex",
|
||||
"mangafox",
|
||||
"mangahere",
|
||||
"mangapanda",
|
||||
|
148
gallery_dl/extractor/mangadex.py
Normal file
148
gallery_dl/extractor/mangadex.py
Normal file
@ -0,0 +1,148 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extract manga-chapters and entire manga from https://mangadex.org/"""
|
||||
|
||||
from .common import ChapterExtractor, MangaExtractor
|
||||
from .. import text, util
|
||||
from urllib.parse import urljoin
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
class MangadexExtractor():
|
||||
"""Base class for mangadex extractors"""
|
||||
category = "mangadex"
|
||||
root = "https://mangadex.org"
|
||||
|
||||
|
||||
class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangadex.org"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"]
|
||||
test = [
|
||||
("https://mangadex.org/chapter/122094", {
|
||||
"keyword": "b4c83fe41f125eae745c2e00d29e087cc4eb78df",
|
||||
"content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f",
|
||||
}),
|
||||
# oneshot
|
||||
("https://mangadex.org/chapter/138086", {
|
||||
"count": 64,
|
||||
"keyword": "9b1b7292f7dbcf10983fbdc34b8cdceeb47328ee",
|
||||
}),
|
||||
]
|
||||
|
||||
def __init__(self, match):
|
||||
self.chapter_id = match.group(1)
|
||||
url = self.root + "/chapter/" + self.chapter_id
|
||||
ChapterExtractor.__init__(self, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
info , pos = text.extract(page, '="og:title" content="', '"')
|
||||
manga_id, pos = text.extract(page, '/images/manga/', '.', pos)
|
||||
_ , pos = text.extract(page, ' id="jump_group"', '', pos)
|
||||
_ , pos = text.extract(page, ' selected ', '', pos)
|
||||
language, ___ = text.extract(page, " title='", "'", pos-100)
|
||||
group , pos = text.extract(page, '>', '<', pos)
|
||||
|
||||
info = text.unescape(info)
|
||||
match = re.match(
|
||||
r"(?:(?:Vol\. (\d+) )?Ch\. (\d+)([^ ]*)|(.*)) "
|
||||
r"\(([^)]+)\)",
|
||||
info)
|
||||
|
||||
return {
|
||||
"manga": match.group(5),
|
||||
"manga_id": util.safe_int(manga_id),
|
||||
"volume": util.safe_int(match.group(1)),
|
||||
"chapter": util.safe_int(match.group(2)),
|
||||
"chapter_minor": match.group(3) or "",
|
||||
"chapter_id": util.safe_int(self.chapter_id),
|
||||
"chapter_string": info.rstrip(" - MangaDex"),
|
||||
"group": text.unescape(group),
|
||||
"lang": util.language_to_code(language),
|
||||
"language": language,
|
||||
}
|
||||
|
||||
def get_images(self, page):
|
||||
dataurl , pos = text.extract(page, "var dataurl = '", "'")
|
||||
pagelist, pos = text.extract(page, "var page_array = [", "]", pos)
|
||||
server , pos = text.extract(page, "var server = '", "'", pos)
|
||||
|
||||
base = urljoin(self.root, server + dataurl + "/")
|
||||
|
||||
return [
|
||||
(base + page, None)
|
||||
for page in json.loads(
|
||||
"[" + pagelist.replace("'", '"').rstrip(",") + "]"
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
|
||||
"""Extractor for manga from mangadex.org"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?(mangadex\.(?:org|com)/manga/\d+)"]
|
||||
test = [
|
||||
("https://mangadex.org/manga/2946/souten-no-koumori", {
|
||||
"url": "9e77934759828458d0424473922e41f348719472",
|
||||
"keywords": {
|
||||
"manga": "Souten no Koumori",
|
||||
"manga_id": 2946,
|
||||
"title": "Oneshot",
|
||||
"volume": int,
|
||||
"chapter": int,
|
||||
"chapter_minor": str,
|
||||
"chapter_id": int,
|
||||
"group": str,
|
||||
"contributor": str,
|
||||
"date": str,
|
||||
"views": int,
|
||||
"lang": str,
|
||||
"language": str,
|
||||
},
|
||||
}),
|
||||
]
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
extr = text.extract
|
||||
|
||||
manga = text.unescape(extr(
|
||||
page, '"og:title" content="', '"')[0].rpartition(" (")[0])
|
||||
manga_id = util.safe_int(extr(
|
||||
page, '/images/manga/', '.')[0])
|
||||
|
||||
for info in text.extract_iter(page, "<tr id=", "</tr>"):
|
||||
chid , pos = extr(info, 'data-chapter-id="', '"')
|
||||
chapter , pos = extr(info, 'data-chapter-num="', '"', pos)
|
||||
volume , pos = extr(info, 'data-volume-num="', '"', pos)
|
||||
title , pos = extr(info, 'data-chapter-name="', '"', pos)
|
||||
language, pos = extr(info, " title='", "'", pos)
|
||||
group , pos = extr(info, "<td>", "</td>", pos)
|
||||
user , pos = extr(info, "<td>", "</td>", pos)
|
||||
views , pos = extr(info, ">", "<", pos)
|
||||
date , pos = extr(info, ' datetime="', '"', pos)
|
||||
|
||||
chapter, sep, minor = chapter.partition(".")
|
||||
|
||||
results.append((self.root + "/chapter/" + chid, {
|
||||
"manga": manga,
|
||||
"manga_id": util.safe_int(manga_id),
|
||||
"title": text.unescape(title),
|
||||
"volume": util.safe_int(volume),
|
||||
"chapter": util.safe_int(chapter),
|
||||
"chapter_minor": sep + minor,
|
||||
"chapter_id": util.safe_int(chid),
|
||||
"group": text.unescape(text.remove_html(group)),
|
||||
"contributor": text.remove_html(user),
|
||||
"views": util.safe_int(views),
|
||||
"date": date,
|
||||
"lang": util.language_to_code(language),
|
||||
"language": language,
|
||||
}))
|
||||
|
||||
return results
|
@ -155,6 +155,8 @@ def language_to_code(lang, default=None):
|
||||
|
||||
CODES = {
|
||||
"ar": "Arabic",
|
||||
"bg": "Bulgarian",
|
||||
"ca": "Catalan",
|
||||
"cs": "Czech",
|
||||
"da": "Danish",
|
||||
"de": "German",
|
||||
|
@ -6,4 +6,4 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
__version__ = "1.3.0"
|
||||
__version__ = "1.3.1-dev"
|
||||
|
@ -18,7 +18,9 @@ SKIP = {
|
||||
"archivedmoe", "archiveofsins", "thebarchive",
|
||||
|
||||
# temporary issues
|
||||
"imgchili",
|
||||
"powermanga",
|
||||
"pinterest",
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user