mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-25 04:02:32 +01:00
[comicfury] add support
This commit is contained in:
parent
bca9a1a1e5
commit
cd7cb8c505
@ -181,6 +181,12 @@ Consider all listed sites to potentially be NSFW.
|
|||||||
<td>Tag Searches</td>
|
<td>Tag Searches</td>
|
||||||
<td></td>
|
<td></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Comicfury</td>
|
||||||
|
<td>https://comicfury.com</td>
|
||||||
|
<td>Comic Issues, Comics</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>Coomer</td>
|
<td>Coomer</td>
|
||||||
<td>https://coomer.su/</td>
|
<td>https://coomer.su/</td>
|
||||||
|
@ -39,6 +39,7 @@ modules = [
|
|||||||
"cien",
|
"cien",
|
||||||
"civitai",
|
"civitai",
|
||||||
"cohost",
|
"cohost",
|
||||||
|
"comicfury",
|
||||||
"comicvine",
|
"comicvine",
|
||||||
"cyberdrop",
|
"cyberdrop",
|
||||||
"danbooru",
|
"danbooru",
|
||||||
|
147
gallery_dl/extractor/comicfury.py
Normal file
147
gallery_dl/extractor/comicfury.py
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extractors for https://comicfury.com"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import itertools
|
||||||
|
from .common import Extractor, Message
|
||||||
|
from .. import text
|
||||||
|
|
||||||
|
|
||||||
|
CF_DOMAINS = (
|
||||||
|
r"([\w-]+)\.(?:thecomicseries\.com|the-comic\.org"
|
||||||
|
r"|thecomicstrip\.org|webcomic\.ws|cfw\.me)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ComicfuryExtractor(Extractor):
|
||||||
|
"""Base class for ComicFury extractors"""
|
||||||
|
category = "comicfury"
|
||||||
|
directory_fmt = ("{category}", "{comic}")
|
||||||
|
filename_fmt = "{category}_{comic}_{id}_{num:>02}.{extension}"
|
||||||
|
archive_fmt = "{filename}"
|
||||||
|
root = "https://comicfury.com"
|
||||||
|
cookies_domain = "comicfury.com"
|
||||||
|
|
||||||
|
def _init(self):
|
||||||
|
self._search_segments = re.compile(
|
||||||
|
(r'\n *<div class="is--image-segments">\n'
|
||||||
|
r'([\s\S]+?)\n *</div>\n')).search
|
||||||
|
|
||||||
|
def request(self, url, **kwargs):
|
||||||
|
resp = Extractor.request(self, url, **kwargs)
|
||||||
|
if '<div class="nhead">Content Warning</div>' in resp.text:
|
||||||
|
token = self.session.cookies.get(
|
||||||
|
"token", domain=self.cookies_domain)
|
||||||
|
resp = Extractor.request(self, url, method="POST", data={
|
||||||
|
"proceed": "View Webcomic",
|
||||||
|
"token": token,
|
||||||
|
}, **kwargs)
|
||||||
|
return resp
|
||||||
|
|
||||||
|
def _parse_page(self, page):
|
||||||
|
comic_name, pos = text.extract(
|
||||||
|
page, '<h2 class="webcomic-title-content-inner">', '</h2>')
|
||||||
|
relative_id, pos = text.extract(
|
||||||
|
page, 'Comic #', ':', pos)
|
||||||
|
comic, pos = text.extract(
|
||||||
|
page, '<a href="/comicprofile.php?url=', '"', pos)
|
||||||
|
|
||||||
|
relative_id = int(relative_id)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
id, pos = text.extract(
|
||||||
|
page, '<div class="is--comic-page" id="comic-', '"', pos)
|
||||||
|
if not id:
|
||||||
|
break
|
||||||
|
chapter_id, pos = text.extract(
|
||||||
|
page, ' data-chapter-id="', '"', pos)
|
||||||
|
chapter_name, pos = text.extract(
|
||||||
|
page, ' data-chapter-name="', '"', pos)
|
||||||
|
pos = text.extract(
|
||||||
|
page, '<div class="is--title" style="', '"', pos)[1]
|
||||||
|
title, pos = text.extract(page, '>', '</div>', pos)
|
||||||
|
|
||||||
|
segments = self._search_segments(page, pos)
|
||||||
|
pos = segments.end(0)
|
||||||
|
urls = list(text.extract_iter(
|
||||||
|
segments.group(1), '<img src="', '"'))
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"comic_name": text.unescape(comic_name),
|
||||||
|
"comic": comic,
|
||||||
|
"relative_id": relative_id,
|
||||||
|
"id": int(id),
|
||||||
|
"chapter_id": int(chapter_id),
|
||||||
|
"chapter_name": text.unescape(chapter_name),
|
||||||
|
"title": text.unescape(title),
|
||||||
|
"count": len(urls)
|
||||||
|
}
|
||||||
|
yield Message.Directory, data
|
||||||
|
for data["num"], url in enumerate(urls, 1):
|
||||||
|
url = text.unescape(url)
|
||||||
|
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||||
|
|
||||||
|
relative_id += 1
|
||||||
|
|
||||||
|
|
||||||
|
class ComicfuryIssueExtractor(ComicfuryExtractor):
|
||||||
|
"""Extractor for a single issue URL"""
|
||||||
|
subcategory = "issue"
|
||||||
|
pattern = (r"(?:https?://)?(?:comicfury\.com/read/([\w-]+)(?:/comics?/"
|
||||||
|
r"(first|last|\d+)?)?|" + CF_DOMAINS + r"/comics/"
|
||||||
|
r"(first|1|pl/\d+)?)(?:[?#].*)?$")
|
||||||
|
example = "https://comicfury.com/read/URL/comics/1234"
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
ComicfuryExtractor.__init__(self, match)
|
||||||
|
self.comic = match.group(1) or match.group(3)
|
||||||
|
if match.group(1) is not None:
|
||||||
|
self.id = match.group(2) or ""
|
||||||
|
else:
|
||||||
|
id = match.group(4)
|
||||||
|
if id in ("first", "1"):
|
||||||
|
self.id = "first"
|
||||||
|
elif not id:
|
||||||
|
self.id = "last"
|
||||||
|
else:
|
||||||
|
self.id = id[3:]
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
url = self.root + "/read/" + self.comic + "/comics/" + self.id
|
||||||
|
page = self.request(url).text
|
||||||
|
iter = self._parse_page(page)
|
||||||
|
|
||||||
|
msg, data = next(iter)
|
||||||
|
yield msg, data
|
||||||
|
yield from itertools.islice(iter, data["count"])
|
||||||
|
|
||||||
|
|
||||||
|
class ComicfuryComicExtractor(ComicfuryExtractor):
|
||||||
|
"""Extractor for an entire comic"""
|
||||||
|
subcategory = "comic"
|
||||||
|
pattern = (r"(?:https?://)?(?:comicfury\.com/comicprofile\.php"
|
||||||
|
r"\?url=([\w-]+)|" + CF_DOMAINS + r")/?(?:[?#].*)?$")
|
||||||
|
example = "https://comicfury.com/comicprofile.php?url=URL"
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
ComicfuryExtractor.__init__(self, match)
|
||||||
|
self.comic = match.group(1) or match.group(2)
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
url = self.root + "/read/" + self.comic + "/comics/first"
|
||||||
|
while True:
|
||||||
|
page = self.request(url).text
|
||||||
|
yield from self._parse_page(page)
|
||||||
|
|
||||||
|
div = text.extr(
|
||||||
|
page, '<div class="final-next-page-link-container">', '</div>')
|
||||||
|
new_url = text.extr(
|
||||||
|
div, '<a href="', '" class="final-next-page-link">')
|
||||||
|
if not new_url:
|
||||||
|
break
|
||||||
|
url = text.urljoin(url, text.unescape(new_url))
|
102
test/results/comicfury.py
Normal file
102
test/results/comicfury.py
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
from gallery_dl.extractor import comicfury
|
||||||
|
|
||||||
|
|
||||||
|
__tests__ = (
|
||||||
|
{
|
||||||
|
"#url" : "https://rain.thecomicseries.com/comics/pl/73003",
|
||||||
|
"#category": ("", "comicfury", "issue"),
|
||||||
|
"#class" : comicfury.ComicfuryIssueExtractor,
|
||||||
|
"#count" : 1,
|
||||||
|
"#urls" : "https://img.comicfury.com/comics/c8f813e19a0aae0f2a0b57a6b36ceec52058036413.png",
|
||||||
|
|
||||||
|
"comic_name" : "Rain",
|
||||||
|
"comic" : "rain",
|
||||||
|
"relative_id" : 6,
|
||||||
|
"id" : 73003,
|
||||||
|
"chapter_id" : 2770,
|
||||||
|
"chapter_name": "Ch 1: The New Girl",
|
||||||
|
"title" : "Chapter 1 - The New Girl",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://grinders.the-comic.org/comics/first",
|
||||||
|
"#category": ("", "comicfury", "issue"),
|
||||||
|
"#class" : comicfury.ComicfuryIssueExtractor,
|
||||||
|
"#count" : 1,
|
||||||
|
"#urls" : "https://img.comicfury.com/comics/184/43571a1579840219f1635377961.png",
|
||||||
|
|
||||||
|
"comic_name" : "Grinder$",
|
||||||
|
"comic" : "grinders",
|
||||||
|
"relative_id" : 1,
|
||||||
|
"id" : 1137093,
|
||||||
|
"chapter_id" : 48527,
|
||||||
|
"chapter_name": "Foam",
|
||||||
|
"title" : "Teaser",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://belovedchainscomic.thecomicstrip.org/comics/1",
|
||||||
|
"#category": ("", "comicfury", "issue"),
|
||||||
|
"#class" : comicfury.ComicfuryIssueExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://belovedchainscomic.webcomic.ws/comics/",
|
||||||
|
"#category": ("", "comicfury", "issue"),
|
||||||
|
"#class" : comicfury.ComicfuryIssueExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://comicfury.com/read/MKsJekyllAndHyde/comic/last",
|
||||||
|
"#category": ("", "comicfury", "issue"),
|
||||||
|
"#class" : comicfury.ComicfuryIssueExtractor,
|
||||||
|
"#count" : 1,
|
||||||
|
"#urls" : "https://img.comicfury.com/comics/222/37111a1634996413b60163f1077624721.png",
|
||||||
|
|
||||||
|
"comic_name" : "MK's The Strange Case of Dr. Jekyll and Mr. Hyde",
|
||||||
|
"comic" : "MKsJekyllAndHyde",
|
||||||
|
"relative_id" : 622,
|
||||||
|
"id" : 1493321,
|
||||||
|
"chapter_id" : 57040,
|
||||||
|
"chapter_name": "Epilogue 3",
|
||||||
|
"title" : "THE END",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://comicfury.com/read/rain-tradfr",
|
||||||
|
"#category": ("", "comicfury", "issue"),
|
||||||
|
"#class" : comicfury.ComicfuryIssueExtractor,
|
||||||
|
"#count" : 1,
|
||||||
|
"#urls" : "https://img.comicfury.com/comics/218/49338a1624179795b80143f379314885.jpg",
|
||||||
|
|
||||||
|
"comic_name" : "Rain, la traduction française",
|
||||||
|
"comic" : "rain-tradfr",
|
||||||
|
"relative_id" : 1,
|
||||||
|
"id" : 1381699,
|
||||||
|
"chapter_id" : 56171,
|
||||||
|
"chapter_name": "Hors Chapitre",
|
||||||
|
"title" : "RAIN",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://comicfury.com/comicprofile.php?url=lanternsofarcadia",
|
||||||
|
"#category": ("", "comicfury", "comic"),
|
||||||
|
"#class" : comicfury.ComicfuryComicExtractor,
|
||||||
|
"#range" : "1-6",
|
||||||
|
"#sha1_url" : "d4080dcb41f5c019e1ceb450a624041208ccdcb8",
|
||||||
|
"#sha1_content": "0c1937e4d177ce55afbfe30ab9376700c6cf619f",
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"#url" : "https://bloomer-layout.cfw.me",
|
||||||
|
"#category": ("", "comicfury", "comic"),
|
||||||
|
"#class" : comicfury.ComicfuryComicExtractor,
|
||||||
|
},
|
||||||
|
|
||||||
|
)
|
Loading…
Reference in New Issue
Block a user