mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
[myhentaigallery] update and fix extraction (#1001)
- extract more metadata - match "/show/" URLs - complete test results - fix missing images for lines starting with " <img" - fix missing comma in supportedsites.py
This commit is contained in:
parent
05b9ac8d37
commit
c43b3894be
@ -81,6 +81,7 @@ MangaPark https://mangapark.net/ Chapters, Manga
|
||||
Mangareader https://www.mangareader.net/ Chapters, Manga
|
||||
Mangoxo https://www.mangoxo.com/ Albums, Channels Optional
|
||||
mastodon.social https://mastodon.social/ Images from Statuses, User Profiles Optional (`OAuth <https://github.com/mikf/gallery-dl#oauth>`__)
|
||||
My Hentai Gallery https://myhentaigallery.com/ Galleries
|
||||
Naver https://blog.naver.com/ Blogs, Posts
|
||||
Newgrounds https://www.newgrounds.com/ |newgrounds-C| Optional
|
||||
Ngomik http://ngomik.in/ Chapters
|
||||
|
@ -1,7 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018-2020 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
@ -15,16 +13,30 @@ from .. import text, exception
|
||||
class MyhentaigalleryGalleryExtractor(GalleryExtractor):
|
||||
"""Extractor for image galleries from myhentaigallery.com"""
|
||||
category = "myhentaigallery"
|
||||
directory_fmt = ("{category}", "{gallery_id} [{artist}] {title}")
|
||||
pattern = (r"(?:https?://)?(myhentaigallery\.com"
|
||||
r"/gallery/thumbnails/[0-9]+)")
|
||||
directory_fmt = ("{category}", "{gallery_id} {artist:?[/] /J, }{title}")
|
||||
pattern = (r"(?:https?://)?myhentaigallery\.com"
|
||||
r"/gallery/(?:thumbnails|show)/(\d+)")
|
||||
test = (
|
||||
("https://myhentaigallery.com/gallery/thumbnails/16247"),
|
||||
("https://myhentaigallery.com/gallery/thumbnails/15224"),
|
||||
("https://myhentaigallery.com/gallery/thumbnails/16247", {
|
||||
"pattern": r"https://images.myhentaigrid.com/imagesgallery/images"
|
||||
r"/[^/]+/original/\d+\.jpg",
|
||||
"keyword": {
|
||||
"artist" : list,
|
||||
"count" : 11,
|
||||
"gallery_id": 16247,
|
||||
"group" : list,
|
||||
"parodies" : list,
|
||||
"tags" : ["Giantess"],
|
||||
"title" : "Attack Of The 50ft Woman 1",
|
||||
},
|
||||
}),
|
||||
("https://myhentaigallery.com/gallery/show/16247/1"),
|
||||
)
|
||||
root = "https://myhentaigallery.com"
|
||||
|
||||
def __init__(self, match):
|
||||
url = "https://" + match.group(1)
|
||||
self.gallery_id = match.group(1)
|
||||
url = "{}/gallery/thumbnails/{}".format(self.root, self.gallery_id)
|
||||
GalleryExtractor.__init__(self, match, url)
|
||||
self.session.headers["Referer"] = url
|
||||
|
||||
@ -32,22 +44,22 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor):
|
||||
extr = text.extract_from(page)
|
||||
split = text.split_html
|
||||
|
||||
image = extr('<div class="comic-cover">\n<a href="', '"')
|
||||
title = extr('<div class="comic-description">\n<h1>', '</h1>')
|
||||
if not title:
|
||||
raise exception.NotFoundError("gallery")
|
||||
data = {
|
||||
|
||||
return {
|
||||
"title" : text.unescape(title),
|
||||
"gallery_id": text.parse_int(image.split("/")[-2]),
|
||||
"gallery_id": text.parse_int(self.gallery_id),
|
||||
"tags" : split(extr('<div>\nCategories:', '</div>')),
|
||||
"artist" : split(extr('<div>\nArtists:' , '</div>')),
|
||||
"group" : split(extr('<div>\nGroups:' , '</div>')),
|
||||
"parodies" : split(extr('<div>\nParodies:' , '</div>')),
|
||||
}
|
||||
artists = split(extr('<div>\nArtists:', '</div>'))
|
||||
data["artist"] = artists[0] if artists else "Unknown"
|
||||
return data
|
||||
|
||||
def images(self, page):
|
||||
extr = text.extract_iter
|
||||
return [
|
||||
(text.unescape(url).replace("/thumbnail/", "/original/"), None)
|
||||
for url in extr(page, 'class="comic-thumb">\n<img src="', '"')
|
||||
(text.unescape(text.extract(url, 'src="', '"')[0]).replace(
|
||||
"/thumbnail/", "/original/"), None)
|
||||
for url in text.extract_iter(page, 'class="comic-thumb"', '</div>')
|
||||
]
|
||||
|
@ -60,7 +60,7 @@ CATEGORY_MAP = {
|
||||
"mangapark" : "MangaPark",
|
||||
"mangastream" : "Manga Stream",
|
||||
"mastodon.social": "mastodon.social",
|
||||
"myhentaigallery": "My Hentai Gallery"
|
||||
"myhentaigallery": "My Hentai Gallery",
|
||||
"myportfolio" : "Adobe Portfolio",
|
||||
"nhentai" : "nhentai",
|
||||
"nijie" : "nijie",
|
||||
|
Loading…
Reference in New Issue
Block a user