1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 04:02:32 +01:00

merge #4832: [tmohentai] add 'gallery' extractor (#4808)

This commit is contained in:
Mike Fährmann 2023-11-21 20:25:49 +01:00
commit ce7c4cb544
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
5 changed files with 110 additions and 0 deletions

View File

@ -823,6 +823,12 @@ Consider all sites to be NSFW unless otherwise known.
<td>Galleries</td>
<td></td>
</tr>
<tr>
<td>TMOHentai</td>
<td>https://tmohentai.com/</td>
<td>Galleries</td>
<td></td>
</tr>
<tr>
<td>Toyhouse</td>
<td>https://toyhou.se/</td>

View File

@ -147,6 +147,7 @@ modules = [
"tapas",
"tcbscans",
"telegraph",
"tmohentai",
"toyhouse",
"tsumino",
"tumblr",

View File

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://tmohentai.com/"""
from .common import GalleryExtractor
from .. import text
BASE_PATTERN = r"(?:https?://)?tmohentai\.com"
class TmohentaiGalleryExtractor(GalleryExtractor):
category = "tmohentai"
root = "http://tmohentai.com"
directory_fmt = ("{category}", "{title} ({gallery_id})")
pattern = BASE_PATTERN + r"/(?:contents|reader)/(\w+)"
example = "https://tmohentai.com/contents/12345a67b89c0"
def __init__(self, match):
self.gallery_id = match.group(1)
url = "{}/contents/{}".format(self.root, self.gallery_id)
GalleryExtractor.__init__(self, match, url)
def images(self, page):
fmt = "https://imgrojo.tmohentai.com/contents/{}/{{:>03}}.webp".format(
self.gallery_id).format
cnt = page.count('class="lanzador')
return [(fmt(i), None) for i in range(0, cnt)]
def metadata(self, page):
extr = text.extract_from(page)
return {
"gallery_id": self.gallery_id,
"title" : text.unescape(extr("<h3>", "<").strip()),
"artists" : text.split_html(extr(
"<label>Artists and Artists Groups</label>", "</ul>")),
"categories": text.split_html(extr(
"<label>Genders</label>", "</ul>")),
"tags" : text.split_html(extr(
"<label>Tags</label>", "</ul>")),
"uploader" : text.remove_html(extr(
"<label>Uploaded By</label>", "</ul>")),
"language" : extr("&nbsp;", "\n"),
}

View File

@ -122,6 +122,7 @@ CATEGORY_MAP = {
"tbib" : "The Big ImageBoard",
"tcbscans" : "TCB Scans",
"tco" : "Twitter t.co",
"tmohentai" : "TMOHentai",
"thatpervert" : "ThatPervert",
"thebarchive" : "The /b/ Archive",
"thecollection" : "The /co/llection",

54
test/results/tmohentai.py Normal file
View File

@ -0,0 +1,54 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import tmohentai
__tests__ = (
{
"#url" : "https://tmohentai.com/contents/653c2aeaa693c",
"#category": ("", "tmohentai", "gallery"),
"#class" : tmohentai.TmohentaiGalleryExtractor,
"#pattern" : r"https://imgrojo\.tmohentai\.com/contents/653c2aeaa693c/\d\d\d\.webp",
"#count" : 46,
"artists" : ["Andoryu"],
"categories": [
"Big Breasts",
"BlowJob",
"Cheating",
"Mature",
"Milf",
"Student",
],
"count" : 46,
"extension" : "webp",
"gallery_id": "653c2aeaa693c",
"language" : "Español",
"num" : int,
"tags" : [
"milf",
"Madre",
"enormes pechos",
"Peluda",
"nakadashi",
"cheating",
"madura",
"sexo a escondidas",
"Ama de casa",
"mamada",
],
"title" : "La Mama de mi Novia es tan Pervertida que no Pude Soportarlo mas",
"uploader" : "NekoCreme Fansub",
},
{
"#url" : "https://tmohentai.com/reader/653c2aeaa693c/paginated/1",
"#category": ("", "tmohentai", "gallery"),
"#class" : tmohentai.TmohentaiGalleryExtractor,
},
)