1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2025-01-31 19:51:34 +01:00

[luscious] fix tag extraction

This commit is contained in:
Mike Fährmann 2019-05-13 11:50:21 +02:00
parent a5b060765d
commit aa8e366b90
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 12 additions and 10 deletions

View File

@ -67,8 +67,8 @@ class FallenangelsMangaExtractor(MangaExtractor):
pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$" pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
test = ( test = (
("http://manga.fascans.com/manga/trinity-seven", { ("http://manga.fascans.com/manga/trinity-seven", {
"url": "92699a250ff7d5adcf4b06e6a45b0c05f3426643", "url": "293057f264de6c438b979bd1c3de4719568db452",
"keyword": "afc785c37da7c48e639d3a596e8e0401482b628f", "keyword": "50e0374dba60734230e4284b5ffdadef5104ae62",
}), }),
("https://truyen.fascans.com/manga/rakudai-kishi-no-eiyuutan", { ("https://truyen.fascans.com/manga/rakudai-kishi-no-eiyuutan", {
"url": "51a731a6b82d5eb7a335fbae6b02d06aeb2ab07b", "url": "51a731a6b82d5eb7a335fbae6b02d06aeb2ab07b",

View File

@ -49,7 +49,7 @@ class LusciousBase(Extractor):
def _parse_tags(tags): def _parse_tags(tags):
return [ return [
text.unescape(tag.replace(":_", ":")) text.unescape(tag.replace(":_", ":"))
for tag in text.extract_iter(tags or "", "/tagged/+", "/") for tag in text.extract_iter(tags or "", "/tags/", "/")
] ]
@ -62,12 +62,12 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor):
test = ( test = (
("https://luscious.net/albums/okinami-no-koigokoro_277031/", { ("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
"url": "7e4984a271a1072ac6483e4228a045895aff86f3", "url": "7e4984a271a1072ac6483e4228a045895aff86f3",
"keyword": "f9c34e1a5b0c1f119e9f644c99933ecf7d7dbfd2", "keyword": "c597c132834f4990f90bf5dee5de2a9d4ba263a4",
"content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3", "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
}), }),
("https://luscious.net/albums/virgin-killer-sweater_282582/", { ("https://luscious.net/albums/virgin-killer-sweater_282582/", {
"url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c", "url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c",
"keyword": "c147d8ef90843f68e37ed15e4fe017e62fc97c96", "keyword": "e1202078b504adeccd521aa932f456a5a85479a0",
}), }),
("https://luscious.net/albums/not-found_277035/", { ("https://luscious.net/albums/not-found_277035/", {
"exception": exception.NotFoundError, "exception": exception.NotFoundError,
@ -102,14 +102,16 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor):
else: else:
count, pos = text.extract(page, '<p>', ' ', pos) count, pos = text.extract(page, '<p>', ' ', pos)
genre, pos = text.extract(page, '<p>Genre:', '</p>', pos) genre, pos = text.extract(page, '<p>Genre:', '</p>', pos)
adnce, pos = text.extract(page, '<p>Audience:', '</p>', pos)
tags , pos = text.extract(page, '"tag_list static">', '</ol>', pos) tags , pos = text.extract(page, '"tag_list static">', '</ol>', pos)
return { return {
"gallery_id": text.parse_int(self.gallery_id), "gallery_id": text.parse_int(self.gallery_id),
"title": text.unescape(title or ""), "title" : text.unescape(title or ""),
"count": text.parse_int(count), "count" : text.parse_int(count),
"genre": text.remove_html(genre), "genre" : text.remove_html(genre),
"tags" : self._parse_tags(tags), "audience" : text.remove_html(adnce),
"tags" : self._parse_tags(tags),
} }
def images(self, page): def images(self, page):

View File

@ -278,7 +278,7 @@ class SankakuPostExtractor(SankakuExtractor):
"options": (("tags", True),), "options": (("tags", True),),
"keyword": { "keyword": {
"tags_artist": "bonocho", "tags_artist": "bonocho",
"tags_copyright": "batman_(series) the_dark_knight", "tags_copyright": "batman_(series) batman the_dark_knight",
"tags_medium": "sketch copyright_name", "tags_medium": "sketch copyright_name",
"tags_studio": "dc_comics", "tags_studio": "dc_comics",
"tags_character": str, "tags_character": str,