1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-26 04:32:51 +01:00

adjust archive-ids

This commit is contained in:
Mike Fährmann 2018-02-12 23:09:34 +01:00
parent be3ea4425d
commit 179bcdd349
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
11 changed files with 32 additions and 21 deletions

View File

@ -70,7 +70,7 @@ class FoolfuukaThreadExtractor(SharedConfigExtractor):
directory_fmt = ["{category}", "{board[shortname]}",
"{thread_num}{title:? - //}"]
filename_fmt = "{media[media]}"
archive_fmt = "{{board[shortname]}}_{num}_{timestamp}"
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
root = ""
referer = True

View File

@ -172,6 +172,8 @@ class ChapterExtractor(Extractor):
"{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}"]
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
archive_fmt = (
"{manga}_{chapter}{chapter_minor}_{page}")
def __init__(self, url):
Extractor.__init__(self)

View File

@ -16,6 +16,7 @@ class DirectlinkExtractor(Extractor):
"""Extractor for direct links to images and other media files"""
category = "directlink"
filename_fmt = "{domain}/{path}"
archive_fmt = "{domain}/{path}"
pattern = [r"https?://(?P<domain>[^/]+)/(?P<path>[^?&#]+\."
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$"]

View File

@ -58,6 +58,7 @@ class Hentai2readMangaExtractor(MangaExtractor):
class Hentai2readChapterExtractor(ChapterExtractor):
"""Extractor for a single manga chapter from hentai2read.com"""
category = "hentai2read"
archive_fmt = "{chapter_id}_{page}"
pattern = [r"(?:https?://)?(?:www\.)?hentai2read\.com/([^/]+)/(\d+)"]
test = [("http://hentai2read.com/amazon_elixir/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",

View File

@ -120,6 +120,7 @@ class HentaifoundryImageExtractor(Extractor):
subcategory = "image"
directory_fmt = ["{category}", "{artist}"]
filename_fmt = "{category}_{index}_{title}.{extension}"
archive_fmt = "{index}"
pattern = [(r"(?:https?://)?(?:www\.|pictures\.)?hentai-foundry\.com/"
r"(?:pictures/user/([^/]+)/(\d+)"
r"|[^/]/([^/]+)/(\d+))")]

View File

@ -59,6 +59,7 @@ class HentaihereMangaExtractor(MangaExtractor):
class HentaihereChapterExtractor(ChapterExtractor):
"""Extractor for a single manga chapter from hentaihere.com"""
category = "hentaihere"
archive_fmt = "{chapter_id}_{page}"
pattern = [r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/(\d+)"]
test = [("https://hentaihere.com/m/S13812/1/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",

View File

@ -16,7 +16,6 @@ import json
class ImgurExtractor(Extractor):
"""Base class for imgur extractors"""
category = "imgur"
archive_fmt = "{hash}"
def __init__(self, match):
Extractor.__init__(self)
@ -53,6 +52,7 @@ class ImgurImageExtractor(ImgurExtractor):
"""Extractor for individual images from imgur.com"""
subcategory = "image"
filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
archive_fmt = "{hash}"
pattern = [(r"(?:https?://)?(?:m\.|www\.)?imgur\.com/"
r"(?:gallery/)?((?!gallery)[^/?&#]{7})/?"),
(r"(?:https?://)?i\.imgur\.com/([^/?&#.]{5,7})\.")]
@ -112,6 +112,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
subcategory = "album"
directory_fmt = ["{category}", "{album[hash]}{album[title]:? - //}"]
filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}"
archive_fmt = "{album[hash]}_{hash}"
pattern = [r"(?:https?://)?(?:m\.|www\.)?imgur\.com/"
r"(?:a|gallery)/([^/?&#]{5})/?$"]
test = [

View File

@ -18,7 +18,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousExtractor):
category = "khinsider"
subcategory = "soundtrack"
directory_fmt = ["{category}", "{album}"]
archive_fmt = "{album}_{name}"
archive_fmt = "{album}_{name}.{extension}"
pattern = [r"(?:https?://)?downloads\.khinsider\.com/"
r"game-soundtracks/album/([^/?&#]+)"]
test = [(("https://downloads.khinsider.com/game-soundtracks/"

View File

@ -24,6 +24,7 @@ IV = [
class KissmangaBase():
"""Base class for kissmanga extractors"""
category = "kissmanga"
archive_fmt = "{chapter_id}_{page}"
root = "http://kissmanga.com"
def request(self, url):
@ -69,7 +70,7 @@ class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
test = [
("http://kissmanga.com/Manga/Dropout", {
"url": "992befdd64e178fe5af67de53f8b510860d968ca",
"keyword": "1d23ea07296e004b33bee17fe2f5cd5177c58680",
"keyword": "32b09711c28b481845acc32e3bb6054cfc90224d",
}),
("http://kissmanga.com/manga/feng-shen-ji", None),
]
@ -87,8 +88,9 @@ class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
for item in text.extract_iter(page, '<a href="', ' online">'):
url, _, chapter = item.partition(needle)
data = {
"manga": manga, "id": url.rpartition("=")[2],
"chapter_string": chapter, "lang": "en", "language": "English",
"manga": manga, "chapter_string": chapter,
"chapter_id": util.safe_int(url.rpartition("=")[2]),
"lang": "en", "language": "English",
}
self.parse_chapter_string(data)
results.append((self.root + url, data))
@ -98,25 +100,26 @@ class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
"""Extractor for manga-chapters from kissmanga.com"""
pattern = [r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com"
r"/Manga/[^/?&#]+/[^/?&#]+\?id=\d+"]
r"/Manga/[^/?&#]+/[^/?&#]+\?id=(\d+)"]
test = [
("http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", {
"url": "4136bcd1c6cecbca8cc2bc965d54f33ef0a97cc0",
"keyword": "68384c1167858fb4aa475c4596f0a685c45fff36",
"keyword": "4a3a9341d453541de0dbfa24cd6b2e3ed39c0182",
}),
("http://kissmanga.com/Manga/Urban-Tales/a?id=256717", {
"url": "de074848f6c1245204bb9214c12bcc3ecfd65019",
"keyword": "089158338b4cde43b2ff244814effeb13297de33",
"keyword": "ffc11b630da44fe67709ed0473756cf51b90a05c",
}),
("http://kissmanga.com/Manga/Monster/Monster-79?id=7608", {
"count": 23,
"keyword": "558da596e86ca544eb72cf303f3694bbf0b1f2f5",
"keyword": "92669a75e48a8501f3fbfc22b8fd2d3188239212",
}),
("http://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608", None),
]
def __init__(self, match):
ChapterExtractor.__init__(self, match.group(0))
self.chapter_id = match.group(1)
self.session.headers["Referer"] = self.root
def get_metadata(self, page):
@ -125,6 +128,7 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
data = {
"manga": manga.strip(),
"chapter_string": cinfo.strip(),
"chapter_id": util.safe_int(self.chapter_id),
"lang": "en",
"language": "English",
}

View File

@ -9,7 +9,7 @@
"""Extract comic-issues and entire comics from http://readcomiconline.to/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text, cloudflare
from .. import text, util, cloudflare
import re
@ -18,7 +18,7 @@ class ReadcomiconlineBase():
category = "readcomiconline"
directory_fmt = ["{category}", "{comic}", "{issue:>03}"]
filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}"
archive_fmt = "{comic}_{issue}_{page}"
archive_fmt = "{issue_id}_{page}"
root = "http://readcomiconline.to"
useragent = "Wget/1.19.2 (linux-gnu)"
@ -33,11 +33,11 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
test = [
("http://readcomiconline.to/Comic/W-i-t-c-h", {
"url": "c5a530538a30b176916e30cbe223a93d83cb2691",
"keyword": "51097f2b65da683160dbea4de128dbec1cbf9357",
"keyword": "3986248e4458fa44a201ec073c3684917f48ee0c",
}),
("http://readcomiconline.to/Comic/Bazooka-Jules", {
"url": "e517dca61dff489f18ca781084f59a9eeb60a6b6",
"keyword": "7d4877d1215650a768097a8626a2f0c6083119a4",
"keyword": "f5ba5246cd787bb750924d9690cb1549199bd516",
}),
]
@ -55,7 +55,8 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
if issue.startswith('Issue #'):
issue = issue[7:]
results.append((self.root + url, {
"comic": comic, "issue": issue, "id": url.rpartition("=")[2],
"comic": comic, "issue": issue,
"issue_id": util.safe_int(url.rpartition("=")[2]),
"lang": "en", "language": "English",
}))
return results
@ -65,14 +66,15 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.to"""
subcategory = "issue"
pattern = [r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
r"/Comic/[^/?&#]+/[^/?&#]+\?id=\d+"]
r"/Comic/[^/?&#]+/[^/?&#]+\?id=(\d+)"]
test = [("http://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
"url": "a45c77f8fbde66091fe2346d6341f9cf3c6b1bc5",
"keyword": "dee8a8a44659825afe1d69e1d809a48b03e98c68",
"keyword": "c6de1c9c8a307dc4be56783c4ac6f1338ffac6fc",
})]
def __init__(self, match):
ChapterExtractor.__init__(self, match.group(0))
self.issue_id = match.group(1)
self.session.headers["User-Agent"] = self.useragent
def get_metadata(self, page):
@ -82,6 +84,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
return {
"comic": comic,
"issue": match.group(1) or match.group(2),
"issue_id": util.safe_int(self.issue_id),
"lang": "en",
"language": "English",
}

View File

@ -545,10 +545,7 @@ class DownloadArchive():
self.cursor = con.cursor()
self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
"(entry PRIMARY KEY) WITHOUT ROWID")
self.keygen = (
extractor.category +
(extractor.archive_fmt or extractor.filename_fmt)
).format_map
self.keygen = (extractor.category + extractor.archive_fmt).format_map
self._key = None
def check(self, kwdict):