2021-07-23 02:01:51 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-09-11 16:30:55 +02:00
|
|
|
# Copyright 2021-2023 Mike Fährmann
|
2021-07-23 02:01:51 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
|
|
|
"""Extractors for https://comicvine.gamespot.com/"""
|
|
|
|
|
|
|
|
from .booru import BooruExtractor
|
|
|
|
from .. import text
|
|
|
|
import operator
|
|
|
|
|
|
|
|
|
|
|
|
class ComicvineTagExtractor(BooruExtractor):
|
|
|
|
"""Extractor for a gallery on comicvine.gamespot.com"""
|
|
|
|
category = "comicvine"
|
|
|
|
subcategory = "tag"
|
|
|
|
basecategory = ""
|
|
|
|
root = "https://comicvine.gamespot.com"
|
|
|
|
per_page = 1000
|
|
|
|
directory_fmt = ("{category}", "{tag}")
|
|
|
|
filename_fmt = "{filename}.{extension}"
|
|
|
|
archive_fmt = "{id}"
|
|
|
|
pattern = (r"(?:https?://)?comicvine\.gamespot\.com"
|
|
|
|
r"(/([^/?#]+)/(\d+-\d+)/images/.*)")
|
2023-09-14 13:27:03 +02:00
|
|
|
example = "https://comicvine.gamespot.com/TAG/123-45/images/"
|
2021-07-23 02:01:51 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
BooruExtractor.__init__(self, match)
|
|
|
|
self.path, self.object_name, self.object_id = match.groups()
|
|
|
|
|
|
|
|
def metadata(self):
|
|
|
|
return {"tag": text.unquote(self.object_name)}
|
|
|
|
|
|
|
|
def posts(self):
|
|
|
|
url = self.root + "/js/image-data.json"
|
|
|
|
params = {
|
|
|
|
"images": text.extract(
|
|
|
|
self.request(self.root + self.path).text,
|
|
|
|
'data-gallery-id="', '"')[0],
|
|
|
|
"start" : self.page_start,
|
|
|
|
"count" : self.per_page,
|
|
|
|
"object": self.object_id,
|
|
|
|
}
|
|
|
|
|
|
|
|
while True:
|
|
|
|
images = self.request(url, params=params).json()["images"]
|
|
|
|
yield from images
|
|
|
|
|
|
|
|
if len(images) < self.per_page:
|
|
|
|
return
|
|
|
|
params["start"] += self.per_page
|
|
|
|
|
|
|
|
def skip(self, num):
|
|
|
|
self.page_start = num
|
|
|
|
return num
|
|
|
|
|
|
|
|
_file_url = operator.itemgetter("original")
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _prepare(post):
|
|
|
|
post["date"] = text.parse_datetime(
|
|
|
|
post["dateCreated"], "%a, %b %d %Y")
|
|
|
|
post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]]
|