diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 235cb856..60c9beab 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -69,11 +69,11 @@ class SequentialExtractor(Extractor): Extractor.__init__(self) -class AsyncExtractor(Extractor): +class AsynchronousExtractor(Extractor): def __init__(self, config): Extractor.__init__(self) - queue_size = int(config.get("queue-size", 5)) + queue_size = int(config.get("general", "queue-size", fallback=5)) self.__queue = queue.Queue(maxsize=queue_size) self.__thread = threading.Thread(target=self.async_items) # self.__thread = threading.Thread(target=self.async_images, daemon=True) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 35a49522..765ac825 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -1,94 +1,116 @@ -from .common import AsyncExtractor -from ..util import safe_request +# -*- coding: utf-8 -*- + +# Copyright 2014, 2015 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images and ugoira from http://www.pixiv.net/""" + +from .common import AsynchronousExtractor +from .common import Message +from .common import safe_request import re import csv import requests -class Extractor(AsyncExtractor): +info = { + "category": "pixiv", + "extractor": "PixivExtractor", + "directory": ["{category}", "{artist-id}"], + "filename": "{category}_{artist-id}_{illust-id}{num}.{extension}", + "pattern": [ + r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)", + ], +} + + +class PixivExtractor(AsynchronousExtractor): member_url = "http://www.pixiv.net/member_illust.php" illust_url = "http://www.pixiv.net/member_illust.php?mode=medium" + singl_v1_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img{directory:>02}" + "/img/{artist-nick}/{illust-id}.{extension}") + manga_v1_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img{directory:>02}" + "/img/{artist-nick}/{illust-id}{big}_p{index}.{extension}") + + singl_v2_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img-original/img" + "/{url-date}/{illust-id}_p0.{extension}") + manga_v2_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img-original/img" + "/{url-date}/{illust-id}_p{index}.{extension}") + def __init__(self, match, config): - AsyncExtractor.__init__(self, config) - self.member_id = match.group(1) - self.category = "pixiv" - self.directory = self.member_id - self.session.cookies.update(config["pixiv-cookies"]) - self.session.headers.update({"Referer": "http://www.pixiv.net/"}) + AsynchronousExtractor.__init__(self, config) + self.config = config + self.artist_id = match.group(1) self.api = PixivAPI(config["pixiv-cookies"]["PHPSESSID"]) - def images(self): - sname_fmt = "pixiv_{1}_{0}.{2}" - mname_fmt = "pixiv_{1}_{0}_p{num:02}.{2}" + def items(self): + yield Message.Version, 1 + yield Message.Headers, {"Referer": "http://www.pixiv.net/"} + yield Message.Cookies, self.config["pixiv-cookies"] + yield Message.Directory, self.get_job_metadata() - singl_v1_fmt = "http://i{6[8]}.pixiv.net/img{4:>02}/img/{24}/{0}.{2}" - manga_v1_fmt = "http://i{6[8]}.pixiv.net/img{4:>02}/img/{24}/{0}{big}_p{num}.{2}" - - singl_v2_fmt = "http://i{6[8]}.pixiv.net/img-original/img/{date}/{0}_p0.{2}" - manga_v2_fmt = "http://i{6[8]}.pixiv.net/img-original/img/{date}/{0}_p{num}.{2}" - - date = "" - big = "" - - for img in self.image_ids(): - data = self.api.request(img) + for illust_id in self.get_illust_ids(): + data = self.api.request(illust_id) # debug # for i, value in enumerate(data): # print("{:02}: {}".format(i, value)) # return # debug end - if "うごイラ" in data[13]: + # if "うごイラ" in data["tags"]: # ugoira / animations - try: - url, framelist = self.parse_ugoira(img) - data[2] = "zip" - yield (url, sname_fmt.format(*data)) - data[2] = "txt" - yield (framelist, sname_fmt.format(*data)) - continue - except: - print("[Warning] failed to get ugoira url; trying fallback") + # url, framelist = self.parse_ugoira(img) + # data[2] = "zip" + # yield (url, sname_fmt.format(*data)) + # data[2] = "txt" + # yield (framelist, sname_fmt.format(*data)) + # continue # images - if img > 46270949: - date = data[6][45:64] - url_s_fmt = singl_v2_fmt - url_m_fmt = manga_v2_fmt + if illust_id > 46270949: + big = "" + url_s_fmt = self.singl_v2_fmt + url_m_fmt = self.manga_v2_fmt else: - big = "_big" if img > 11319935 else "" - url_s_fmt = singl_v1_fmt - url_m_fmt = manga_v1_fmt + big = "_big" if illust_id > 11319935 else "" + url_s_fmt = self.singl_v1_fmt + url_m_fmt = self.manga_v1_fmt - if not data[19]: - yield (url_s_fmt.format(*data, date=date), sname_fmt.format(*data)) + if not data["count"]: + yield Message.Url, url_s_fmt.format(**data), data else: - for i in range(0, int(data[19])): - yield (url_m_fmt.format(*data, num=i, date=date, big=big), - mname_fmt.format(*data, num=i)) + for i in range(0, int(data["count"])): + data["num"] = "_p{:02}".format(i) + yield (Message.Url, + url_m_fmt.format(index=i, big=big, **data), + data.copy()) - def image_ids(self): - """generator -- yield all image ids""" - needle = ' 31: - return next(csv.reader([text])) - -# class FileDict(dict): -# - # def __init__(self, *args): - # super().__init__() - # self.re = re.compile(r"pixiv_\d+_(?P\d+)(?P_p\d+)?\.[a-z]{3}") - # for arg in args: - # self.load_from(arg) -# - # def load_from(self, directory): - # match = self.re.match - # for file in os.listdir(directory): - # m = match(file) - # if m is None: - # continue - # val = True if m.group("extra") else False - # dict.__setitem__(self, m.group("id"), val) -# - # def __getitem__(self, key): - # return dict.get(self, key) + return text