diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 235cb856..60c9beab 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -69,11 +69,11 @@ class SequentialExtractor(Extractor):
Extractor.__init__(self)
-class AsyncExtractor(Extractor):
+class AsynchronousExtractor(Extractor):
def __init__(self, config):
Extractor.__init__(self)
- queue_size = int(config.get("queue-size", 5))
+ queue_size = int(config.get("general", "queue-size", fallback=5))
self.__queue = queue.Queue(maxsize=queue_size)
self.__thread = threading.Thread(target=self.async_items)
# self.__thread = threading.Thread(target=self.async_images, daemon=True)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 35a49522..765ac825 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1,94 +1,116 @@
-from .common import AsyncExtractor
-from ..util import safe_request
+# -*- coding: utf-8 -*-
+
+# Copyright 2014, 2015 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract images and ugoira from http://www.pixiv.net/"""
+
+from .common import AsynchronousExtractor
+from .common import Message
+from .common import safe_request
import re
import csv
import requests
-class Extractor(AsyncExtractor):
+info = {
+ "category": "pixiv",
+ "extractor": "PixivExtractor",
+ "directory": ["{category}", "{artist-id}"],
+ "filename": "{category}_{artist-id}_{illust-id}{num}.{extension}",
+ "pattern": [
+ r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)",
+ ],
+}
+
+
+class PixivExtractor(AsynchronousExtractor):
member_url = "http://www.pixiv.net/member_illust.php"
illust_url = "http://www.pixiv.net/member_illust.php?mode=medium"
+ singl_v1_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img{directory:>02}"
+ "/img/{artist-nick}/{illust-id}.{extension}")
+ manga_v1_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img{directory:>02}"
+ "/img/{artist-nick}/{illust-id}{big}_p{index}.{extension}")
+
+ singl_v2_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img-original/img"
+ "/{url-date}/{illust-id}_p0.{extension}")
+ manga_v2_fmt = ("http://i{thumbnail-url[8]}.pixiv.net/img-original/img"
+ "/{url-date}/{illust-id}_p{index}.{extension}")
+
def __init__(self, match, config):
- AsyncExtractor.__init__(self, config)
- self.member_id = match.group(1)
- self.category = "pixiv"
- self.directory = self.member_id
- self.session.cookies.update(config["pixiv-cookies"])
- self.session.headers.update({"Referer": "http://www.pixiv.net/"})
+ AsynchronousExtractor.__init__(self, config)
+ self.config = config
+ self.artist_id = match.group(1)
self.api = PixivAPI(config["pixiv-cookies"]["PHPSESSID"])
- def images(self):
- sname_fmt = "pixiv_{1}_{0}.{2}"
- mname_fmt = "pixiv_{1}_{0}_p{num:02}.{2}"
+ def items(self):
+ yield Message.Version, 1
+ yield Message.Headers, {"Referer": "http://www.pixiv.net/"}
+ yield Message.Cookies, self.config["pixiv-cookies"]
+ yield Message.Directory, self.get_job_metadata()
- singl_v1_fmt = "http://i{6[8]}.pixiv.net/img{4:>02}/img/{24}/{0}.{2}"
- manga_v1_fmt = "http://i{6[8]}.pixiv.net/img{4:>02}/img/{24}/{0}{big}_p{num}.{2}"
-
- singl_v2_fmt = "http://i{6[8]}.pixiv.net/img-original/img/{date}/{0}_p0.{2}"
- manga_v2_fmt = "http://i{6[8]}.pixiv.net/img-original/img/{date}/{0}_p{num}.{2}"
-
- date = ""
- big = ""
-
- for img in self.image_ids():
- data = self.api.request(img)
+ for illust_id in self.get_illust_ids():
+ data = self.api.request(illust_id)
# debug
# for i, value in enumerate(data):
# print("{:02}: {}".format(i, value))
# return
# debug end
- if "うごイラ" in data[13]:
+ # if "うごイラ" in data["tags"]:
# ugoira / animations
- try:
- url, framelist = self.parse_ugoira(img)
- data[2] = "zip"
- yield (url, sname_fmt.format(*data))
- data[2] = "txt"
- yield (framelist, sname_fmt.format(*data))
- continue
- except:
- print("[Warning] failed to get ugoira url; trying fallback")
+ # url, framelist = self.parse_ugoira(img)
+ # data[2] = "zip"
+ # yield (url, sname_fmt.format(*data))
+ # data[2] = "txt"
+ # yield (framelist, sname_fmt.format(*data))
+ # continue
# images
- if img > 46270949:
- date = data[6][45:64]
- url_s_fmt = singl_v2_fmt
- url_m_fmt = manga_v2_fmt
+ if illust_id > 46270949:
+ big = ""
+ url_s_fmt = self.singl_v2_fmt
+ url_m_fmt = self.manga_v2_fmt
else:
- big = "_big" if img > 11319935 else ""
- url_s_fmt = singl_v1_fmt
- url_m_fmt = manga_v1_fmt
+ big = "_big" if illust_id > 11319935 else ""
+ url_s_fmt = self.singl_v1_fmt
+ url_m_fmt = self.manga_v1_fmt
- if not data[19]:
- yield (url_s_fmt.format(*data, date=date), sname_fmt.format(*data))
+ if not data["count"]:
+ yield Message.Url, url_s_fmt.format(**data), data
else:
- for i in range(0, int(data[19])):
- yield (url_m_fmt.format(*data, num=i, date=date, big=big),
- mname_fmt.format(*data, num=i))
+ for i in range(0, int(data["count"])):
+ data["num"] = "_p{:02}".format(i)
+ yield (Message.Url,
+ url_m_fmt.format(index=i, big=big, **data),
+ data.copy())
- def image_ids(self):
- """generator -- yield all image ids"""
- needle = ' 31:
- return next(csv.reader([text]))
-
-# class FileDict(dict):
-#
- # def __init__(self, *args):
- # super().__init__()
- # self.re = re.compile(r"pixiv_\d+_(?P\d+)(?P_p\d+)?\.[a-z]{3}")
- # for arg in args:
- # self.load_from(arg)
-#
- # def load_from(self, directory):
- # match = self.re.match
- # for file in os.listdir(directory):
- # m = match(file)
- # if m is None:
- # continue
- # val = True if m.group("extra") else False
- # dict.__setitem__(self, m.group("id"), val)
-#
- # def __getitem__(self, key):
- # return dict.get(self, key)
+ return text