mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 18:53:21 +01:00
share extractor and downloader sessions
There was never any "good" reason for the strict separation between extractors and downloaders. This change allows for reduced resource usage (probably unnoticeable) and less lines of code at the "cost" of tighter coupling.
This commit is contained in:
parent
4414aefe97
commit
58e95a7487
@ -6,10 +6,9 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Downloader module for http:// and https:// urls"""
|
||||
"""Downloader module for http:// and https:// URLs"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import requests.exceptions as rexcepts
|
||||
import mimetypes
|
||||
import logging
|
||||
@ -24,9 +23,9 @@ class Downloader(BasicDownloader):
|
||||
retries = config.interpolate(("downloader", "http", "retries",), 5)
|
||||
timeout = config.interpolate(("downloader", "http", "timeout",), None)
|
||||
|
||||
def __init__(self, output):
|
||||
def __init__(self, session, output):
|
||||
BasicDownloader.__init__(self)
|
||||
self.session = requests.session()
|
||||
self.session = session
|
||||
self.out = output
|
||||
|
||||
def download_impl(self, url, pathfmt):
|
||||
@ -96,17 +95,3 @@ class Downloader(BasicDownloader):
|
||||
|
||||
# output for unrecoverable errors
|
||||
self.out.error(pathfmt.path, msg, tries, 0)
|
||||
|
||||
def set_headers(self, headers):
|
||||
"""Set headers for http requests"""
|
||||
self.set_dict(self.session.headers, headers)
|
||||
|
||||
def set_cookies(self, cookies):
|
||||
"""Set cookies for http requests"""
|
||||
self.set_dict(self.session.cookies, cookies)
|
||||
|
||||
@staticmethod
|
||||
def set_dict(dest, src):
|
||||
"""Copy the contents of dictionary 'src' to 'dest'"""
|
||||
dest.clear()
|
||||
dest.update(src)
|
||||
|
@ -27,13 +27,13 @@ class BooruExtractor(Extractor):
|
||||
|
||||
def __init__(self):
|
||||
Extractor.__init__(self)
|
||||
self.session.headers.update(self.headers)
|
||||
self.params = {"limit": 50}
|
||||
self.setup()
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, self.get_job_metadata()
|
||||
yield Message.Headers, self.headers
|
||||
for data in self.items_impl():
|
||||
try:
|
||||
url = self.get_file_url(data)
|
||||
|
@ -50,9 +50,8 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
self.setup_headers()
|
||||
yield Message.Version, 1
|
||||
yield Message.Headers, self.setup_headers()
|
||||
yield Message.Cookies, self.session.cookies
|
||||
|
||||
url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
|
||||
response = self.session.get(url)
|
||||
@ -76,14 +75,9 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
"""Initialize headers"""
|
||||
self.session.headers.update({
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Accept": "text/html,application/xhtml+xml,"
|
||||
"application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Referer": self.root + "/",
|
||||
})
|
||||
headers = self.session.headers.copy()
|
||||
headers["Accept"] = "image/png,image/*;q=0.8,*/*;q=0.5"
|
||||
return headers
|
||||
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
|
@ -27,7 +27,6 @@ class ImgchiliExtractor(Extractor):
|
||||
page = self.request(self.url, encoding="utf-8").text
|
||||
data = self.get_job_metadata(page)
|
||||
yield Message.Version, 1
|
||||
yield Message.Headers, self.session.headers
|
||||
yield Message.Directory, data
|
||||
for url, image in self.get_images(page):
|
||||
data.update(image)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 Mike Fährmann
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -12,6 +12,4 @@ class Message():
|
||||
Version = 1
|
||||
Directory = 2
|
||||
Url = 3
|
||||
Headers = 4
|
||||
Cookies = 5
|
||||
Queue = 6
|
||||
|
@ -31,8 +31,6 @@ class PixivExtractor(Extractor):
|
||||
metadata = self.get_metadata()
|
||||
|
||||
yield Message.Version, 1
|
||||
yield Message.Headers, self.session.headers
|
||||
yield Message.Cookies, self.session.cookies
|
||||
yield Message.Directory, metadata
|
||||
|
||||
for work in self.works():
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014, 2015 Mike Fährmann
|
||||
# Copyright 2014-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -35,7 +35,6 @@ class SankakuTagExtractor(AsynchronousExtractor):
|
||||
def items(self):
|
||||
data = self.get_job_metadata()
|
||||
yield Message.Version, 1
|
||||
yield Message.Headers, self.session.headers
|
||||
yield Message.Directory, data
|
||||
for image in self.get_images():
|
||||
image.update(data)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 Mike Fährmann
|
||||
# Copyright 2016-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -38,7 +38,6 @@ class SenmangaChapterExtractor(Extractor):
|
||||
data = self.get_job_metadata()
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Headers, self.session.headers
|
||||
for i in range(int(data["count"])):
|
||||
page = str(i+1)
|
||||
data["page"] = page
|
||||
|
@ -79,12 +79,6 @@ class Job():
|
||||
if self.pred_queue:
|
||||
self.handle_queue(msg[1])
|
||||
|
||||
elif msg[0] == Message.Headers:
|
||||
self.handle_headers(msg[1])
|
||||
|
||||
elif msg[0] == Message.Cookies:
|
||||
self.handle_cookies(msg[1])
|
||||
|
||||
elif msg[0] == Message.Version:
|
||||
if msg[1] != 1:
|
||||
raise "unsupported message-version ({}, {})".format(
|
||||
@ -101,12 +95,6 @@ class Job():
|
||||
def handle_queue(self, url):
|
||||
"""Handle Message.Queue"""
|
||||
|
||||
def handle_headers(self, headers):
|
||||
"""Handle Message.Headers"""
|
||||
|
||||
def handle_cookies(self, cookies):
|
||||
"""Handle Message.Cookies"""
|
||||
|
||||
def update_kwdict(self, kwdict):
|
||||
"""Add 'category' and 'subcategory' keywords"""
|
||||
kwdict["category"] = self.extractor.category
|
||||
@ -145,12 +133,6 @@ class DownloadJob(Job):
|
||||
except exception.NoExtractorError:
|
||||
self._write_unsupported(url)
|
||||
|
||||
def handle_headers(self, headers):
|
||||
self.get_downloader("http:").set_headers(headers)
|
||||
|
||||
def handle_cookies(self, cookies):
|
||||
self.get_downloader("http:").set_cookies(cookies)
|
||||
|
||||
def get_downloader(self, url):
|
||||
"""Return, and possibly construct, a downloader suitable for 'url'"""
|
||||
pos = url.find(":")
|
||||
@ -160,7 +142,7 @@ class DownloadJob(Job):
|
||||
instance = self.downloaders.get(scheme)
|
||||
if instance is None:
|
||||
klass = downloader.find(scheme)
|
||||
instance = klass(self.out)
|
||||
instance = klass(self.extractor.session, self.out)
|
||||
self.downloaders[scheme] = instance
|
||||
return instance
|
||||
|
||||
@ -300,13 +282,10 @@ class DataJob(Job):
|
||||
# collect data
|
||||
try:
|
||||
for msg in self.extractor:
|
||||
if msg[0] in (Message.Headers, Message.Cookies):
|
||||
copy = (msg[0], dict(msg[1]))
|
||||
else:
|
||||
copy = [
|
||||
part.copy() if hasattr(part, "copy") else part
|
||||
for part in msg
|
||||
]
|
||||
copy = [
|
||||
part.copy() if hasattr(part, "copy") else part
|
||||
for part in msg
|
||||
]
|
||||
self.data.append(copy)
|
||||
except Exception as exc:
|
||||
self.data.append((exc.__class__.__name__, str(exc)))
|
||||
|
Loading…
Reference in New Issue
Block a user