1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 18:53:21 +01:00

share extractor and downloader sessions

There was never any "good" reason for the strict separation
between extractors and downloaders. This change allows for
reduced resource usage (probably unnoticeable) and less lines
of code at the "cost" of tighter coupling.
This commit is contained in:
Mike Fährmann 2017-06-30 19:38:14 +02:00
parent 4414aefe97
commit 58e95a7487
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
9 changed files with 13 additions and 62 deletions

View File

@ -6,10 +6,9 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Downloader module for http:// and https:// urls"""
"""Downloader module for http:// and https:// URLs"""
import time
import requests
import requests.exceptions as rexcepts
import mimetypes
import logging
@ -24,9 +23,9 @@ class Downloader(BasicDownloader):
retries = config.interpolate(("downloader", "http", "retries",), 5)
timeout = config.interpolate(("downloader", "http", "timeout",), None)
def __init__(self, output):
def __init__(self, session, output):
BasicDownloader.__init__(self)
self.session = requests.session()
self.session = session
self.out = output
def download_impl(self, url, pathfmt):
@ -96,17 +95,3 @@ class Downloader(BasicDownloader):
# output for unrecoverable errors
self.out.error(pathfmt.path, msg, tries, 0)
def set_headers(self, headers):
"""Set headers for http requests"""
self.set_dict(self.session.headers, headers)
def set_cookies(self, cookies):
"""Set cookies for http requests"""
self.set_dict(self.session.cookies, cookies)
@staticmethod
def set_dict(dest, src):
"""Copy the contents of dictionary 'src' to 'dest'"""
dest.clear()
dest.update(src)

View File

@ -27,13 +27,13 @@ class BooruExtractor(Extractor):
def __init__(self):
Extractor.__init__(self)
self.session.headers.update(self.headers)
self.params = {"limit": 50}
self.setup()
def items(self):
yield Message.Version, 1
yield Message.Directory, self.get_job_metadata()
yield Message.Headers, self.headers
for data in self.items_impl():
try:
url = self.get_file_url(data)

View File

@ -50,9 +50,8 @@ class ExhentaiGalleryExtractor(Extractor):
def items(self):
self.login()
self.setup_headers()
yield Message.Version, 1
yield Message.Headers, self.setup_headers()
yield Message.Cookies, self.session.cookies
url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
response = self.session.get(url)
@ -76,14 +75,9 @@ class ExhentaiGalleryExtractor(Extractor):
"""Initialize headers"""
self.session.headers.update({
"User-Agent": "Mozilla/5.0",
"Accept": "text/html,application/xhtml+xml,"
"application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Referer": self.root + "/",
})
headers = self.session.headers.copy()
headers["Accept"] = "image/png,image/*;q=0.8,*/*;q=0.5"
return headers
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""

View File

@ -27,7 +27,6 @@ class ImgchiliExtractor(Extractor):
page = self.request(self.url, encoding="utf-8").text
data = self.get_job_metadata(page)
yield Message.Version, 1
yield Message.Headers, self.session.headers
yield Message.Directory, data
for url, image in self.get_images(page):
data.update(image)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann
# Copyright 2015-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -12,6 +12,4 @@ class Message():
Version = 1
Directory = 2
Url = 3
Headers = 4
Cookies = 5
Queue = 6

View File

@ -31,8 +31,6 @@ class PixivExtractor(Extractor):
metadata = self.get_metadata()
yield Message.Version, 1
yield Message.Headers, self.session.headers
yield Message.Cookies, self.session.cookies
yield Message.Directory, metadata
for work in self.works():

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014, 2015 Mike Fährmann
# Copyright 2014-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -35,7 +35,6 @@ class SankakuTagExtractor(AsynchronousExtractor):
def items(self):
data = self.get_job_metadata()
yield Message.Version, 1
yield Message.Headers, self.session.headers
yield Message.Directory, data
for image in self.get_images():
image.update(data)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann
# Copyright 2016-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -38,7 +38,6 @@ class SenmangaChapterExtractor(Extractor):
data = self.get_job_metadata()
yield Message.Version, 1
yield Message.Directory, data
yield Message.Headers, self.session.headers
for i in range(int(data["count"])):
page = str(i+1)
data["page"] = page

View File

@ -79,12 +79,6 @@ class Job():
if self.pred_queue:
self.handle_queue(msg[1])
elif msg[0] == Message.Headers:
self.handle_headers(msg[1])
elif msg[0] == Message.Cookies:
self.handle_cookies(msg[1])
elif msg[0] == Message.Version:
if msg[1] != 1:
raise "unsupported message-version ({}, {})".format(
@ -101,12 +95,6 @@ class Job():
def handle_queue(self, url):
"""Handle Message.Queue"""
def handle_headers(self, headers):
"""Handle Message.Headers"""
def handle_cookies(self, cookies):
"""Handle Message.Cookies"""
def update_kwdict(self, kwdict):
"""Add 'category' and 'subcategory' keywords"""
kwdict["category"] = self.extractor.category
@ -145,12 +133,6 @@ class DownloadJob(Job):
except exception.NoExtractorError:
self._write_unsupported(url)
def handle_headers(self, headers):
self.get_downloader("http:").set_headers(headers)
def handle_cookies(self, cookies):
self.get_downloader("http:").set_cookies(cookies)
def get_downloader(self, url):
"""Return, and possibly construct, a downloader suitable for 'url'"""
pos = url.find(":")
@ -160,7 +142,7 @@ class DownloadJob(Job):
instance = self.downloaders.get(scheme)
if instance is None:
klass = downloader.find(scheme)
instance = klass(self.out)
instance = klass(self.extractor.session, self.out)
self.downloaders[scheme] = instance
return instance
@ -300,13 +282,10 @@ class DataJob(Job):
# collect data
try:
for msg in self.extractor:
if msg[0] in (Message.Headers, Message.Cookies):
copy = (msg[0], dict(msg[1]))
else:
copy = [
part.copy() if hasattr(part, "copy") else part
for part in msg
]
copy = [
part.copy() if hasattr(part, "copy") else part
for part in msg
]
self.data.append(copy)
except Exception as exc:
self.data.append((exc.__class__.__name__, str(exc)))