mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
add 'extractor.*.user-agent' config option
This commit is contained in:
parent
6913eeaa40
commit
e6814aebe2
@ -289,6 +289,19 @@ Description Source to read additional cookies from.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.*.user-agent
|
||||
----------------------
|
||||
=========== =====
|
||||
Type ``string``
|
||||
Default ``"Mozilla/5.0 (X11; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0"``
|
||||
Description User-Agent header value to be used for HTTP requests.
|
||||
|
||||
Note that this option has no effect on `pixiv` and
|
||||
`readcomiconline` extractors, as these need specific values to
|
||||
function correctly.
|
||||
=========== =====
|
||||
|
||||
|
||||
Extractor-specific Options
|
||||
==========================
|
||||
|
||||
|
@ -17,7 +17,6 @@ class ThreedeebooruExtractor(booru.JSONBooruExtractor):
|
||||
api_url = "http://behoimi.org/post/index.json"
|
||||
headers = {
|
||||
"Referer": "http://behoimi.org/post/show/",
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Accept-Encoding": "identity",
|
||||
}
|
||||
|
||||
|
@ -75,7 +75,6 @@ class FoolfuukaThreadExtractor(SharedConfigExtractor):
|
||||
def __init__(self, match):
|
||||
SharedConfigExtractor.__init__(self)
|
||||
self.board, self.thread = match.groups()
|
||||
self.session.headers["User-Agent"] = "Mozilla 5.0"
|
||||
if self.referer:
|
||||
self.session.headers["Referer"] = self.root
|
||||
|
||||
|
@ -34,6 +34,7 @@ class Extractor():
|
||||
self.session = requests.Session()
|
||||
self.log = logging.getLogger(self.category)
|
||||
self._set_cookies(self.config("cookies"))
|
||||
self._set_headers()
|
||||
|
||||
def __iter__(self):
|
||||
return self.items()
|
||||
@ -96,6 +97,13 @@ class Extractor():
|
||||
|
||||
return username, password
|
||||
|
||||
def _set_headers(self):
|
||||
"""Set additional headers for the 'session' object"""
|
||||
self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
|
||||
self.session.headers["User-Agent"] = self.config(
|
||||
"user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:54.0) "
|
||||
"Gecko/20100101 Firefox/54.0"))
|
||||
|
||||
def _set_cookies(self, cookies):
|
||||
"""Populate the cookiejar with 'cookies'"""
|
||||
if cookies:
|
||||
|
@ -50,11 +50,7 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
self.wait_max = self.config("wait-max", 6)
|
||||
if self.wait_max < self.wait_min:
|
||||
self.wait_max = self.wait_min
|
||||
self.session.headers.update({
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Referer": self.root + "/",
|
||||
})
|
||||
self.session.headers["Referer"] = self.root + "/"
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
|
@ -20,6 +20,10 @@ class ReadcomiconlineExtractor(kissmanga.KissmangaExtractor):
|
||||
filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}"
|
||||
root = "http://readcomiconline.to"
|
||||
|
||||
def __init__(self, match):
|
||||
kissmanga.KissmangaExtractor.__init__(self, match)
|
||||
self.session.headers["User-Agent"] = "Wget/1.19.2 (linux-gnu)"
|
||||
|
||||
|
||||
class ReadcomiconlineComicExtractor(ReadcomiconlineExtractor,
|
||||
kissmanga.KissmangaMangaExtractor):
|
||||
|
@ -41,9 +41,6 @@ class SankakuTagExtractor(Extractor):
|
||||
self.wait_max = self.config("wait-max", 4)
|
||||
if self.wait_max < self.wait_min:
|
||||
self.wait_max = self.wait_min
|
||||
self.session.headers["User-Agent"] = (
|
||||
"Mozilla/5.0 Gecko/20100101 Firefox/40.0"
|
||||
)
|
||||
|
||||
def skip(self, num):
|
||||
pages = min(num // 20, 49)
|
||||
|
@ -38,7 +38,6 @@ class SenmangaChapterExtractor(Extractor):
|
||||
self.chapter_url = "{}/{}/".format(self.root, part)
|
||||
self.img_url = "{}/viewer/{}/".format(self.root, part)
|
||||
self.session.headers["Referer"] = self.chapter_url
|
||||
self.session.headers["User-Agent"] = "Mozilla 5.0"
|
||||
|
||||
def items(self):
|
||||
data = self.get_job_metadata()
|
||||
|
@ -37,10 +37,6 @@ class TwitterTweetExtractor(Extractor):
|
||||
self.path, self.user, self.tid = match.groups()
|
||||
|
||||
def items(self):
|
||||
self.session.headers["User-Agent"] = (
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:48.0) "
|
||||
"Gecko/20100101 Firefox/48.0"
|
||||
)
|
||||
page = self.request("https://twitter.com/" + self.path).text
|
||||
data = self.get_job_metadata()
|
||||
imgs = self.get_image_urls(page)
|
||||
|
Loading…
Reference in New Issue
Block a user