mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
Merge branch 'cookies'
This commit is contained in:
commit
f08af03845
@ -224,7 +224,7 @@ Description The username to use when attempting to log in to another site.
|
||||
``seiga`` modules and optional (but strongly recommended) for
|
||||
``batoto`` and ``exhentai``.
|
||||
|
||||
This value can also be given via the ``-u/--username``
|
||||
This value can also be set via the ``-u/--username``
|
||||
command-line option or by using a |.netrc|_ file.
|
||||
(see Authentication_)
|
||||
=========== =====
|
||||
@ -239,6 +239,20 @@ Description The password belonging to the username.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.*.cookies
|
||||
-------------------
|
||||
=========== =====
|
||||
Type ``string`` or ``object``
|
||||
Default ``null``
|
||||
Description Source to read additional cookies from.
|
||||
|
||||
* If this is a ``string``, it specifies the path of a
|
||||
Mozilla/Netscape format cookies.txt file.
|
||||
* If this is an ``object``, its key-value pairs, which should both
|
||||
be ``strings``, will be used as cookie-names and -values.
|
||||
=========== =====
|
||||
|
||||
|
||||
Extractor-specific Options
|
||||
==========================
|
||||
|
||||
|
@ -6,10 +6,9 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Downloader module for http:// and https:// urls"""
|
||||
"""Downloader module for http:// and https:// URLs"""
|
||||
|
||||
import time
|
||||
import requests
|
||||
import requests.exceptions as rexcepts
|
||||
import mimetypes
|
||||
import logging
|
||||
@ -24,9 +23,9 @@ class Downloader(BasicDownloader):
|
||||
retries = config.interpolate(("downloader", "http", "retries",), 5)
|
||||
timeout = config.interpolate(("downloader", "http", "timeout",), None)
|
||||
|
||||
def __init__(self, output):
|
||||
def __init__(self, session, output):
|
||||
BasicDownloader.__init__(self)
|
||||
self.session = requests.session()
|
||||
self.session = session
|
||||
self.out = output
|
||||
|
||||
def download_impl(self, url, pathfmt):
|
||||
@ -96,17 +95,3 @@ class Downloader(BasicDownloader):
|
||||
|
||||
# output for unrecoverable errors
|
||||
self.out.error(pathfmt.path, msg, tries, 0)
|
||||
|
||||
def set_headers(self, headers):
|
||||
"""Set headers for http requests"""
|
||||
self.set_dict(self.session.headers, headers)
|
||||
|
||||
def set_cookies(self, cookies):
|
||||
"""Set cookies for http requests"""
|
||||
self.set_dict(self.session.cookies, cookies)
|
||||
|
||||
@staticmethod
|
||||
def set_dict(dest, src):
|
||||
"""Copy the contents of dictionary 'src' to 'dest'"""
|
||||
dest.clear()
|
||||
dest.update(src)
|
||||
|
@ -19,15 +19,19 @@ class BatotoExtractor():
|
||||
category = "batoto"
|
||||
scheme = "https"
|
||||
root = "https://bato.to"
|
||||
cookienames = ("member_id", "pass_hash")
|
||||
cookiedomain = ".bato.to"
|
||||
|
||||
def login(self):
|
||||
"""Login and set necessary cookies"""
|
||||
if self._check_cookies(self.cookienames):
|
||||
return
|
||||
username, password = self.auth_info()
|
||||
if username:
|
||||
cookies = self._login_impl(username, password)
|
||||
for key, value in cookies.items():
|
||||
self.session.cookies.set(
|
||||
key, value, domain=".bato.to", path="/")
|
||||
key, value, domain=self.cookiedomain)
|
||||
|
||||
@cache(maxage=7*24*60*60, keyarg=1)
|
||||
def _login_impl(self, username, password):
|
||||
@ -53,7 +57,7 @@ class BatotoExtractor():
|
||||
method="POST", params=params, data=data)
|
||||
if "Sign In - " in response.text:
|
||||
raise exception.AuthenticationError()
|
||||
return {c: response.cookies[c] for c in ("member_id", "pass_hash")}
|
||||
return {c: response.cookies[c] for c in self.cookienames}
|
||||
|
||||
|
||||
class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):
|
||||
|
@ -27,13 +27,13 @@ class BooruExtractor(Extractor):
|
||||
|
||||
def __init__(self):
|
||||
Extractor.__init__(self)
|
||||
self.session.headers.update(self.headers)
|
||||
self.params = {"limit": 50}
|
||||
self.setup()
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, self.get_job_metadata()
|
||||
yield Message.Headers, self.headers
|
||||
for data in self.items_impl():
|
||||
try:
|
||||
url = self.get_file_url(data)
|
||||
|
@ -9,12 +9,14 @@
|
||||
"""Common classes and constants used by extractor modules."""
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import netrc
|
||||
import queue
|
||||
import logging
|
||||
import requests
|
||||
import threading
|
||||
import http.cookiejar
|
||||
from .message import Message
|
||||
from .. import config
|
||||
|
||||
@ -25,11 +27,26 @@ class Extractor():
|
||||
subcategory = ""
|
||||
directory_fmt = ["{category}"]
|
||||
filename_fmt = "{filename}"
|
||||
cookiedomain = ""
|
||||
|
||||
def __init__(self):
|
||||
self.session = requests.Session()
|
||||
self.log = logging.getLogger(self.category)
|
||||
|
||||
cookies = self.config("cookies")
|
||||
if cookies:
|
||||
if isinstance(cookies, dict):
|
||||
setcookie = self.session.cookies.set
|
||||
for name, value in cookies.items():
|
||||
setcookie(name, value, domain=self.cookiedomain)
|
||||
else:
|
||||
try:
|
||||
cj = http.cookiejar.MozillaCookieJar()
|
||||
cj.load(cookies)
|
||||
self.session.cookies.update(cj)
|
||||
except OSError as exc:
|
||||
self.log.warning("cookies: %s", exc)
|
||||
|
||||
def __iter__(self):
|
||||
return self.items()
|
||||
|
||||
@ -67,6 +84,17 @@ class Extractor():
|
||||
response.encoding = encoding
|
||||
return response
|
||||
|
||||
def _check_cookies(self, cookienames, domain=None):
|
||||
"""Return True if all 'cookienames' exist in the current session"""
|
||||
if not domain and self.cookiedomain:
|
||||
domain = self.cookiedomain
|
||||
for name in cookienames:
|
||||
try:
|
||||
self.session.cookies._find(name, domain)
|
||||
except KeyError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class AsynchronousExtractor(Extractor):
|
||||
|
||||
@ -159,6 +187,13 @@ def safe_request(session, url, method="GET", *args, **kwargs):
|
||||
return r
|
||||
|
||||
|
||||
# Reduce strictness of the expected magic string in cookie jar files.
|
||||
# (This allows the use of Wget-generated cookiejar files without modification)
|
||||
|
||||
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
|
||||
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
|
||||
|
||||
|
||||
# The first import of requests happens inside this file.
|
||||
# If we are running on Windows and the from requests expected certificate file
|
||||
# is missing (which happens in a standalone executable from py2exe), the
|
||||
|
@ -36,6 +36,8 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
}),
|
||||
]
|
||||
root = "https://exhentai.org"
|
||||
cookienames = ("ipb_member_id", "ipb_pass_hash")
|
||||
cookiedomain = ".exhentai.org"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self)
|
||||
@ -50,9 +52,8 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
self.setup_headers()
|
||||
yield Message.Version, 1
|
||||
yield Message.Headers, self.setup_headers()
|
||||
yield Message.Cookies, self.session.cookies
|
||||
|
||||
url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
|
||||
response = self.session.get(url)
|
||||
@ -76,14 +77,9 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
"""Initialize headers"""
|
||||
self.session.headers.update({
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Accept": "text/html,application/xhtml+xml,"
|
||||
"application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Referer": self.root + "/",
|
||||
})
|
||||
headers = self.session.headers.copy()
|
||||
headers["Accept"] = "image/png,image/*;q=0.8,*/*;q=0.5"
|
||||
return headers
|
||||
|
||||
def get_job_metadata(self, page):
|
||||
"""Collect metadata for extractor-job"""
|
||||
@ -182,6 +178,8 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
|
||||
def login(self):
|
||||
"""Login and set necessary cookies"""
|
||||
if self._check_cookies(self.cookienames):
|
||||
return
|
||||
username, password = self.auth_info()
|
||||
if not username:
|
||||
self.log.info("no username given; using e-hentai.org")
|
||||
@ -191,21 +189,12 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
cookies = self._login_impl(username, password)
|
||||
for key, value in cookies.items():
|
||||
self.session.cookies.set(
|
||||
key, value, domain=".exhentai.org", path="/")
|
||||
key, value, domain=self.cookiedomain)
|
||||
|
||||
@cache(maxage=90*24*60*60, keyarg=1)
|
||||
def _login_impl(self, username, password):
|
||||
"""Actual login implementation"""
|
||||
self.log.info("Logging in as %s", username)
|
||||
cnames = ["ipb_member_id", "ipb_pass_hash"]
|
||||
|
||||
try:
|
||||
cookies = self.config("cookies")
|
||||
if isinstance(cookies, dict) and all(c in cookies for c in cnames):
|
||||
return cookies
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"
|
||||
params = {
|
||||
"CookieDate": "1",
|
||||
@ -221,4 +210,4 @@ class ExhentaiGalleryExtractor(Extractor):
|
||||
|
||||
if "You are now logged in as:" not in response.text:
|
||||
raise exception.AuthenticationError()
|
||||
return {c: response.cookies[c] for c in cnames}
|
||||
return {c: response.cookies[c] for c in self.cookienames}
|
||||
|
@ -27,7 +27,6 @@ class ImgchiliExtractor(Extractor):
|
||||
page = self.request(self.url, encoding="utf-8").text
|
||||
data = self.get_job_metadata(page)
|
||||
yield Message.Version, 1
|
||||
yield Message.Headers, self.session.headers
|
||||
yield Message.Directory, data
|
||||
for url, image in self.get_images(page):
|
||||
data.update(image)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 Mike Fährmann
|
||||
# Copyright 2015-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -12,6 +12,4 @@ class Message():
|
||||
Version = 1
|
||||
Directory = 2
|
||||
Url = 3
|
||||
Headers = 4
|
||||
Cookies = 5
|
||||
Queue = 6
|
||||
|
@ -18,6 +18,7 @@ class NijieExtractor(AsynchronousExtractor):
|
||||
category = "nijie"
|
||||
directory_fmt = ["{category}", "{artist-id}"]
|
||||
filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
|
||||
cookiedomain = "nijie.info"
|
||||
popup_url = "https://nijie.info/view_popup.php?id="
|
||||
|
||||
def __init__(self):
|
||||
@ -62,6 +63,8 @@ class NijieExtractor(AsynchronousExtractor):
|
||||
|
||||
def login(self):
|
||||
"""Login and obtain session cookie"""
|
||||
if self._check_cookies(("nemail", "nlogin")):
|
||||
return
|
||||
username, password = self.auth_info()
|
||||
self.session.cookies = self._login_impl(username, password)
|
||||
|
||||
|
@ -32,8 +32,6 @@ class PixivExtractor(Extractor):
|
||||
metadata = self.get_metadata()
|
||||
|
||||
yield Message.Version, 1
|
||||
yield Message.Headers, self.session.headers
|
||||
yield Message.Cookies, self.session.cookies
|
||||
yield Message.Directory, metadata
|
||||
|
||||
for work in self.works():
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014, 2015 Mike Fährmann
|
||||
# Copyright 2014-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -35,7 +35,6 @@ class SankakuTagExtractor(AsynchronousExtractor):
|
||||
def items(self):
|
||||
data = self.get_job_metadata()
|
||||
yield Message.Version, 1
|
||||
yield Message.Headers, self.session.headers
|
||||
yield Message.Directory, data
|
||||
for image in self.get_images():
|
||||
image.update(data)
|
||||
|
@ -17,6 +17,7 @@ from xml.etree import ElementTree
|
||||
class SeigaExtractor(Extractor):
|
||||
"""Base class for seiga extractors"""
|
||||
category = "seiga"
|
||||
cookiedomain = ".nicovideo.jp"
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
@ -47,6 +48,8 @@ class SeigaExtractor(Extractor):
|
||||
|
||||
def login(self):
|
||||
"""Login and set necessary cookies"""
|
||||
if self._check_cookies(("user_session",)):
|
||||
return
|
||||
username, password = self.auth_info()
|
||||
self.session.cookies = self._login_impl(username, password)
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 Mike Fährmann
|
||||
# Copyright 2016-2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -38,7 +38,6 @@ class SenmangaChapterExtractor(Extractor):
|
||||
data = self.get_job_metadata()
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, data
|
||||
yield Message.Headers, self.session.headers
|
||||
for i in range(int(data["count"])):
|
||||
page = str(i+1)
|
||||
data["page"] = page
|
||||
|
@ -79,12 +79,6 @@ class Job():
|
||||
if self.pred_queue:
|
||||
self.handle_queue(msg[1])
|
||||
|
||||
elif msg[0] == Message.Headers:
|
||||
self.handle_headers(msg[1])
|
||||
|
||||
elif msg[0] == Message.Cookies:
|
||||
self.handle_cookies(msg[1])
|
||||
|
||||
elif msg[0] == Message.Version:
|
||||
if msg[1] != 1:
|
||||
raise "unsupported message-version ({}, {})".format(
|
||||
@ -101,12 +95,6 @@ class Job():
|
||||
def handle_queue(self, url):
|
||||
"""Handle Message.Queue"""
|
||||
|
||||
def handle_headers(self, headers):
|
||||
"""Handle Message.Headers"""
|
||||
|
||||
def handle_cookies(self, cookies):
|
||||
"""Handle Message.Cookies"""
|
||||
|
||||
def update_kwdict(self, kwdict):
|
||||
"""Add 'category' and 'subcategory' keywords"""
|
||||
kwdict["category"] = self.extractor.category
|
||||
@ -145,12 +133,6 @@ class DownloadJob(Job):
|
||||
except exception.NoExtractorError:
|
||||
self._write_unsupported(url)
|
||||
|
||||
def handle_headers(self, headers):
|
||||
self.get_downloader("http:").set_headers(headers)
|
||||
|
||||
def handle_cookies(self, cookies):
|
||||
self.get_downloader("http:").set_cookies(cookies)
|
||||
|
||||
def get_downloader(self, url):
|
||||
"""Return, and possibly construct, a downloader suitable for 'url'"""
|
||||
pos = url.find(":")
|
||||
@ -160,7 +142,7 @@ class DownloadJob(Job):
|
||||
instance = self.downloaders.get(scheme)
|
||||
if instance is None:
|
||||
klass = downloader.find(scheme)
|
||||
instance = klass(self.out)
|
||||
instance = klass(self.extractor.session, self.out)
|
||||
self.downloaders[scheme] = instance
|
||||
return instance
|
||||
|
||||
@ -300,13 +282,10 @@ class DataJob(Job):
|
||||
# collect data
|
||||
try:
|
||||
for msg in self.extractor:
|
||||
if msg[0] in (Message.Headers, Message.Cookies):
|
||||
copy = (msg[0], dict(msg[1]))
|
||||
else:
|
||||
copy = [
|
||||
part.copy() if hasattr(part, "copy") else part
|
||||
for part in msg
|
||||
]
|
||||
copy = [
|
||||
part.copy() if hasattr(part, "copy") else part
|
||||
for part in msg
|
||||
]
|
||||
self.data.append(copy)
|
||||
except Exception as exc:
|
||||
self.data.append((exc.__class__.__name__, str(exc)))
|
||||
|
@ -125,6 +125,11 @@ def build_parser():
|
||||
metavar="SECONDS", action=ConfigAction, dest="timeout", type=float,
|
||||
help="Timeout for HTTP connections (defaut: no timeout)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cookies",
|
||||
metavar="FILE", action=ConfigAction, dest="cookies",
|
||||
help="File to load additional cookies from",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-c", "--config",
|
||||
metavar="CFG", dest="cfgfiles", action="append",
|
||||
|
@ -242,7 +242,7 @@ class OAuthSession():
|
||||
self.session = session
|
||||
self.consumer_secret = consumer_secret
|
||||
self.token_secret = token_secret or ""
|
||||
self.params = session.params
|
||||
self.params = {}
|
||||
self.params["oauth_consumer_key"] = consumer_key
|
||||
self.params["oauth_token"] = token
|
||||
self.params["oauth_signature_method"] = "HMAC-SHA1"
|
||||
|
123
test/test_cookies.py
Normal file
123
test/test_cookies.py
Normal file
@ -0,0 +1,123 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2017 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
import logging
|
||||
import tempfile
|
||||
import http.cookiejar
|
||||
from os.path import join
|
||||
|
||||
import gallery_dl.config as config
|
||||
import gallery_dl.extractor as extractor
|
||||
from gallery_dl.extractor.message import Message
|
||||
|
||||
CKEY = ("cookies",)
|
||||
|
||||
|
||||
class TestCookiejar(unittest.TestCase):
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.path = tempfile.TemporaryDirectory()
|
||||
|
||||
cls.cookiefile = join(cls.path.name, "cookies.txt")
|
||||
with open(cls.cookiefile, "w") as file:
|
||||
file.write("""# HTTP Cookie File
|
||||
.example.org\tTRUE\t/\tFALSE\t253402210800\tNAME\tVALUE
|
||||
""")
|
||||
|
||||
cls.invalid_cookiefile = join(cls.path.name, "invalid.txt")
|
||||
with open(cls.invalid_cookiefile, "w") as file:
|
||||
file.write("""# asd
|
||||
.example.org\tTRUE\t/\tFALSE\t253402210800\tNAME\tVALUE
|
||||
""")
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
cls.path.cleanup()
|
||||
|
||||
def test_cookiefile(self):
|
||||
config.set(CKEY, self.cookiefile)
|
||||
|
||||
cookies = extractor.find("test:").session.cookies
|
||||
self.assertEqual(len(cookies), 1)
|
||||
|
||||
cookie = next(iter(cookies))
|
||||
self.assertEqual(cookie.domain, ".example.org")
|
||||
self.assertEqual(cookie.path, "/")
|
||||
self.assertEqual(cookie.name, "NAME")
|
||||
self.assertEqual(cookie.value, "VALUE")
|
||||
|
||||
def test_invalid_cookiefile(self):
|
||||
self._test_warning(self.invalid_cookiefile, http.cookiejar.LoadError)
|
||||
|
||||
def test_invalid_filename(self):
|
||||
self._test_warning(join(self.path.name, "nothing"), FileNotFoundError)
|
||||
|
||||
def _test_warning(self, filename, exc):
|
||||
config.set(CKEY, filename)
|
||||
log = logging.getLogger("test")
|
||||
with mock.patch.object(log, "warning") as mock_warning:
|
||||
cookies = extractor.find("test:").session.cookies
|
||||
self.assertEqual(len(cookies), 0)
|
||||
mock_warning.assert_called_once()
|
||||
self.assertEqual(mock_warning.call_args[0][0], "cookies: %s")
|
||||
self.assertIsInstance(mock_warning.call_args[0][1], exc)
|
||||
|
||||
|
||||
class TestCookiedict(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"}
|
||||
config.set(CKEY, self.cdict)
|
||||
|
||||
def test_dict(self):
|
||||
cookies = extractor.find("test:").session.cookies
|
||||
self.assertEqual(len(cookies), len(self.cdict))
|
||||
self.assertEqual(sorted(cookies.keys()), sorted(self.cdict.keys()))
|
||||
self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values()))
|
||||
|
||||
def test_domain(self):
|
||||
for category in ["batoto", "exhentai", "nijie", "seiga"]:
|
||||
extr = _get_extractor(category)
|
||||
cookies = extr.session.cookies
|
||||
for key in self.cdict.keys():
|
||||
self.assertTrue(key in cookies)
|
||||
for c in cookies:
|
||||
self.assertEqual(c.domain, extr.cookiedomain)
|
||||
|
||||
|
||||
class TestCookieLogin(unittest.TestCase):
|
||||
|
||||
def test_cookie_login(self):
|
||||
extr_cookies = {
|
||||
"batoto": ("member_id", "pass_hash"),
|
||||
"exhentai": ("ipb_member_id", "ipb_pass_hash"),
|
||||
"nijie": ("nemail", "nlogin"),
|
||||
"seiga": ("user_session",),
|
||||
}
|
||||
for category, cookienames in extr_cookies.items():
|
||||
cookies = {name: "value" for name in cookienames}
|
||||
config.set(CKEY, cookies)
|
||||
extr = _get_extractor(category)
|
||||
with mock.patch.object(extr, "_login_impl") as mock_login:
|
||||
extr.login()
|
||||
mock_login.assert_not_called()
|
||||
|
||||
|
||||
def _get_extractor(category):
|
||||
for msg in extractor.find("test:" + category):
|
||||
if msg[0] == Message.Queue:
|
||||
return extractor.find(msg[1])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in New Issue
Block a user