1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 10:42:34 +01:00

Merge branch 'cookies'

This commit is contained in:
Mike Fährmann 2017-07-25 14:04:53 +02:00
commit f08af03845
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
17 changed files with 210 additions and 77 deletions

View File

@ -224,7 +224,7 @@ Description The username to use when attempting to log in to another site.
``seiga`` modules and optional (but strongly recommended) for
``batoto`` and ``exhentai``.
This value can also be given via the ``-u/--username``
This value can also be set via the ``-u/--username``
command-line option or by using a |.netrc|_ file.
(see Authentication_)
=========== =====
@ -239,6 +239,20 @@ Description The password belonging to the username.
=========== =====
extractor.*.cookies
-------------------
=========== =====
Type ``string`` or ``object``
Default ``null``
Description Source to read additional cookies from.
* If this is a ``string``, it specifies the path of a
Mozilla/Netscape format cookies.txt file.
* If this is an ``object``, its key-value pairs, which should both
be ``strings``, will be used as cookie-names and -values.
=========== =====
Extractor-specific Options
==========================

View File

@ -6,10 +6,9 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Downloader module for http:// and https:// urls"""
"""Downloader module for http:// and https:// URLs"""
import time
import requests
import requests.exceptions as rexcepts
import mimetypes
import logging
@ -24,9 +23,9 @@ class Downloader(BasicDownloader):
retries = config.interpolate(("downloader", "http", "retries",), 5)
timeout = config.interpolate(("downloader", "http", "timeout",), None)
def __init__(self, output):
def __init__(self, session, output):
BasicDownloader.__init__(self)
self.session = requests.session()
self.session = session
self.out = output
def download_impl(self, url, pathfmt):
@ -96,17 +95,3 @@ class Downloader(BasicDownloader):
# output for unrecoverable errors
self.out.error(pathfmt.path, msg, tries, 0)
def set_headers(self, headers):
"""Set headers for http requests"""
self.set_dict(self.session.headers, headers)
def set_cookies(self, cookies):
"""Set cookies for http requests"""
self.set_dict(self.session.cookies, cookies)
@staticmethod
def set_dict(dest, src):
"""Copy the contents of dictionary 'src' to 'dest'"""
dest.clear()
dest.update(src)

View File

@ -19,15 +19,19 @@ class BatotoExtractor():
category = "batoto"
scheme = "https"
root = "https://bato.to"
cookienames = ("member_id", "pass_hash")
cookiedomain = ".bato.to"
def login(self):
"""Login and set necessary cookies"""
if self._check_cookies(self.cookienames):
return
username, password = self.auth_info()
if username:
cookies = self._login_impl(username, password)
for key, value in cookies.items():
self.session.cookies.set(
key, value, domain=".bato.to", path="/")
key, value, domain=self.cookiedomain)
@cache(maxage=7*24*60*60, keyarg=1)
def _login_impl(self, username, password):
@ -53,7 +57,7 @@ class BatotoExtractor():
method="POST", params=params, data=data)
if "Sign In - " in response.text:
raise exception.AuthenticationError()
return {c: response.cookies[c] for c in ("member_id", "pass_hash")}
return {c: response.cookies[c] for c in self.cookienames}
class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):

View File

@ -27,13 +27,13 @@ class BooruExtractor(Extractor):
def __init__(self):
Extractor.__init__(self)
self.session.headers.update(self.headers)
self.params = {"limit": 50}
self.setup()
def items(self):
yield Message.Version, 1
yield Message.Directory, self.get_job_metadata()
yield Message.Headers, self.headers
for data in self.items_impl():
try:
url = self.get_file_url(data)

View File

@ -9,12 +9,14 @@
"""Common classes and constants used by extractor modules."""
import os
import re
import time
import netrc
import queue
import logging
import requests
import threading
import http.cookiejar
from .message import Message
from .. import config
@ -25,11 +27,26 @@ class Extractor():
subcategory = ""
directory_fmt = ["{category}"]
filename_fmt = "{filename}"
cookiedomain = ""
def __init__(self):
self.session = requests.Session()
self.log = logging.getLogger(self.category)
cookies = self.config("cookies")
if cookies:
if isinstance(cookies, dict):
setcookie = self.session.cookies.set
for name, value in cookies.items():
setcookie(name, value, domain=self.cookiedomain)
else:
try:
cj = http.cookiejar.MozillaCookieJar()
cj.load(cookies)
self.session.cookies.update(cj)
except OSError as exc:
self.log.warning("cookies: %s", exc)
def __iter__(self):
return self.items()
@ -67,6 +84,17 @@ class Extractor():
response.encoding = encoding
return response
def _check_cookies(self, cookienames, domain=None):
"""Return True if all 'cookienames' exist in the current session"""
if not domain and self.cookiedomain:
domain = self.cookiedomain
for name in cookienames:
try:
self.session.cookies._find(name, domain)
except KeyError:
return False
return True
class AsynchronousExtractor(Extractor):
@ -159,6 +187,13 @@ def safe_request(session, url, method="GET", *args, **kwargs):
return r
# Reduce strictness of the expected magic string in cookie jar files.
# (This allows the use of Wget-generated cookiejar files without modification)
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
# The first import of requests happens inside this file.
# If we are running on Windows and the from requests expected certificate file
# is missing (which happens in a standalone executable from py2exe), the

View File

@ -36,6 +36,8 @@ class ExhentaiGalleryExtractor(Extractor):
}),
]
root = "https://exhentai.org"
cookienames = ("ipb_member_id", "ipb_pass_hash")
cookiedomain = ".exhentai.org"
def __init__(self, match):
Extractor.__init__(self)
@ -50,9 +52,8 @@ class ExhentaiGalleryExtractor(Extractor):
def items(self):
self.login()
self.setup_headers()
yield Message.Version, 1
yield Message.Headers, self.setup_headers()
yield Message.Cookies, self.session.cookies
url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
response = self.session.get(url)
@ -76,14 +77,9 @@ class ExhentaiGalleryExtractor(Extractor):
"""Initialize headers"""
self.session.headers.update({
"User-Agent": "Mozilla/5.0",
"Accept": "text/html,application/xhtml+xml,"
"application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Referer": self.root + "/",
})
headers = self.session.headers.copy()
headers["Accept"] = "image/png,image/*;q=0.8,*/*;q=0.5"
return headers
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
@ -182,6 +178,8 @@ class ExhentaiGalleryExtractor(Extractor):
def login(self):
"""Login and set necessary cookies"""
if self._check_cookies(self.cookienames):
return
username, password = self.auth_info()
if not username:
self.log.info("no username given; using e-hentai.org")
@ -191,21 +189,12 @@ class ExhentaiGalleryExtractor(Extractor):
cookies = self._login_impl(username, password)
for key, value in cookies.items():
self.session.cookies.set(
key, value, domain=".exhentai.org", path="/")
key, value, domain=self.cookiedomain)
@cache(maxage=90*24*60*60, keyarg=1)
def _login_impl(self, username, password):
"""Actual login implementation"""
self.log.info("Logging in as %s", username)
cnames = ["ipb_member_id", "ipb_pass_hash"]
try:
cookies = self.config("cookies")
if isinstance(cookies, dict) and all(c in cookies for c in cnames):
return cookies
except TypeError:
pass
url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"
params = {
"CookieDate": "1",
@ -221,4 +210,4 @@ class ExhentaiGalleryExtractor(Extractor):
if "You are now logged in as:" not in response.text:
raise exception.AuthenticationError()
return {c: response.cookies[c] for c in cnames}
return {c: response.cookies[c] for c in self.cookienames}

View File

@ -27,7 +27,6 @@ class ImgchiliExtractor(Extractor):
page = self.request(self.url, encoding="utf-8").text
data = self.get_job_metadata(page)
yield Message.Version, 1
yield Message.Headers, self.session.headers
yield Message.Directory, data
for url, image in self.get_images(page):
data.update(image)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann
# Copyright 2015-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -12,6 +12,4 @@ class Message():
Version = 1
Directory = 2
Url = 3
Headers = 4
Cookies = 5
Queue = 6

View File

@ -18,6 +18,7 @@ class NijieExtractor(AsynchronousExtractor):
category = "nijie"
directory_fmt = ["{category}", "{artist-id}"]
filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
cookiedomain = "nijie.info"
popup_url = "https://nijie.info/view_popup.php?id="
def __init__(self):
@ -62,6 +63,8 @@ class NijieExtractor(AsynchronousExtractor):
def login(self):
"""Login and obtain session cookie"""
if self._check_cookies(("nemail", "nlogin")):
return
username, password = self.auth_info()
self.session.cookies = self._login_impl(username, password)

View File

@ -32,8 +32,6 @@ class PixivExtractor(Extractor):
metadata = self.get_metadata()
yield Message.Version, 1
yield Message.Headers, self.session.headers
yield Message.Cookies, self.session.cookies
yield Message.Directory, metadata
for work in self.works():

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014, 2015 Mike Fährmann
# Copyright 2014-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -35,7 +35,6 @@ class SankakuTagExtractor(AsynchronousExtractor):
def items(self):
data = self.get_job_metadata()
yield Message.Version, 1
yield Message.Headers, self.session.headers
yield Message.Directory, data
for image in self.get_images():
image.update(data)

View File

@ -17,6 +17,7 @@ from xml.etree import ElementTree
class SeigaExtractor(Extractor):
"""Base class for seiga extractors"""
category = "seiga"
cookiedomain = ".nicovideo.jp"
def items(self):
self.login()
@ -47,6 +48,8 @@ class SeigaExtractor(Extractor):
def login(self):
"""Login and set necessary cookies"""
if self._check_cookies(("user_session",)):
return
username, password = self.auth_info()
self.session.cookies = self._login_impl(username, password)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann
# Copyright 2016-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -38,7 +38,6 @@ class SenmangaChapterExtractor(Extractor):
data = self.get_job_metadata()
yield Message.Version, 1
yield Message.Directory, data
yield Message.Headers, self.session.headers
for i in range(int(data["count"])):
page = str(i+1)
data["page"] = page

View File

@ -79,12 +79,6 @@ class Job():
if self.pred_queue:
self.handle_queue(msg[1])
elif msg[0] == Message.Headers:
self.handle_headers(msg[1])
elif msg[0] == Message.Cookies:
self.handle_cookies(msg[1])
elif msg[0] == Message.Version:
if msg[1] != 1:
raise "unsupported message-version ({}, {})".format(
@ -101,12 +95,6 @@ class Job():
def handle_queue(self, url):
"""Handle Message.Queue"""
def handle_headers(self, headers):
"""Handle Message.Headers"""
def handle_cookies(self, cookies):
"""Handle Message.Cookies"""
def update_kwdict(self, kwdict):
"""Add 'category' and 'subcategory' keywords"""
kwdict["category"] = self.extractor.category
@ -145,12 +133,6 @@ class DownloadJob(Job):
except exception.NoExtractorError:
self._write_unsupported(url)
def handle_headers(self, headers):
self.get_downloader("http:").set_headers(headers)
def handle_cookies(self, cookies):
self.get_downloader("http:").set_cookies(cookies)
def get_downloader(self, url):
"""Return, and possibly construct, a downloader suitable for 'url'"""
pos = url.find(":")
@ -160,7 +142,7 @@ class DownloadJob(Job):
instance = self.downloaders.get(scheme)
if instance is None:
klass = downloader.find(scheme)
instance = klass(self.out)
instance = klass(self.extractor.session, self.out)
self.downloaders[scheme] = instance
return instance
@ -300,13 +282,10 @@ class DataJob(Job):
# collect data
try:
for msg in self.extractor:
if msg[0] in (Message.Headers, Message.Cookies):
copy = (msg[0], dict(msg[1]))
else:
copy = [
part.copy() if hasattr(part, "copy") else part
for part in msg
]
copy = [
part.copy() if hasattr(part, "copy") else part
for part in msg
]
self.data.append(copy)
except Exception as exc:
self.data.append((exc.__class__.__name__, str(exc)))

View File

@ -125,6 +125,11 @@ def build_parser():
metavar="SECONDS", action=ConfigAction, dest="timeout", type=float,
help="Timeout for HTTP connections (defaut: no timeout)",
)
parser.add_argument(
"--cookies",
metavar="FILE", action=ConfigAction, dest="cookies",
help="File to load additional cookies from",
)
parser.add_argument(
"-c", "--config",
metavar="CFG", dest="cfgfiles", action="append",

View File

@ -242,7 +242,7 @@ class OAuthSession():
self.session = session
self.consumer_secret = consumer_secret
self.token_secret = token_secret or ""
self.params = session.params
self.params = {}
self.params["oauth_consumer_key"] = consumer_key
self.params["oauth_token"] = token
self.params["oauth_signature_method"] = "HMAC-SHA1"

123
test/test_cookies.py Normal file
View File

@ -0,0 +1,123 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import unittest
from unittest import mock
import logging
import tempfile
import http.cookiejar
from os.path import join
import gallery_dl.config as config
import gallery_dl.extractor as extractor
from gallery_dl.extractor.message import Message
CKEY = ("cookies",)
class TestCookiejar(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.path = tempfile.TemporaryDirectory()
cls.cookiefile = join(cls.path.name, "cookies.txt")
with open(cls.cookiefile, "w") as file:
file.write("""# HTTP Cookie File
.example.org\tTRUE\t/\tFALSE\t253402210800\tNAME\tVALUE
""")
cls.invalid_cookiefile = join(cls.path.name, "invalid.txt")
with open(cls.invalid_cookiefile, "w") as file:
file.write("""# asd
.example.org\tTRUE\t/\tFALSE\t253402210800\tNAME\tVALUE
""")
@classmethod
def tearDownClass(cls):
cls.path.cleanup()
def test_cookiefile(self):
config.set(CKEY, self.cookiefile)
cookies = extractor.find("test:").session.cookies
self.assertEqual(len(cookies), 1)
cookie = next(iter(cookies))
self.assertEqual(cookie.domain, ".example.org")
self.assertEqual(cookie.path, "/")
self.assertEqual(cookie.name, "NAME")
self.assertEqual(cookie.value, "VALUE")
def test_invalid_cookiefile(self):
self._test_warning(self.invalid_cookiefile, http.cookiejar.LoadError)
def test_invalid_filename(self):
self._test_warning(join(self.path.name, "nothing"), FileNotFoundError)
def _test_warning(self, filename, exc):
config.set(CKEY, filename)
log = logging.getLogger("test")
with mock.patch.object(log, "warning") as mock_warning:
cookies = extractor.find("test:").session.cookies
self.assertEqual(len(cookies), 0)
mock_warning.assert_called_once()
self.assertEqual(mock_warning.call_args[0][0], "cookies: %s")
self.assertIsInstance(mock_warning.call_args[0][1], exc)
class TestCookiedict(unittest.TestCase):
def setUp(self):
self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"}
config.set(CKEY, self.cdict)
def test_dict(self):
cookies = extractor.find("test:").session.cookies
self.assertEqual(len(cookies), len(self.cdict))
self.assertEqual(sorted(cookies.keys()), sorted(self.cdict.keys()))
self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values()))
def test_domain(self):
for category in ["batoto", "exhentai", "nijie", "seiga"]:
extr = _get_extractor(category)
cookies = extr.session.cookies
for key in self.cdict.keys():
self.assertTrue(key in cookies)
for c in cookies:
self.assertEqual(c.domain, extr.cookiedomain)
class TestCookieLogin(unittest.TestCase):
def test_cookie_login(self):
extr_cookies = {
"batoto": ("member_id", "pass_hash"),
"exhentai": ("ipb_member_id", "ipb_pass_hash"),
"nijie": ("nemail", "nlogin"),
"seiga": ("user_session",),
}
for category, cookienames in extr_cookies.items():
cookies = {name: "value" for name in cookienames}
config.set(CKEY, cookies)
extr = _get_extractor(category)
with mock.patch.object(extr, "_login_impl") as mock_login:
extr.login()
mock_login.assert_not_called()
def _get_extractor(category):
for msg in extractor.find("test:" + category):
if msg[0] == Message.Queue:
return extractor.find(msg[1])
if __name__ == "__main__":
unittest.main()