1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 18:53:21 +01:00

Merge branch 'cookies'

This commit is contained in:
Mike Fährmann 2017-07-25 14:04:53 +02:00
commit f08af03845
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
17 changed files with 210 additions and 77 deletions

View File

@ -224,7 +224,7 @@ Description The username to use when attempting to log in to another site.
``seiga`` modules and optional (but strongly recommended) for ``seiga`` modules and optional (but strongly recommended) for
``batoto`` and ``exhentai``. ``batoto`` and ``exhentai``.
This value can also be given via the ``-u/--username`` This value can also be set via the ``-u/--username``
command-line option or by using a |.netrc|_ file. command-line option or by using a |.netrc|_ file.
(see Authentication_) (see Authentication_)
=========== ===== =========== =====
@ -239,6 +239,20 @@ Description The password belonging to the username.
=========== ===== =========== =====
extractor.*.cookies
-------------------
=========== =====
Type ``string`` or ``object``
Default ``null``
Description Source to read additional cookies from.
* If this is a ``string``, it specifies the path of a
Mozilla/Netscape format cookies.txt file.
* If this is an ``object``, its key-value pairs, which should both
be ``strings``, will be used as cookie-names and -values.
=========== =====
Extractor-specific Options Extractor-specific Options
========================== ==========================

View File

@ -6,10 +6,9 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
"""Downloader module for http:// and https:// urls""" """Downloader module for http:// and https:// URLs"""
import time import time
import requests
import requests.exceptions as rexcepts import requests.exceptions as rexcepts
import mimetypes import mimetypes
import logging import logging
@ -24,9 +23,9 @@ class Downloader(BasicDownloader):
retries = config.interpolate(("downloader", "http", "retries",), 5) retries = config.interpolate(("downloader", "http", "retries",), 5)
timeout = config.interpolate(("downloader", "http", "timeout",), None) timeout = config.interpolate(("downloader", "http", "timeout",), None)
def __init__(self, output): def __init__(self, session, output):
BasicDownloader.__init__(self) BasicDownloader.__init__(self)
self.session = requests.session() self.session = session
self.out = output self.out = output
def download_impl(self, url, pathfmt): def download_impl(self, url, pathfmt):
@ -96,17 +95,3 @@ class Downloader(BasicDownloader):
# output for unrecoverable errors # output for unrecoverable errors
self.out.error(pathfmt.path, msg, tries, 0) self.out.error(pathfmt.path, msg, tries, 0)
def set_headers(self, headers):
"""Set headers for http requests"""
self.set_dict(self.session.headers, headers)
def set_cookies(self, cookies):
"""Set cookies for http requests"""
self.set_dict(self.session.cookies, cookies)
@staticmethod
def set_dict(dest, src):
"""Copy the contents of dictionary 'src' to 'dest'"""
dest.clear()
dest.update(src)

View File

@ -19,15 +19,19 @@ class BatotoExtractor():
category = "batoto" category = "batoto"
scheme = "https" scheme = "https"
root = "https://bato.to" root = "https://bato.to"
cookienames = ("member_id", "pass_hash")
cookiedomain = ".bato.to"
def login(self): def login(self):
"""Login and set necessary cookies""" """Login and set necessary cookies"""
if self._check_cookies(self.cookienames):
return
username, password = self.auth_info() username, password = self.auth_info()
if username: if username:
cookies = self._login_impl(username, password) cookies = self._login_impl(username, password)
for key, value in cookies.items(): for key, value in cookies.items():
self.session.cookies.set( self.session.cookies.set(
key, value, domain=".bato.to", path="/") key, value, domain=self.cookiedomain)
@cache(maxage=7*24*60*60, keyarg=1) @cache(maxage=7*24*60*60, keyarg=1)
def _login_impl(self, username, password): def _login_impl(self, username, password):
@ -53,7 +57,7 @@ class BatotoExtractor():
method="POST", params=params, data=data) method="POST", params=params, data=data)
if "Sign In - " in response.text: if "Sign In - " in response.text:
raise exception.AuthenticationError() raise exception.AuthenticationError()
return {c: response.cookies[c] for c in ("member_id", "pass_hash")} return {c: response.cookies[c] for c in self.cookienames}
class BatotoMangaExtractor(BatotoExtractor, MangaExtractor): class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):

View File

@ -27,13 +27,13 @@ class BooruExtractor(Extractor):
def __init__(self): def __init__(self):
Extractor.__init__(self) Extractor.__init__(self)
self.session.headers.update(self.headers)
self.params = {"limit": 50} self.params = {"limit": 50}
self.setup() self.setup()
def items(self): def items(self):
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, self.get_job_metadata() yield Message.Directory, self.get_job_metadata()
yield Message.Headers, self.headers
for data in self.items_impl(): for data in self.items_impl():
try: try:
url = self.get_file_url(data) url = self.get_file_url(data)

View File

@ -9,12 +9,14 @@
"""Common classes and constants used by extractor modules.""" """Common classes and constants used by extractor modules."""
import os import os
import re
import time import time
import netrc import netrc
import queue import queue
import logging import logging
import requests import requests
import threading import threading
import http.cookiejar
from .message import Message from .message import Message
from .. import config from .. import config
@ -25,11 +27,26 @@ class Extractor():
subcategory = "" subcategory = ""
directory_fmt = ["{category}"] directory_fmt = ["{category}"]
filename_fmt = "{filename}" filename_fmt = "{filename}"
cookiedomain = ""
def __init__(self): def __init__(self):
self.session = requests.Session() self.session = requests.Session()
self.log = logging.getLogger(self.category) self.log = logging.getLogger(self.category)
cookies = self.config("cookies")
if cookies:
if isinstance(cookies, dict):
setcookie = self.session.cookies.set
for name, value in cookies.items():
setcookie(name, value, domain=self.cookiedomain)
else:
try:
cj = http.cookiejar.MozillaCookieJar()
cj.load(cookies)
self.session.cookies.update(cj)
except OSError as exc:
self.log.warning("cookies: %s", exc)
def __iter__(self): def __iter__(self):
return self.items() return self.items()
@ -67,6 +84,17 @@ class Extractor():
response.encoding = encoding response.encoding = encoding
return response return response
def _check_cookies(self, cookienames, domain=None):
"""Return True if all 'cookienames' exist in the current session"""
if not domain and self.cookiedomain:
domain = self.cookiedomain
for name in cookienames:
try:
self.session.cookies._find(name, domain)
except KeyError:
return False
return True
class AsynchronousExtractor(Extractor): class AsynchronousExtractor(Extractor):
@ -159,6 +187,13 @@ def safe_request(session, url, method="GET", *args, **kwargs):
return r return r
# Reduce strictness of the expected magic string in cookie jar files.
# (This allows the use of Wget-generated cookiejar files without modification)
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
# The first import of requests happens inside this file. # The first import of requests happens inside this file.
# If we are running on Windows and the from requests expected certificate file # If we are running on Windows and the from requests expected certificate file
# is missing (which happens in a standalone executable from py2exe), the # is missing (which happens in a standalone executable from py2exe), the

View File

@ -36,6 +36,8 @@ class ExhentaiGalleryExtractor(Extractor):
}), }),
] ]
root = "https://exhentai.org" root = "https://exhentai.org"
cookienames = ("ipb_member_id", "ipb_pass_hash")
cookiedomain = ".exhentai.org"
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self) Extractor.__init__(self)
@ -50,9 +52,8 @@ class ExhentaiGalleryExtractor(Extractor):
def items(self): def items(self):
self.login() self.login()
self.setup_headers()
yield Message.Version, 1 yield Message.Version, 1
yield Message.Headers, self.setup_headers()
yield Message.Cookies, self.session.cookies
url = "{}/g/{}/{}/".format(self.root, self.gid, self.token) url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
response = self.session.get(url) response = self.session.get(url)
@ -76,14 +77,9 @@ class ExhentaiGalleryExtractor(Extractor):
"""Initialize headers""" """Initialize headers"""
self.session.headers.update({ self.session.headers.update({
"User-Agent": "Mozilla/5.0", "User-Agent": "Mozilla/5.0",
"Accept": "text/html,application/xhtml+xml,"
"application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5", "Accept-Language": "en-US,en;q=0.5",
"Referer": self.root + "/", "Referer": self.root + "/",
}) })
headers = self.session.headers.copy()
headers["Accept"] = "image/png,image/*;q=0.8,*/*;q=0.5"
return headers
def get_job_metadata(self, page): def get_job_metadata(self, page):
"""Collect metadata for extractor-job""" """Collect metadata for extractor-job"""
@ -182,6 +178,8 @@ class ExhentaiGalleryExtractor(Extractor):
def login(self): def login(self):
"""Login and set necessary cookies""" """Login and set necessary cookies"""
if self._check_cookies(self.cookienames):
return
username, password = self.auth_info() username, password = self.auth_info()
if not username: if not username:
self.log.info("no username given; using e-hentai.org") self.log.info("no username given; using e-hentai.org")
@ -191,21 +189,12 @@ class ExhentaiGalleryExtractor(Extractor):
cookies = self._login_impl(username, password) cookies = self._login_impl(username, password)
for key, value in cookies.items(): for key, value in cookies.items():
self.session.cookies.set( self.session.cookies.set(
key, value, domain=".exhentai.org", path="/") key, value, domain=self.cookiedomain)
@cache(maxage=90*24*60*60, keyarg=1) @cache(maxage=90*24*60*60, keyarg=1)
def _login_impl(self, username, password): def _login_impl(self, username, password):
"""Actual login implementation""" """Actual login implementation"""
self.log.info("Logging in as %s", username) self.log.info("Logging in as %s", username)
cnames = ["ipb_member_id", "ipb_pass_hash"]
try:
cookies = self.config("cookies")
if isinstance(cookies, dict) and all(c in cookies for c in cnames):
return cookies
except TypeError:
pass
url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01" url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"
params = { params = {
"CookieDate": "1", "CookieDate": "1",
@ -221,4 +210,4 @@ class ExhentaiGalleryExtractor(Extractor):
if "You are now logged in as:" not in response.text: if "You are now logged in as:" not in response.text:
raise exception.AuthenticationError() raise exception.AuthenticationError()
return {c: response.cookies[c] for c in cnames} return {c: response.cookies[c] for c in self.cookienames}

View File

@ -27,7 +27,6 @@ class ImgchiliExtractor(Extractor):
page = self.request(self.url, encoding="utf-8").text page = self.request(self.url, encoding="utf-8").text
data = self.get_job_metadata(page) data = self.get_job_metadata(page)
yield Message.Version, 1 yield Message.Version, 1
yield Message.Headers, self.session.headers
yield Message.Directory, data yield Message.Directory, data
for url, image in self.get_images(page): for url, image in self.get_images(page):
data.update(image) data.update(image)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann # Copyright 2015-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@ -12,6 +12,4 @@ class Message():
Version = 1 Version = 1
Directory = 2 Directory = 2
Url = 3 Url = 3
Headers = 4
Cookies = 5
Queue = 6 Queue = 6

View File

@ -18,6 +18,7 @@ class NijieExtractor(AsynchronousExtractor):
category = "nijie" category = "nijie"
directory_fmt = ["{category}", "{artist-id}"] directory_fmt = ["{category}", "{artist-id}"]
filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}" filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}"
cookiedomain = "nijie.info"
popup_url = "https://nijie.info/view_popup.php?id=" popup_url = "https://nijie.info/view_popup.php?id="
def __init__(self): def __init__(self):
@ -62,6 +63,8 @@ class NijieExtractor(AsynchronousExtractor):
def login(self): def login(self):
"""Login and obtain session cookie""" """Login and obtain session cookie"""
if self._check_cookies(("nemail", "nlogin")):
return
username, password = self.auth_info() username, password = self.auth_info()
self.session.cookies = self._login_impl(username, password) self.session.cookies = self._login_impl(username, password)

View File

@ -32,8 +32,6 @@ class PixivExtractor(Extractor):
metadata = self.get_metadata() metadata = self.get_metadata()
yield Message.Version, 1 yield Message.Version, 1
yield Message.Headers, self.session.headers
yield Message.Cookies, self.session.cookies
yield Message.Directory, metadata yield Message.Directory, metadata
for work in self.works(): for work in self.works():

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2014, 2015 Mike Fährmann # Copyright 2014-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@ -35,7 +35,6 @@ class SankakuTagExtractor(AsynchronousExtractor):
def items(self): def items(self):
data = self.get_job_metadata() data = self.get_job_metadata()
yield Message.Version, 1 yield Message.Version, 1
yield Message.Headers, self.session.headers
yield Message.Directory, data yield Message.Directory, data
for image in self.get_images(): for image in self.get_images():
image.update(data) image.update(data)

View File

@ -17,6 +17,7 @@ from xml.etree import ElementTree
class SeigaExtractor(Extractor): class SeigaExtractor(Extractor):
"""Base class for seiga extractors""" """Base class for seiga extractors"""
category = "seiga" category = "seiga"
cookiedomain = ".nicovideo.jp"
def items(self): def items(self):
self.login() self.login()
@ -47,6 +48,8 @@ class SeigaExtractor(Extractor):
def login(self): def login(self):
"""Login and set necessary cookies""" """Login and set necessary cookies"""
if self._check_cookies(("user_session",)):
return
username, password = self.auth_info() username, password = self.auth_info()
self.session.cookies = self._login_impl(username, password) self.session.cookies = self._login_impl(username, password)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann # Copyright 2016-2017 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@ -38,7 +38,6 @@ class SenmangaChapterExtractor(Extractor):
data = self.get_job_metadata() data = self.get_job_metadata()
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, data yield Message.Directory, data
yield Message.Headers, self.session.headers
for i in range(int(data["count"])): for i in range(int(data["count"])):
page = str(i+1) page = str(i+1)
data["page"] = page data["page"] = page

View File

@ -79,12 +79,6 @@ class Job():
if self.pred_queue: if self.pred_queue:
self.handle_queue(msg[1]) self.handle_queue(msg[1])
elif msg[0] == Message.Headers:
self.handle_headers(msg[1])
elif msg[0] == Message.Cookies:
self.handle_cookies(msg[1])
elif msg[0] == Message.Version: elif msg[0] == Message.Version:
if msg[1] != 1: if msg[1] != 1:
raise "unsupported message-version ({}, {})".format( raise "unsupported message-version ({}, {})".format(
@ -101,12 +95,6 @@ class Job():
def handle_queue(self, url): def handle_queue(self, url):
"""Handle Message.Queue""" """Handle Message.Queue"""
def handle_headers(self, headers):
"""Handle Message.Headers"""
def handle_cookies(self, cookies):
"""Handle Message.Cookies"""
def update_kwdict(self, kwdict): def update_kwdict(self, kwdict):
"""Add 'category' and 'subcategory' keywords""" """Add 'category' and 'subcategory' keywords"""
kwdict["category"] = self.extractor.category kwdict["category"] = self.extractor.category
@ -145,12 +133,6 @@ class DownloadJob(Job):
except exception.NoExtractorError: except exception.NoExtractorError:
self._write_unsupported(url) self._write_unsupported(url)
def handle_headers(self, headers):
self.get_downloader("http:").set_headers(headers)
def handle_cookies(self, cookies):
self.get_downloader("http:").set_cookies(cookies)
def get_downloader(self, url): def get_downloader(self, url):
"""Return, and possibly construct, a downloader suitable for 'url'""" """Return, and possibly construct, a downloader suitable for 'url'"""
pos = url.find(":") pos = url.find(":")
@ -160,7 +142,7 @@ class DownloadJob(Job):
instance = self.downloaders.get(scheme) instance = self.downloaders.get(scheme)
if instance is None: if instance is None:
klass = downloader.find(scheme) klass = downloader.find(scheme)
instance = klass(self.out) instance = klass(self.extractor.session, self.out)
self.downloaders[scheme] = instance self.downloaders[scheme] = instance
return instance return instance
@ -300,13 +282,10 @@ class DataJob(Job):
# collect data # collect data
try: try:
for msg in self.extractor: for msg in self.extractor:
if msg[0] in (Message.Headers, Message.Cookies): copy = [
copy = (msg[0], dict(msg[1])) part.copy() if hasattr(part, "copy") else part
else: for part in msg
copy = [ ]
part.copy() if hasattr(part, "copy") else part
for part in msg
]
self.data.append(copy) self.data.append(copy)
except Exception as exc: except Exception as exc:
self.data.append((exc.__class__.__name__, str(exc))) self.data.append((exc.__class__.__name__, str(exc)))

View File

@ -125,6 +125,11 @@ def build_parser():
metavar="SECONDS", action=ConfigAction, dest="timeout", type=float, metavar="SECONDS", action=ConfigAction, dest="timeout", type=float,
help="Timeout for HTTP connections (defaut: no timeout)", help="Timeout for HTTP connections (defaut: no timeout)",
) )
parser.add_argument(
"--cookies",
metavar="FILE", action=ConfigAction, dest="cookies",
help="File to load additional cookies from",
)
parser.add_argument( parser.add_argument(
"-c", "--config", "-c", "--config",
metavar="CFG", dest="cfgfiles", action="append", metavar="CFG", dest="cfgfiles", action="append",

View File

@ -242,7 +242,7 @@ class OAuthSession():
self.session = session self.session = session
self.consumer_secret = consumer_secret self.consumer_secret = consumer_secret
self.token_secret = token_secret or "" self.token_secret = token_secret or ""
self.params = session.params self.params = {}
self.params["oauth_consumer_key"] = consumer_key self.params["oauth_consumer_key"] = consumer_key
self.params["oauth_token"] = token self.params["oauth_token"] = token
self.params["oauth_signature_method"] = "HMAC-SHA1" self.params["oauth_signature_method"] = "HMAC-SHA1"

123
test/test_cookies.py Normal file
View File

@ -0,0 +1,123 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import unittest
from unittest import mock
import logging
import tempfile
import http.cookiejar
from os.path import join
import gallery_dl.config as config
import gallery_dl.extractor as extractor
from gallery_dl.extractor.message import Message
CKEY = ("cookies",)
class TestCookiejar(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.path = tempfile.TemporaryDirectory()
cls.cookiefile = join(cls.path.name, "cookies.txt")
with open(cls.cookiefile, "w") as file:
file.write("""# HTTP Cookie File
.example.org\tTRUE\t/\tFALSE\t253402210800\tNAME\tVALUE
""")
cls.invalid_cookiefile = join(cls.path.name, "invalid.txt")
with open(cls.invalid_cookiefile, "w") as file:
file.write("""# asd
.example.org\tTRUE\t/\tFALSE\t253402210800\tNAME\tVALUE
""")
@classmethod
def tearDownClass(cls):
cls.path.cleanup()
def test_cookiefile(self):
config.set(CKEY, self.cookiefile)
cookies = extractor.find("test:").session.cookies
self.assertEqual(len(cookies), 1)
cookie = next(iter(cookies))
self.assertEqual(cookie.domain, ".example.org")
self.assertEqual(cookie.path, "/")
self.assertEqual(cookie.name, "NAME")
self.assertEqual(cookie.value, "VALUE")
def test_invalid_cookiefile(self):
self._test_warning(self.invalid_cookiefile, http.cookiejar.LoadError)
def test_invalid_filename(self):
self._test_warning(join(self.path.name, "nothing"), FileNotFoundError)
def _test_warning(self, filename, exc):
config.set(CKEY, filename)
log = logging.getLogger("test")
with mock.patch.object(log, "warning") as mock_warning:
cookies = extractor.find("test:").session.cookies
self.assertEqual(len(cookies), 0)
mock_warning.assert_called_once()
self.assertEqual(mock_warning.call_args[0][0], "cookies: %s")
self.assertIsInstance(mock_warning.call_args[0][1], exc)
class TestCookiedict(unittest.TestCase):
def setUp(self):
self.cdict = {"NAME1": "VALUE1", "NAME2": "VALUE2"}
config.set(CKEY, self.cdict)
def test_dict(self):
cookies = extractor.find("test:").session.cookies
self.assertEqual(len(cookies), len(self.cdict))
self.assertEqual(sorted(cookies.keys()), sorted(self.cdict.keys()))
self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values()))
def test_domain(self):
for category in ["batoto", "exhentai", "nijie", "seiga"]:
extr = _get_extractor(category)
cookies = extr.session.cookies
for key in self.cdict.keys():
self.assertTrue(key in cookies)
for c in cookies:
self.assertEqual(c.domain, extr.cookiedomain)
class TestCookieLogin(unittest.TestCase):
def test_cookie_login(self):
extr_cookies = {
"batoto": ("member_id", "pass_hash"),
"exhentai": ("ipb_member_id", "ipb_pass_hash"),
"nijie": ("nemail", "nlogin"),
"seiga": ("user_session",),
}
for category, cookienames in extr_cookies.items():
cookies = {name: "value" for name in cookienames}
config.set(CKEY, cookies)
extr = _get_extractor(category)
with mock.patch.object(extr, "_login_impl") as mock_login:
extr.login()
mock_login.assert_not_called()
def _get_extractor(category):
for msg in extractor.find("test:" + category):
if msg[0] == Message.Queue:
return extractor.find(msg[1])
if __name__ == "__main__":
unittest.main()