gallery-dl/gallery_dl/extractor/exhentai.py

# -*- coding: utf-8 -*-

# Copyright 2014-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extract images from galleries at https://exhentai.org/"""

from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
import time
import random
import requests


class ExhentaiGalleryExtractor(Extractor):
    """Extractor for image galleries from exhentai.org"""
    category = "exhentai"
    subcategory = "gallery"
    directory_fmt = ["{category}", "{gallery_id}"]
    filename_fmt = "{gallery_id}_{num:>04}_{image_token}_{name}.{extension}"
    pattern = [r"(?:https?://)?(g\.e-|e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]
    test = [
        ("https://exhentai.org/g/960460/4f0e369d82/", {
            "keyword": "173277161e28162dcc755d2e7a88e6cd750f2477",
            "content": "493d759de534355c9f55f8e365565b62411de146",
        }),
        ("https://exhentai.org/g/960461/4f0e369d82/", {
            "exception": exception.NotFoundError,
        }),
        ("http://exhentai.org/g/962698/7f02358e00/", {
            "exception": exception.AuthorizationError,
        }),
    ]
    root = "https://exhentai.org"
    cookienames = ("ipb_member_id", "ipb_pass_hash")
    cookiedomain = ".exhentai.org"

    def __init__(self, match):
        Extractor.__init__(self)
        self.key = {}
        self.count = 0
        self.version, self.gid, self.token = match.groups()
        self.gid = util.safe_int(self.gid)
        self.original = self.config("original", True)
        self.wait_min = self.config("wait-min", 3)
        self.wait_max = self.config("wait-max", 6)
        if self.wait_max < self.wait_min:
            self.wait_max = self.wait_min
        self.session.headers["Referer"] = self.root + "/"

    def items(self):
        self.login()
        yield Message.Version, 1

        url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
        response = self.request(url, fatal=False)
        page = response.text

        if response.status_code == 404 and "Gallery Not Available" in page:
            raise exception.AuthorizationError()
        if self._is_sadpanda(response):
            self.log.info("sadpanda.jpg")
            raise exception.AuthorizationError()
        if page.startswith(("Key missing", "Gallery not found")):
            raise exception.NotFoundError("gallery")

        data = self.get_job_metadata(page)
        self.count = data["count"]
        yield Message.Directory, data

        for url, image in self.get_images(page):
            data.update(image)
            if "/fullimg.php" in url:
                data["extension"] = ""
                self.wait(1.5)
            yield Message.Url, url, data

    def get_job_metadata(self, page):
        """Collect metadata for extractor-job"""
        data = {
            "gallery_id"   : self.gid,
            "gallery_token": self.token,
        }
        text.extract_all(page, (
            ("title"     , '<h1 id="gn">', '</h1>'),
            ("title_jp"  , '<h1 id="gj">', '</h1>'),
            ("date"      , '>Posted:</td><td class="gdt2">', '</td>'),
            ("language"  , '>Language:</td><td class="gdt2">', ' '),
            ("size"      , '>File Size:</td><td class="gdt2">', ' '),
            ("size_units", '', '<'),
            ("count"     , '>Length:</td><td class="gdt2">', ' '),
        ), values=data)
        data["lang"] = util.language_to_code(data["language"])
        data["title"] = text.unescape(data["title"])
        data["title_jp"] = text.unescape(data["title_jp"])
        data["count"] = util.safe_int(data["count"])
        return data

    def get_images(self, page):
        """Collect url and metadata for all images in this gallery"""
        part = text.extract(page, 'hentai.org/s/', '"')[0]
        yield self.image_from_page(self.root + "/s/" + part)
        yield from self.images_from_api()

    def image_from_page(self, url):
        """Get image url and data from webpage"""
        self.wait()
        page = self.request(url).text
        data = text.extract_all(page, (
            (None      , '<div id="i3"><a onclick="return load_image(', ''),
            ("nextkey" , "'", "'"),
            ("url"     , '<img id="img" src="', '"'),
            ("origurl" , 'hentai.org/fullimg.php', '"'),
            ("startkey", 'var startkey="', '";'),
            ("showkey" , 'var showkey="', '";'),
        ))[0]
        self.key["start"] = data["startkey"]
        self.key["show"] = data["showkey"]
        self.key["next"] = data["nextkey"]

        if self.original and data["origurl"]:
            part = text.unescape(data["origurl"])
            url = self.root + "/fullimg.php" + part
        else:
            url = data["url"]

        return url, text.nameext_from_url(data["url"], {
            "num": 1,
            "image_token": data["startkey"],
        })

    def images_from_api(self):
        """Get image url and data from api calls"""
        api_url = self.root + "/api.php"
        nextkey = self.key["next"]
        request = {
            "method" : "showpage",
            "gid"    : self.gid,
            "imgkey" : nextkey,
            "showkey": self.key["show"],
        }
        for request["page"] in range(2, self.count + 1):
            while True:
                try:
                    self.wait()
                    page = self.session.post(api_url, json=request).json()
                    break
                except requests.exceptions.ConnectionError:
                    pass
            imgkey = nextkey
            nextkey, pos = text.extract(page["i3"], "'", "'")
            imgurl , pos = text.extract(page["i3"], 'id="img" src="', '"', pos)
            origurl, pos = text.extract(page["i7"], '<a href="', '"')

            if self.original and origurl:
                url = text.unescape(origurl)
            else:
                url = imgurl

            yield url, text.nameext_from_url(imgurl, {
                "num": request["page"],
                "image_token": imgkey
            })
            request["imgkey"] = nextkey

    def wait(self, waittime=None):
        """Wait for a randomly chosen amount of seconds"""
        if not waittime:
            waittime = random.uniform(self.wait_min, self.wait_max)
        else:
            waittime = random.uniform(waittime * 0.66, waittime * 1.33)
        time.sleep(waittime)

    def login(self):
        """Login and set necessary cookies"""
        if self._check_cookies(self.cookienames):
            return
        username, password = self._get_auth_info()
        if not username:
            self.log.info("no username given; using e-hentai.org")
            self.root = "https://e-hentai.org"
            self.original = False
            return
        cookies = self._login_impl(username, password)
        for key, value in cookies.items():
            self.session.cookies.set(
                key, value, domain=self.cookiedomain)

    @cache(maxage=90*24*60*60, keyarg=1)
    def _login_impl(self, username, password):
        """Actual login implementation"""
        self.log.info("Logging in as %s", username)
        url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"
        data = {
            "CookieDate": "1",
            "b": "d",
            "bt": "1-1",
            "UserName": username,
            "PassWord": password,
            "ipb_login_submit": "Login!",
        }
        headers = {
            "Referer": "https://e-hentai.org/bounce_login.php?b=d&bt=1-1"
        }
        response = self.request(url, method="POST", data=data, headers=headers)

        if "You are now logged in as:" not in response.text:
            raise exception.AuthenticationError()
        return {c: response.cookies[c] for c in self.cookienames}

    @staticmethod
    def _is_sadpanda(response):
        """Return True if the response object contains a sad panda"""
        return (
            response.headers.get("Content-Length") == "9615" and
            "sadpanda.jpg" in response.headers.get("Content-Disposition", "")
        )
[exhentai] reenable extractor 2015-10-31 16:50:20 +01:00			`# -- coding: utf-8 --`

add login notifications 2017-03-17 09:42:59 +01:00			`# Copyright 2014-2017 Mike Fährmann`
[exhentai] reenable extractor 2015-10-31 16:50:20 +01:00			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

[exhentai] transition to https 2016-08-30 09:17:40 +02:00			`"""Extract images from galleries at https://exhentai.org/"""`
[exhentai] reenable extractor 2015-10-31 16:50:20 +01:00
			`from .common import Extractor, Message`
implement and use extractor.config() method 2017-04-25 17:12:48 +02:00			`from .. import text, util, exception`
[exhentai] provide username/password auth 2016-07-23 17:55:46 +02:00			`from ..cache import cache`
initial commit 2014-10-12 21:56:44 +02:00			`import time`
			`import random`
[exhentai] retry failed api calls 2016-10-11 13:27:19 +02:00			`import requests`
initial commit 2014-10-12 21:56:44 +02:00
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00
consistent extractor naming scheme + docstrings 2016-09-12 10:20:57 +02:00			`class ExhentaiGalleryExtractor(Extractor):`
update extractor docstrings 2017-06-28 17:39:07 +02:00			`"""Extractor for image galleries from exhentai.org"""`
update all other extractors 2015-11-21 04:26:30 +01:00			`category = "exhentai"`
consistent extractor naming scheme + docstrings 2016-09-12 10:20:57 +02:00			`subcategory = "gallery"`
change keyword names to valid Python identifiers This commit mostly replaces all minus-signs ('-') in keyword names with underscores ('_') to allow them to be used in filter-expressions. For example 'gallery-id' got renamed to 'gallery_id'. (It is theoretically possible to access any variable, regardless of its name, with 'locals()["NAME"]', but that seems a bit too convoluted if just 'NAME' could be enough) 2017-09-10 22:20:47 +02:00			`directory_fmt = ["{category}", "{gallery_id}"]`
			`filename_fmt = "{gallery_id}_{num:>04}_{image_token}_{name}.{extension}"`
[exhentai] accept "e-hentai.org" URLs (#11) 2017-04-04 09:30:35 +02:00			`pattern = [r"(?:https?://)?(g\.e-\|e-\|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]`
add a few more tests 2016-12-31 00:51:06 +01:00			`test = [`
			`("https://exhentai.org/g/960460/4f0e369d82/", {`
implement and use 'util.safe_int()' same as Python's 'int()', except it doesn't raise any exceptions and accepts a default value 2017-09-24 15:59:25 +02:00			`"keyword": "173277161e28162dcc755d2e7a88e6cd750f2477",`
add a few more tests 2016-12-31 00:51:06 +01:00			`"content": "493d759de534355c9f55f8e365565b62411de146",`
			`}),`
			`("https://exhentai.org/g/960461/4f0e369d82/", {`
			`"exception": exception.NotFoundError,`
			`}),`
			`("http://exhentai.org/g/962698/7f02358e00/", {`
			`"exception": exception.AuthorizationError,`
			`}),`
			`]`
[exhentai] fall back to e-hentai if no username is given 2017-04-28 15:59:56 +02:00			`root = "https://exhentai.org"`
skip login if cookies are present 2017-07-17 10:33:36 +02:00			`cookienames = ("ipb_member_id", "ipb_pass_hash")`
use 'cookiedomain' for cookies set by object-config-values otherwise these cookies would not be picked up by the _check_cookies() method. 2017-07-22 15:43:35 +02:00			`cookiedomain = ".exhentai.org"`
initial commit 2014-10-12 21:56:44 +02:00
[exhentai] reenable extractor 2015-10-31 16:50:20 +01:00			`def __init__(self, match):`
			`Extractor.__init__(self)`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`self.key = {}`
[exhentai] use image-count as stop signal 2016-10-12 15:19:31 +02:00			`self.count = 0`
[exhentai] accept "e-hentai.org" URLs (#11) 2017-04-04 09:30:35 +02:00			`self.version, self.gid, self.token = match.groups()`
implement and use 'util.safe_int()' same as Python's 'int()', except it doesn't raise any exceptions and accepts a default value 2017-09-24 15:59:25 +02:00			`self.gid = util.safe_int(self.gid)`
implement and use extractor.config() method 2017-04-25 17:12:48 +02:00			`self.original = self.config("original", True)`
			`self.wait_min = self.config("wait-min", 3)`
			`self.wait_max = self.config("wait-max", 6)`
[exhentai] configurable wait-times 2015-11-19 17:04:54 +01:00			`if self.wait_max < self.wait_min:`
			`self.wait_max = self.wait_min`
add 'extractor.*.user-agent' config option 2017-11-15 13:54:40 +01:00			`self.session.headers["Referer"] = self.root + "/"`
initial commit 2014-10-12 21:56:44 +02:00
[exhentai] reenable extractor 2015-10-31 16:50:20 +01:00			`def items(self):`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`self.login()`
[exhentai] reenable extractor 2015-10-31 16:50:20 +01:00			`yield Message.Version, 1`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00
[exhentai] fall back to e-hentai if no username is given 2017-04-28 15:59:56 +02:00			`url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)`
improve 'extractor.request' - add 'fatal' argument - improve internal logic and flow - raise known exception on error - update exception hierarchy 2017-08-05 16:11:46 +02:00			`response = self.request(url, fatal=False)`
[exhentai] update login procedure (#37) This new version behaves pretty much exactly like a browser would and caches all cookies sent to it and not just "ipb_member_id" and "ipb_pass_hash". 2017-08-28 21:03:32 +02:00			`page = response.text`
[exhenai] init headers before login and detect sadpanda - also debug-logs html after failed login - #37 2017-08-25 16:44:11 +02:00
[exhentai] raise proper exception for 'unavailable' galleries 2016-12-22 12:42:41 +01:00			`if response.status_code == 404 and "Gallery Not Available" in page:`
			`raise exception.AuthorizationError()`
[exhentai] update login procedure (#37) This new version behaves pretty much exactly like a browser would and caches all cookies sent to it and not just "ipb_member_id" and "ipb_pass_hash". 2017-08-28 21:03:32 +02:00			`if self._is_sadpanda(response):`
[exhenai] init headers before login and detect sadpanda - also debug-logs html after failed login - #37 2017-08-25 16:44:11 +02:00			`self.log.info("sadpanda.jpg")`
			`raise exception.AuthorizationError()`
[exhentai] fix detection of invalid gallery keys 2017-02-15 03:36:46 +01:00			`if page.startswith(("Key missing", "Gallery not found")):`
[exhentai] transition to https 2016-08-30 09:17:40 +02:00			`raise exception.NotFoundError("gallery")`
[exhenai] init headers before login and detect sadpanda - also debug-logs html after failed login - #37 2017-08-25 16:44:11 +02:00
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`data = self.get_job_metadata(page)`
implement and use 'util.safe_int()' same as Python's 'int()', except it doesn't raise any exceptions and accepts a default value 2017-09-24 15:59:25 +02:00			`self.count = data["count"]`
[exhentai] reenable extractor 2015-10-31 16:50:20 +01:00			`yield Message.Directory, data`
initial commit 2014-10-12 21:56:44 +02:00
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`for url, image in self.get_images(page):`
			`data.update(image)`
[exhentai] metadata consistency 2016-09-19 16:13:26 +02:00			`if "/fullimg.php" in url:`
allow extension by Content-Type for exhentai, seiga, senmanga 2016-09-30 16:43:43 +02:00			`data["extension"] = ""`
[exhentai] update login procedure (#37) This new version behaves pretty much exactly like a browser would and caches all cookies sent to it and not just "ipb_member_id" and "ipb_pass_hash". 2017-08-28 21:03:32 +02:00			`self.wait(1.5)`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`yield Message.Url, url, data`

[exhentai] reenable extractor 2015-10-31 16:50:20 +01:00			`def get_job_metadata(self, page):`
[exhentai] use text.extract_all 2015-11-03 00:10:30 +01:00			`"""Collect metadata for extractor-job"""`
			`data = {`
change keyword names to valid Python identifiers This commit mostly replaces all minus-signs ('-') in keyword names with underscores ('_') to allow them to be used in filter-expressions. For example 'gallery-id' got renamed to 'gallery_id'. (It is theoretically possible to access any variable, regardless of its name, with 'locals()["NAME"]', but that seems a bit too convoluted if just 'NAME' could be enough) 2017-09-10 22:20:47 +02:00			`"gallery_id" : self.gid,`
			`"gallery_token": self.token,`
[exhentai] use text.extract_all 2015-11-03 00:10:30 +01:00			`}`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`text.extract_all(page, (`
			`("title" , '<h1 id="gn">', '</h1>'),`
			`("title_jp" , '<h1 id="gj">', '</h1>'),`
			`("date" , '>Posted:</td><td class="gdt2">', '</td>'),`
			`("language" , '>Language:</td><td class="gdt2">', ' '),`
			`("size" , '>File Size:</td><td class="gdt2">', ' '),`
change keyword names to valid Python identifiers This commit mostly replaces all minus-signs ('-') in keyword names with underscores ('_') to allow them to be used in filter-expressions. For example 'gallery-id' got renamed to 'gallery_id'. (It is theoretically possible to access any variable, regardless of its name, with 'locals()["NAME"]', but that seems a bit too convoluted if just 'NAME' could be enough) 2017-09-10 22:20:47 +02:00			`("size_units", '', '<'),`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`("count" , '>Length:</td><td class="gdt2">', ' '),`
[exhentai] use text.extract_all 2015-11-03 00:10:30 +01:00			`), values=data)`
move code into util.py 2017-03-28 13:12:44 +02:00			`data["lang"] = util.language_to_code(data["language"])`
[exhentai] unescape title 2016-08-31 10:20:46 +02:00			`data["title"] = text.unescape(data["title"])`
			`data["title_jp"] = text.unescape(data["title_jp"])`
implement and use 'util.safe_int()' same as Python's 'int()', except it doesn't raise any exceptions and accepts a default value 2017-09-24 15:59:25 +02:00			`data["count"] = util.safe_int(data["count"])`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`return data`
initial commit 2014-10-12 21:56:44 +02:00
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`def get_images(self, page):`
[exhentai] use text.extract_all 2015-11-03 00:10:30 +01:00			`"""Collect url and metadata for all images in this gallery"""`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00			`part = text.extract(page, 'hentai.org/s/', '"')[0]`
[exhentai] fall back to e-hentai if no username is given 2017-04-28 15:59:56 +02:00			`yield self.image_from_page(self.root + "/s/" + part)`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`yield from self.images_from_api()`

			`def image_from_page(self, url):`
			`"""Get image url and data from webpage"""`
[exhentai] configurable wait-times 2015-11-19 17:04:54 +01:00			`self.wait()`
[exhentai] reenable extractor 2015-10-31 16:50:20 +01:00			`page = self.request(url).text`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`data = text.extract_all(page, (`
			`(None , '<div id="i3"><a onclick="return load_image(', ''),`
			`("nextkey" , "'", "'"),`
			`("url" , '<img id="img" src="', '"'),`
[exhentai] fall back to e-hentai if no username is given 2017-04-28 15:59:56 +02:00			`("origurl" , 'hentai.org/fullimg.php', '"'),`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`("startkey", 'var startkey="', '";'),`
			`("showkey" , 'var showkey="', '";'),`
			`))[0]`
			`self.key["start"] = data["startkey"]`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00			`self.key["show"] = data["showkey"]`
			`self.key["next"] = data["nextkey"]`

			`if self.original and data["origurl"]:`
			`part = text.unescape(data["origurl"])`
[exhentai] fall back to e-hentai if no username is given 2017-04-28 15:59:56 +02:00			`url = self.root + "/fullimg.php" + part`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00			`else:`
			`url = data["url"]`

[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`return url, text.nameext_from_url(data["url"], {`
			`"num": 1,`
change keyword names to valid Python identifiers This commit mostly replaces all minus-signs ('-') in keyword names with underscores ('_') to allow them to be used in filter-expressions. For example 'gallery-id' got renamed to 'gallery_id'. (It is theoretically possible to access any variable, regardless of its name, with 'locals()["NAME"]', but that seems a bit too convoluted if just 'NAME' could be enough) 2017-09-10 22:20:47 +02:00			`"image_token": data["startkey"],`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`})`
fixed various bugs - forgot "self." before "name_fmt" - image keys where off by one 2014-10-15 16:17:59 +02:00
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`def images_from_api(self):`
			`"""Get image url and data from api calls"""`
[exhentai] fall back to e-hentai if no username is given 2017-04-28 15:59:56 +02:00			`api_url = self.root + "/api.php"`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00			`nextkey = self.key["next"]`
initial commit 2014-10-12 21:56:44 +02:00			`request = {`
			`"method" : "showpage",`
implement and use 'util.safe_int()' same as Python's 'int()', except it doesn't raise any exceptions and accepts a default value 2017-09-24 15:59:25 +02:00			`"gid" : self.gid,`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`"imgkey" : nextkey,`
			`"showkey": self.key["show"],`
initial commit 2014-10-12 21:56:44 +02:00			`}`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00			`for request["page"] in range(2, self.count + 1):`
[exhentai] retry failed api calls 2016-10-11 13:27:19 +02:00			`while True:`
			`try:`
[exhentai] use image-count as stop signal 2016-10-12 15:19:31 +02:00			`self.wait()`
[exhentai] fall back to e-hentai if no username is given 2017-04-28 15:59:56 +02:00			`page = self.session.post(api_url, json=request).json()`
[exhentai] retry failed api calls 2016-10-11 13:27:19 +02:00			`break`
[exhentai] use image-count as stop signal 2016-10-12 15:19:31 +02:00			`except requests.exceptions.ConnectionError:`
			`pass`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`imgkey = nextkey`
			`nextkey, pos = text.extract(page["i3"], "'", "'")`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00			`imgurl , pos = text.extract(page["i3"], 'id="img" src="', '"', pos)`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`origurl, pos = text.extract(page["i7"], '<a href="', '"')`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00
			`if self.original and origurl:`
			`url = text.unescape(origurl)`
			`else:`
			`url = imgurl`

[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`yield url, text.nameext_from_url(imgurl, {`
			`"num": request["page"],`
change keyword names to valid Python identifiers This commit mostly replaces all minus-signs ('-') in keyword names with underscores ('_') to allow them to be used in filter-expressions. For example 'gallery-id' got renamed to 'gallery_id'. (It is theoretically possible to access any variable, regardless of its name, with 'locals()["NAME"]', but that seems a bit too convoluted if just 'NAME' could be enough) 2017-09-10 22:20:47 +02:00			`"image_token": imgkey`
[exhentai] rewrite 2016-09-20 19:01:16 +02:00			`})`
			`request["imgkey"] = nextkey`
[exhentai] configurable wait-times 2015-11-19 17:04:54 +01:00
			`def wait(self, waittime=None):`
			`"""Wait for a randomly chosen amount of seconds"""`
			`if not waittime:`
			`waittime = random.uniform(self.wait_min, self.wait_max)`
			`else:`
[exhentai] update login procedure (#37) This new version behaves pretty much exactly like a browser would and caches all cookies sent to it and not just "ipb_member_id" and "ipb_pass_hash". 2017-08-28 21:03:32 +02:00			`waittime = random.uniform(waittime * 0.66, waittime * 1.33)`
[exhentai] configurable wait-times 2015-11-19 17:04:54 +01:00			`time.sleep(waittime)`
[exhentai] provide username/password auth 2016-07-23 17:55:46 +02:00
			`def login(self):`
			`"""Login and set necessary cookies"""`
use 'cookiedomain' for cookies set by object-config-values otherwise these cookies would not be picked up by the _check_cookies() method. 2017-07-22 15:43:35 +02:00			`if self._check_cookies(self.cookienames):`
skip login if cookies are present 2017-07-17 10:33:36 +02:00			`return`
code cleanup and fixes 2017-07-25 14:59:41 +02:00			`username, password = self._get_auth_info()`
[exhentai] fall back to e-hentai if no username is given 2017-04-28 15:59:56 +02:00			`if not username:`
			`self.log.info("no username given; using e-hentai.org")`
			`self.root = "https://e-hentai.org"`
			`self.original = False`
			`return`
adjust login methods to a specific style 2017-01-08 17:33:25 +01:00			`cookies = self._login_impl(username, password)`
[exhentai] provide username/password auth 2016-07-23 17:55:46 +02:00			`for key, value in cookies.items():`
code adjustments according to pep8 nr2 2017-02-01 00:53:19 +01:00			`self.session.cookies.set(`
use 'cookiedomain' for cookies set by object-config-values otherwise these cookies would not be picked up by the _check_cookies() method. 2017-07-22 15:43:35 +02:00			`key, value, domain=self.cookiedomain)`
[exhentai] provide username/password auth 2016-07-23 17:55:46 +02:00
add login notifications 2017-03-17 09:42:59 +01:00			`@cache(maxage=902460*60, keyarg=1)`
adjust login methods to a specific style 2017-01-08 17:33:25 +01:00			`def _login_impl(self, username, password):`
[exhentai] provide username/password auth 2016-07-23 17:55:46 +02:00			`"""Actual login implementation"""`
add login notifications 2017-03-17 09:42:59 +01:00			`self.log.info("Logging in as %s", username)`
[exhentai] provide username/password auth 2016-07-23 17:55:46 +02:00			`url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"`
improve 'extractor.request' - add 'fatal' argument - improve internal logic and flow - raise known exception on error - update exception hierarchy 2017-08-05 16:11:46 +02:00			`data = {`
[exhentai] provide username/password auth 2016-07-23 17:55:46 +02:00			`"CookieDate": "1",`
			`"b": "d",`
			`"bt": "1-1",`
adjust login methods to a specific style 2017-01-08 17:33:25 +01:00			`"UserName": username,`
			`"PassWord": password,`
[exhentai] provide username/password auth 2016-07-23 17:55:46 +02:00			`"ipb_login_submit": "Login!",`
			`}`
[exhenai] init headers before login and detect sadpanda - also debug-logs html after failed login - #37 2017-08-25 16:44:11 +02:00			`headers = {`
[exhentai] revert login-method to its old version (#37) Additional cookies don't seem to help and have to be manually set anyway. The older method is more likely to succeed, so I'd rather use this one. 2017-08-29 22:10:38 +02:00			`"Referer": "https://e-hentai.org/bounce_login.php?b=d&bt=1-1"`
[exhenai] init headers before login and detect sadpanda - also debug-logs html after failed login - #37 2017-08-25 16:44:11 +02:00			`}`
			`response = self.request(url, method="POST", data=data, headers=headers)`
[exhentai] provide username/password auth 2016-07-23 17:55:46 +02:00
[exhentai] revert login-method to its old version (#37) Additional cookies don't seem to help and have to be manually set anyway. The older method is more likely to succeed, so I'd rather use this one. 2017-08-29 22:10:38 +02:00			`if "You are now logged in as:" not in response.text:`
[exhentai] provide username/password auth 2016-07-23 17:55:46 +02:00			`raise exception.AuthenticationError()`
[exhentai] revert login-method to its old version (#37) Additional cookies don't seem to help and have to be manually set anyway. The older method is more likely to succeed, so I'd rather use this one. 2017-08-29 22:10:38 +02:00			`return {c: response.cookies[c] for c in self.cookienames}`
[exhentai] update login procedure (#37) This new version behaves pretty much exactly like a browser would and caches all cookies sent to it and not just "ipb_member_id" and "ipb_pass_hash". 2017-08-28 21:03:32 +02:00
			`@staticmethod`
			`def _is_sadpanda(response):`
			`"""Return True if the response object contains a sad panda"""`
			`return (`
			`response.headers.get("Content-Length") == "9615" and`
			`"sadpanda.jpg" in response.headers.get("Content-Disposition", "")`
			`)`