gallery-dl/gallery_dl/extractor/common.py

# -*- coding: utf-8 -*-

# Copyright 2014-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Common classes and constants used by extractor modules."""

import os
import re
import time
import netrc
import queue
import logging
import requests
import threading
import http.cookiejar
from .message import Message
from .. import config, text, exception


class Extractor():

    category = ""
    subcategory = ""
    categorytransfer = False
    directory_fmt = ["{category}"]
    filename_fmt = "{name}.{extension}"
    archive_fmt = ""
    cookiedomain = ""

    def __init__(self):
        self.session = requests.Session()
        self.log = logging.getLogger(self.category)
        self._set_headers()
        self._set_cookies()
        self._set_proxies()
        self._retries = self.config("retries", 5)
        self._timeout = self.config("timeout", 30)
        self._verify = self.config("verify", True)

    def __iter__(self):
        return self.items()

    def items(self):
        yield Message.Version, 1

    def skip(self, num):
        return 0

    def config(self, key, default=None):
        return config.interpolate(
            ("extractor", self.category, self.subcategory, key), default)

    def request(self, url, method="GET", *,
                encoding=None, expect=(), retries=None, **kwargs):
        tries = 0
        retries = retries or self._retries
        kwargs.setdefault("timeout", self._timeout)
        kwargs.setdefault("verify", self._verify)
        while True:
            try:
                response = self.session.request(method, url, **kwargs)
            except (requests.ConnectionError, requests.Timeout) as exc:
                msg = exc
            except requests.exceptions.RequestException as exc:
                raise exception.HttpError(exc)
            else:
                code = response.status_code
                if 200 <= code < 400 or code in expect:
                    if encoding:
                        response.encoding = encoding
                    return response

                msg = "{}: {} for url: {}".format(
                    code, response.reason, url)
                if code < 500 and code != 429:
                    break

            if tries >= retries:
                break
            self.log.debug("%s (%d/%d)", msg, tries + 1, retries)
            time.sleep(2 ** tries)
            tries += 1

        raise exception.HttpError(msg)

    def _get_auth_info(self):
        """Return authentication information as (username, password) tuple"""
        username = self.config("username")
        password = None

        if username:
            password = self.config("password")
        elif config.get(("netrc",), False):
            try:
                info = netrc.netrc().authenticators(self.category)
                username, _, password = info
            except (OSError, netrc.NetrcParseError) as exc:
                self.log.error("netrc: %s", exc)
            except TypeError:
                self.log.warning("netrc: No authentication info")

        return username, password

    def _set_headers(self):
        """Set additional headers for the 'session' object"""
        self.session.headers["Accept-Language"] = "en-US,en;q=0.5"
        self.session.headers["User-Agent"] = self.config(
            "user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:62.0) "
                           "Gecko/20100101 Firefox/62.0"))

    def _set_cookies(self):
        """Populate the session's cookiejar"""
        cookies = self.config("cookies")
        if cookies:
            if isinstance(cookies, dict):
                setcookie = self.session.cookies.set
                for name, value in cookies.items():
                    setcookie(name, value, domain=self.cookiedomain)
            else:
                try:
                    cj = http.cookiejar.MozillaCookieJar()
                    cj.load(cookies)
                    self.session.cookies.update(cj)
                except OSError as exc:
                    self.log.warning("cookies: %s", exc)

    def _set_proxies(self):
        """Update the session's proxy map"""
        proxies = self.config("proxy")
        if proxies:
            if isinstance(proxies, str):
                proxies = {"http": proxies, "https": proxies}
            if isinstance(proxies, dict):
                for scheme, proxy in proxies.items():
                    if "://" not in proxy:
                        proxies[scheme] = "http://" + proxy.lstrip("/")
                self.session.proxies = proxies
            else:
                self.log.warning("invalid proxy specifier: %s", proxies)

    def _check_cookies(self, cookienames, domain=None):
        """Check if all 'cookienames' are in the session's cookiejar"""
        if not domain and self.cookiedomain:
            domain = self.cookiedomain
        for name in cookienames:
            try:
                self.session.cookies._find(name, domain)
            except KeyError:
                return False
        return True


class AsynchronousExtractor(Extractor):

    def __init__(self):
        Extractor.__init__(self)
        queue_size = int(config.get(("queue-size",), 5))
        self.__queue = queue.Queue(queue_size)
        self.__thread = threading.Thread(target=self.async_items, daemon=True)

    def __iter__(self):
        get = self.__queue.get
        done = self.__queue.task_done

        self.__thread.start()
        while True:
            task = get()
            if task is None:
                return
            if isinstance(task, Exception):
                raise task
            yield task
            done()

    def async_items(self):
        put = self.__queue.put
        try:
            for task in self.items():
                put(task)
        except Exception as exc:
            put(exc)
        put(None)


class ChapterExtractor(Extractor):

    subcategory = "chapter"
    directory_fmt = [
        "{category}", "{manga}",
        "{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}"]
    filename_fmt = (
        "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
    archive_fmt = (
        "{manga}_{chapter}{chapter_minor}_{page}")

    def __init__(self, url):
        Extractor.__init__(self)
        self.url = url

    def items(self):
        page = self.request(self.url).text
        data = self.get_metadata(page)
        imgs = self.get_images(page)

        if "count" in data:
            images = zip(
                range(1, data["count"]+1),
                imgs
            )
        else:
            try:
                data["count"] = len(imgs)
            except TypeError:
                pass
            images = enumerate(imgs, 1)

        yield Message.Version, 1
        yield Message.Directory, data
        for data["page"], (url, imgdata) in images:
            if imgdata:
                data.update(imgdata)
            yield Message.Url, url, text.nameext_from_url(url, data)

    def get_metadata(self, page):
        """Return a dict with general metadata"""

    def get_images(self, page):
        """Return a list of all (image-url, metadata)-tuples"""


class MangaExtractor(Extractor):

    subcategory = "manga"
    categorytransfer = True
    scheme = "http"
    root = ""
    reverse = True

    def __init__(self, match, url=None):
        Extractor.__init__(self)
        self.url = url or self.scheme + "://" + match.group(1)

    def items(self):
        page = self.request(self.url).text

        chapters = self.chapters(page)
        if self.reverse:
            chapters.reverse()

        yield Message.Version, 1
        for chapter, data in chapters:
            yield Message.Queue, chapter, data

    def chapters(self, page):
        """Return a list of all (chapter-url, metadata)-tuples"""


class SharedConfigExtractor(Extractor):

    basecategory = ""

    def config(self, key, default=None, sentinel=object()):
        value = Extractor.config(self, key, sentinel)
        if value is sentinel:
            cat, self.category = self.category, self.basecategory
            value = Extractor.config(self, key, default)
            self.category = cat
        return value


# Reduce strictness of the expected magic string in cookiejar files.
# (This allows the use of Wget-generated cookiejars without modification)

http.cookiejar.MozillaCookieJar.magic_re = re.compile(
    "#( Netscape)? HTTP Cookie File", re.IGNORECASE)


# The first import of requests happens inside this file.
# If we are running on Windows and the from requests expected certificate file
# is missing (which happens in a standalone executable from py2exe), the
# requests.Session object gets monkey patched to always set its 'verify'
# attribute to False to avoid an exception being thrown when attempting to
# access https:// URLs.

if os.name == "nt":
    import os.path
    import requests.certs
    import requests.packages.urllib3 as ulib3
    if not os.path.isfile(requests.certs.where()):
        def patched_init(self):
            session_init(self)
            self.verify = False
        session_init = requests.Session.__init__
        requests.Session.__init__ = patched_init
        ulib3.disable_warnings(ulib3.exceptions.InsecureRequestWarning)
update extractor base classes 2015-04-08 01:43:25 +02:00			`# -- coding: utf-8 --`

implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`# Copyright 2014-2018 Mike Fährmann`
update extractor base classes 2015-04-08 01:43:25 +02:00			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

			`"""Common classes and constants used by extractor modules."""`

compatibility fixes to make a standalone exe work 2017-01-23 00:07:36 +01:00			`import os`
ignore case of cookiejar magic strings 2017-07-24 18:33:42 +02:00			`import re`
move code from util.py 2015-04-08 01:46:04 +02:00			`import time`
add .netrc support (#22) Use the '--netrc' cmdline option or set the 'netrc' config option to 'true' to enable the use of .netrc authentication data. The 'machine' names for the .netrc info are the lowercase extractor names (or categories): batoto, exhentai, nijie, pixiv, seiga. 2017-06-24 12:17:26 +02:00			`import netrc`
initial commit 2014-10-12 21:56:44 +02:00			`import queue`
add logger objects to extractors 2017-03-07 23:50:19 +01:00			`import logging`
initial commit 2014-10-12 21:56:44 +02:00			`import requests`
move code from util.py 2015-04-08 01:46:04 +02:00			`import threading`
add '--cookies' command-line option 2017-07-03 15:02:19 +02:00			`import http.cookiejar`
delay 'requests'-import 2015-11-24 19:47:51 +01:00			`from .message import Message`
implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`from .. import config, text, exception`
move code from util.py 2015-04-08 01:46:04 +02:00
trying to understand travis-ci unit test failures - added some debug output via logging module - unit tests work on my machine (tm) 2017-01-12 22:35:42 +01:00
update extractor base classes 2015-04-08 01:43:25 +02:00			`class Extractor():`

initial support for extractor-subcategories 2015-11-30 00:30:02 +01:00			`category = ""`
			`subcategory = ""`
implement (sub)category-transfer between extractors (#41) ImageFap- and all Manga-Extractors will transfer their (sub)category values to other extractors instantiated by them, which will in turn allow those to use options set for their parents. Example: ImagefapGalleryExtractors will use options set under extractor.imagefap.user, if (and only if) they have been instantiated by a ImagefapUserExtractor; and options from extractor.imagefap.gallery otherwise. 2017-09-26 20:50:49 +02:00			`categorytransfer = False`
put common directory- and filename formats in base classes 2017-05-30 12:10:16 +02:00			`directory_fmt = ["{category}"]`
[reddit] add extractor for reddit-hosted images (closes #68) 2018-01-14 18:55:42 +01:00			`filename_fmt = "{name}.{extension}"`
add DownloadArchive class 2018-01-29 22:13:06 +01:00			`archive_fmt = ""`
use 'cookiedomain' for cookies set by object-config-values otherwise these cookies would not be picked up by the _check_cookies() method. 2017-07-22 15:43:35 +02:00			`cookiedomain = ""`
remove unused format-strings 2015-11-29 23:41:43 +01:00
update extractor base classes 2015-04-08 01:43:25 +02:00			`def __init__(self):`
			`self.session = requests.Session()`
add logger objects to extractors 2017-03-07 23:50:19 +01:00			`self.log = logging.getLogger(self.category)`
add 'extractor.*.user-agent' config option 2017-11-15 13:54:40 +01:00			`self._set_headers()`
add explicit proxy support (#76) - '--proxy' as command-line argument - 'extractor.*.proxy' as config option 2018-02-19 18:24:56 +01:00			`self._set_cookies()`
			`self._set_proxies()`
adjust value resolution for retries/timeout/verify options This change introduces 'extractor..retries/timeout/verify' options as a general way to set these values for all HTTP requests. 'downloader.http.retries/timeout/verify' is a way to override these options for file downloads only and will fall back to 'extractor..…* values if they haven't been explicitly set. Also: downloader classes now take an extractor object as first argument instead of a requests.session. 2018-10-06 19:59:19 +02:00			`self._retries = self.config("retries", 5)`
			`self._timeout = self.config("timeout", 30)`
			`self._verify = self.config("verify", True)`
add '--cookies' command-line option 2017-07-03 15:02:19 +02:00
initial commit 2014-10-12 21:56:44 +02:00			`def __iter__(self):`
update extractor base classes 2015-04-08 01:43:25 +02:00			`return self.items()`

			`def items(self):`
			`yield Message.Version, 1`
initial commit 2014-10-12 21:56:44 +02:00
implement basic way to tell extractors to skip ahead 2017-03-03 17:26:50 +01:00			`def skip(self, num):`
			`return 0`

implement and use extractor.config() method 2017-04-25 17:12:48 +02:00			`def config(self, key, default=None):`
			`return config.interpolate(`
			`("extractor", self.category, self.subcategory, key), default)`

use 'retries' and 'timeout' options for regular HTTP requests 2018-08-02 14:59:37 +02:00			`def request(self, url, method="GET", *,`
			`encoding=None, expect=(), retries=None, **kwargs):`
			`tries = 0`
			`retries = retries or self._retries`
			`kwargs.setdefault("timeout", self._timeout)`
use 'verify' option for regular HTTP requests 2018-10-06 16:38:43 +02:00			`kwargs.setdefault("verify", self._verify)`
improve 'extractor.request' - add 'fatal' argument - improve internal logic and flow - raise known exception on error - update exception hierarchy 2017-08-05 16:11:46 +02:00			`while True:`
			`try:`
use 'retries' and 'timeout' options for regular HTTP requests 2018-08-02 14:59:37 +02:00			`response = self.session.request(method, url, **kwargs)`
improve extractor.request() - better retry behavior - exponential back-off - removed 'allow_empty' argument 2018-04-23 18:45:59 +02:00			`except (requests.ConnectionError, requests.Timeout) as exc:`
improve Extractor.request()'s retry behavior 2017-11-12 21:18:47 +01:00			`msg = exc`
improve 'extractor.request' - add 'fatal' argument - improve internal logic and flow - raise known exception on error - update exception hierarchy 2017-08-05 16:11:46 +02:00			`except requests.exceptions.RequestException as exc:`
improve extractor.request() - better retry behavior - exponential back-off - removed 'allow_empty' argument 2018-04-23 18:45:59 +02:00			`raise exception.HttpError(exc)`
[common] explicitly handle HTTP status code 429 2017-10-13 22:43:38 +02:00			`else:`
improve extractor.request() Replace the 'fatal' parameter with 'expect', which is a list/range of HTTP status codes >= 400 that should also be accepted. 2018-06-17 21:49:13 +02:00			`code = response.status_code`
			`if 200 <= code < 400 or code in expect:`
improve extractor.request() - better retry behavior - exponential back-off - removed 'allow_empty' argument 2018-04-23 18:45:59 +02:00			`if encoding:`
			`response.encoding = encoding`
			`return response`

adjust message for status_code based exceptions from: 5xx HTTP Error: Reason to : 5xx: Reason The "HTTP Error" part was in there to emulate Request's error messages from response.raise_for_status(), but it reads a lot better without. 2018-10-18 15:09:49 +02:00			`msg = "{}: {} for url: {}".format(`
improve extractor.request() Replace the 'fatal' parameter with 'expect', which is a list/range of HTTP status codes >= 400 that should also be accepted. 2018-06-17 21:49:13 +02:00			`code, response.reason, url)`
			`if code < 500 and code != 429:`
improve extractor.request() - better retry behavior - exponential back-off - removed 'allow_empty' argument 2018-04-23 18:45:59 +02:00			`break`

use 'retries' and 'timeout' options for regular HTTP requests 2018-08-02 14:59:37 +02:00			`if tries >= retries:`
improve extractor.request() - better retry behavior - exponential back-off - removed 'allow_empty' argument 2018-04-23 18:45:59 +02:00			`break`
use 'retries' and 'timeout' options for regular HTTP requests 2018-08-02 14:59:37 +02:00			`self.log.debug("%s (%d/%d)", msg, tries + 1, retries)`
improve extractor.request() - better retry behavior - exponential back-off - removed 'allow_empty' argument 2018-04-23 18:45:59 +02:00			`time.sleep(2 ** tries)`
use 'retries' and 'timeout' options for regular HTTP requests 2018-08-02 14:59:37 +02:00			`tries += 1`
improve extractor.request() - better retry behavior - exponential back-off - removed 'allow_empty' argument 2018-04-23 18:45:59 +02:00
			`raise exception.HttpError(msg)`
code cleanup and fixes 2017-07-25 14:59:41 +02:00
			`def _get_auth_info(self):`
add .netrc support (#22) Use the '--netrc' cmdline option or set the 'netrc' config option to 'true' to enable the use of .netrc authentication data. The 'machine' names for the .netrc info are the lowercase extractor names (or categories): batoto, exhentai, nijie, pixiv, seiga. 2017-06-24 12:17:26 +02:00			`"""Return authentication information as (username, password) tuple"""`
			`username = self.config("username")`
			`password = None`

			`if username:`
			`password = self.config("password")`
			`elif config.get(("netrc",), False):`
			`try:`
			`info = netrc.netrc().authenticators(self.category)`
			`username, _, password = info`
			`except (OSError, netrc.NetrcParseError) as exc:`
			`self.log.error("netrc: %s", exc)`
			`except TypeError:`
			`self.log.warning("netrc: No authentication info")`

			`return username, password`

add 'extractor.*.user-agent' config option 2017-11-15 13:54:40 +01:00			`def _set_headers(self):`
			`"""Set additional headers for the 'session' object"""`
			`self.session.headers["Accept-Language"] = "en-US,en;q=0.5"`
			`self.session.headers["User-Agent"] = self.config(`
update default user-agent string (closes #122) 2018-11-11 10:07:10 +01:00			`"user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:62.0) "`
			`"Gecko/20100101 Firefox/62.0"))`
add 'extractor.*.user-agent' config option 2017-11-15 13:54:40 +01:00
add explicit proxy support (#76) - '--proxy' as command-line argument - 'extractor.*.proxy' as config option 2018-02-19 18:24:56 +01:00			`def _set_cookies(self):`
			`"""Populate the session's cookiejar"""`
			`cookies = self.config("cookies")`
code cleanup and fixes 2017-07-25 14:59:41 +02:00			`if cookies:`
			`if isinstance(cookies, dict):`
			`setcookie = self.session.cookies.set`
			`for name, value in cookies.items():`
			`setcookie(name, value, domain=self.cookiedomain)`
			`else:`
			`try:`
			`cj = http.cookiejar.MozillaCookieJar()`
			`cj.load(cookies)`
			`self.session.cookies.update(cj)`
			`except OSError as exc:`
			`self.log.warning("cookies: %s", exc)`
initial commit 2014-10-12 21:56:44 +02:00
add explicit proxy support (#76) - '--proxy' as command-line argument - 'extractor.*.proxy' as config option 2018-02-19 18:24:56 +01:00			`def _set_proxies(self):`
			`"""Update the session's proxy map"""`
			`proxies = self.config("proxy")`
			`if proxies:`
			`if isinstance(proxies, str):`
			`proxies = {"http": proxies, "https": proxies}`
			`if isinstance(proxies, dict):`
			`for scheme, proxy in proxies.items():`
			`if "://" not in proxy:`
			`proxies[scheme] = "http://" + proxy.lstrip("/")`
			`self.session.proxies = proxies`
			`else:`
			`self.log.warning("invalid proxy specifier: %s", proxies)`

skip login if cookies are present 2017-07-17 10:33:36 +02:00			`def _check_cookies(self, cookienames, domain=None):`
code cleanup and fixes 2017-07-25 14:59:41 +02:00			`"""Check if all 'cookienames' are in the session's cookiejar"""`
use 'cookiedomain' for cookies set by object-config-values otherwise these cookies would not be picked up by the _check_cookies() method. 2017-07-22 15:43:35 +02:00			`if not domain and self.cookiedomain:`
			`domain = self.cookiedomain`
skip login if cookies are present 2017-07-17 10:33:36 +02:00			`for name in cookienames:`
			`try:`
			`self.session.cookies._find(name, domain)`
			`except KeyError:`
			`return False`
			`return True`

update extractor base classes 2015-04-08 01:43:25 +02:00
[pixiv] update to new extractor interface 2015-04-10 15:29:09 +02:00			`class AsynchronousExtractor(Extractor):`
initial commit 2014-10-12 21:56:44 +02:00
rewrite extractors to use config-module 2015-10-05 15:35:48 +02:00			`def __init__(self):`
update extractor base classes 2015-04-08 01:43:25 +02:00			`Extractor.__init__(self)`
smaller code and text changes 2017-09-30 18:52:23 +02:00			`queue_size = int(config.get(("queue-size",), 5))`
			`self.__queue = queue.Queue(queue_size)`
better support for KeyboardInterrupt exceptions 2015-04-10 17:31:49 +02:00			`self.__thread = threading.Thread(target=self.async_items, daemon=True)`
initial commit 2014-10-12 21:56:44 +02:00
			`def __iter__(self):`
update extractor base classes 2015-04-08 01:43:25 +02:00			`get = self.__queue.get`
initial commit 2014-10-12 21:56:44 +02:00			`done = self.__queue.task_done`

			`self.__thread.start()`
			`while True:`
			`task = get()`
			`if task is None:`
			`return`
re-raise async exceptions in main thread 2016-07-24 22:16:59 +02:00			`if isinstance(task, Exception):`
			`raise task`
initial commit 2014-10-12 21:56:44 +02:00			`yield task`
			`done()`

update extractor base classes 2015-04-08 01:43:25 +02:00			`def async_items(self):`
initial commit 2014-10-12 21:56:44 +02:00			`put = self.__queue.put`
			`try:`
update extractor base classes 2015-04-08 01:43:25 +02:00			`for task in self.items():`
initial commit 2014-10-12 21:56:44 +02:00			`put(task)`
smaller code and text changes 2017-09-30 18:52:23 +02:00			`except Exception as exc:`
			`put(exc)`
initial commit 2014-10-12 21:56:44 +02:00			`put(None)`
move code from util.py 2015-04-08 01:46:04 +02:00

implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`class ChapterExtractor(Extractor):`

			`subcategory = "chapter"`
			`directory_fmt = [`
			`"{category}", "{manga}",`
use generic chapter-extractor in more modules 2018-02-07 11:22:47 +01:00			`"{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}"]`
implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`filename_fmt = (`
use generic chapter-extractor in more modules 2018-02-07 11:22:47 +01:00			`"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")`
adjust archive-ids 2018-02-12 23:09:34 +01:00			`archive_fmt = (`
			`"{manga}_{chapter}{chapter_minor}_{page}")`
implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00
			`def __init__(self, url):`
			`Extractor.__init__(self)`
			`self.url = url`

			`def items(self):`
			`page = self.request(self.url).text`
			`data = self.get_metadata(page)`
			`imgs = self.get_images(page)`

			`if "count" in data:`
use generic chapter-extractor in more modules 2018-02-07 11:22:47 +01:00			`images = zip(`
			`range(1, data["count"]+1),`
			`imgs`
			`)`
implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`else:`
			`try:`
			`data["count"] = len(imgs)`
			`except TypeError:`
			`pass`
			`images = enumerate(imgs, 1)`

			`yield Message.Version, 1`
			`yield Message.Directory, data`
			`for data["page"], (url, imgdata) in images:`
			`if imgdata:`
			`data.update(imgdata)`
			`yield Message.Url, url, text.nameext_from_url(url, data)`

			`def get_metadata(self, page):`
			`"""Return a dict with general metadata"""`

			`def get_images(self, page):`
			`"""Return a list of all (image-url, metadata)-tuples"""`


simplify code by using a MangaExtractor base class 2017-05-20 11:27:43 +02:00			`class MangaExtractor(Extractor):`

			`subcategory = "manga"`
implement (sub)category-transfer between extractors (#41) ImageFap- and all Manga-Extractors will transfer their (sub)category values to other extractors instantiated by them, which will in turn allow those to use options set for their parents. Example: ImagefapGalleryExtractors will use options set under extractor.imagefap.user, if (and only if) they have been instantiated by a ImagefapUserExtractor; and options from extractor.imagefap.gallery otherwise. 2017-09-26 20:50:49 +02:00			`categorytransfer = True`
simplify code by using a MangaExtractor base class 2017-05-20 11:27:43 +02:00			`scheme = "http"`
			`root = ""`
			`reverse = True`

			`def __init__(self, match, url=None):`
			`Extractor.__init__(self)`
			`self.url = url or self.scheme + "://" + match.group(1)`

			`def items(self):`
			`page = self.request(self.url).text`

			`chapters = self.chapters(page)`
			`if self.reverse:`
			`chapters.reverse()`

			`yield Message.Version, 1`
simplify MangaExtractor class 2017-09-24 16:03:29 +02:00			`for chapter, data in chapters:`
			`yield Message.Queue, chapter, data`
simplify code by using a MangaExtractor base class 2017-05-20 11:27:43 +02:00
			`def chapters(self, page):`
implement generic manga-chapter extractor 2018-02-03 23:14:32 +01:00			`"""Return a list of all (chapter-url, metadata)-tuples"""`
simplify code by using a MangaExtractor base class 2017-05-20 11:27:43 +02:00

add common config category for boorus and foolslide 2017-08-29 22:42:48 +02:00			`class SharedConfigExtractor(Extractor):`

			`basecategory = ""`

			`def config(self, key, default=None, sentinel=object()):`
			`value = Extractor.config(self, key, sentinel)`
			`if value is sentinel:`
			`cat, self.category = self.category, self.basecategory`
			`value = Extractor.config(self, key, default)`
			`self.category = cat`
			`return value`


improve 'extractor.request' - add 'fatal' argument - improve internal logic and flow - raise known exception on error - update exception hierarchy 2017-08-05 16:11:46 +02:00			`# Reduce strictness of the expected magic string in cookiejar files.`
			`# (This allows the use of Wget-generated cookiejars without modification)`
ignore case of cookiejar magic strings 2017-07-24 18:33:42 +02:00
			`http.cookiejar.MozillaCookieJar.magic_re = re.compile(`
			`"#( Netscape)? HTTP Cookie File", re.IGNORECASE)`


compatibility fixes to make a standalone exe work 2017-01-23 00:07:36 +01:00			`# The first import of requests happens inside this file.`
			`# If we are running on Windows and the from requests expected certificate file`
			`# is missing (which happens in a standalone executable from py2exe), the`
			`# requests.Session object gets monkey patched to always set its 'verify'`
			`# attribute to False to avoid an exception being thrown when attempting to`
			`# access https:// URLs.`

			`if os.name == "nt":`
			`import os.path`
			`import requests.certs`
disable urllib3s InsecureConnectionWarning 2017-02-11 21:01:31 +01:00			`import requests.packages.urllib3 as ulib3`
compatibility fixes to make a standalone exe work 2017-01-23 00:07:36 +01:00			`if not os.path.isfile(requests.certs.where()):`
			`def patched_init(self):`
			`session_init(self)`
			`self.verify = False`
			`session_init = requests.Session.__init__`
			`requests.Session.__init__ = patched_init`
disable urllib3s InsecureConnectionWarning 2017-02-11 21:01:31 +01:00			`ulib3.disable_warnings(ulib3.exceptions.InsecureRequestWarning)`