gallery-dl/gallery_dl/__init__.py

# -*- coding: utf-8 -*-

# Copyright 2014-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

from __future__ import unicode_literals, print_function

__author__ = "Mike Fährmann"
__copyright__ = "Copyright 2014-2017 Mike Fährmann"
__license__ = "GPLv2"
__maintainer__ = "Mike Fährmann"
__email__ = "mike_faehrmann@web.de"

import sys

if sys.hexversion < 0x3030000:
    print("Python 3.3+ required", file=sys.stderr)
    sys.exit(1)

import argparse
import logging
import json
from . import config, extractor, job, exception
from .version import __version__


def build_cmdline_parser():
    parser = argparse.ArgumentParser(
        description='Download images from various sources')
    parser.add_argument(
        "-g", "--get-urls", dest="list_urls", action="count",
        help="print download urls",
    )
    parser.add_argument(
        "-d", "--dest",
        metavar="DEST",
        help="destination directory",
    )
    parser.add_argument(
        "-u", "--username",
        metavar="USER"
    )
    parser.add_argument(
        "-p", "--password",
        metavar="PASS"
    )
    parser.add_argument(
        "-i", "--input-file",
        metavar="FILE", dest="inputfile",
        help="download URLs found in local FILE",
    )
    parser.add_argument(
        "--images",
        metavar="ITEM-SPEC", dest="images",
        help=("specify which images to download through a comma seperated list"
              " of indices or index-ranges; "
              "for example '--images -2,4,6-8,10-' will download images with "
              "index 1, 2, 4, 6, 7, 8 and 10 up to the last one")
    )
    parser.add_argument(
        "--chapters",
        metavar="ITEM-SPEC", dest="chapters",
        help=("same as '--images' except for chapters")
    )
    parser.add_argument(
        "-c", "--config",
        metavar="CFG", dest="cfgfiles", action="append",
        help="additional configuration files",
    )
    parser.add_argument(
        "-o", "--option",
        metavar="OPT", action="append", default=[],
        help="additional 'key=value' option values",
    )
    parser.add_argument(
        "--list-extractors", dest="list_extractors", action="store_true",
        help=("print a list of extractor classes "
              "with description and example URL"),
    )
    parser.add_argument(
        "--list-keywords", dest="list_keywords", action="store_true",
        help="print a list of available keywords for the given URLs",
    )
    parser.add_argument(
        "--list-modules", dest="list_modules", action="store_true",
        help="print a list of available modules/supported sites",
    )
    parser.add_argument(
        "--version", action="version", version=__version__,
        help="print program version and exit"
    )
    parser.add_argument(
        "urls",
        nargs="*", metavar="URL",
        help="url to download images from"
    )
    return parser


def parse_option(opt):
    try:
        key, value = opt.split("=", 1)
        try:
            value = json.loads(value)
        except ValueError:
            pass
        config.set(key.split("."), value)
    except ValueError:
        print("Invalid 'key=value' pair:", opt, file=sys.stderr)


def initialize_logging():
    logging.basicConfig(
        format="[%(name)s][%(levelname)s] %(message)s",
        level=logging.INFO
    )
    # convert levelnames to lowercase
    for level in (10, 20, 30, 40, 50):
        name = logging.getLevelName(level)
        logging.addLevelName(level, name.lower())


def sanatize_input(file):
    for line in file:
        line = line.strip()
        if line:
            yield line


def main():
    try:
        initialize_logging()
        config.load()
        parser = build_cmdline_parser()
        args = parser.parse_args()

        if args.cfgfiles:
            config.load(*args.cfgfiles, strict=True)

        if args.dest:
            config.set(("base-directory",), args.dest)
        if args.username:
            config.set(("username",), args.username)
        if args.password:
            config.set(("password",), args.password)
        if args.images:
            config.set(("images",), args.images)
        if args.chapters:
            config.set(("chapters",), args.chapters)

        for opt in args.option:
            parse_option(opt)

        if args.list_modules:
            for module_name in extractor.modules:
                print(module_name)
        elif args.list_extractors:
            for extr in extractor.extractors():
                print(extr.__name__)
                if extr.__doc__:
                    print(extr.__doc__)
                if hasattr(extr, "test") and extr.test:
                    print("Example:", extr.test[0][0])
                print()
        else:
            if not args.urls and not args.inputfile:
                parser.error("the following arguments are required: URL")

            if args.list_urls:
                jobtype = job.UrlJob
                jobtype.maxdepth = args.list_urls
            elif args.list_keywords:
                jobtype = job.KeywordJob
            else:
                jobtype = job.DownloadJob

            urls = args.urls
            if args.inputfile:
                try:
                    if args.inputfile == "-":
                        file = sys.stdin
                    else:
                        file = open(args.inputfile)
                    import itertools
                    urls = itertools.chain(urls, sanatize_input(file))
                except OSError as e:
                    print(e)

            for url in urls:
                try:
                    jobtype(url).run()
                except exception.NoExtractorError:
                    print("No suitable extractor found for URL '", url, "'",
                          sep="", file=sys.stderr)

    except KeyboardInterrupt:
        print("\nKeyboardInterrupt", file=sys.stderr)
    except BrokenPipeError:
        pass
    except IOError as err:
        import errno
        if err.errno != errno.EPIPE:
            raise
update __init__.py 2015-04-05 17:15:27 +02:00			`# -- coding: utf-8 --`

code adjustments according to pep8 2017-01-30 19:40:15 +01:00			`# Copyright 2014-2017 Mike Fährmann`
update __init__.py 2015-04-05 17:15:27 +02:00			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

add Python2 compatible version check 2016-10-04 14:33:50 +02:00			`from __future__ import unicode_literals, print_function`

code adjustments according to pep8 2017-01-30 19:40:15 +01:00			`__author__ = "Mike Fährmann"`
			`__copyright__ = "Copyright 2014-2017 Mike Fährmann"`
			`__license__ = "GPLv2"`
initial commit 2014-10-12 21:56:44 +02:00			`__maintainer__ = "Mike Fährmann"`
code adjustments according to pep8 2017-01-30 19:40:15 +01:00			`__email__ = "mike_faehrmann@web.de"`
initial commit 2014-10-12 21:56:44 +02:00
write error messages to stderr 2016-08-06 13:40:49 +02:00			`import sys`
add Python2 compatible version check 2016-10-04 14:33:50 +02:00
			`if sys.hexversion < 0x3030000:`
			`print("Python 3.3+ required", file=sys.stderr)`
			`sys.exit(1)`

initial commit 2014-10-12 21:56:44 +02:00			`import argparse`
add logger objects to extractors 2017-03-07 23:50:19 +01:00			`import logging`
better key=value option parsing 2016-07-21 13:13:53 +02:00			`import json`
handle broken pipes 2016-08-05 10:25:31 +02:00			`from . import config, extractor, job, exception`
put centralized version string in 'version.py' 2016-10-08 11:37:47 +02:00			`from .version import __version__`
initial commit 2014-10-12 21:56:44 +02:00
code adjustments according to pep8 2017-01-30 19:40:15 +01:00
make URL argument not necessary for some switches 2015-11-14 15:31:07 +01:00			`def build_cmdline_parser():`
update __init__.py 2015-04-05 17:15:27 +02:00			`parser = argparse.ArgumentParser(`
initial commit 2014-10-12 21:56:44 +02:00			`description='Download images from various sources')`
update __init__.py 2015-04-05 17:15:27 +02:00			`parser.add_argument(`
rework the '-g' cmdline option the amount of how often the -g option is given now determines up until what level URLs are resolved. example: $ gallery-dl -g http://kissmanga.com/Manga/Dropout http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847 - when applied to a manga-extractor, specifying the -g option once will now print a list of all chapter URls $ gallery-dl -gg http://kissmanga.com/Manga/Dropout http://2.bp.blogspot.com/.../000.png http://2.bp.blogspot.com/.../001.png ... - specifying it twice (or even more often) will go a level deeper and print the image URLs found in those chapters 2017-02-17 22:18:16 +01:00			`"-g", "--get-urls", dest="list_urls", action="count",`
add '--list-extractors' argument 2016-09-14 09:51:01 +02:00			`help="print download urls",`
update __init__.py 2015-04-05 17:15:27 +02:00			`)`
			`parser.add_argument(`
			`"-d", "--dest",`
			`metavar="DEST",`
properly implement -c,--config option 2015-11-14 17:22:56 +01:00			`help="destination directory",`
update __init__.py 2015-04-05 17:15:27 +02:00			`)`
add --username and --password cmd-line options 2016-07-24 22:18:54 +02:00			`parser.add_argument(`
			`"-u", "--username",`
			`metavar="USER"`
			`)`
			`parser.add_argument(`
			`"-p", "--password",`
			`metavar="PASS"`
			`)`
add -i/--input-file option 2016-12-04 16:11:54 +01:00			`parser.add_argument(`
			`"-i", "--input-file",`
			`metavar="FILE", dest="inputfile",`
			`help="download URLs found in local FILE",`
			`)`
add '--items' option this allows to specify which manga-chapters/comic-issues to download when using gallery-dl on a manga/comic URL 2017-02-20 22:02:49 +01:00			`parser.add_argument(`
implement '--images' and '--chapters' options - the former '--items' has been renamed to '--chapters' - #6 2017-02-23 21:51:29 +01:00			`"--images",`
			`metavar="ITEM-SPEC", dest="images",`
			`help=("specify which images to download through a comma seperated list"`
			`" of indices or index-ranges; "`
			`"for example '--images -2,4,6-8,10-' will download images with "`
			`"index 1, 2, 4, 6, 7, 8 and 10 up to the last one")`
			`)`
			`parser.add_argument(`
			`"--chapters",`
			`metavar="ITEM-SPEC", dest="chapters",`
			`help=("same as '--images' except for chapters")`
add '--items' option this allows to specify which manga-chapters/comic-issues to download when using gallery-dl on a manga/comic URL 2017-02-20 22:02:49 +01:00			`)`
add '--list-extractors' argument 2016-09-14 09:51:01 +02:00			`parser.add_argument(`
			`"-c", "--config",`
			`metavar="CFG", dest="cfgfiles", action="append",`
			`help="additional configuration files",`
			`)`
cmd line switch to set options 2015-11-10 01:56:31 +01:00			`parser.add_argument(`
			`"-o", "--option",`
bugfixes 2015-11-12 02:26:27 +01:00			`metavar="OPT", action="append", default=[],`
better key=value option parsing 2016-07-21 13:13:53 +02:00			`help="additional 'key=value' option values",`
cmd line switch to set options 2015-11-10 01:56:31 +01:00			`)`
implement -g,--get-urls option 2015-12-10 02:14:28 +01:00			`parser.add_argument(`
add '--list-extractors' argument 2016-09-14 09:51:01 +02:00			`"--list-extractors", dest="list_extractors", action="store_true",`
code adjustments according to pep8 2017-01-30 19:40:15 +01:00			`help=("print a list of extractor classes "`
			`"with description and example URL"),`
implement --list-modules cmd-line option 2015-11-14 15:11:44 +01:00			`)`
implement --list-keywords cmd-line option 2015-11-13 01:19:01 +01:00			`parser.add_argument(`
implement -g,--get-urls option 2015-12-10 02:14:28 +01:00			`"--list-keywords", dest="list_keywords", action="store_true",`
properly implement -c,--config option 2015-11-14 17:22:56 +01:00			`help="print a list of available keywords for the given URLs",`
implement --list-keywords cmd-line option 2015-11-13 01:19:01 +01:00			`)`
add '--list-extractors' argument 2016-09-14 09:51:01 +02:00			`parser.add_argument(`
			`"--list-modules", dest="list_modules", action="store_true",`
			`help="print a list of available modules/supported sites",`
			`)`
add '--version' 2016-08-24 14:51:15 +02:00			`parser.add_argument(`
			`"--version", action="version", version=__version__,`
			`help="print program version and exit"`
			`)`
update __init__.py 2015-04-05 17:15:27 +02:00			`parser.add_argument(`
			`"urls",`
implement --list-modules cmd-line option 2015-11-14 15:11:44 +01:00			`nargs="*", metavar="URL",`
update __init__.py 2015-04-05 17:15:27 +02:00			`help="url to download images from"`
			`)`
make URL argument not necessary for some switches 2015-11-14 15:31:07 +01:00			`return parser`
initial commit 2014-10-12 21:56:44 +02:00
better key=value option parsing 2016-07-21 13:13:53 +02:00
			`def parse_option(opt):`
			`try:`
			`key, value = opt.split("=", 1)`
			`try:`
			`value = json.loads(value)`
replace JSONDecodeError with ValueError 2016-09-24 12:05:45 +02:00			`except ValueError:`
better key=value option parsing 2016-07-21 13:13:53 +02:00			`pass`
			`config.set(key.split("."), value)`
			`except ValueError:`
write error messages to stderr 2016-08-06 13:40:49 +02:00			`print("Invalid 'key=value' pair:", opt, file=sys.stderr)`
better key=value option parsing 2016-07-21 13:13:53 +02:00
code adjustments according to pep8 2017-01-30 19:40:15 +01:00
add logger objects to extractors 2017-03-07 23:50:19 +01:00			`def initialize_logging():`
			`logging.basicConfig(`
			`format="[%(name)s][%(levelname)s] %(message)s",`
			`level=logging.INFO`
			`)`
			`# convert levelnames to lowercase`
			`for level in (10, 20, 30, 40, 50):`
			`name = logging.getLevelName(level)`
			`logging.addLevelName(level, name.lower())`


add -i/--input-file option 2016-12-04 16:11:54 +01:00			`def sanatize_input(file):`
			`for line in file:`
			`line = line.strip()`
			`if line:`
			`yield line`

code adjustments according to pep8 2017-01-30 19:40:15 +01:00
initial commit 2014-10-12 21:56:44 +02:00			`def main():`
better support for KeyboardInterrupt exceptions 2015-04-10 17:31:49 +02:00			`try:`
add logger objects to extractors 2017-03-07 23:50:19 +01:00			`initialize_logging()`
implement --list-modules cmd-line option 2015-11-14 15:11:44 +01:00			`config.load()`
make URL argument not necessary for some switches 2015-11-14 15:31:07 +01:00			`parser = build_cmdline_parser()`
			`args = parser.parse_args()`
implement --list-modules cmd-line option 2015-11-14 15:11:44 +01:00
properly implement -c,--config option 2015-11-14 17:22:56 +01:00			`if args.cfgfiles:`
			`config.load(*args.cfgfiles, strict=True)`

properly implement -d,--dest options 2015-11-14 16:07:10 +01:00			`if args.dest:`
			`config.set(("base-directory",), args.dest)`
add --username and --password cmd-line options 2016-07-24 22:18:54 +02:00			`if args.username:`
			`config.set(("username",), args.username)`
			`if args.password:`
			`config.set(("password",), args.password)`
implement '--images' and '--chapters' options - the former '--items' has been renamed to '--chapters' - #6 2017-02-23 21:51:29 +01:00			`if args.images:`
			`config.set(("images",), args.images)`
			`if args.chapters:`
			`config.set(("chapters",), args.chapters)`
properly implement -d,--dest options 2015-11-14 16:07:10 +01:00
implement --list-modules cmd-line option 2015-11-14 15:11:44 +01:00			`for opt in args.option:`
better key=value option parsing 2016-07-21 13:13:53 +02:00			`parse_option(opt)`
implement --list-modules cmd-line option 2015-11-14 15:11:44 +01:00
			`if args.list_modules:`
			`for module_name in extractor.modules:`
			`print(module_name)`
add '--list-extractors' argument 2016-09-14 09:51:01 +02:00			`elif args.list_extractors:`
			`for extr in extractor.extractors():`
			`print(extr.__name__)`
			`if extr.__doc__:`
			`print(extr.__doc__)`
			`if hasattr(extr, "test") and extr.test:`
			`print("Example:", extr.test[0][0])`
			`print()`
implement --list-modules cmd-line option 2015-11-14 15:11:44 +01:00			`else:`
add -i/--input-file option 2016-12-04 16:11:54 +01:00			`if not args.urls and not args.inputfile:`
make URL argument not necessary for some switches 2015-11-14 15:31:07 +01:00			`parser.error("the following arguments are required: URL")`
better key=value option parsing 2016-07-21 13:13:53 +02:00
implement -g,--get-urls option 2015-12-10 02:14:28 +01:00			`if args.list_urls:`
rename a few files 2016-07-14 14:25:56 +02:00			`jobtype = job.UrlJob`
rework the '-g' cmdline option the amount of how often the -g option is given now determines up until what level URLs are resolved. example: $ gallery-dl -g http://kissmanga.com/Manga/Dropout http://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847 - when applied to a manga-extractor, specifying the -g option once will now print a list of all chapter URls $ gallery-dl -gg http://kissmanga.com/Manga/Dropout http://2.bp.blogspot.com/.../000.png http://2.bp.blogspot.com/.../001.png ... - specifying it twice (or even more often) will go a level deeper and print the image URLs found in those chapters 2017-02-17 22:18:16 +01:00			`jobtype.maxdepth = args.list_urls`
implement -g,--get-urls option 2015-12-10 02:14:28 +01:00			`elif args.list_keywords:`
rename a few files 2016-07-14 14:25:56 +02:00			`jobtype = job.KeywordJob`
implement -g,--get-urls option 2015-12-10 02:14:28 +01:00			`else:`
rename a few files 2016-07-14 14:25:56 +02:00			`jobtype = job.DownloadJob`
better key=value option parsing 2016-07-21 13:13:53 +02:00
add -i/--input-file option 2016-12-04 16:11:54 +01:00			`urls = args.urls`
			`if args.inputfile:`
			`try:`
			`if args.inputfile == "-":`
			`file = sys.stdin`
			`else:`
			`file = open(args.inputfile)`
			`import itertools`
			`urls = itertools.chain(urls, sanatize_input(file))`
			`except OSError as e:`
			`print(e)`

			`for url in urls:`
add authentication-exception 2016-07-14 14:57:42 +02:00			`try:`
			`jobtype(url).run()`
			`except exception.NoExtractorError:`
write error messages to stderr 2016-08-06 13:40:49 +02:00			`print("No suitable extractor found for URL '", url, "'",`
			`sep="", file=sys.stderr)`
move some exception handling code 2017-02-25 23:53:31 +01:00
better support for KeyboardInterrupt exceptions 2015-04-10 17:31:49 +02:00			`except KeyboardInterrupt:`
write error messages to stderr 2016-08-06 13:40:49 +02:00			`print("\nKeyboardInterrupt", file=sys.stderr)`
handle broken pipes 2016-08-05 10:25:31 +02:00			`except BrokenPipeError:`
			`pass`
[pixiv] raise NotFoundError instead of failing 2016-08-28 16:21:51 +02:00			`except IOError as err:`
handle broken pipes 2016-08-05 10:25:31 +02:00			`import errno`
[pixiv] raise NotFoundError instead of failing 2016-08-28 16:21:51 +02:00			`if err.errno != errno.EPIPE:`
handle broken pipes 2016-08-05 10:25:31 +02:00			`raise`