2015-04-05 17:15:27 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
# Copyright 2014-2017 Mike Fährmann
|
2015-04-05 17:15:27 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2016-10-04 14:33:50 +02:00
|
|
|
from __future__ import unicode_literals, print_function
|
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
__author__ = "Mike Fährmann"
|
|
|
|
__copyright__ = "Copyright 2014-2017 Mike Fährmann"
|
|
|
|
__license__ = "GPLv2"
|
2014-10-12 21:56:44 +02:00
|
|
|
__maintainer__ = "Mike Fährmann"
|
2017-01-30 19:40:15 +01:00
|
|
|
__email__ = "mike_faehrmann@web.de"
|
2014-10-12 21:56:44 +02:00
|
|
|
|
2016-08-06 13:40:49 +02:00
|
|
|
import sys
|
2016-10-04 14:33:50 +02:00
|
|
|
|
|
|
|
if sys.hexversion < 0x3030000:
|
|
|
|
print("Python 3.3+ required", file=sys.stderr)
|
|
|
|
sys.exit(1)
|
|
|
|
|
2014-10-12 21:56:44 +02:00
|
|
|
import argparse
|
2017-03-07 23:50:19 +01:00
|
|
|
import logging
|
2016-07-21 13:13:53 +02:00
|
|
|
import json
|
2016-08-05 10:25:31 +02:00
|
|
|
from . import config, extractor, job, exception
|
2016-10-08 11:37:47 +02:00
|
|
|
from .version import __version__
|
2014-10-12 21:56:44 +02:00
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
|
2015-11-14 15:31:07 +01:00
|
|
|
def build_cmdline_parser():
|
2015-04-05 17:15:27 +02:00
|
|
|
parser = argparse.ArgumentParser(
|
2014-10-12 21:56:44 +02:00
|
|
|
description='Download images from various sources')
|
2015-04-05 17:15:27 +02:00
|
|
|
parser.add_argument(
|
2017-02-17 22:18:16 +01:00
|
|
|
"-g", "--get-urls", dest="list_urls", action="count",
|
2016-09-14 09:51:01 +02:00
|
|
|
help="print download urls",
|
2015-04-05 17:15:27 +02:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-d", "--dest",
|
|
|
|
metavar="DEST",
|
2015-11-14 17:22:56 +01:00
|
|
|
help="destination directory",
|
2015-04-05 17:15:27 +02:00
|
|
|
)
|
2016-07-24 22:18:54 +02:00
|
|
|
parser.add_argument(
|
|
|
|
"-u", "--username",
|
|
|
|
metavar="USER"
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-p", "--password",
|
|
|
|
metavar="PASS"
|
|
|
|
)
|
2016-12-04 16:11:54 +01:00
|
|
|
parser.add_argument(
|
|
|
|
"-i", "--input-file",
|
|
|
|
metavar="FILE", dest="inputfile",
|
|
|
|
help="download URLs found in local FILE",
|
|
|
|
)
|
2017-02-20 22:02:49 +01:00
|
|
|
parser.add_argument(
|
2017-02-23 21:51:29 +01:00
|
|
|
"--images",
|
|
|
|
metavar="ITEM-SPEC", dest="images",
|
|
|
|
help=("specify which images to download through a comma seperated list"
|
|
|
|
" of indices or index-ranges; "
|
|
|
|
"for example '--images -2,4,6-8,10-' will download images with "
|
|
|
|
"index 1, 2, 4, 6, 7, 8 and 10 up to the last one")
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--chapters",
|
|
|
|
metavar="ITEM-SPEC", dest="chapters",
|
|
|
|
help=("same as '--images' except for chapters")
|
2017-02-20 22:02:49 +01:00
|
|
|
)
|
2016-09-14 09:51:01 +02:00
|
|
|
parser.add_argument(
|
|
|
|
"-c", "--config",
|
|
|
|
metavar="CFG", dest="cfgfiles", action="append",
|
|
|
|
help="additional configuration files",
|
|
|
|
)
|
2015-11-10 01:56:31 +01:00
|
|
|
parser.add_argument(
|
|
|
|
"-o", "--option",
|
2015-11-12 02:26:27 +01:00
|
|
|
metavar="OPT", action="append", default=[],
|
2016-07-21 13:13:53 +02:00
|
|
|
help="additional 'key=value' option values",
|
2015-11-10 01:56:31 +01:00
|
|
|
)
|
2015-12-10 02:14:28 +01:00
|
|
|
parser.add_argument(
|
2016-09-14 09:51:01 +02:00
|
|
|
"--list-extractors", dest="list_extractors", action="store_true",
|
2017-01-30 19:40:15 +01:00
|
|
|
help=("print a list of extractor classes "
|
|
|
|
"with description and example URL"),
|
2015-11-14 15:11:44 +01:00
|
|
|
)
|
2015-11-13 01:19:01 +01:00
|
|
|
parser.add_argument(
|
2015-12-10 02:14:28 +01:00
|
|
|
"--list-keywords", dest="list_keywords", action="store_true",
|
2015-11-14 17:22:56 +01:00
|
|
|
help="print a list of available keywords for the given URLs",
|
2015-11-13 01:19:01 +01:00
|
|
|
)
|
2016-09-14 09:51:01 +02:00
|
|
|
parser.add_argument(
|
|
|
|
"--list-modules", dest="list_modules", action="store_true",
|
|
|
|
help="print a list of available modules/supported sites",
|
|
|
|
)
|
2016-08-24 14:51:15 +02:00
|
|
|
parser.add_argument(
|
|
|
|
"--version", action="version", version=__version__,
|
|
|
|
help="print program version and exit"
|
|
|
|
)
|
2015-04-05 17:15:27 +02:00
|
|
|
parser.add_argument(
|
|
|
|
"urls",
|
2015-11-14 15:11:44 +01:00
|
|
|
nargs="*", metavar="URL",
|
2015-04-05 17:15:27 +02:00
|
|
|
help="url to download images from"
|
|
|
|
)
|
2015-11-14 15:31:07 +01:00
|
|
|
return parser
|
2014-10-12 21:56:44 +02:00
|
|
|
|
2016-07-21 13:13:53 +02:00
|
|
|
|
|
|
|
def parse_option(opt):
|
|
|
|
try:
|
|
|
|
key, value = opt.split("=", 1)
|
|
|
|
try:
|
|
|
|
value = json.loads(value)
|
2016-09-24 12:05:45 +02:00
|
|
|
except ValueError:
|
2016-07-21 13:13:53 +02:00
|
|
|
pass
|
|
|
|
config.set(key.split("."), value)
|
|
|
|
except ValueError:
|
2016-08-06 13:40:49 +02:00
|
|
|
print("Invalid 'key=value' pair:", opt, file=sys.stderr)
|
2016-07-21 13:13:53 +02:00
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
|
2017-03-07 23:50:19 +01:00
|
|
|
def initialize_logging():
|
|
|
|
logging.basicConfig(
|
|
|
|
format="[%(name)s][%(levelname)s] %(message)s",
|
|
|
|
level=logging.INFO
|
|
|
|
)
|
|
|
|
# convert levelnames to lowercase
|
|
|
|
for level in (10, 20, 30, 40, 50):
|
|
|
|
name = logging.getLevelName(level)
|
|
|
|
logging.addLevelName(level, name.lower())
|
|
|
|
|
|
|
|
|
2016-12-04 16:11:54 +01:00
|
|
|
def sanatize_input(file):
|
|
|
|
for line in file:
|
|
|
|
line = line.strip()
|
|
|
|
if line:
|
|
|
|
yield line
|
|
|
|
|
2017-01-30 19:40:15 +01:00
|
|
|
|
2014-10-12 21:56:44 +02:00
|
|
|
def main():
|
2015-04-10 17:31:49 +02:00
|
|
|
try:
|
2017-03-07 23:50:19 +01:00
|
|
|
initialize_logging()
|
2015-11-14 15:11:44 +01:00
|
|
|
config.load()
|
2015-11-14 15:31:07 +01:00
|
|
|
parser = build_cmdline_parser()
|
|
|
|
args = parser.parse_args()
|
2015-11-14 15:11:44 +01:00
|
|
|
|
2015-11-14 17:22:56 +01:00
|
|
|
if args.cfgfiles:
|
|
|
|
config.load(*args.cfgfiles, strict=True)
|
|
|
|
|
2015-11-14 16:07:10 +01:00
|
|
|
if args.dest:
|
|
|
|
config.set(("base-directory",), args.dest)
|
2016-07-24 22:18:54 +02:00
|
|
|
if args.username:
|
|
|
|
config.set(("username",), args.username)
|
|
|
|
if args.password:
|
|
|
|
config.set(("password",), args.password)
|
2017-02-23 21:51:29 +01:00
|
|
|
if args.images:
|
|
|
|
config.set(("images",), args.images)
|
|
|
|
if args.chapters:
|
|
|
|
config.set(("chapters",), args.chapters)
|
2015-11-14 16:07:10 +01:00
|
|
|
|
2015-11-14 15:11:44 +01:00
|
|
|
for opt in args.option:
|
2016-07-21 13:13:53 +02:00
|
|
|
parse_option(opt)
|
2015-11-14 15:11:44 +01:00
|
|
|
|
|
|
|
if args.list_modules:
|
|
|
|
for module_name in extractor.modules:
|
|
|
|
print(module_name)
|
2016-09-14 09:51:01 +02:00
|
|
|
elif args.list_extractors:
|
|
|
|
for extr in extractor.extractors():
|
|
|
|
print(extr.__name__)
|
|
|
|
if extr.__doc__:
|
|
|
|
print(extr.__doc__)
|
|
|
|
if hasattr(extr, "test") and extr.test:
|
|
|
|
print("Example:", extr.test[0][0])
|
|
|
|
print()
|
2015-11-14 15:11:44 +01:00
|
|
|
else:
|
2016-12-04 16:11:54 +01:00
|
|
|
if not args.urls and not args.inputfile:
|
2015-11-14 15:31:07 +01:00
|
|
|
parser.error("the following arguments are required: URL")
|
2016-07-21 13:13:53 +02:00
|
|
|
|
2015-12-10 02:14:28 +01:00
|
|
|
if args.list_urls:
|
2016-07-14 14:25:56 +02:00
|
|
|
jobtype = job.UrlJob
|
2017-02-17 22:18:16 +01:00
|
|
|
jobtype.maxdepth = args.list_urls
|
2015-12-10 02:14:28 +01:00
|
|
|
elif args.list_keywords:
|
2016-07-14 14:25:56 +02:00
|
|
|
jobtype = job.KeywordJob
|
2015-12-10 02:14:28 +01:00
|
|
|
else:
|
2016-07-14 14:25:56 +02:00
|
|
|
jobtype = job.DownloadJob
|
2016-07-21 13:13:53 +02:00
|
|
|
|
2016-12-04 16:11:54 +01:00
|
|
|
urls = args.urls
|
|
|
|
if args.inputfile:
|
|
|
|
try:
|
|
|
|
if args.inputfile == "-":
|
|
|
|
file = sys.stdin
|
|
|
|
else:
|
|
|
|
file = open(args.inputfile)
|
|
|
|
import itertools
|
|
|
|
urls = itertools.chain(urls, sanatize_input(file))
|
|
|
|
except OSError as e:
|
|
|
|
print(e)
|
|
|
|
|
|
|
|
for url in urls:
|
2016-07-14 14:57:42 +02:00
|
|
|
try:
|
|
|
|
jobtype(url).run()
|
|
|
|
except exception.NoExtractorError:
|
2016-08-06 13:40:49 +02:00
|
|
|
print("No suitable extractor found for URL '", url, "'",
|
|
|
|
sep="", file=sys.stderr)
|
2017-02-25 23:53:31 +01:00
|
|
|
|
2015-04-10 17:31:49 +02:00
|
|
|
except KeyboardInterrupt:
|
2016-08-06 13:40:49 +02:00
|
|
|
print("\nKeyboardInterrupt", file=sys.stderr)
|
2016-08-05 10:25:31 +02:00
|
|
|
except BrokenPipeError:
|
|
|
|
pass
|
2016-08-28 16:21:51 +02:00
|
|
|
except IOError as err:
|
2016-08-05 10:25:31 +02:00
|
|
|
import errno
|
2016-08-28 16:21:51 +02:00
|
|
|
if err.errno != errno.EPIPE:
|
2016-08-05 10:25:31 +02:00
|
|
|
raise
|