1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

move code into util.py

This commit is contained in:
Mike Fährmann 2017-03-28 13:12:44 +02:00
parent e3212dd98f
commit 841fd50242
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
12 changed files with 164 additions and 185 deletions

View File

@ -9,7 +9,7 @@
"""Extract manga chapters from https://bato.to/"""
from .common import Extractor, AsynchronousExtractor, Message
from .. import text, iso639_1, config, exception
from .. import text, util, config, exception
from ..cache import cache
import re
@ -160,7 +160,7 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
"chapter": match.group(3),
"title": match.group(5) or "",
"group": group,
"lang": iso639_1.language_to_code(lang),
"lang": util.language_to_code(lang),
"language": lang,
"count": count,
}

View File

@ -9,7 +9,7 @@
"""Extract images from galleries at https://exhentai.org/"""
from .common import Extractor, Message
from .. import config, text, iso639_1, exception
from .. import config, text, util, exception
from ..cache import cache
import time
import random
@ -103,7 +103,7 @@ class ExhentaiGalleryExtractor(Extractor):
("size-units", '', '<'),
("count" , '>Length:</td><td class="gdt2">', ' '),
), values=data)
data["lang"] = iso639_1.language_to_code(data["language"])
data["lang"] = util.language_to_code(data["language"])
data["title"] = text.unescape(data["title"])
data["title_jp"] = text.unescape(data["title_jp"])
return data

View File

@ -9,7 +9,7 @@
"""Base classes for extractors for FoolSlide based sites"""
from .common import Extractor, Message
from .. import text, iso639_1
from .. import text, util
import json
@ -73,7 +73,7 @@ class FoolslideChapterExtractor(Extractor):
self.data["manga"] = text.unescape(manga)
self.data["title"] = title
self.data["language"] = iso639_1.code_to_language(self.data["lang"])
self.data["language"] = util.code_to_language(self.data["lang"])
self.data["chapter_string"] = chapter
return self.data

View File

@ -9,7 +9,7 @@
"""Extract images from https://hitomi.la/"""
from .common import Extractor, Message
from .. import text, iso639_1
from .. import text, util
import string
@ -68,7 +68,7 @@ class HitomiGalleryExtractor(Extractor):
"artist": string.capwords(artist),
"group": string.capwords(group),
"type": gtype.strip().capitalize(),
"lang": iso639_1.language_to_code(lang),
"lang": util.language_to_code(lang),
"language": lang,
"series": string.capwords(series),
}

View File

@ -120,9 +120,6 @@ class FapatImageExtractor(ImgytImageExtractor):
https = False
class ChronosImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from chronos.to"""
category = "chronos"
@ -292,7 +289,7 @@ class ImgtrexImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imgtrex.com"""
category = "imgtrex"
pattern = [r"(?:https?://)?((?:www\.)?imgtrex\.com/([^/]+))"]
test = [("http://imgtrex.com/im0ypxq0rke4/test-テスト-&<a>.png", {
test = [("http://imgtrex.com/im0ypxq0rke4/test-&<a>.png", {
"url": "c000618bddda42bd599a590b7972c7396d19d8fe",
"keyword": "58905795a9cd3f17d5ff024fc4d63645795ba23c",
"content": "0c8768055e4e20e7c7259608b67799171b691140",

View File

@ -9,7 +9,7 @@
"""Extract images from https://luscious.net/"""
from .common import Extractor, Message
from .. import text, iso639_1
from .. import text, util
import re
@ -61,7 +61,7 @@ class LusciousAlbumExtractor(Extractor):
("section" , '>', '<'),
("language", '<p>Language:', ' '),
), values={"gallery-id": self.gid})[0]
data["lang"] = iso639_1.language_to_code(data["language"])
data["lang"] = util.language_to_code(data["language"])
data["artist"] = text.extract(data["tags"], "rtist: ", ",")[0] or ""
self.section = data["com"]
del data["com"]

View File

@ -1,54 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Conversion between language names and ISO 639-1 codes"""
def code_to_language(code, default="English"):
"""Map an ISO 639-1 language code to its actual name"""
return codes.get(code.lower(), default)
def language_to_code(lang, default="en"):
"""Map a language name to its ISO 639-1 code"""
lang = lang.capitalize()
for code, language in codes.items():
if language == lang:
return code
return default
codes = {
"ar": "Arabic",
"cs": "Czech",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
"es": "Spanish",
"fi": "Finnish",
"fr": "French",
"he": "Hebrew",
"hu": "Hungarian",
"id": "Indonesian",
"it": "Italian",
"jp": "Japanese",
"ko": "Korean",
"ms": "Malay",
"nl": "Dutch",
"no": "Norwegian",
"pl": "Polish",
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
"sv": "Swedish",
"th": "Thai",
"tr": "Turkish",
"vi": "Vietnamese",
"zh": "Chinese",
}

View File

@ -8,7 +8,7 @@
import json
import hashlib
from . import extractor, downloader, config, util, path, output, exception
from . import extractor, downloader, config, util, output, exception
from .extractor.message import Message
@ -105,7 +105,7 @@ class DownloadJob(Job):
def __init__(self, url):
Job.__init__(self, url)
self.pathfmt = path.PathFormat(self.extractor)
self.pathfmt = util.PathFormat(self.extractor)
self.downloaders = {}
self.queue = None
self.out = output.select()

View File

@ -1,83 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import os
from . import config, text
class PathFormat():
def __init__(self, extractor):
key = ["extractor", extractor.category]
if extractor.subcategory:
key.append(extractor.subcategory)
self.filename_fmt = config.interpolate(
key + ["filename"], default=extractor.filename_fmt
)
self.directory_fmt = config.interpolate(
key + ["directory"], default=extractor.directory_fmt
)
self.has_extension = False
self.keywords = {}
self.directory = self.realdirectory = ""
self.path = self.realpath = ""
def open(self):
"""Open file ta 'realpath' and return a corresponding file object"""
return open(self.realpath, "wb")
def exists(self):
"""Return True if 'path' is complete and referse to an existing path"""
if self.has_extension:
return os.path.exists(self.realpath)
return False
def set_directory(self, keywords):
"""Build directory path and create it if necessary"""
segments = [
text.clean_path(segment.format_map(keywords).strip())
for segment in self.directory_fmt
]
self.directory = os.path.join(
self.get_base_directory(),
*segments
)
self.realdirectory = self.adjust_path(self.directory)
os.makedirs(self.realdirectory, exist_ok=True)
def set_keywords(self, keywords):
"""Set filename keywords"""
self.keywords = keywords
self.has_extension = bool(keywords.get("extension"))
if self.has_extension:
self.build_path()
def set_extension(self, extension):
"""Set the 'extension' keyword"""
self.has_extension = True
self.keywords["extension"] = extension
self.build_path()
def build_path(self, sep=os.path.sep):
"""Use filename-keywords and directory to build a full path"""
filename = text.clean_path(self.filename_fmt.format_map(self.keywords))
self.path = self.directory + sep + filename
self.realpath = self.realdirectory + sep + filename
@staticmethod
def get_base_directory():
"""Return the base-destination-directory for downloads"""
bdir = config.get(("base-directory",), default=(".", "gallery-dl"))
if not isinstance(bdir, str):
bdir = os.path.join(*bdir)
return os.path.expanduser(os.path.expandvars(bdir))
@staticmethod
def adjust_path(path):
"""Enable longer-than-260-character paths on windows"""
return "\\\\?\\" + os.path.abspath(path) if os.name == "nt" else path

View File

@ -6,10 +6,11 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Utility functions"""
"""Utility functions and classes"""
import os
import sys
from . import exception
from . import config, text, exception
def parse_range(rangespec):
@ -62,6 +63,51 @@ def optimize_range(ranges):
return result
def code_to_language(code, default="English"):
"""Map an ISO 639-1 language code to its actual name"""
return codes.get(code.lower(), default)
def language_to_code(lang, default="en"):
"""Map a language name to its ISO 639-1 code"""
lang = lang.capitalize()
for code, language in codes.items():
if language == lang:
return code
return default
codes = {
"ar": "Arabic",
"cs": "Czech",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
"es": "Spanish",
"fi": "Finnish",
"fr": "French",
"he": "Hebrew",
"hu": "Hungarian",
"id": "Indonesian",
"it": "Italian",
"jp": "Japanese",
"ko": "Korean",
"ms": "Malay",
"nl": "Dutch",
"no": "Norwegian",
"pl": "Polish",
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
"sv": "Swedish",
"th": "Thai",
"tr": "Turkish",
"vi": "Vietnamese",
"zh": "Chinese",
}
class RangePredicate():
"""Predicate; is True if the current index is in the given range"""
def __init__(self, rangespec):
@ -82,3 +128,76 @@ class RangePredicate():
if lower <= self.index <= upper:
return True
return False
class PathFormat():
def __init__(self, extractor):
key = ["extractor", extractor.category]
if extractor.subcategory:
key.append(extractor.subcategory)
self.filename_fmt = config.interpolate(
key + ["filename"], default=extractor.filename_fmt
)
self.directory_fmt = config.interpolate(
key + ["directory"], default=extractor.directory_fmt
)
self.has_extension = False
self.keywords = {}
self.directory = self.realdirectory = ""
self.path = self.realpath = ""
def open(self):
"""Open file ta 'realpath' and return a corresponding file object"""
return open(self.realpath, "wb")
def exists(self):
"""Return True if 'path' is complete and referse to an existing path"""
if self.has_extension:
return os.path.exists(self.realpath)
return False
def set_directory(self, keywords):
"""Build directory path and create it if necessary"""
segments = [
text.clean_path(segment.format_map(keywords).strip())
for segment in self.directory_fmt
]
self.directory = os.path.join(
self.get_base_directory(),
*segments
)
self.realdirectory = self.adjust_path(self.directory)
os.makedirs(self.realdirectory, exist_ok=True)
def set_keywords(self, keywords):
"""Set filename keywords"""
self.keywords = keywords
self.has_extension = bool(keywords.get("extension"))
if self.has_extension:
self.build_path()
def set_extension(self, extension):
"""Set the 'extension' keyword"""
self.has_extension = True
self.keywords["extension"] = extension
self.build_path()
def build_path(self, sep=os.path.sep):
"""Use filename-keywords and directory to build a full path"""
filename = text.clean_path(self.filename_fmt.format_map(self.keywords))
self.path = self.directory + sep + filename
self.realpath = self.realdirectory + sep + filename
@staticmethod
def get_base_directory():
"""Return the base-destination-directory for downloads"""
bdir = config.get(("base-directory",), default=(".", "gallery-dl"))
if not isinstance(bdir, str):
bdir = os.path.join(*bdir)
return os.path.expanduser(os.path.expandvars(bdir))
@staticmethod
def adjust_path(path):
"""Enable longer-than-260-character paths on windows"""
return "\\\\?\\" + os.path.abspath(path) if os.name == "nt" else path

View File

@ -1,30 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import unittest
import gallery_dl.iso639_1 as iso639_1
class TestISO639_1(unittest.TestCase):
def test_code_to_language(self):
self.assertEqual(iso639_1.code_to_language("en"), "English")
self.assertEqual(iso639_1.code_to_language("FR"), "French")
self.assertEqual(iso639_1.code_to_language("xx"), "English")
self.assertEqual(iso639_1.code_to_language("xx", default=None), None)
def test_language_to_code(self):
self.assertEqual(iso639_1.language_to_code("English"), "en")
self.assertEqual(iso639_1.language_to_code("fRENch"), "fr")
self.assertEqual(iso639_1.language_to_code("xx"), "en")
self.assertEqual(iso639_1.language_to_code("xx", default=None), None)
if __name__ == '__main__':
unittest.main()

30
test/test_util.py Normal file
View File

@ -0,0 +1,30 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import unittest
import gallery_dl.util as util
class TestISO639_1(unittest.TestCase):
def test_code_to_language(self):
self.assertEqual(util.code_to_language("en"), "English")
self.assertEqual(util.code_to_language("FR"), "French")
self.assertEqual(util.code_to_language("xx"), "English")
self.assertEqual(util.code_to_language("xx", default=None), None)
def test_language_to_code(self):
self.assertEqual(util.language_to_code("English"), "en")
self.assertEqual(util.language_to_code("fRENch"), "fr")
self.assertEqual(util.language_to_code("xx"), "en")
self.assertEqual(util.language_to_code("xx", default=None), None)
if __name__ == '__main__':
unittest.main()