1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 10:42:34 +01:00

rename safe_int to parse_int; move parse_* to text module

This commit is contained in:
Mike Fährmann 2018-04-20 14:53:21 +02:00
parent ff643793bd
commit cc36f88586
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
37 changed files with 210 additions and 189 deletions

View File

@ -12,7 +12,7 @@ import time
import mimetypes import mimetypes
from requests.exceptions import ConnectionError, Timeout from requests.exceptions import ConnectionError, Timeout
from .common import DownloaderBase from .common import DownloaderBase
from .. import util, exception from .. import text, exception
class Downloader(DownloaderBase): class Downloader(DownloaderBase):
@ -28,7 +28,7 @@ class Downloader(DownloaderBase):
self.chunk_size = 16384 self.chunk_size = 16384
if self.rate: if self.rate:
self.rate = util.parse_bytes(self.rate) self.rate = text.parse_bytes(self.rate)
if not self.rate: if not self.rate:
self.log.warning("Invalid rate limit specified") self.log.warning("Invalid rate limit specified")
elif self.rate < self.chunk_size: elif self.rate < self.chunk_size:
@ -61,7 +61,7 @@ class Downloader(DownloaderBase):
else: else:
self.response.raise_for_status() self.response.raise_for_status()
return offset, util.safe_int(size) return offset, text.parse_int(size)
def receive(self, file): def receive(self, file):
if self.rate: if self.rate:

View File

@ -158,7 +158,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
def __init__(self, match): def __init__(self, match):
ArtstationExtractor.__init__(self, match) ArtstationExtractor.__init__(self, match)
self.album_id = util.safe_int(match.group(2)) self.album_id = text.parse_int(match.group(2))
def metadata(self): def metadata(self):
userinfo = self.get_user_info(self.user) userinfo = self.get_user_info(self.user)
@ -256,7 +256,7 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
def _id_from_url(url): def _id_from_url(url):
"""Get an image's submission ID from its URL""" """Get an image's submission ID from its URL"""
parts = url.split("/") parts = url.split("/")
return util.safe_int("".join(parts[7:10])) return text.parse_int("".join(parts[7:10]))
class ArtstationSearchExtractor(ArtstationExtractor): class ArtstationSearchExtractor(ArtstationExtractor):

View File

@ -9,7 +9,7 @@
"""Extract images from https://www.deviantart.com/""" """Extract images from https://www.deviantart.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, exception
from ..cache import cache, memcache from ..cache import cache, memcache
import itertools import itertools
import datetime import datetime
@ -62,7 +62,7 @@ class DeviantartExtractor(Extractor):
if "videos" in deviation: if "videos" in deviation:
video = max(deviation["videos"], video = max(deviation["videos"],
key=lambda x: util.safe_int(x["quality"][:-1])) key=lambda x: text.parse_int(x["quality"][:-1]))
yield self.commit(deviation, video) yield self.commit(deviation, video)
if "flash" in deviation: if "flash" in deviation:

View File

@ -9,7 +9,7 @@
"""Extract manga-chapters from https://dynasty-scans.com/""" """Extract manga-chapters from https://dynasty-scans.com/"""
from .common import ChapterExtractor from .common import ChapterExtractor
from .. import text, util from .. import text
import re import re
import json import json
@ -53,7 +53,7 @@ class DynastyscansChapterExtractor(ChapterExtractor):
return { return {
"manga": text.unescape(match.group(1)), "manga": text.unescape(match.group(1)),
"chapter": util.safe_int(match.group(2)), "chapter": text.parse_int(match.group(2)),
"chapter_minor": match.group(3) or "", "chapter_minor": match.group(3) or "",
"title": text.unescape(match.group(4) or ""), "title": text.unescape(match.group(4) or ""),
"author": text.remove_html(author), "author": text.remove_html(author),

View File

@ -120,7 +120,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.key = {} self.key = {}
self.count = 0 self.count = 0
self.version, self.gid, self.token = match.groups() self.version, self.gid, self.token = match.groups()
self.gid = util.safe_int(self.gid) self.gid = text.parse_int(self.gid)
def items(self): def items(self):
self.login() self.login()
@ -163,7 +163,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["lang"] = util.language_to_code(data["language"]) data["lang"] = util.language_to_code(data["language"])
data["title"] = text.unescape(data["title"]) data["title"] = text.unescape(data["title"])
data["title_jp"] = text.unescape(data["title_jp"]) data["title_jp"] = text.unescape(data["title_jp"])
data["count"] = util.safe_int(data["count"]) data["count"] = text.parse_int(data["count"])
data["gallery_size"] = util.parse_bytes( data["gallery_size"] = util.parse_bytes(
data["gallery_size"].rstrip("Bb")) data["gallery_size"].rstrip("Bb"))
return data return data
@ -245,17 +245,17 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def _parse_image_info(url): def _parse_image_info(url):
parts = url.split("/")[4].split("-") parts = url.split("/")[4].split("-")
return { return {
"width": util.safe_int(parts[2]), "width": text.parse_int(parts[2]),
"height": util.safe_int(parts[3]), "height": text.parse_int(parts[3]),
"size": util.safe_int(parts[1]), "size": text.parse_int(parts[1]),
} }
@staticmethod @staticmethod
def _parse_original_info(info): def _parse_original_info(info):
parts = info.lstrip().split(" ") parts = info.lstrip().split(" ")
return { return {
"width": util.safe_int(parts[0]), "width": text.parse_int(parts[0]),
"height": util.safe_int(parts[2]), "height": text.parse_int(parts[2]),
"size": util.parse_bytes(parts[3] + parts[4][0]), "size": util.parse_bytes(parts[3] + parts[4][0]),
} }
@ -274,7 +274,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
def __init__(self, match): def __init__(self, match):
ExhentaiExtractor.__init__(self) ExhentaiExtractor.__init__(self)
self.params = text.parse_query(match.group(1) or "") self.params = text.parse_query(match.group(1) or "")
self.params["page"] = util.safe_int(self.params.get("page")) self.params["page"] = text.parse_int(self.params.get("page"))
self.url = self.root self.url = self.root
def items(self): def items(self):
@ -308,7 +308,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
return Message.Queue, url, { return Message.Queue, url, {
"type": gtype, "type": gtype,
"date": date, "date": date,
"gallery_id": util.safe_int(parts[1]), "gallery_id": text.parse_int(parts[1]),
"gallery_token": parts[2], "gallery_token": parts[2],
"title": text.unescape(title), "title": text.unescape(title),
key: last, key: last,

View File

@ -98,8 +98,8 @@ class FallenangelsMangaExtractor(MangaExtractor):
chapter, dot, minor = chapter.partition(".") chapter, dot, minor = chapter.partition(".")
results.append((url, { results.append((url, {
"manga": manga, "title": title, "manga": manga, "title": title,
"volume": util.safe_int(volume), "volume": text.parse_int(volume),
"chapter": util.safe_int(chapter), "chapter": text.parse_int(chapter),
"chapter_minor": dot + minor, "chapter_minor": dot + minor,
"lang": self.lang, "language": language, "lang": self.lang, "language": language,
})) }))

View File

@ -50,8 +50,8 @@ class FoolslideExtractor(SharedConfigExtractor):
lang = info[1].partition("-")[0] lang = info[1].partition("-")[0]
data["lang"] = lang data["lang"] = lang
data["language"] = util.code_to_language(lang) data["language"] = util.code_to_language(lang)
data["volume"] = util.safe_int(info[2]) data["volume"] = text.parse_int(info[2])
data["chapter"] = util.safe_int(info[3]) data["chapter"] = text.parse_int(info[3])
data["chapter_minor"] = "." + info[4] if len(info) >= 5 else "" data["chapter_minor"] = "." + info[4] if len(info) >= 5 else ""
return data return data
@ -75,7 +75,7 @@ class FoolslideChapterExtractor(FoolslideExtractor):
imgs = self.get_images(page) imgs = self.get_images(page)
data["count"] = len(imgs) data["count"] = len(imgs)
data["chapter_id"] = util.safe_int(imgs[0]["chapter_id"]) data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"])
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, data yield Message.Directory, data
@ -88,7 +88,7 @@ class FoolslideChapterExtractor(FoolslideExtractor):
except KeyError: except KeyError:
pass pass
for key in ("height", "id", "size", "width"): for key in ("height", "id", "size", "width"):
image[key] = util.safe_int(image[key]) image[key] = text.parse_int(image[key])
data.update(image) data.update(image)
text.nameext_from_url(data["filename"], data) text.nameext_from_url(data["filename"], data)
yield Message.Url, url, data yield Message.Url, url, data

View File

@ -37,7 +37,7 @@ class GelbooruExtractor(SharedConfigExtractor):
if isinstance(post, str): if isinstance(post, str):
post = self.get_post_data(post) post = self.get_post_data(post)
for key in ("id", "width", "height", "score", "change"): for key in ("id", "width", "height", "score", "change"):
post[key] = util.safe_int(post[key]) post[key] = text.parse_int(post[key])
url = post["file_url"] url = post["file_url"]
post.update(data) post.update(data)
yield Message.Url, url, text.nameext_from_url(url, post) yield Message.Url, url, text.nameext_from_url(url, post)
@ -174,7 +174,7 @@ class GelbooruPoolExtractor(GelbooruExtractor):
raise exception.NotFoundError("pool") raise exception.NotFoundError("pool")
return { return {
"pool": util.safe_int(self.pool_id), "pool": text.parse_int(self.pool_id),
"pool_name": text.unescape(name), "pool_name": text.unescape(name),
"count": len(self.posts), "count": len(self.posts),
} }

View File

@ -9,7 +9,7 @@
"""Extract images from http://www.hbrowse.com/""" """Extract images from http://www.hbrowse.com/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util from .. import text
from urllib.parse import urljoin from urllib.parse import urljoin
import json import json
@ -30,7 +30,7 @@ class HbrowseExtractor():
), values=data) ), values=data)
data["manga"] = text.unescape(data["manga"]) data["manga"] = text.unescape(data["manga"])
data["total"] = util.safe_int(data["total"]) data["total"] = text.parse_int(data["total"])
data["artist"] = text.remove_html(data["artist"]) data["artist"] = text.remove_html(data["artist"])
data["origin"] = text.remove_html(data["origin"]) data["origin"] = text.remove_html(data["origin"])
return data return data
@ -48,7 +48,7 @@ class HbrowseMangaExtractor(HbrowseExtractor, MangaExtractor):
def chapters(self, page): def chapters(self, page):
results = [] results = []
data = self.parse_page(page, { data = self.parse_page(page, {
"manga_id": util.safe_int( "manga_id": text.parse_int(
self.url.rstrip("/").rpartition("/")[2]) self.url.rstrip("/").rpartition("/")[2])
}) })
@ -59,7 +59,7 @@ class HbrowseMangaExtractor(HbrowseExtractor, MangaExtractor):
if not url: if not url:
return results return results
title, pos = text.extract(page, '>View ', '<', pos) title, pos = text.extract(page, '>View ', '<', pos)
data["chapter"] = util.safe_int(url.rpartition("/")[2][1:]) data["chapter"] = text.parse_int(url.rpartition("/")[2][1:])
data["title"] = title data["title"] = title
results.append((urljoin(self.root, url), data.copy())) results.append((urljoin(self.root, url), data.copy()))
@ -84,8 +84,8 @@ class HbrowseChapterExtractor(HbrowseExtractor, ChapterExtractor):
def get_metadata(self, page): def get_metadata(self, page):
return self.parse_page(page, { return self.parse_page(page, {
"manga_id": util.safe_int(self.gid), "manga_id": text.parse_int(self.gid),
"chapter": util.safe_int(self.chapter) "chapter": text.parse_int(self.chapter)
}) })
def get_images(self, page): def get_images(self, page):

View File

@ -9,7 +9,7 @@
"""Extract hentai-manga from https://hentai2read.com/""" """Extract hentai-manga from https://hentai2read.com/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util from .. import text
import re import re
import json import json
@ -36,7 +36,8 @@ class Hentai2readMangaExtractor(MangaExtractor):
page, '<span itemprop="name">', '</span>') page, '<span itemprop="name">', '</span>')
mtype, pos = text.extract( mtype, pos = text.extract(
page, '<small class="text-danger">[', ']</small>', pos) page, '<small class="text-danger">[', ']</small>', pos)
manga_id = util.safe_int(text.extract(page, 'data-mid="', '"', pos)[0]) manga_id = text.parse_int(text.extract(
page, 'data-mid="', '"', pos)[0])
while True: while True:
chapter_id, pos = text.extract(page, ' data-cid="', '"', pos) chapter_id, pos = text.extract(page, ' data-cid="', '"', pos)
@ -49,8 +50,8 @@ class Hentai2readMangaExtractor(MangaExtractor):
chapter, _, title = text.unescape(chapter).strip().partition(" - ") chapter, _, title = text.unescape(chapter).strip().partition(" - ")
results.append((url, { results.append((url, {
"manga_id": manga_id, "manga": manga, "type": mtype, "manga_id": manga_id, "manga": manga, "type": mtype,
"chapter_id": util.safe_int(chapter_id), "chapter_id": text.parse_int(chapter_id),
"chapter": util.safe_int(chapter), "chapter": text.parse_int(chapter),
"title": title, "lang": "en", "language": "English", "title": title, "lang": "en", "language": "English",
})) }))
@ -78,9 +79,9 @@ class Hentai2readChapterExtractor(ChapterExtractor):
r"(\d+): (.+) . Page 1 ", title) r"(\d+): (.+) . Page 1 ", title)
return { return {
"manga": match.group(1), "manga": match.group(1),
"manga_id": util.safe_int(manga_id), "manga_id": text.parse_int(manga_id),
"chapter": util.safe_int(self.chapter), "chapter": text.parse_int(self.chapter),
"chapter_id": util.safe_int(chapter_id), "chapter_id": text.parse_int(chapter_id),
"type": match.group(2), "type": match.group(2),
"author": match.group(3), "author": match.group(3),
"title": match.group(5), "title": match.group(5),

View File

@ -9,7 +9,7 @@
"""Extract images from https://www.hentai-foundry.com/""" """Extract images from https://www.hentai-foundry.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, exception
class HentaifoundryUserExtractor(Extractor): class HentaifoundryUserExtractor(Extractor):
@ -69,7 +69,7 @@ class HentaifoundryUserExtractor(Extractor):
page = response.text page = response.text
token, pos = text.extract(page, 'hidden" value="', '"') token, pos = text.extract(page, 'hidden" value="', '"')
count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos) count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos)
return {"artist": self.artist, "count": util.safe_int(count)}, token return {"artist": self.artist, "count": text.parse_int(count)}, token
def get_image_metadata(self, url): def get_image_metadata(self, url):
"""Collect metadata for an image""" """Collect metadata for an image"""
@ -79,7 +79,7 @@ class HentaifoundryUserExtractor(Extractor):
page, 'Pictures</a> &raquo; <span>', '<') page, 'Pictures</a> &raquo; <span>', '<')
part, pos = text.extract( part, pos = text.extract(
page, '//pictures.hentai-foundry.com', '"', pos) page, '//pictures.hentai-foundry.com', '"', pos)
data = {"index": util.safe_int(index), "title": text.unescape(title)} data = {"index": text.parse_int(index), "title": text.unescape(title)}
text.nameext_from_url(part, data) text.nameext_from_url(part, data)
return "https://pictures.hentai-foundry.com" + part, data return "https://pictures.hentai-foundry.com" + part, data
@ -161,7 +161,7 @@ class HentaifoundryImageExtractor(Extractor):
url , pos = extr(page, '//pictures.hentai-foundry.com', '"', pos) url , pos = extr(page, '//pictures.hentai-foundry.com', '"', pos)
data = { data = {
"artist": artist, "artist": artist,
"index": util.safe_int(self.index), "index": text.parse_int(self.index),
"title": text.unescape(title), "title": text.unescape(title),
} }
text.nameext_from_url(url, data) text.nameext_from_url(url, data)

View File

@ -9,7 +9,7 @@
"""Extract hentai-manga from https://hentaihere.com/""" """Extract hentai-manga from https://hentaihere.com/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util from .. import text
import re import re
import json import json
@ -32,7 +32,7 @@ class HentaihereMangaExtractor(MangaExtractor):
def chapters(self, page): def chapters(self, page):
results = [] results = []
manga_id = util.safe_int( manga_id = text.parse_int(
self.url.rstrip("/").rpartition("/")[2][1:]) self.url.rstrip("/").rpartition("/")[2][1:])
manga, pos = text.extract( manga, pos = text.extract(
page, '<span itemprop="name">', '</span>') page, '<span itemprop="name">', '</span>')
@ -50,8 +50,8 @@ class HentaihereMangaExtractor(MangaExtractor):
chapter, _, title = text.unescape(chapter).strip().partition(" - ") chapter, _, title = text.unescape(chapter).strip().partition(" - ")
results.append((url, { results.append((url, {
"manga_id": manga_id, "manga": manga, "type": mtype, "manga_id": manga_id, "manga": manga, "type": mtype,
"chapter_id": util.safe_int(chapter_id), "chapter_id": text.parse_int(chapter_id),
"chapter": util.safe_int(chapter), "chapter": text.parse_int(chapter),
"title": title, "lang": "en", "language": "English", "title": title, "lang": "en", "language": "English",
})) }))
@ -79,9 +79,9 @@ class HentaihereChapterExtractor(ChapterExtractor):
match = re.match(pattern, title) match = re.match(pattern, title)
return { return {
"manga": match.group(1), "manga": match.group(1),
"manga_id": util.safe_int(self.manga_id), "manga_id": text.parse_int(self.manga_id),
"chapter": util.safe_int(self.chapter), "chapter": text.parse_int(self.chapter),
"chapter_id": util.safe_int(chapter_id), "chapter_id": text.parse_int(chapter_id),
"type": match.group(2), "type": match.group(2),
"title": match.group(3), "title": match.group(3),
"author": match.group(4), "author": match.group(4),

View File

@ -30,7 +30,7 @@ class HitomiGalleryExtractor(ChapterExtractor):
] ]
def __init__(self, match): def __init__(self, match):
self.gid = util.safe_int(match.group(1)) self.gid = text.parse_int(match.group(1))
url = "https://hitomi.la/galleries/{}.html".format(self.gid) url = "https://hitomi.la/galleries/{}.html".format(self.gid)
ChapterExtractor.__init__(self, url) ChapterExtractor.__init__(self, url)

View File

@ -9,7 +9,7 @@
"""Extract images from http://imagefap.com/""" """Extract images from http://imagefap.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util from .. import text
import json import json
@ -159,7 +159,7 @@ class ImagefapUserExtractor(ImagefapExtractor):
yield Message.Version, 1 yield Message.Version, 1
for gid, name in self.get_gallery_data(): for gid, name in self.get_gallery_data():
url = "http://www.imagefap.com/gallery/" + gid url = "http://www.imagefap.com/gallery/" + gid
data = {"gallery_id": util.safe_int(gid), "title": name} data = {"gallery_id": text.parse_int(gid), "title": name}
yield Message.Queue, url, data yield Message.Queue, url, data
def get_gallery_data(self): def get_gallery_data(self):

View File

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from http://kissmanga.com/""" """Extract manga-chapters and entire manga from http://kissmanga.com/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util, cloudflare, aes, exception from .. import text, cloudflare, aes, exception
from ..cache import cache from ..cache import cache
import re import re
import hashlib import hashlib
@ -56,8 +56,8 @@ class KissmangaBase():
), data["chapter_string"]) ), data["chapter_string"])
volume, chapter, minor, title = match.groups() volume, chapter, minor, title = match.groups()
data["volume"] = util.safe_int(volume) data["volume"] = text.parse_int(volume)
data["chapter"] = util.safe_int(chapter) data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = "." + minor if minor else "" data["chapter_minor"] = "." + minor if minor else ""
data["title"] = title if title and title != "Read Online" else "" data["title"] = title if title and title != "Read Online" else ""
return data return data
@ -89,7 +89,7 @@ class KissmangaMangaExtractor(KissmangaBase, MangaExtractor):
url, _, chapter = item.partition(needle) url, _, chapter = item.partition(needle)
data = { data = {
"manga": manga, "chapter_string": chapter, "manga": manga, "chapter_string": chapter,
"chapter_id": util.safe_int(url.rpartition("=")[2]), "chapter_id": text.parse_int(url.rpartition("=")[2]),
"lang": "en", "language": "English", "lang": "en", "language": "English",
} }
self.parse_chapter_string(data) self.parse_chapter_string(data)
@ -128,7 +128,7 @@ class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor):
data = { data = {
"manga": manga.strip(), "manga": manga.strip(),
"chapter_string": cinfo.strip(), "chapter_string": cinfo.strip(),
"chapter_id": util.safe_int(self.chapter_id), "chapter_id": text.parse_int(self.chapter_id),
"lang": "en", "lang": "en",
"language": "English", "language": "English",
} }

View File

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from https://komikcast.com/""" """Extract manga-chapters and entire manga from https://komikcast.com/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util, cloudflare from .. import text, cloudflare
import re import re
@ -39,7 +39,7 @@ class KomikcastBase():
data["title"] = title.strip() data["title"] = title.strip()
else: else:
data["title"] = "" data["title"] = ""
data["chapter"] = util.safe_int(chapter) data["chapter"] = text.parse_int(chapter)
data["lang"] = "id" data["lang"] = "id"
data["language"] = "Indonesian" data["language"] = "Indonesian"
@ -75,8 +75,8 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
page, '<div id="readerarea">', '<div class="navig">')[0] page, '<div id="readerarea">', '<div class="navig">')[0]
return [ return [
(url, { (url, {
"width": util.safe_int(width), "width": text.parse_int(width),
"height": util.safe_int(height), "height": text.parse_int(height),
}) })
for url, width, height in re.findall( for url, width, height in re.findall(
r"<img[^>]*? src=[\"']([^\"']+)[\"']" r"<img[^>]*? src=[\"']([^\"']+)[\"']"

View File

@ -65,11 +65,11 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
return { return {
"manga": match.group(5), "manga": match.group(5),
"manga_id": util.safe_int(manga_id), "manga_id": text.parse_int(manga_id),
"volume": util.safe_int(match.group(1)), "volume": text.parse_int(match.group(1)),
"chapter": util.safe_int(match.group(2)), "chapter": text.parse_int(match.group(2)),
"chapter_minor": match.group(3) or "", "chapter_minor": match.group(3) or "",
"chapter_id": util.safe_int(self.chapter_id), "chapter_id": text.parse_int(self.chapter_id),
"chapter_string": info.replace(" - MangaDex", ""), "chapter_string": info.replace(" - MangaDex", ""),
"group": text.unescape(group), "group": text.unescape(group),
"lang": util.language_to_code(language), "lang": util.language_to_code(language),
@ -124,7 +124,7 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
manga = text.unescape(extr( manga = text.unescape(extr(
page, '"og:title" content="', '"')[0].rpartition(" (")[0]) page, '"og:title" content="', '"')[0].rpartition(" (")[0])
manga_id = util.safe_int(extr( manga_id = text.parse_int(extr(
page, '/images/manga/', '.')[0]) page, '/images/manga/', '.')[0])
while True: while True:
@ -145,15 +145,15 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
results.append((self.root + "/chapter/" + chid, { results.append((self.root + "/chapter/" + chid, {
"manga": manga, "manga": manga,
"manga_id": util.safe_int(manga_id), "manga_id": text.parse_int(manga_id),
"title": text.unescape(title), "title": text.unescape(title),
"volume": util.safe_int(volume), "volume": text.parse_int(volume),
"chapter": util.safe_int(chapter), "chapter": text.parse_int(chapter),
"chapter_minor": sep + minor, "chapter_minor": sep + minor,
"chapter_id": util.safe_int(chid), "chapter_id": text.parse_int(chid),
"group": text.unescape(text.remove_html(group)), "group": text.unescape(text.remove_html(group)),
"contributor": text.remove_html(user), "contributor": text.remove_html(user),
"views": util.safe_int(views), "views": text.parse_int(views),
"date": date, "date": date,
"lang": util.language_to_code(language), "lang": util.language_to_code(language),
"language": language, "language": language,

View File

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from http://fanfox.net/""" """Extract manga-chapters and entire manga from http://fanfox.net/"""
from .common import ChapterExtractor from .common import ChapterExtractor
from .. import text, util, exception from .. import text, exception
import re import re
@ -47,7 +47,7 @@ class MangafoxChapterExtractor(ChapterExtractor):
data["chapter_minor"] = match.group(4) or "" data["chapter_minor"] = match.group(4) or ""
data["manga"] = data["manga"].rpartition(" ")[0] data["manga"] = data["manga"].rpartition(" ")[0]
for key in ("sid", "cid", "count", "volume", "chapter"): for key in ("sid", "cid", "count", "volume", "chapter"):
data[key] = util.safe_int(data[key]) data[key] = text.parse_int(data[key])
return data return data
def get_images(self, page): def get_images(self, page):

View File

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from http://www.mangahere.co/""" """Extract manga-chapters and entire manga from http://www.mangahere.co/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util from .. import text
from urllib.parse import urljoin from urllib.parse import urljoin
import re import re
@ -53,8 +53,8 @@ class MangahereMangaExtractor(MangaExtractor):
date, pos = text.extract(page, 'class="right">', '</span>', pos) date, pos = text.extract(page, 'class="right">', '</span>', pos)
results.append((urljoin("http:", url), { results.append((urljoin("http:", url), {
"manga": manga, "title": title, "date": date, "manga": manga, "title": title, "date": date,
"volume": util.safe_int(volume.rpartition(" ")[2]), "volume": text.parse_int(volume.rpartition(" ")[2]),
"chapter": util.safe_int(chapter), "chapter": text.parse_int(chapter),
"chapter_minor": dot + minor, "chapter_minor": dot + minor,
"lang": "en", "language": "English", "lang": "en", "language": "English",
})) }))
@ -93,11 +93,11 @@ class MangahereChapterExtractor(ChapterExtractor):
return { return {
"manga": text.unescape(manga), "manga": text.unescape(manga),
# "title": TODO, # "title": TODO,
"volume": util.safe_int(self.volume), "volume": text.parse_int(self.volume),
"chapter": util.safe_int(self.chapter), "chapter": text.parse_int(self.chapter),
"chapter_minor": self.chminor or "", "chapter_minor": self.chminor or "",
"chapter_id": util.safe_int(chid), "chapter_id": text.parse_int(chid),
"count": util.safe_int(count), "count": text.parse_int(count),
"lang": "en", "lang": "en",
"language": "English", "language": "English",
} }

View File

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from https://mangapark.me/""" """Extract manga-chapters and entire manga from https://mangapark.me/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util from .. import text
from urllib.parse import urljoin from urllib.parse import urljoin
@ -25,12 +25,12 @@ class MangaparkExtractor():
for part in path.split("/")[3:]: for part in path.split("/")[3:]:
key, value = part[0], part[1:] key, value = part[0], part[1:]
if key == "s": if key == "s":
data["version"] = util.safe_int(value) data["version"] = text.parse_int(value)
elif key == "v": elif key == "v":
data["volume"] = util.safe_int(value) data["volume"] = text.parse_int(value)
elif key == "c": elif key == "c":
chapter, dot, minor = value.partition(".") chapter, dot, minor = value.partition(".")
data["chapter"] = util.safe_int(chapter) data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = dot + minor data["chapter_minor"] = dot + minor
elif key == "e": elif key == "e":
data["chapter_minor"] = "v" + value data["chapter_minor"] = "v" + value
@ -64,7 +64,7 @@ class MangaparkMangaExtractor(MangaparkExtractor, MangaExtractor):
self.parse_chapter_path(path, data) self.parse_chapter_path(path, data)
data["title"] = title[3:].strip() data["title"] = title[3:].strip()
data["date"] = date data["date"] = date
data["count"] = util.safe_int(count) data["count"] = text.parse_int(count)
results.append((self.root + path, data.copy())) results.append((self.root + path, data.copy()))
@ -107,7 +107,7 @@ class MangaparkChapterExtractor(MangaparkExtractor, ChapterExtractor):
data["manga"], _, data["type"] = data["manga"].rpartition(" ") data["manga"], _, data["type"] = data["manga"].rpartition(" ")
data["manga"] = text.unescape(data["manga"]) data["manga"] = text.unescape(data["manga"])
data["title"] = data["title"].partition(": ")[2] data["title"] = data["title"].partition(": ")[2]
data["count"] = util.safe_int(data["count"]) data["count"] = text.parse_int(data["count"])
return data return data
def get_images(self, page): def get_images(self, page):

View File

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from https://www.mangareader.net/""" """Extract manga-chapters and entire manga from https://www.mangareader.net/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util from .. import text
class MangareaderBase(): class MangareaderBase():
@ -53,7 +53,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
return results return results
data["title"], pos = text.extract(page, '</a> : ', '</td>', pos) data["title"], pos = text.extract(page, '</a> : ', '</td>', pos)
data["date"] , pos = text.extract(page, '<td>', '</td>', pos) data["date"] , pos = text.extract(page, '<td>', '</td>', pos)
data["chapter"] = util.safe_int(url.rpartition("/")[2]) data["chapter"] = text.parse_int(url.rpartition("/")[2])
results.append((self.root + url, data.copy())) results.append((self.root + url, data.copy()))
@ -79,7 +79,7 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
"""Collect metadata for extractor-job""" """Collect metadata for extractor-job"""
page = self.request(self.root + self.url_title).text page = self.request(self.root + self.url_title).text
data = self.parse_page(page, { data = self.parse_page(page, {
"chapter": util.safe_int(self.chapter), "chapter": text.parse_int(self.chapter),
"lang": "en", "lang": "en",
"language": "English", "language": "English",
}) })
@ -87,7 +87,7 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
('title', ' ' + self.chapter + '</a> : ', '</td>'), ('title', ' ' + self.chapter + '</a> : ', '</td>'),
('date', '<td>', '</td>'), ('date', '<td>', '</td>'),
), page.index('<div id="chapterlist">'), data) ), page.index('<div id="chapterlist">'), data)
data["count"] = util.safe_int(text.extract( data["count"] = text.parse_int(text.extract(
chapter_page, '</select> of ', '<')[0] chapter_page, '</select> of ', '<')[0]
) )
return data return data
@ -118,6 +118,6 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
height, pos = extr(page, ' height="', '"', pos) height, pos = extr(page, ' height="', '"', pos)
image, pos = extr(page, ' src="', '"', pos) image, pos = extr(page, ' src="', '"', pos)
return self.root + url, image, { return self.root + url, image, {
"width": util.safe_int(width), "width": text.parse_int(width),
"height": util.safe_int(height), "height": text.parse_int(height),
} }

View File

@ -9,7 +9,7 @@
"""Extract manga-chapters from https://mangastream.com/""" """Extract manga-chapters from https://mangastream.com/"""
from .common import ChapterExtractor from .common import ChapterExtractor
from .. import text, util from .. import text
from urllib.parse import urljoin from urllib.parse import urljoin
@ -35,9 +35,9 @@ class MangastreamChapterExtractor(ChapterExtractor):
return { return {
"manga": manga, "manga": manga,
"chapter": text.unquote(self.chapter), "chapter": text.unquote(self.chapter),
"chapter_id": util.safe_int(self.ch_id), "chapter_id": text.parse_int(self.ch_id),
"title": title, "title": title,
"count": util.safe_int(count, 1), "count": text.parse_int(count, 1),
"lang": "en", "lang": "en",
"language": "English", "language": "English",
} }

View File

@ -9,7 +9,7 @@
"""Extract images from https://nhentai.net/""" """Extract images from https://nhentai.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util from .. import text
class NHentaiExtractor(Extractor): class NHentaiExtractor(Extractor):
@ -95,7 +95,7 @@ class NhentaiSearchExtractor(NHentaiExtractor):
def _pagination(self, endpoint, params): def _pagination(self, endpoint, params):
"""Pagination over API responses""" """Pagination over API responses"""
url = "{}/api/{}".format(self.root, endpoint) url = "{}/api/{}".format(self.root, endpoint)
params["page"] = util.safe_int(params.get("page"), 1) params["page"] = text.parse_int(params.get("page"), 1)
while True: while True:
data = self.request(url, params=params, fatal=False).json() data = self.request(url, params=params, fatal=False).json()

View File

@ -9,7 +9,7 @@
"""Extract images from https://nijie.info/""" """Extract images from https://nijie.info/"""
from .common import AsynchronousExtractor, Message from .common import AsynchronousExtractor, Message
from .. import text, util, exception from .. import text, exception
from ..cache import cache from ..cache import cache
@ -44,7 +44,7 @@ class NijieExtractor(AsynchronousExtractor):
def get_job_metadata(self): def get_job_metadata(self):
"""Collect metadata for extractor-job""" """Collect metadata for extractor-job"""
return {"user_id": util.safe_int(self.user_id)} return {"user_id": text.parse_int(self.user_id)}
def get_image_ids(self): def get_image_ids(self):
"""Collect all relevant image-ids""" """Collect all relevant image-ids"""
@ -63,8 +63,8 @@ class NijieExtractor(AsynchronousExtractor):
images = list(text.extract_iter(page, '<img src="//pic', '"', pos)) images = list(text.extract_iter(page, '<img src="//pic', '"', pos))
title = title.rpartition("|")[0].strip() title = title.rpartition("|")[0].strip()
image_id = util.safe_int(image_id) image_id = text.parse_int(image_id)
artist_id = util.safe_int(self._userid_from_popup(page)) artist_id = text.parse_int(self._userid_from_popup(page))
for index, url in enumerate(images): for index, url in enumerate(images):
yield "https://pic" + url, text.nameext_from_url(url, { yield "https://pic" + url, text.nameext_from_url(url, {

View File

@ -27,7 +27,7 @@ class PahealExtractor(SharedConfigExtractor):
for data in self.get_posts(): for data in self.get_posts():
url = data["file_url"] url = data["file_url"]
for key in ("id", "width", "height"): for key in ("id", "width", "height"):
data[key] = util.safe_int(data[key]) data[key] = text.parse_int(data[key])
data["tags"] = text.unquote(data["tags"]) data["tags"] = text.unquote(data["tags"])
yield Message.Url, url, text.nameext_from_url(url, data) yield Message.Url, url, text.nameext_from_url(url, data)

View File

@ -9,7 +9,7 @@
"""Extract images from https://www.pinterest.com""" """Extract images from https://www.pinterest.com"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, exception
class PinterestExtractor(Extractor): class PinterestExtractor(Extractor):
@ -27,10 +27,10 @@ class PinterestExtractor(Extractor):
img = pin["image"]["original"] img = pin["image"]["original"]
url = img["url"] url = img["url"]
data = { data = {
"pin_id": util.safe_int(pin["id"]), "pin_id": text.parse_int(pin["id"]),
"note": pin["note"], "note": pin["note"],
"width": util.safe_int(img["width"]), "width": text.parse_int(img["width"]),
"height": util.safe_int(img["height"]), "height": text.parse_int(img["height"]),
} }
return url, text.nameext_from_url(url, data) return url, text.nameext_from_url(url, data)
@ -99,7 +99,7 @@ class PinterestBoardExtractor(PinterestExtractor):
"""Get metadata from a board-object""" """Get metadata from a board-object"""
data = { data = {
"user": self.user, "user": self.user,
"board_id": util.safe_int(board["id"]), "board_id": text.parse_int(board["id"]),
"board": board["name"], "board": board["name"],
"count": board["counts"]["pins"], "count": board["counts"]["pins"],
} }

View File

@ -9,7 +9,7 @@
"""Extract comic-issues and entire comics from http://readcomiconline.to/""" """Extract comic-issues and entire comics from http://readcomiconline.to/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util, cloudflare from .. import text, cloudflare
import re import re
@ -56,7 +56,7 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
issue = issue[7:] issue = issue[7:]
results.append((self.root + url, { results.append((self.root + url, {
"comic": comic, "issue": issue, "comic": comic, "issue": issue,
"issue_id": util.safe_int(url.rpartition("=")[2]), "issue_id": text.parse_int(url.rpartition("=")[2]),
"lang": "en", "language": "English", "lang": "en", "language": "English",
})) }))
return results return results
@ -84,7 +84,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
return { return {
"comic": comic, "comic": comic,
"issue": match.group(1) or match.group(2), "issue": match.group(1) or match.group(2),
"issue_id": util.safe_int(self.issue_id), "issue_id": text.parse_int(self.issue_id),
"lang": "en", "lang": "en",
"language": "English", "language": "English",
} }

View File

@ -82,16 +82,16 @@ class SankakuExtractor(SharedConfigExtractor):
file_url = extr(page, '<embed src="', '"', pos)[0] file_url = extr(page, '<embed src="', '"', pos)[0]
return { return {
"id": util.safe_int(post_id), "id": text.parse_int(post_id),
"md5": file_url.rpartition("/")[2].partition(".")[0], "md5": file_url.rpartition("/")[2].partition(".")[0],
"tags": tags, "tags": tags,
"vote_average": float(vavg or 0), "vote_average": float(vavg or 0),
"vote_count": util.safe_int(vcnt), "vote_count": text.parse_int(vcnt),
"created_at": created, "created_at": created,
"rating": (rating or "?")[0].lower(), "rating": (rating or "?")[0].lower(),
"file_url": "https:" + text.unescape(file_url), "file_url": "https:" + text.unescape(file_url),
"width": util.safe_int(width), "width": text.parse_int(width),
"height": util.safe_int(height), "height": text.parse_int(height),
} }
def wait(self): def wait(self):
@ -165,8 +165,8 @@ class SankakuTagExtractor(SankakuExtractor):
SankakuExtractor.__init__(self) SankakuExtractor.__init__(self)
query = text.parse_query(match.group(1)) query = text.parse_query(match.group(1))
self.tags = text.unquote(query.get("tags", "").replace("+", " ")) self.tags = text.unquote(query.get("tags", "").replace("+", " "))
self.start_page = util.safe_int(query.get("page"), 1) self.start_page = text.parse_int(query.get("page"), 1)
self.next = util.safe_int(query.get("next"), 0) self.next = text.parse_int(query.get("next"), 0)
def skip(self, num): def skip(self, num):
if self.next: if self.next:
@ -212,7 +212,7 @@ class SankakuTagExtractor(SankakuExtractor):
yield from ids yield from ids
params["page"] = 2 params["page"] = 2
params["next"] = util.safe_int(ids[-1]) - 1 params["next"] = text.parse_int(ids[-1]) - 1
class SankakuPoolExtractor(SankakuExtractor): class SankakuPoolExtractor(SankakuExtractor):

View File

@ -123,11 +123,11 @@ class SeigaUserExtractor(SeigaExtractor):
return { return {
"user": { "user": {
"id": util.safe_int(self.user_id), "id": text.parse_int(self.user_id),
"name": data["name"], "name": data["name"],
"message": (data["msg"] or "").strip(), "message": (data["msg"] or "").strip(),
}, },
"count": util.safe_int(data["count"]), "count": text.parse_int(data["count"]),
} }
def get_images(self): def get_images(self):
@ -152,7 +152,7 @@ class SeigaUserExtractor(SeigaExtractor):
("clips" , '</span>', '</li>'), ("clips" , '</span>', '</li>'),
))[0] ))[0]
for key in ("image_id", "views", "comments", "clips"): for key in ("image_id", "views", "comments", "clips"):
data[key] = util.safe_int(data[key]) data[key] = text.parse_int(data[key])
yield data yield data
cnt += 1 cnt += 1
@ -188,4 +188,4 @@ class SeigaImageExtractor(SeigaExtractor):
return num return num
def get_images(self): def get_images(self):
return ({}, {"image_id": util.safe_int(self.image_id)}) return ({}, {"image_id": text.parse_int(self.image_id)})

View File

@ -9,7 +9,7 @@
"""Extract manga-chapters from from http://raw.senmanga.com/""" """Extract manga-chapters from from http://raw.senmanga.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util from .. import text
class SenmangaChapterExtractor(Extractor): class SenmangaChapterExtractor(Extractor):
@ -59,7 +59,7 @@ class SenmangaChapterExtractor(Extractor):
return { return {
"manga": text.unescape(manga), "manga": text.unescape(manga),
"chapter_string": chapter.partition(" - Page ")[0], "chapter_string": chapter.partition(" - Page ")[0],
"count": util.safe_int(count), "count": text.parse_int(count),
"lang": "jp", "lang": "jp",
"language": "Japanese", "language": "Japanese",
} }

View File

@ -9,7 +9,7 @@
"""Extract images from https://www.slideshare.net/""" """Extract images from https://www.slideshare.net/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util from .. import text
class SlideshareExtractor(Extractor): class SlideshareExtractor(Extractor):
@ -78,7 +78,7 @@ class SlideshareExtractor(Extractor):
"presentation": self.presentation, "presentation": self.presentation,
"title": text.unescape(title.strip()), "title": text.unescape(title.strip()),
"description": text.unescape(descr), "description": text.unescape(descr),
"views": util.safe_int(views.replace(",", "")), "views": text.parse_int(views.replace(",", "")),
"published": published, "published": published,
} }

View File

@ -9,7 +9,7 @@
"""Extract manga pages from http://www.thespectrum.net/manga_scans/""" """Extract manga pages from http://www.thespectrum.net/manga_scans/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util from .. import text
class SpectrumnexusMangaExtractor(MangaExtractor): class SpectrumnexusMangaExtractor(MangaExtractor):
@ -55,15 +55,15 @@ class SpectrumnexusChapterExtractor(ChapterExtractor):
def get_metadata(self, page): def get_metadata(self, page):
data = { data = {
"chapter": util.safe_int(self.chapter), "chapter": text.parse_int(self.chapter),
"chapter_string": self.chapter_string.replace("+", " "), "chapter_string": self.chapter_string.replace("+", " "),
"volume": util.safe_int(self.volume), "volume": text.parse_int(self.volume),
} }
data = text.extract_all(page, ( data = text.extract_all(page, (
('manga', '<title>', ' &#183; SPECTRUM NEXUS </title>'), ('manga', '<title>', ' &#183; SPECTRUM NEXUS </title>'),
('count', '<div class="viewerLabel"> of ', '<'), ('count', '<div class="viewerLabel"> of ', '<'),
), values=data)[0] ), values=data)[0]
data["count"] = util.safe_int(data["count"]) data["count"] = text.parse_int(data["count"])
return data return data
def get_images(self, page): def get_images(self, page):

View File

@ -9,7 +9,7 @@
"""Extract images from https://www.xvideos.com/""" """Extract images from https://www.xvideos.com/"""
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, util, exception from .. import text, exception
import json import json
@ -57,7 +57,7 @@ class XvideosGalleryExtractor(XvideosExtractor):
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, data yield Message.Directory, data
for url in imgs: for url in imgs:
data["num"] = util.safe_int(url.rsplit("_", 2)[1]) data["num"] = text.parse_int(url.rsplit("_", 2)[1])
data["extension"] = url.rpartition(".")[2] data["extension"] = url.rpartition(".")[2]
yield Message.Url, url, data yield Message.Url, url, data
@ -73,14 +73,14 @@ class XvideosGalleryExtractor(XvideosExtractor):
return { return {
"user": { "user": {
"id": util.safe_int(data["userid"]), "id": text.parse_int(data["userid"]),
"name": self.user, "name": self.user,
"display": data["display"], "display": data["display"],
"description": text.remove_html(data["descr"]).strip(), "description": text.remove_html(data["descr"]).strip(),
}, },
"tags": text.unescape(data["tags"] or "").strip().split(", "), "tags": text.unescape(data["tags"] or "").strip().split(", "),
"title": text.unescape(data["title"]), "title": text.unescape(data["title"]),
"gallery_id": util.safe_int(self.gid), "gallery_id": text.parse_int(self.gid),
} }
@staticmethod @staticmethod
@ -123,7 +123,7 @@ class XvideosUserExtractor(XvideosExtractor):
del data["galleries"]["0"] del data["galleries"]["0"]
galleries = [ galleries = [
{"gallery_id": util.safe_int(gid), {"gallery_id": text.parse_int(gid),
"title": text.unescape(gdata["title"]), "title": text.unescape(gdata["title"]),
"count": gdata["nb_pics"]} "count": gdata["nb_pics"]}
for gid, gdata in data["galleries"].items() for gid, gdata in data["galleries"].items()

View File

@ -9,8 +9,8 @@
"""Collection of functions that work in strings/text""" """Collection of functions that work in strings/text"""
import re import re
import os.path
import html import html
import os.path
import urllib.parse import urllib.parse
@ -125,6 +125,35 @@ def extract_iter(txt, begin, end, pos=0):
yield value yield value
def parse_bytes(value, default=0, suffixes="bkmgtp"):
"""Convert a bytes-amount ("500k", "2.5M", ...) to int"""
try:
last = value[-1].lower()
except (TypeError, KeyError, IndexError):
return default
if last in suffixes:
mul = 1024 ** suffixes.index(last)
value = value[:-1]
else:
mul = 1
try:
return round(float(value) * mul)
except ValueError:
return default
def parse_int(value, default=0):
"""Convert 'value' to int"""
if not value:
return default
try:
return int(value)
except (ValueError, TypeError):
return default
def parse_query(qs): def parse_query(qs):
"""Parse a query string into key-value pairs""" """Parse a query string into key-value pairs"""
result = {} result = {}
@ -142,6 +171,7 @@ if os.name == "nt":
else: else:
clean_path = clean_path_posix clean_path = clean_path_posix
urljoin = urllib.parse.urljoin
unquote = urllib.parse.unquote unquote = urllib.parse.unquote
escape = html.escape escape = html.escape

View File

@ -95,22 +95,6 @@ def bdecode(data, alphabet="0123456789"):
return num return num
def parse_bytes(value, suffixes="bkmgtp"):
"""Convert a bytes-amount ("500k", "2.5M", ...) to int"""
last = value[-1].lower()
if last in suffixes:
mul = 1024 ** suffixes.index(last)
value = value[:-1]
else:
mul = 1
try:
return round(float(value) * mul)
except ValueError:
return 0
def advance(iterable, num): def advance(iterable, num):
""""Advance the iterable by 'num' steps""" """"Advance the iterable by 'num' steps"""
iterator = iter(iterable) iterator = iter(iterable)
@ -135,16 +119,6 @@ def combine_dict(a, b):
return a return a
def safe_int(value, default=0):
"""Safely convert value to integer"""
if value is None or value == "":
return default
try:
return int(value)
except (ValueError, TypeError):
return default
def expand_path(path): def expand_path(path):
"""Expand environment variables and tildes (~)""" """Expand environment variables and tildes (~)"""
if not path: if not path:
@ -253,7 +227,7 @@ class UniquePredicate():
class FilterPredicate(): class FilterPredicate():
"""Predicate; True if evaluating the given expression returns True""" """Predicate; True if evaluating the given expression returns True"""
globalsdict = { globalsdict = {
"safe_int": safe_int, "parse_int": text.parse_int,
"urlsplit": urllib.parse.urlsplit, "urlsplit": urllib.parse.urlsplit,
"datetime": datetime.datetime, "datetime": datetime.datetime,
"abort": raises(exception.StopExtraction()), "abort": raises(exception.StopExtraction()),

View File

@ -13,6 +13,7 @@ from gallery_dl import text
INVALID = ((), [], {}, None, 1, 2.3) INVALID = ((), [], {}, None, 1, 2.3)
INVALID_ALT = ((), [], {}, None, "")
class TestText(unittest.TestCase): class TestText(unittest.TestCase):
@ -194,6 +195,47 @@ class TestText(unittest.TestCase):
self.assertEqual( self.assertEqual(
g(txt, "[", "]", 6), ["a", "d"]) g(txt, "[", "]", 6), ["a", "d"])
def test_parse_bytes(self, f=text.parse_bytes):
self.assertEqual(f("0"), 0)
self.assertEqual(f("50"), 50)
self.assertEqual(f("50k"), 50 * 1024**1)
self.assertEqual(f("50m"), 50 * 1024**2)
self.assertEqual(f("50g"), 50 * 1024**3)
self.assertEqual(f("50t"), 50 * 1024**4)
self.assertEqual(f("50p"), 50 * 1024**5)
# fractions
self.assertEqual(f("123.456"), 123)
self.assertEqual(f("123.567"), 124)
self.assertEqual(f("0.5M"), round(0.5 * 1024**2))
# invalid arguments
for value in INVALID_ALT:
self.assertEqual(f(value), 0)
self.assertEqual(f("NaN"), 0)
self.assertEqual(f("invalid"), 0)
self.assertEqual(f(" 123 kb "), 0)
def test_parse_int(self, f=text.parse_int):
self.assertEqual(f(0), 0)
self.assertEqual(f("0"), 0)
self.assertEqual(f(123), 123)
self.assertEqual(f("123"), 123)
# invalid arguments
for value in INVALID_ALT:
self.assertEqual(f(value), 0)
self.assertEqual(f("123.456"), 0)
self.assertEqual(f("zzz"), 0)
self.assertEqual(f([1, 2, 3]), 0)
self.assertEqual(f({1: 2, 3: 4}), 0)
# 'default' argument
default = "default"
for value in INVALID_ALT:
self.assertEqual(f(value, default), default)
self.assertEqual(f("zzz", default), default)
def test_parse_query(self, f=text.parse_query): def test_parse_query(self, f=text.parse_query):
# standard usage # standard usage
self.assertEqual(f(""), {}) self.assertEqual(f(""), {})

View File

@ -227,22 +227,6 @@ class TestOther(unittest.TestCase):
result = util.bdecode(util.bencode(value, alphabet), alphabet) result = util.bdecode(util.bencode(value, alphabet), alphabet)
self.assertEqual(result, value) self.assertEqual(result, value)
def test_parse_bytes(self):
self.assertEqual(util.parse_bytes("50"), 50)
self.assertEqual(util.parse_bytes("50k"), 50 * 1024**1)
self.assertEqual(util.parse_bytes("50m"), 50 * 1024**2)
self.assertEqual(util.parse_bytes("50g"), 50 * 1024**3)
self.assertEqual(util.parse_bytes("50t"), 50 * 1024**4)
self.assertEqual(util.parse_bytes("50p"), 50 * 1024**5)
self.assertEqual(util.parse_bytes("123.456"), 123)
self.assertEqual(util.parse_bytes("123.567"), 124)
self.assertEqual(util.parse_bytes("0.5M"), round(0.5 * 1024**2))
self.assertEqual(util.parse_bytes("NaN"), 0)
self.assertEqual(util.parse_bytes("invalid"), 0)
self.assertEqual(util.parse_bytes(" 123 kb "), 0)
def test_advance(self): def test_advance(self):
items = range(5) items = range(5)
@ -281,16 +265,6 @@ class TestOther(unittest.TestCase):
{1: {2: {3: {4: {"1": "A", "3": "C"}}}}}), {1: {2: {3: {4: {"1": "A", "3": "C"}}}}}),
{1: {2: {3: {4: {"1": "A", "2": "b", "3": "C"}}}}}) {1: {2: {3: {4: {"1": "A", "2": "b", "3": "C"}}}}})
def test_safe_int(self):
self.assertEqual(util.safe_int(123), 123)
self.assertEqual(util.safe_int("123"), 123)
self.assertEqual(util.safe_int("zzz"), 0)
self.assertEqual(util.safe_int(""), 0)
self.assertEqual(util.safe_int(None), 0)
self.assertEqual(util.safe_int("zzz", "default"), "default")
self.assertEqual(util.safe_int("", "default"), "default")
self.assertEqual(util.safe_int(None, "default"), "default")
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()