')[0]
return [
(url, {
- "width": util.safe_int(width),
- "height": util.safe_int(height),
+ "width": text.parse_int(width),
+ "height": text.parse_int(height),
})
for url, width, height in re.findall(
r"
]*? src=[\"']([^\"']+)[\"']"
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index a53183d7..4529a735 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -65,11 +65,11 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
return {
"manga": match.group(5),
- "manga_id": util.safe_int(manga_id),
- "volume": util.safe_int(match.group(1)),
- "chapter": util.safe_int(match.group(2)),
+ "manga_id": text.parse_int(manga_id),
+ "volume": text.parse_int(match.group(1)),
+ "chapter": text.parse_int(match.group(2)),
"chapter_minor": match.group(3) or "",
- "chapter_id": util.safe_int(self.chapter_id),
+ "chapter_id": text.parse_int(self.chapter_id),
"chapter_string": info.replace(" - MangaDex", ""),
"group": text.unescape(group),
"lang": util.language_to_code(language),
@@ -124,7 +124,7 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
manga = text.unescape(extr(
page, '"og:title" content="', '"')[0].rpartition(" (")[0])
- manga_id = util.safe_int(extr(
+ manga_id = text.parse_int(extr(
page, '/images/manga/', '.')[0])
while True:
@@ -145,15 +145,15 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
results.append((self.root + "/chapter/" + chid, {
"manga": manga,
- "manga_id": util.safe_int(manga_id),
+ "manga_id": text.parse_int(manga_id),
"title": text.unescape(title),
- "volume": util.safe_int(volume),
- "chapter": util.safe_int(chapter),
+ "volume": text.parse_int(volume),
+ "chapter": text.parse_int(chapter),
"chapter_minor": sep + minor,
- "chapter_id": util.safe_int(chid),
+ "chapter_id": text.parse_int(chid),
"group": text.unescape(text.remove_html(group)),
"contributor": text.remove_html(user),
- "views": util.safe_int(views),
+ "views": text.parse_int(views),
"date": date,
"lang": util.language_to_code(language),
"language": language,
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index 906c2372..38eefa12 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from http://fanfox.net/"""
from .common import ChapterExtractor
-from .. import text, util, exception
+from .. import text, exception
import re
@@ -47,7 +47,7 @@ class MangafoxChapterExtractor(ChapterExtractor):
data["chapter_minor"] = match.group(4) or ""
data["manga"] = data["manga"].rpartition(" ")[0]
for key in ("sid", "cid", "count", "volume", "chapter"):
- data[key] = util.safe_int(data[key])
+ data[key] = text.parse_int(data[key])
return data
def get_images(self, page):
diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py
index 31e758f5..b4ade702 100644
--- a/gallery_dl/extractor/mangahere.py
+++ b/gallery_dl/extractor/mangahere.py
@@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from http://www.mangahere.co/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text, util
+from .. import text
from urllib.parse import urljoin
import re
@@ -53,8 +53,8 @@ class MangahereMangaExtractor(MangaExtractor):
date, pos = text.extract(page, 'class="right">', '', pos)
results.append((urljoin("http:", url), {
"manga": manga, "title": title, "date": date,
- "volume": util.safe_int(volume.rpartition(" ")[2]),
- "chapter": util.safe_int(chapter),
+ "volume": text.parse_int(volume.rpartition(" ")[2]),
+ "chapter": text.parse_int(chapter),
"chapter_minor": dot + minor,
"lang": "en", "language": "English",
}))
@@ -93,11 +93,11 @@ class MangahereChapterExtractor(ChapterExtractor):
return {
"manga": text.unescape(manga),
# "title": TODO,
- "volume": util.safe_int(self.volume),
- "chapter": util.safe_int(self.chapter),
+ "volume": text.parse_int(self.volume),
+ "chapter": text.parse_int(self.chapter),
"chapter_minor": self.chminor or "",
- "chapter_id": util.safe_int(chid),
- "count": util.safe_int(count),
+ "chapter_id": text.parse_int(chid),
+ "count": text.parse_int(count),
"lang": "en",
"language": "English",
}
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 4752d1f5..b12b7d45 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from https://mangapark.me/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text, util
+from .. import text
from urllib.parse import urljoin
@@ -25,12 +25,12 @@ class MangaparkExtractor():
for part in path.split("/")[3:]:
key, value = part[0], part[1:]
if key == "s":
- data["version"] = util.safe_int(value)
+ data["version"] = text.parse_int(value)
elif key == "v":
- data["volume"] = util.safe_int(value)
+ data["volume"] = text.parse_int(value)
elif key == "c":
chapter, dot, minor = value.partition(".")
- data["chapter"] = util.safe_int(chapter)
+ data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = dot + minor
elif key == "e":
data["chapter_minor"] = "v" + value
@@ -64,7 +64,7 @@ class MangaparkMangaExtractor(MangaparkExtractor, MangaExtractor):
self.parse_chapter_path(path, data)
data["title"] = title[3:].strip()
data["date"] = date
- data["count"] = util.safe_int(count)
+ data["count"] = text.parse_int(count)
results.append((self.root + path, data.copy()))
@@ -107,7 +107,7 @@ class MangaparkChapterExtractor(MangaparkExtractor, ChapterExtractor):
data["manga"], _, data["type"] = data["manga"].rpartition(" ")
data["manga"] = text.unescape(data["manga"])
data["title"] = data["title"].partition(": ")[2]
- data["count"] = util.safe_int(data["count"])
+ data["count"] = text.parse_int(data["count"])
return data
def get_images(self, page):
diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py
index 11714a6d..3aabb153 100644
--- a/gallery_dl/extractor/mangareader.py
+++ b/gallery_dl/extractor/mangareader.py
@@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from https://www.mangareader.net/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text, util
+from .. import text
class MangareaderBase():
@@ -53,7 +53,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
return results
data["title"], pos = text.extract(page, ' : ', '', pos)
data["date"] , pos = text.extract(page, '
', ' | ', pos)
- data["chapter"] = util.safe_int(url.rpartition("/")[2])
+ data["chapter"] = text.parse_int(url.rpartition("/")[2])
results.append((self.root + url, data.copy()))
@@ -79,7 +79,7 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
"""Collect metadata for extractor-job"""
page = self.request(self.root + self.url_title).text
data = self.parse_page(page, {
- "chapter": util.safe_int(self.chapter),
+ "chapter": text.parse_int(self.chapter),
"lang": "en",
"language": "English",
})
@@ -87,7 +87,7 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
('title', ' ' + self.chapter + ' : ', ''),
('date', '
', ' | '),
), page.index('
'), data)
- data["count"] = util.safe_int(text.extract(
+ data["count"] = text.parse_int(text.extract(
chapter_page, ' of ', '<')[0]
)
return data
@@ -118,6 +118,6 @@ class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
height, pos = extr(page, ' height="', '"', pos)
image, pos = extr(page, ' src="', '"', pos)
return self.root + url, image, {
- "width": util.safe_int(width),
- "height": util.safe_int(height),
+ "width": text.parse_int(width),
+ "height": text.parse_int(height),
}
diff --git a/gallery_dl/extractor/mangastream.py b/gallery_dl/extractor/mangastream.py
index 51a469a3..70c18f1a 100644
--- a/gallery_dl/extractor/mangastream.py
+++ b/gallery_dl/extractor/mangastream.py
@@ -9,7 +9,7 @@
"""Extract manga-chapters from https://mangastream.com/"""
from .common import ChapterExtractor
-from .. import text, util
+from .. import text
from urllib.parse import urljoin
@@ -35,9 +35,9 @@ class MangastreamChapterExtractor(ChapterExtractor):
return {
"manga": manga,
"chapter": text.unquote(self.chapter),
- "chapter_id": util.safe_int(self.ch_id),
+ "chapter_id": text.parse_int(self.ch_id),
"title": title,
- "count": util.safe_int(count, 1),
+ "count": text.parse_int(count, 1),
"lang": "en",
"language": "English",
}
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py
index 35816e60..a0c8abc1 100644
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -9,7 +9,7 @@
"""Extract images from https://nhentai.net/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text
class NHentaiExtractor(Extractor):
@@ -95,7 +95,7 @@ class NhentaiSearchExtractor(NHentaiExtractor):
def _pagination(self, endpoint, params):
"""Pagination over API responses"""
url = "{}/api/{}".format(self.root, endpoint)
- params["page"] = util.safe_int(params.get("page"), 1)
+ params["page"] = text.parse_int(params.get("page"), 1)
while True:
data = self.request(url, params=params, fatal=False).json()
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 56e1e521..79b7a988 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -9,7 +9,7 @@
"""Extract images from https://nijie.info/"""
from .common import AsynchronousExtractor, Message
-from .. import text, util, exception
+from .. import text, exception
from ..cache import cache
@@ -44,7 +44,7 @@ class NijieExtractor(AsynchronousExtractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
- return {"user_id": util.safe_int(self.user_id)}
+ return {"user_id": text.parse_int(self.user_id)}
def get_image_ids(self):
"""Collect all relevant image-ids"""
@@ -63,8 +63,8 @@ class NijieExtractor(AsynchronousExtractor):
images = list(text.extract_iter(page, '
', ''),
))[0]
for key in ("image_id", "views", "comments", "clips"):
- data[key] = util.safe_int(data[key])
+ data[key] = text.parse_int(data[key])
yield data
cnt += 1
@@ -188,4 +188,4 @@ class SeigaImageExtractor(SeigaExtractor):
return num
def get_images(self):
- return ({}, {"image_id": util.safe_int(self.image_id)})
+ return ({}, {"image_id": text.parse_int(self.image_id)})
diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py
index d46d5e7c..98f096c0 100644
--- a/gallery_dl/extractor/senmanga.py
+++ b/gallery_dl/extractor/senmanga.py
@@ -9,7 +9,7 @@
"""Extract manga-chapters from from http://raw.senmanga.com/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text
class SenmangaChapterExtractor(Extractor):
@@ -59,7 +59,7 @@ class SenmangaChapterExtractor(Extractor):
return {
"manga": text.unescape(manga),
"chapter_string": chapter.partition(" - Page ")[0],
- "count": util.safe_int(count),
+ "count": text.parse_int(count),
"lang": "jp",
"language": "Japanese",
}
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index 1a680d34..3ab15809 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -9,7 +9,7 @@
"""Extract images from https://www.slideshare.net/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text
class SlideshareExtractor(Extractor):
@@ -78,7 +78,7 @@ class SlideshareExtractor(Extractor):
"presentation": self.presentation,
"title": text.unescape(title.strip()),
"description": text.unescape(descr),
- "views": util.safe_int(views.replace(",", "")),
+ "views": text.parse_int(views.replace(",", "")),
"published": published,
}
diff --git a/gallery_dl/extractor/spectrumnexus.py b/gallery_dl/extractor/spectrumnexus.py
index 8bc2aa1c..12207d7c 100644
--- a/gallery_dl/extractor/spectrumnexus.py
+++ b/gallery_dl/extractor/spectrumnexus.py
@@ -9,7 +9,7 @@
"""Extract manga pages from http://www.thespectrum.net/manga_scans/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text, util
+from .. import text
class SpectrumnexusMangaExtractor(MangaExtractor):
@@ -55,15 +55,15 @@ class SpectrumnexusChapterExtractor(ChapterExtractor):
def get_metadata(self, page):
data = {
- "chapter": util.safe_int(self.chapter),
+ "chapter": text.parse_int(self.chapter),
"chapter_string": self.chapter_string.replace("+", " "),
- "volume": util.safe_int(self.volume),
+ "volume": text.parse_int(self.volume),
}
data = text.extract_all(page, (
('manga', '
', ' · SPECTRUM NEXUS '),
('count', '
of ', '<'),
), values=data)[0]
- data["count"] = util.safe_int(data["count"])
+ data["count"] = text.parse_int(data["count"])
return data
def get_images(self, page):
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index d3575389..99d936a6 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -9,7 +9,7 @@
"""Extract images from https://www.xvideos.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
+from .. import text, exception
import json
@@ -57,7 +57,7 @@ class XvideosGalleryExtractor(XvideosExtractor):
yield Message.Version, 1
yield Message.Directory, data
for url in imgs:
- data["num"] = util.safe_int(url.rsplit("_", 2)[1])
+ data["num"] = text.parse_int(url.rsplit("_", 2)[1])
data["extension"] = url.rpartition(".")[2]
yield Message.Url, url, data
@@ -73,14 +73,14 @@ class XvideosGalleryExtractor(XvideosExtractor):
return {
"user": {
- "id": util.safe_int(data["userid"]),
+ "id": text.parse_int(data["userid"]),
"name": self.user,
"display": data["display"],
"description": text.remove_html(data["descr"]).strip(),
},
"tags": text.unescape(data["tags"] or "").strip().split(", "),
"title": text.unescape(data["title"]),
- "gallery_id": util.safe_int(self.gid),
+ "gallery_id": text.parse_int(self.gid),
}
@staticmethod
@@ -123,7 +123,7 @@ class XvideosUserExtractor(XvideosExtractor):
del data["galleries"]["0"]
galleries = [
- {"gallery_id": util.safe_int(gid),
+ {"gallery_id": text.parse_int(gid),
"title": text.unescape(gdata["title"]),
"count": gdata["nb_pics"]}
for gid, gdata in data["galleries"].items()
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 4bed863a..033cf5f3 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -9,8 +9,8 @@
"""Collection of functions that work in strings/text"""
import re
-import os.path
import html
+import os.path
import urllib.parse
@@ -125,6 +125,35 @@ def extract_iter(txt, begin, end, pos=0):
yield value
+def parse_bytes(value, default=0, suffixes="bkmgtp"):
+ """Convert a bytes-amount ("500k", "2.5M", ...) to int"""
+ try:
+ last = value[-1].lower()
+ except (TypeError, KeyError, IndexError):
+ return default
+
+ if last in suffixes:
+ mul = 1024 ** suffixes.index(last)
+ value = value[:-1]
+ else:
+ mul = 1
+
+ try:
+ return round(float(value) * mul)
+ except ValueError:
+ return default
+
+
+def parse_int(value, default=0):
+ """Convert 'value' to int"""
+ if not value:
+ return default
+ try:
+ return int(value)
+ except (ValueError, TypeError):
+ return default
+
+
def parse_query(qs):
"""Parse a query string into key-value pairs"""
result = {}
@@ -142,6 +171,7 @@ if os.name == "nt":
else:
clean_path = clean_path_posix
+urljoin = urllib.parse.urljoin
unquote = urllib.parse.unquote
escape = html.escape
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 8341e084..af754f07 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -95,22 +95,6 @@ def bdecode(data, alphabet="0123456789"):
return num
-def parse_bytes(value, suffixes="bkmgtp"):
- """Convert a bytes-amount ("500k", "2.5M", ...) to int"""
- last = value[-1].lower()
-
- if last in suffixes:
- mul = 1024 ** suffixes.index(last)
- value = value[:-1]
- else:
- mul = 1
-
- try:
- return round(float(value) * mul)
- except ValueError:
- return 0
-
-
def advance(iterable, num):
""""Advance the iterable by 'num' steps"""
iterator = iter(iterable)
@@ -135,16 +119,6 @@ def combine_dict(a, b):
return a
-def safe_int(value, default=0):
- """Safely convert value to integer"""
- if value is None or value == "":
- return default
- try:
- return int(value)
- except (ValueError, TypeError):
- return default
-
-
def expand_path(path):
"""Expand environment variables and tildes (~)"""
if not path:
@@ -253,7 +227,7 @@ class UniquePredicate():
class FilterPredicate():
"""Predicate; True if evaluating the given expression returns True"""
globalsdict = {
- "safe_int": safe_int,
+ "parse_int": text.parse_int,
"urlsplit": urllib.parse.urlsplit,
"datetime": datetime.datetime,
"abort": raises(exception.StopExtraction()),
diff --git a/test/test_text.py b/test/test_text.py
index b07dff10..f76fc2a2 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -13,6 +13,7 @@ from gallery_dl import text
INVALID = ((), [], {}, None, 1, 2.3)
+INVALID_ALT = ((), [], {}, None, "")
class TestText(unittest.TestCase):
@@ -194,6 +195,47 @@ class TestText(unittest.TestCase):
self.assertEqual(
g(txt, "[", "]", 6), ["a", "d"])
+ def test_parse_bytes(self, f=text.parse_bytes):
+ self.assertEqual(f("0"), 0)
+ self.assertEqual(f("50"), 50)
+ self.assertEqual(f("50k"), 50 * 1024**1)
+ self.assertEqual(f("50m"), 50 * 1024**2)
+ self.assertEqual(f("50g"), 50 * 1024**3)
+ self.assertEqual(f("50t"), 50 * 1024**4)
+ self.assertEqual(f("50p"), 50 * 1024**5)
+
+ # fractions
+ self.assertEqual(f("123.456"), 123)
+ self.assertEqual(f("123.567"), 124)
+ self.assertEqual(f("0.5M"), round(0.5 * 1024**2))
+
+ # invalid arguments
+ for value in INVALID_ALT:
+ self.assertEqual(f(value), 0)
+ self.assertEqual(f("NaN"), 0)
+ self.assertEqual(f("invalid"), 0)
+ self.assertEqual(f(" 123 kb "), 0)
+
+ def test_parse_int(self, f=text.parse_int):
+ self.assertEqual(f(0), 0)
+ self.assertEqual(f("0"), 0)
+ self.assertEqual(f(123), 123)
+ self.assertEqual(f("123"), 123)
+
+ # invalid arguments
+ for value in INVALID_ALT:
+ self.assertEqual(f(value), 0)
+ self.assertEqual(f("123.456"), 0)
+ self.assertEqual(f("zzz"), 0)
+ self.assertEqual(f([1, 2, 3]), 0)
+ self.assertEqual(f({1: 2, 3: 4}), 0)
+
+ # 'default' argument
+ default = "default"
+ for value in INVALID_ALT:
+ self.assertEqual(f(value, default), default)
+ self.assertEqual(f("zzz", default), default)
+
def test_parse_query(self, f=text.parse_query):
# standard usage
self.assertEqual(f(""), {})
diff --git a/test/test_util.py b/test/test_util.py
index 7c684d89..8333828e 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -227,22 +227,6 @@ class TestOther(unittest.TestCase):
result = util.bdecode(util.bencode(value, alphabet), alphabet)
self.assertEqual(result, value)
- def test_parse_bytes(self):
- self.assertEqual(util.parse_bytes("50"), 50)
- self.assertEqual(util.parse_bytes("50k"), 50 * 1024**1)
- self.assertEqual(util.parse_bytes("50m"), 50 * 1024**2)
- self.assertEqual(util.parse_bytes("50g"), 50 * 1024**3)
- self.assertEqual(util.parse_bytes("50t"), 50 * 1024**4)
- self.assertEqual(util.parse_bytes("50p"), 50 * 1024**5)
-
- self.assertEqual(util.parse_bytes("123.456"), 123)
- self.assertEqual(util.parse_bytes("123.567"), 124)
- self.assertEqual(util.parse_bytes("0.5M"), round(0.5 * 1024**2))
-
- self.assertEqual(util.parse_bytes("NaN"), 0)
- self.assertEqual(util.parse_bytes("invalid"), 0)
- self.assertEqual(util.parse_bytes(" 123 kb "), 0)
-
def test_advance(self):
items = range(5)
@@ -281,16 +265,6 @@ class TestOther(unittest.TestCase):
{1: {2: {3: {4: {"1": "A", "3": "C"}}}}}),
{1: {2: {3: {4: {"1": "A", "2": "b", "3": "C"}}}}})
- def test_safe_int(self):
- self.assertEqual(util.safe_int(123), 123)
- self.assertEqual(util.safe_int("123"), 123)
- self.assertEqual(util.safe_int("zzz"), 0)
- self.assertEqual(util.safe_int(""), 0)
- self.assertEqual(util.safe_int(None), 0)
- self.assertEqual(util.safe_int("zzz", "default"), "default")
- self.assertEqual(util.safe_int("", "default"), "default")
- self.assertEqual(util.safe_int(None, "default"), "default")
-
if __name__ == '__main__':
unittest.main()