1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-26 12:42:29 +01:00

[download] adjust filename extension on filetype mismatch

(closes #63)
This commit is contained in:
Mike Fährmann 2018-01-17 15:49:46 +01:00
parent 91ed147cef
commit 6174a5c4ef
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
4 changed files with 44 additions and 7 deletions

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2017 Mike Fährmann
# Copyright 2014-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -85,11 +85,11 @@ class DownloaderBase():
# check response
if not offset:
mode = "wb"
mode = "w+b"
if filesize:
self.log.info("Unable to resume partial download")
else:
mode = "ab"
mode = "r+b"
self.log.info("Resuming download at byte %d", offset)
# set missing filename extension
@ -102,6 +102,9 @@ class DownloaderBase():
self.out.start(pathfmt.path)
self.downloading = True
with pathfmt.open(mode) as file:
if offset:
file.seek(offset)
# download content
try:
self.receive(file)
@ -114,9 +117,15 @@ class DownloaderBase():
msg = "filesize mismatch ({} < {})".format(
file.tell(), size)
continue
# check filename extension
adj_ext = self._check_extension(file, pathfmt)
break
self.downloading = False
if adj_ext:
pathfmt.adjust_extension(adj_ext)
if self.part:
pathfmt.part_move()
self.out.success(pathfmt.path, tries)
@ -139,3 +148,23 @@ class DownloaderBase():
def get_extension(self):
"""Return a filename extension appropriate for the current request"""
@staticmethod
def _check_extension(file, pathfmt):
"""Check filename extension against fileheader"""
extension = pathfmt.keywords["extension"]
if extension in FILETYPE_CHECK:
file.seek(0)
header = file.read(8)
if len(header) >= 8 and not FILETYPE_CHECK[extension](header):
for ext, check in FILETYPE_CHECK.items():
if ext != extension and check(header):
return ext
return None
FILETYPE_CHECK = {
"jpg": lambda h: h[0:2] == b"\xff\xd8",
"png": lambda h: h[0:8] == b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a",
"gif": lambda h: h[0:4] == b"GIF8" and h[5] == 97,
}

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2017 Mike Fährmann
# Copyright 2015-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -296,6 +296,7 @@ class TestJob(DownloadJob):
self.hash_content = hashlib.sha1()
if content:
self.fileobj = self.HashIO(self.hash_content)
self.get_downloader("http:")._check_extension = lambda a, b: None
def run(self):
for msg in self.extractor:

View File

@ -409,6 +409,13 @@ class PathFormat():
self.keywords["extension"] = extension
self.build_path()
def adjust_extension(self, extension):
"""Change filename extension of existing file"""
oldpath = self.realpath
self.set_extension(extension)
if not self.partpath:
os.replace(oldpath, self.realpath)
def build_path(self):
"""Use filename-keywords and directory to build a full path"""
try:
@ -446,7 +453,7 @@ class PathFormat():
def part_move(self):
"""Rename .part file to its actual filename"""
try:
os.rename(self.partpath, self.realpath)
os.replace(self.partpath, self.realpath)
return
except OSError:
pass

View File

@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2017 Mike Fährmann
# Copyright 2016-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
__version__ = "1.1.2"
__version__ = "1.2.0-dev"