1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 18:53:21 +01:00

get extension from Content-Type header if not provided

This commit is contained in:
Mike Fährmann 2016-09-30 12:32:48 +02:00
parent 8d106a447c
commit 29692c5784
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
6 changed files with 146 additions and 72 deletions

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014, 2015 Mike Fährmann
# Copyright 2014-2016 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -15,16 +15,15 @@ class BasicDownloader():
max_tries = 5
def download(self, url, fileobj):
def download(self, url, pathfmt):
"""Download the resource at 'url' and write it to a file-like object"""
try:
return self.download_impl(url, fileobj)
return self.download_impl(url, pathfmt)
except:
# remove file if download failed
try:
fileobj.close()
os.unlink(fileobj.name)
except AttributeError:
os.unlink(pathfmt.realpath)
except (AttributeError, FileNotFoundError):
pass
raise

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014, 2015 Mike Fährmann
# Copyright 2014-2016 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -10,16 +10,17 @@
import time
import requests
import mimetypes
from .common import BasicDownloader
class Downloader(BasicDownloader):
def __init__(self, printer):
def __init__(self, output):
BasicDownloader.__init__(self)
self.session = requests.session()
self.printer = printer
self.out = output
def download_impl(self, url, file):
def download_impl(self, url, pathfmt):
tries = 0
while True:
# try to connect to remote source
@ -27,7 +28,7 @@ class Downloader(BasicDownloader):
response = self.session.get(url, stream=True, verify=True)
except requests.exceptions.ConnectionError as exptn:
tries += 1
self.printer.error(file, exptn, tries, self.max_tries)
self.out.error(pathfmt.path, exptn, tries, self.max_tries)
time.sleep(1)
if tries == self.max_tries:
raise
@ -36,10 +37,8 @@ class Downloader(BasicDownloader):
# reject error-status-codes
if response.status_code != requests.codes.ok:
tries += 1
self.printer.error(file, 'HTTP status "{} {}"'.format(
self.out.error(pathfmt.path, 'HTTP status "{} {}"'.format(
response.status_code, response.reason), tries, self.max_tries)
if response.status_code == 404:
return self.max_tries
time.sleep(1)
if tries == self.max_tries:
response.raise_for_status()
@ -48,9 +47,22 @@ class Downloader(BasicDownloader):
# everything ok -- proceed to download
break
for data in response.iter_content(16384):
file.write(data)
return tries
if not pathfmt.has_extension:
# set 'extension' keyword from Content-Type header
mtype = response.headers.get("Content-Type", "image/jpeg")
extensions = mimetypes.guess_all_extensions(mtype)
extensions.sort()
pathfmt.set_extension(extensions[-1][1:])
if pathfmt.exists():
self.out.skip(pathfmt.path)
response.close()
return
self.out.start(pathfmt.path)
with pathfmt.open() as file:
for data in response.iter_content(16384):
file.write(data)
self.out.success(pathfmt.path, tries)
def set_headers(self, headers):
"""Set headers for http requests"""
@ -65,4 +77,3 @@ class Downloader(BasicDownloader):
"""Copy the contents of dictionary 'src' to 'dest'"""
dest.clear()
dest.update(src)

View File

@ -1 +0,0 @@
from .http import Downloader

View File

@ -1,20 +1,29 @@
# -*- coding: utf-8 -*-
# Copyright 2014, 2015 Mike Fährmann
# Copyright 2014-2016 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Downloader module for text urls"""
"""Downloader module for text:// urls"""
from .common import BasicDownloader
class Downloader(BasicDownloader):
def __init__(self, *args):
def __init__(self, output):
BasicDownloader.__init__(self)
self.out = output
def download_impl(self, url, file):
file.write(bytes(url[7:], "utf-8"))
return 0
def download_impl(self, url, pathfmt):
if not pathfmt.has_extension:
pathfmt.set_extension("txt")
if pathfmt.exists():
self.out.skip(pathfmt.path)
return
self.out.start(pathfmt.path)
with pathfmt.open() as file:
file.write(bytes(url[7:], "utf-8"))
self.out.success(pathfmt.path, 0)

View File

@ -1,16 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright 2015 Mike Fährmann
# Copyright 2015, 2016 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import os
import json
import hashlib
import platform
from . import config, extractor, downloader, text, output, exception
from . import extractor, downloader, path, output, exception
from .extractor.message import Message
class Job():
@ -73,19 +71,10 @@ class DownloadJob(Job):
def __init__(self, url):
Job.__init__(self, url)
self.directory = self.get_base_directory()
self.pathfmt = path.PathFormat(self.extractor)
self.downloaders = {}
self.queue = None
self.printer = output.select()
key = ["extractor", self.extractor.category]
if self.extractor.subcategory:
key.append(self.extractor.subcategory)
self.filename_fmt = config.interpolate(
key + ["filename_fmt"], default=self.extractor.filename_fmt
)
self.directory_fmt = config.interpolate(
key + ["directory_fmt"], default=self.extractor.directory_fmt
)
self.out = output.select()
def run(self):
Job.run(self)
@ -98,29 +87,16 @@ class DownloadJob(Job):
def handle_url(self, url, keywords):
"""Download the resource specified in 'url'"""
filename = text.clean_path(self.filename_fmt.format(**keywords))
path = os.path.join(self.directory, filename)
realpath = self.adjust_path(path)
if os.path.exists(realpath):
self.printer.skip(path)
self.pathfmt.set_keywords(keywords)
if self.pathfmt.exists():
self.out.skip(self.pathfmt.path)
return
dlinstance = self.get_downloader(url)
self.printer.start(path)
with open(realpath, "wb") as file:
tries = dlinstance.download(url, file)
self.printer.success(path, tries)
dlinstance.download(url, self.pathfmt)
def handle_directory(self, keywords):
"""Set and create the target directory for downloads"""
segments = [
text.clean_path(segment.format(**keywords).strip())
for segment in self.directory_fmt
]
self.directory = os.path.join(
self.get_base_directory(),
*segments
)
os.makedirs(self.adjust_path(self.directory), exist_ok=True)
self.pathfmt.set_directory(keywords)
def handle_queue(self, url):
"""Add url to work-queue"""
@ -144,23 +120,10 @@ class DownloadJob(Job):
instance = self.downloaders.get(scheme)
if instance is None:
klass = downloader.find(scheme)
instance = klass(self.printer)
instance = klass(self.out)
self.downloaders[scheme] = instance
return instance
@staticmethod
def get_base_directory():
"""Return the base-destination-directory for downloads"""
bdir = config.get(("base-directory",), default=(".", "gallery-dl"))
if not isinstance(bdir, str):
bdir = os.path.join(*bdir)
return os.path.expanduser(os.path.expandvars(bdir))
@staticmethod
def adjust_path(path, longpaths=platform.system() == "Windows"):
"""Enable longer-than-260-character paths on windows"""
return "\\\\?\\" + os.path.abspath(path) if longpaths else path
class KeywordJob(Job):
"""Print available keywords"""
@ -207,6 +170,17 @@ class HashJob(DownloadJob):
def __init__(self, hashobj):
self.hashobj = hashobj
self.path = ""
self.has_extension = True
def __enter__(self):
return self
def __exit__(self, *args):
pass
def open(self):
return self
def write(self, content):
"""Update SHA1 hash"""

82
gallery_dl/path.py Normal file
View File

@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
# Copyright 2016 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import os
from . import config, text
class PathFormat():
def __init__(self, extractor):
key = ["extractor", extractor.category]
if extractor.subcategory:
key.append(extractor.subcategory)
self.filename_fmt = config.interpolate(
key + ["filename_fmt"], default=extractor.filename_fmt
)
self.directory_fmt = config.interpolate(
key + ["directory_fmt"], default=extractor.directory_fmt
)
self.has_extension = False
self.keywords = {}
self.directory = self.realdirectory = ""
self.path = self.realpath = ""
def open(self):
"""Open file ta 'realpath' and return a corresponding file object"""
return open(self.realpath, "wb")
def exists(self):
"""Return True if 'path' is complete and referse to an existing path"""
if self.has_extension:
return os.path.exists(self.realpath)
return False
def set_directory(self, keywords):
"""Build directory path and create it if necessary"""
segments = [
text.clean_path(segment.format(**keywords).strip())
for segment in self.directory_fmt
]
self.directory = os.path.join(
self.get_base_directory(),
*segments
)
self.realdirectory = self.adjust_path(self.directory)
os.makedirs(self.realdirectory, exist_ok=True)
def set_keywords(self, keywords):
"""Set filename keywords"""
self.keywords = keywords
self.has_extension = bool(keywords.get("extension"))
if self.has_extension:
self.build_path()
def set_extension(self, extension):
"""Set the 'extension' keyword"""
self.has_extension = True
self.keywords["extension"] = extension
self.build_path()
def build_path(self, sep=os.path.sep):
"""Use filename-keywords and directory to build a full path"""
filename = text.clean_path(self.filename_fmt.format(**self.keywords))
self.path = self.directory + sep + filename
self.realpath = self.realdirectory + sep + filename
@staticmethod
def get_base_directory():
"""Return the base-destination-directory for downloads"""
bdir = config.get(("base-directory",), default=(".", "gallery-dl"))
if not isinstance(bdir, str):
bdir = os.path.join(*bdir)
return os.path.expanduser(os.path.expandvars(bdir))
@staticmethod
def adjust_path(path):
"""Enable longer-than-260-character paths on windows"""
return "\\\\?\\" + os.path.abspath(path) if os.name == "nt" else path