mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 18:53:21 +01:00
get extension from Content-Type header if not provided
This commit is contained in:
parent
8d106a447c
commit
29692c5784
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014, 2015 Mike Fährmann
|
||||
# Copyright 2014-2016 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -15,16 +15,15 @@ class BasicDownloader():
|
||||
|
||||
max_tries = 5
|
||||
|
||||
def download(self, url, fileobj):
|
||||
def download(self, url, pathfmt):
|
||||
"""Download the resource at 'url' and write it to a file-like object"""
|
||||
try:
|
||||
return self.download_impl(url, fileobj)
|
||||
return self.download_impl(url, pathfmt)
|
||||
except:
|
||||
# remove file if download failed
|
||||
try:
|
||||
fileobj.close()
|
||||
os.unlink(fileobj.name)
|
||||
except AttributeError:
|
||||
os.unlink(pathfmt.realpath)
|
||||
except (AttributeError, FileNotFoundError):
|
||||
pass
|
||||
raise
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014, 2015 Mike Fährmann
|
||||
# Copyright 2014-2016 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -10,16 +10,17 @@
|
||||
|
||||
import time
|
||||
import requests
|
||||
import mimetypes
|
||||
from .common import BasicDownloader
|
||||
|
||||
class Downloader(BasicDownloader):
|
||||
|
||||
def __init__(self, printer):
|
||||
def __init__(self, output):
|
||||
BasicDownloader.__init__(self)
|
||||
self.session = requests.session()
|
||||
self.printer = printer
|
||||
self.out = output
|
||||
|
||||
def download_impl(self, url, file):
|
||||
def download_impl(self, url, pathfmt):
|
||||
tries = 0
|
||||
while True:
|
||||
# try to connect to remote source
|
||||
@ -27,7 +28,7 @@ class Downloader(BasicDownloader):
|
||||
response = self.session.get(url, stream=True, verify=True)
|
||||
except requests.exceptions.ConnectionError as exptn:
|
||||
tries += 1
|
||||
self.printer.error(file, exptn, tries, self.max_tries)
|
||||
self.out.error(pathfmt.path, exptn, tries, self.max_tries)
|
||||
time.sleep(1)
|
||||
if tries == self.max_tries:
|
||||
raise
|
||||
@ -36,10 +37,8 @@ class Downloader(BasicDownloader):
|
||||
# reject error-status-codes
|
||||
if response.status_code != requests.codes.ok:
|
||||
tries += 1
|
||||
self.printer.error(file, 'HTTP status "{} {}"'.format(
|
||||
self.out.error(pathfmt.path, 'HTTP status "{} {}"'.format(
|
||||
response.status_code, response.reason), tries, self.max_tries)
|
||||
if response.status_code == 404:
|
||||
return self.max_tries
|
||||
time.sleep(1)
|
||||
if tries == self.max_tries:
|
||||
response.raise_for_status()
|
||||
@ -48,9 +47,22 @@ class Downloader(BasicDownloader):
|
||||
# everything ok -- proceed to download
|
||||
break
|
||||
|
||||
for data in response.iter_content(16384):
|
||||
file.write(data)
|
||||
return tries
|
||||
if not pathfmt.has_extension:
|
||||
# set 'extension' keyword from Content-Type header
|
||||
mtype = response.headers.get("Content-Type", "image/jpeg")
|
||||
extensions = mimetypes.guess_all_extensions(mtype)
|
||||
extensions.sort()
|
||||
pathfmt.set_extension(extensions[-1][1:])
|
||||
if pathfmt.exists():
|
||||
self.out.skip(pathfmt.path)
|
||||
response.close()
|
||||
return
|
||||
|
||||
self.out.start(pathfmt.path)
|
||||
with pathfmt.open() as file:
|
||||
for data in response.iter_content(16384):
|
||||
file.write(data)
|
||||
self.out.success(pathfmt.path, tries)
|
||||
|
||||
def set_headers(self, headers):
|
||||
"""Set headers for http requests"""
|
||||
@ -65,4 +77,3 @@ class Downloader(BasicDownloader):
|
||||
"""Copy the contents of dictionary 'src' to 'dest'"""
|
||||
dest.clear()
|
||||
dest.update(src)
|
||||
|
||||
|
@ -1 +0,0 @@
|
||||
from .http import Downloader
|
@ -1,20 +1,29 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014, 2015 Mike Fährmann
|
||||
# Copyright 2014-2016 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Downloader module for text urls"""
|
||||
"""Downloader module for text:// urls"""
|
||||
|
||||
from .common import BasicDownloader
|
||||
|
||||
class Downloader(BasicDownloader):
|
||||
|
||||
def __init__(self, *args):
|
||||
def __init__(self, output):
|
||||
BasicDownloader.__init__(self)
|
||||
self.out = output
|
||||
|
||||
def download_impl(self, url, file):
|
||||
file.write(bytes(url[7:], "utf-8"))
|
||||
return 0
|
||||
def download_impl(self, url, pathfmt):
|
||||
if not pathfmt.has_extension:
|
||||
pathfmt.set_extension("txt")
|
||||
if pathfmt.exists():
|
||||
self.out.skip(pathfmt.path)
|
||||
return
|
||||
|
||||
self.out.start(pathfmt.path)
|
||||
with pathfmt.open() as file:
|
||||
file.write(bytes(url[7:], "utf-8"))
|
||||
self.out.success(pathfmt.path, 0)
|
||||
|
@ -1,16 +1,14 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015 Mike Fährmann
|
||||
# Copyright 2015, 2016 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
import platform
|
||||
from . import config, extractor, downloader, text, output, exception
|
||||
from . import extractor, downloader, path, output, exception
|
||||
from .extractor.message import Message
|
||||
|
||||
class Job():
|
||||
@ -73,19 +71,10 @@ class DownloadJob(Job):
|
||||
|
||||
def __init__(self, url):
|
||||
Job.__init__(self, url)
|
||||
self.directory = self.get_base_directory()
|
||||
self.pathfmt = path.PathFormat(self.extractor)
|
||||
self.downloaders = {}
|
||||
self.queue = None
|
||||
self.printer = output.select()
|
||||
key = ["extractor", self.extractor.category]
|
||||
if self.extractor.subcategory:
|
||||
key.append(self.extractor.subcategory)
|
||||
self.filename_fmt = config.interpolate(
|
||||
key + ["filename_fmt"], default=self.extractor.filename_fmt
|
||||
)
|
||||
self.directory_fmt = config.interpolate(
|
||||
key + ["directory_fmt"], default=self.extractor.directory_fmt
|
||||
)
|
||||
self.out = output.select()
|
||||
|
||||
def run(self):
|
||||
Job.run(self)
|
||||
@ -98,29 +87,16 @@ class DownloadJob(Job):
|
||||
|
||||
def handle_url(self, url, keywords):
|
||||
"""Download the resource specified in 'url'"""
|
||||
filename = text.clean_path(self.filename_fmt.format(**keywords))
|
||||
path = os.path.join(self.directory, filename)
|
||||
realpath = self.adjust_path(path)
|
||||
if os.path.exists(realpath):
|
||||
self.printer.skip(path)
|
||||
self.pathfmt.set_keywords(keywords)
|
||||
if self.pathfmt.exists():
|
||||
self.out.skip(self.pathfmt.path)
|
||||
return
|
||||
dlinstance = self.get_downloader(url)
|
||||
self.printer.start(path)
|
||||
with open(realpath, "wb") as file:
|
||||
tries = dlinstance.download(url, file)
|
||||
self.printer.success(path, tries)
|
||||
dlinstance.download(url, self.pathfmt)
|
||||
|
||||
def handle_directory(self, keywords):
|
||||
"""Set and create the target directory for downloads"""
|
||||
segments = [
|
||||
text.clean_path(segment.format(**keywords).strip())
|
||||
for segment in self.directory_fmt
|
||||
]
|
||||
self.directory = os.path.join(
|
||||
self.get_base_directory(),
|
||||
*segments
|
||||
)
|
||||
os.makedirs(self.adjust_path(self.directory), exist_ok=True)
|
||||
self.pathfmt.set_directory(keywords)
|
||||
|
||||
def handle_queue(self, url):
|
||||
"""Add url to work-queue"""
|
||||
@ -144,23 +120,10 @@ class DownloadJob(Job):
|
||||
instance = self.downloaders.get(scheme)
|
||||
if instance is None:
|
||||
klass = downloader.find(scheme)
|
||||
instance = klass(self.printer)
|
||||
instance = klass(self.out)
|
||||
self.downloaders[scheme] = instance
|
||||
return instance
|
||||
|
||||
@staticmethod
|
||||
def get_base_directory():
|
||||
"""Return the base-destination-directory for downloads"""
|
||||
bdir = config.get(("base-directory",), default=(".", "gallery-dl"))
|
||||
if not isinstance(bdir, str):
|
||||
bdir = os.path.join(*bdir)
|
||||
return os.path.expanduser(os.path.expandvars(bdir))
|
||||
|
||||
@staticmethod
|
||||
def adjust_path(path, longpaths=platform.system() == "Windows"):
|
||||
"""Enable longer-than-260-character paths on windows"""
|
||||
return "\\\\?\\" + os.path.abspath(path) if longpaths else path
|
||||
|
||||
|
||||
class KeywordJob(Job):
|
||||
"""Print available keywords"""
|
||||
@ -207,6 +170,17 @@ class HashJob(DownloadJob):
|
||||
|
||||
def __init__(self, hashobj):
|
||||
self.hashobj = hashobj
|
||||
self.path = ""
|
||||
self.has_extension = True
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
pass
|
||||
|
||||
def open(self):
|
||||
return self
|
||||
|
||||
def write(self, content):
|
||||
"""Update SHA1 hash"""
|
||||
|
82
gallery_dl/path.py
Normal file
82
gallery_dl/path.py
Normal file
@ -0,0 +1,82 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
import os
|
||||
from . import config, text
|
||||
|
||||
class PathFormat():
|
||||
|
||||
def __init__(self, extractor):
|
||||
key = ["extractor", extractor.category]
|
||||
if extractor.subcategory:
|
||||
key.append(extractor.subcategory)
|
||||
self.filename_fmt = config.interpolate(
|
||||
key + ["filename_fmt"], default=extractor.filename_fmt
|
||||
)
|
||||
self.directory_fmt = config.interpolate(
|
||||
key + ["directory_fmt"], default=extractor.directory_fmt
|
||||
)
|
||||
self.has_extension = False
|
||||
self.keywords = {}
|
||||
self.directory = self.realdirectory = ""
|
||||
self.path = self.realpath = ""
|
||||
|
||||
def open(self):
|
||||
"""Open file ta 'realpath' and return a corresponding file object"""
|
||||
return open(self.realpath, "wb")
|
||||
|
||||
def exists(self):
|
||||
"""Return True if 'path' is complete and referse to an existing path"""
|
||||
if self.has_extension:
|
||||
return os.path.exists(self.realpath)
|
||||
return False
|
||||
|
||||
def set_directory(self, keywords):
|
||||
"""Build directory path and create it if necessary"""
|
||||
segments = [
|
||||
text.clean_path(segment.format(**keywords).strip())
|
||||
for segment in self.directory_fmt
|
||||
]
|
||||
self.directory = os.path.join(
|
||||
self.get_base_directory(),
|
||||
*segments
|
||||
)
|
||||
self.realdirectory = self.adjust_path(self.directory)
|
||||
os.makedirs(self.realdirectory, exist_ok=True)
|
||||
|
||||
def set_keywords(self, keywords):
|
||||
"""Set filename keywords"""
|
||||
self.keywords = keywords
|
||||
self.has_extension = bool(keywords.get("extension"))
|
||||
if self.has_extension:
|
||||
self.build_path()
|
||||
|
||||
def set_extension(self, extension):
|
||||
"""Set the 'extension' keyword"""
|
||||
self.has_extension = True
|
||||
self.keywords["extension"] = extension
|
||||
self.build_path()
|
||||
|
||||
def build_path(self, sep=os.path.sep):
|
||||
"""Use filename-keywords and directory to build a full path"""
|
||||
filename = text.clean_path(self.filename_fmt.format(**self.keywords))
|
||||
self.path = self.directory + sep + filename
|
||||
self.realpath = self.realdirectory + sep + filename
|
||||
|
||||
@staticmethod
|
||||
def get_base_directory():
|
||||
"""Return the base-destination-directory for downloads"""
|
||||
bdir = config.get(("base-directory",), default=(".", "gallery-dl"))
|
||||
if not isinstance(bdir, str):
|
||||
bdir = os.path.join(*bdir)
|
||||
return os.path.expanduser(os.path.expandvars(bdir))
|
||||
|
||||
@staticmethod
|
||||
def adjust_path(path):
|
||||
"""Enable longer-than-260-character paths on windows"""
|
||||
return "\\\\?\\" + os.path.abspath(path) if os.name == "nt" else path
|
Loading…
Reference in New Issue
Block a user