mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
implement 'http-metadata' option
or at least attempt to.
This commit is contained in:
parent
b7a83ac726
commit
870e6a48a0
@ -586,6 +586,22 @@ Description
|
||||
to access the current file's filename as ``"[gdl_path.filename}"``.
|
||||
|
||||
|
||||
extractor.*.http-metadata
|
||||
-------------------------
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
``null``
|
||||
Description
|
||||
Insert an ``object`` containing a file's HTTP headers and
|
||||
``filename``, ``extension``, and ``date`` parsed from them
|
||||
into metadata dictionaries as the given name.
|
||||
|
||||
For example, setting this option to ``"gdl_http"`` would make it possible
|
||||
to access the current file's ``Last-Modified`` header as ``"[gdl_http[Last-Modified]}"``
|
||||
and its parsed form as .``"[gdl_http[date]}"``.
|
||||
|
||||
|
||||
extractor.*.category-transfer
|
||||
-----------------------------
|
||||
Type
|
||||
|
@ -14,6 +14,8 @@ from requests.exceptions import RequestException, ConnectionError, Timeout
|
||||
from .common import DownloaderBase
|
||||
from .. import text, util
|
||||
|
||||
from email.utils import parsedate_tz
|
||||
from datetime import datetime
|
||||
from ssl import SSLError
|
||||
try:
|
||||
from OpenSSL.SSL import Error as OpenSSLError
|
||||
@ -31,6 +33,7 @@ class HttpDownloader(DownloaderBase):
|
||||
|
||||
self.adjust_extension = self.config("adjust-extensions", True)
|
||||
self.chunk_size = self.config("chunk-size", 32768)
|
||||
self.metadata = extractor.config("http-metadata")
|
||||
self.progress = self.config("progress", 3.0)
|
||||
self.headers = self.config("headers")
|
||||
self.minsize = self.config("filesize-min")
|
||||
@ -171,13 +174,6 @@ class HttpDownloader(DownloaderBase):
|
||||
self.log.warning("Invalid response")
|
||||
return False
|
||||
|
||||
# set missing filename extension from MIME type
|
||||
if not pathfmt.extension:
|
||||
pathfmt.set_extension(self._find_extension(response))
|
||||
if pathfmt.exists():
|
||||
pathfmt.temppath = ""
|
||||
return True
|
||||
|
||||
# check file size
|
||||
size = text.parse_int(size, None)
|
||||
if size is not None:
|
||||
@ -192,6 +188,21 @@ class HttpDownloader(DownloaderBase):
|
||||
size, self.maxsize)
|
||||
return False
|
||||
|
||||
# set missing filename extension from MIME type
|
||||
if not pathfmt.extension:
|
||||
pathfmt.set_extension(self._find_extension(response))
|
||||
if pathfmt.exists():
|
||||
pathfmt.temppath = ""
|
||||
return True
|
||||
|
||||
# set metadata from HTTP headers
|
||||
if self.metadata:
|
||||
kwdict[self.metadata] = self._extract_metadata(response)
|
||||
pathfmt.build_path()
|
||||
if pathfmt.exists():
|
||||
pathfmt.temppath = ""
|
||||
return True
|
||||
|
||||
content = response.iter_content(self.chunk_size)
|
||||
|
||||
# check filename extension against file header
|
||||
@ -294,6 +305,22 @@ class HttpDownloader(DownloaderBase):
|
||||
|
||||
t1 = t2
|
||||
|
||||
def _extract_metadata(self, response):
|
||||
headers = response.headers
|
||||
data = dict(headers)
|
||||
|
||||
hcd = headers.get("content-disposition")
|
||||
if hcd:
|
||||
name = text.extr(hcd, 'filename="', '"')
|
||||
if name:
|
||||
text.nameext_from_url(name, data)
|
||||
|
||||
hlm = headers.get("last-modified")
|
||||
if hlm:
|
||||
data["date"] = datetime(*parsedate_tz(hlm)[:6])
|
||||
|
||||
return data
|
||||
|
||||
def _find_extension(self, response):
|
||||
"""Get filename extension from MIME type"""
|
||||
mtype = response.headers.get("Content-Type", "image/jpeg")
|
||||
|
Loading…
Reference in New Issue
Block a user