1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

implement 'http-metadata' option

or at least attempt to.
This commit is contained in:
Mike Fährmann 2022-11-05 17:37:43 +01:00
parent b7a83ac726
commit 870e6a48a0
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 50 additions and 7 deletions

View File

@ -586,6 +586,22 @@ Description
to access the current file's filename as ``"[gdl_path.filename}"``.
extractor.*.http-metadata
-------------------------
Type
``string``
Default
``null``
Description
Insert an ``object`` containing a file's HTTP headers and
``filename``, ``extension``, and ``date`` parsed from them
into metadata dictionaries as the given name.
For example, setting this option to ``"gdl_http"`` would make it possible
to access the current file's ``Last-Modified`` header as ``"[gdl_http[Last-Modified]}"``
and its parsed form as .``"[gdl_http[date]}"``.
extractor.*.category-transfer
-----------------------------
Type

View File

@ -14,6 +14,8 @@ from requests.exceptions import RequestException, ConnectionError, Timeout
from .common import DownloaderBase
from .. import text, util
from email.utils import parsedate_tz
from datetime import datetime
from ssl import SSLError
try:
from OpenSSL.SSL import Error as OpenSSLError
@ -31,6 +33,7 @@ class HttpDownloader(DownloaderBase):
self.adjust_extension = self.config("adjust-extensions", True)
self.chunk_size = self.config("chunk-size", 32768)
self.metadata = extractor.config("http-metadata")
self.progress = self.config("progress", 3.0)
self.headers = self.config("headers")
self.minsize = self.config("filesize-min")
@ -171,13 +174,6 @@ class HttpDownloader(DownloaderBase):
self.log.warning("Invalid response")
return False
# set missing filename extension from MIME type
if not pathfmt.extension:
pathfmt.set_extension(self._find_extension(response))
if pathfmt.exists():
pathfmt.temppath = ""
return True
# check file size
size = text.parse_int(size, None)
if size is not None:
@ -192,6 +188,21 @@ class HttpDownloader(DownloaderBase):
size, self.maxsize)
return False
# set missing filename extension from MIME type
if not pathfmt.extension:
pathfmt.set_extension(self._find_extension(response))
if pathfmt.exists():
pathfmt.temppath = ""
return True
# set metadata from HTTP headers
if self.metadata:
kwdict[self.metadata] = self._extract_metadata(response)
pathfmt.build_path()
if pathfmt.exists():
pathfmt.temppath = ""
return True
content = response.iter_content(self.chunk_size)
# check filename extension against file header
@ -294,6 +305,22 @@ class HttpDownloader(DownloaderBase):
t1 = t2
def _extract_metadata(self, response):
headers = response.headers
data = dict(headers)
hcd = headers.get("content-disposition")
if hcd:
name = text.extr(hcd, 'filename="', '"')
if name:
text.nameext_from_url(name, data)
hlm = headers.get("last-modified")
if hlm:
data["date"] = datetime(*parsedate_tz(hlm)[:6])
return data
def _find_extension(self, response):
"""Get filename extension from MIME type"""
mtype = response.headers.get("Content-Type", "image/jpeg")