1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

[generic] ensure 'path' metadata is always defined

fixes None directory names when 'path' is empty and got removed
This commit is contained in:
Mike Fährmann 2024-10-10 14:06:47 +02:00
parent a09cef79c6
commit 73f77a543a
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -89,30 +89,33 @@ class GenericExtractor(Extractor):
def metadata(self, page):
"""Extract generic webpage metadata, return them in a dict."""
data = {}
data['path'] = self.path.replace("/", "")
data['pageurl'] = self.url
data['title'] = text.extr(page, '<title>', "</title>")
data['description'] = text.extr(
page, '<meta name="description" content="', '"')
data['keywords'] = text.extr(
page, '<meta name="keywords" content="', '"')
data['language'] = text.extr(
page, '<meta name="language" content="', '"')
data['name'] = text.extr(
page, '<meta itemprop="name" content="', '"')
data['copyright'] = text.extr(
page, '<meta name="copyright" content="', '"')
data['og_site'] = text.extr(
page, '<meta property="og:site" content="', '"')
data['og_site_name'] = text.extr(
page, '<meta property="og:site_name" content="', '"')
data['og_title'] = text.extr(
page, '<meta property="og:title" content="', '"')
data['og_description'] = text.extr(
page, '<meta property="og:description" content="', '"')
data = {
"title" : text.extr(
page, "<title>", "</title>"),
"description" : text.extr(
page, '<meta name="description" content="', '"'),
"keywords" : text.extr(
page, '<meta name="keywords" content="', '"'),
"language" : text.extr(
page, '<meta name="language" content="', '"'),
"name" : text.extr(
page, '<meta itemprop="name" content="', '"'),
"copyright" : text.extr(
page, '<meta name="copyright" content="', '"'),
"og_site" : text.extr(
page, '<meta property="og:site" content="', '"'),
"og_site_name" : text.extr(
page, '<meta property="og:site_name" content="', '"'),
"og_title" : text.extr(
page, '<meta property="og:title" content="', '"'),
"og_description": text.extr(
page, '<meta property="og:description" content="', '"'),
data = {k: text.unescape(data[k]) for k in data if data[k] != ""}
}
data = {k: text.unescape(v) for k, v in data.items() if v}
data["path"] = self.path.replace("/", "")
data["pageurl"] = self.url
return data