mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
[generic] ensure 'path' metadata is always defined
fixes None directory names when 'path' is empty and got removed
This commit is contained in:
parent
a09cef79c6
commit
73f77a543a
@ -89,30 +89,33 @@ class GenericExtractor(Extractor):
|
|||||||
|
|
||||||
def metadata(self, page):
|
def metadata(self, page):
|
||||||
"""Extract generic webpage metadata, return them in a dict."""
|
"""Extract generic webpage metadata, return them in a dict."""
|
||||||
data = {}
|
data = {
|
||||||
data['path'] = self.path.replace("/", "")
|
"title" : text.extr(
|
||||||
data['pageurl'] = self.url
|
page, "<title>", "</title>"),
|
||||||
data['title'] = text.extr(page, '<title>', "</title>")
|
"description" : text.extr(
|
||||||
data['description'] = text.extr(
|
page, '<meta name="description" content="', '"'),
|
||||||
page, '<meta name="description" content="', '"')
|
"keywords" : text.extr(
|
||||||
data['keywords'] = text.extr(
|
page, '<meta name="keywords" content="', '"'),
|
||||||
page, '<meta name="keywords" content="', '"')
|
"language" : text.extr(
|
||||||
data['language'] = text.extr(
|
page, '<meta name="language" content="', '"'),
|
||||||
page, '<meta name="language" content="', '"')
|
"name" : text.extr(
|
||||||
data['name'] = text.extr(
|
page, '<meta itemprop="name" content="', '"'),
|
||||||
page, '<meta itemprop="name" content="', '"')
|
"copyright" : text.extr(
|
||||||
data['copyright'] = text.extr(
|
page, '<meta name="copyright" content="', '"'),
|
||||||
page, '<meta name="copyright" content="', '"')
|
"og_site" : text.extr(
|
||||||
data['og_site'] = text.extr(
|
page, '<meta property="og:site" content="', '"'),
|
||||||
page, '<meta property="og:site" content="', '"')
|
"og_site_name" : text.extr(
|
||||||
data['og_site_name'] = text.extr(
|
page, '<meta property="og:site_name" content="', '"'),
|
||||||
page, '<meta property="og:site_name" content="', '"')
|
"og_title" : text.extr(
|
||||||
data['og_title'] = text.extr(
|
page, '<meta property="og:title" content="', '"'),
|
||||||
page, '<meta property="og:title" content="', '"')
|
"og_description": text.extr(
|
||||||
data['og_description'] = text.extr(
|
page, '<meta property="og:description" content="', '"'),
|
||||||
page, '<meta property="og:description" content="', '"')
|
|
||||||
|
|
||||||
data = {k: text.unescape(data[k]) for k in data if data[k] != ""}
|
}
|
||||||
|
|
||||||
|
data = {k: text.unescape(v) for k, v in data.items() if v}
|
||||||
|
data["path"] = self.path.replace("/", "")
|
||||||
|
data["pageurl"] = self.url
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user