mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-01-31 19:51:34 +01:00
[2chan] fix metadata extraction
This commit is contained in:
parent
173a93454e
commit
71acbdabf4
@ -68,6 +68,8 @@ class _2chanThreadExtractor(Extractor):
|
|||||||
def parse(self, post):
|
def parse(self, post):
|
||||||
"""Build post-object by extracting data from an HTML post"""
|
"""Build post-object by extracting data from an HTML post"""
|
||||||
data = self._extract_post(post)
|
data = self._extract_post(post)
|
||||||
|
if data["name"]:
|
||||||
|
data["name"] = data["name"].strip()
|
||||||
if '<a href="/' in post:
|
if '<a href="/' in post:
|
||||||
self._extract_image(post, data)
|
self._extract_image(post, data)
|
||||||
data["tim"], _, data["extension"] = data["filename"].partition(".")
|
data["tim"], _, data["extension"] = data["filename"].partition(".")
|
||||||
@ -78,10 +80,10 @@ class _2chanThreadExtractor(Extractor):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_post(post):
|
def _extract_post(post):
|
||||||
return text.extract_all(post, (
|
return text.extract_all(post, (
|
||||||
("no" , 'name="', '"'),
|
("post", 'class="csb">' , '<'),
|
||||||
("post", '<b>', '</b>'),
|
("name", 'class="cnm">' , '<'),
|
||||||
("name", '<b>', ' </b>'),
|
("now" , 'class="cnw">' , '<'),
|
||||||
("now" , '</font> ', ' '),
|
("no" , 'class="cno">No.', '<'),
|
||||||
(None , '<blockquote', ''),
|
(None , '<blockquote', ''),
|
||||||
("com" , '>', '</blockquote>'),
|
("com" , '>', '</blockquote>'),
|
||||||
))[0]
|
))[0]
|
||||||
|
@ -93,7 +93,7 @@ class WikiartArtworksExtractor(WikiartExtractor):
|
|||||||
directory_fmt = ("{category}", "Artworks by {group!c}", "{type}")
|
directory_fmt = ("{category}", "Artworks by {group!c}", "{type}")
|
||||||
pattern = BASE_PATTERN + r"/paintings-by-([\w-]+)/([\w-]+)"
|
pattern = BASE_PATTERN + r"/paintings-by-([\w-]+)/([\w-]+)"
|
||||||
test = ("https://www.wikiart.org/en/paintings-by-media/grisaille", {
|
test = ("https://www.wikiart.org/en/paintings-by-media/grisaille", {
|
||||||
"url": "f92d55669fa949491c26a5437527adb14b35b8cc",
|
"url": "228426a9d32b5bba9d659944c6b0ba73883af33f",
|
||||||
})
|
})
|
||||||
|
|
||||||
def __init__(self, match):
|
def __init__(self, match):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user