mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-01-31 11:41:35 +01:00
[2chan] fix metadata extraction
This commit is contained in:
parent
173a93454e
commit
71acbdabf4
@ -68,6 +68,8 @@ class _2chanThreadExtractor(Extractor):
|
||||
def parse(self, post):
|
||||
"""Build post-object by extracting data from an HTML post"""
|
||||
data = self._extract_post(post)
|
||||
if data["name"]:
|
||||
data["name"] = data["name"].strip()
|
||||
if '<a href="/' in post:
|
||||
self._extract_image(post, data)
|
||||
data["tim"], _, data["extension"] = data["filename"].partition(".")
|
||||
@ -78,10 +80,10 @@ class _2chanThreadExtractor(Extractor):
|
||||
@staticmethod
|
||||
def _extract_post(post):
|
||||
return text.extract_all(post, (
|
||||
("no" , 'name="', '"'),
|
||||
("post", '<b>', '</b>'),
|
||||
("name", '<b>', ' </b>'),
|
||||
("now" , '</font> ', ' '),
|
||||
("post", 'class="csb">' , '<'),
|
||||
("name", 'class="cnm">' , '<'),
|
||||
("now" , 'class="cnw">' , '<'),
|
||||
("no" , 'class="cno">No.', '<'),
|
||||
(None , '<blockquote', ''),
|
||||
("com" , '>', '</blockquote>'),
|
||||
))[0]
|
||||
|
@ -93,7 +93,7 @@ class WikiartArtworksExtractor(WikiartExtractor):
|
||||
directory_fmt = ("{category}", "Artworks by {group!c}", "{type}")
|
||||
pattern = BASE_PATTERN + r"/paintings-by-([\w-]+)/([\w-]+)"
|
||||
test = ("https://www.wikiart.org/en/paintings-by-media/grisaille", {
|
||||
"url": "f92d55669fa949491c26a5437527adb14b35b8cc",
|
||||
"url": "228426a9d32b5bba9d659944c6b0ba73883af33f",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
|
Loading…
x
Reference in New Issue
Block a user