1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

[deviantart] fix & improve journal/literature extraction (#6254, #6207)

fetch text from HTML __INITIAL_STATE__,
since the API doesn't reliably work and is unusable for sta.sh journals
This commit is contained in:
Mike Fährmann 2024-10-01 14:31:47 +02:00
parent 8f09e4eb02
commit ed859f05ed
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 60 additions and 21 deletions

View File

@ -177,24 +177,7 @@ class DeviantartExtractor(Extractor):
yield self.commit(deviation, deviation["flash"])
if self.commit_journal:
if "excerpt" in deviation:
# journal = self.api.deviation_content(
# deviation["deviationid"])
if not self.eclipse_api:
self.eclipse_api = DeviantartEclipseAPI(self)
content = self.eclipse_api.deviation_extended_fetch(
deviation["index"],
deviation["author"]["username"],
"journal",
)["deviation"]["textContent"]
html = content["html"]["markup"]
if html.startswith("{"):
html = content["excerpt"].replace("\n", "<br />")
journal = {"html": html}
elif "body" in deviation:
journal = {"html": deviation.pop("body")}
else:
journal = None
journal = self._extract_journal(deviation)
if journal:
if self.extra:
deviation["_journal"] = journal["html"]
@ -375,6 +358,33 @@ class DeviantartExtractor(Extractor):
deviation["extension"] = "txt"
return Message.Url, txt, deviation
def _extract_journal(self, deviation):
if "excerpt" in deviation:
# # empty 'html'
# return self.api.deviation_content(deviation["deviationid"])
if "_page" in deviation:
page = deviation["_page"]
del deviation["_page"]
else:
page = self._limited_request(deviation["url"]).text
state = util.json_loads(text.extr(
page, 'window.__INITIAL_STATE__ = JSON.parse("', '");')
.replace("\\\\", "\\").replace("\\'", "'").replace('\\"', '"'))
deviation = state["@@entities"]["deviation"].popitem()[1]
content = deviation["textContent"]
html = content["html"]["markup"]
if html.startswith("{"):
html = content["excerpt"].replace("\n", "<br />")
return {"html": html}
if "body" in deviation:
return {"html": deviation.pop("body")}
return None
def _extract_content(self, deviation):
content = deviation["content"]
@ -728,6 +738,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
uuid = text.extr(page, '//deviation/', '"')
if uuid:
deviation = self.api.deviation(uuid)
deviation["_page"] = page
deviation["index"] = text.parse_int(text.extr(
page, '\\"deviationId\\":', ','))
yield deviation
@ -939,11 +950,14 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
else:
url = "{}/view/{}/".format(self.root, self.deviation_id)
uuid = text.extr(self._limited_request(url).text,
'"deviationUuid\\":\\"', '\\')
page = self._limited_request(url, notfound="deviation").text
uuid = text.extr(page, '"deviationUuid\\":\\"', '\\')
if not uuid:
raise exception.NotFoundError("deviation")
return (self.api.deviation(uuid),)
deviation = self.api.deviation(uuid)
deviation["_page"] = page
return (deviation,)
class DeviantartScrapsExtractor(DeviantartExtractor):

View File

@ -767,6 +767,16 @@ __tests__ = (
"extension": "swf",
},
{
"#url" : "https://www.deviantart.com/justatest235723/art/video-1103119114",
"#comment" : "video",
"#class" : deviantart.DeviantartDeviationExtractor,
"#pattern" : r"/f/940f2d05-c5eb-4917-8192-7eb6a2d508c6/di8ro5m-e2a5bdf0-daee-4e18-bede-fbfc394d6c65\.mp4\?token=ey",
"filename" : "video_63aebdd4bc0323da460796b9a2ac8522_by_justatest235723-di8ro5m",
"extension": "mp4",
},
{
"#url" : "https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498",
"#comment" : "sta.sh URLs from description (#302)",
@ -810,6 +820,21 @@ __tests__ = (
"#sha1_url": "8ca1dc8df53d3707c778d08a604f9ad9ddba7469",
},
{
"#url" : "https://www.deviantart.com/stash/09z3557z648",
"#comment" : "sta.sh journal (#6207)",
"#class" : deviantart.DeviantartStashExtractor,
"#pattern" : """text:<!DOCTYPE html>\n""",
},
{
"#url" : "https://www.deviantart.com/starvinglunatic/art/Against-the-world-chapter-1-50968347",
"#comment" : "literature (#6254)",
"#class" : deviantart.DeviantartDeviationExtractor,
"#pattern" : """text:<!DOCTYPE html>\n""",
},
{
"#url" : "https://www.deviantart.com/neotypical/art/985226590",
"#comment" : "subscription locked (#4567)",