1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 10:42:34 +01:00

[weasyl:favorite] update

- use 'self.groups' to access userid and username
- safe one request by not doing an explicit username -> userid lookup
- safe one request by following the 'Next' link instead of detecting an
  empty page
This commit is contained in:
Mike Fährmann 2024-09-25 20:02:01 +02:00
parent cbecaecc43
commit 9d7f8f892d
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -160,50 +160,44 @@ class WeasylJournalsExtractor(WeasylExtractor):
class WeasylFavoriteExtractor(WeasylExtractor): class WeasylFavoriteExtractor(WeasylExtractor):
subcategory = "favorite" subcategory = "favorite"
directory_fmt = ("{category}", "{user}", "Favorites") directory_fmt = ("{category}", "{user}", "Favorites")
pattern = BASE_PATTERN + r"favorites(?:\?userid=(\d+)|\/([\w~-]+))" pattern = BASE_PATTERN + r"favorites(?:\?userid=(\d+)|/([^/?#]+))"
example = "https://www.weasyl.com/favorites?userid=12345" example = "https://www.weasyl.com/favorites?userid=12345"
def __init__(self, match):
WeasylExtractor.__init__(self, match)
self.userid = match.group(1)
self.username = match.group(2)
def items(self): def items(self):
if self.userid is None and self.username is not None: userid, username = self.groups
new_url = self.root + "/favorites/{}".format(self.username)
page = self.request(new_url).text
self.userid = text.extr(
page,
'<a class="more" href="/favorites?userid=',
'&amp'
)
owner_login = lastid = None owner_login = lastid = None
url = self.root + "/favorites"
if username:
owner_login = username
path = "/favorites/" + username
else:
path = "/favorites"
params = { params = {
"userid" : self.userid, "userid" : userid,
"feature": "submit", "feature": "submit",
} }
while True: while True:
page = self.request(url, params=params).text page = self.request(self.root + path, params=params).text
pos = page.index('id="favorites-content"') pos = page.index('id="favorites-content"')
if not owner_login: if not owner_login:
owner_login = text.extr(page, '<a href="/~', '"') owner_login = text.extr(page, '<a href="/~', '"')
new_posts = False
for submitid in text.extract_iter(page, "/submissions/", "/", pos): for submitid in text.extract_iter(page, "/submissions/", "/", pos):
if submitid == lastid: if submitid == lastid:
continue continue
new_posts = True
lastid = submitid lastid = submitid
submission = self.request_submission(submitid) submission = self.request_submission(submitid)
if self.populate_submission(submission): if self.populate_submission(submission):
submission["user"] = owner_login submission["user"] = owner_login
yield Message.Directory, submission yield Message.Directory, submission
yield Message.Url, submission["url"], submission yield Message.Url, submission["url"], submission
if not new_posts: try:
pos = page.index('">Next (', pos)
except ValueError:
return return
params["nextid"] = submitid path = text.unescape(text.rextract(page, 'href="', '"', pos)[0])
params = None