1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 18:53:21 +01:00

[imagefap] fix and improve folder extraction (#3013)

This commit is contained in:
Mike Fährmann 2022-10-07 22:07:34 +02:00
parent 55fca5fe4b
commit bc9d291c13
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -182,40 +182,49 @@ class ImagefapUserExtractor(ImagefapExtractor):
self.user, self.user_id = match.groups()
def items(self):
for gid, name in self.get_gallery_data():
url = "{}/gallery/{}".format(self.root, gid)
data = {
"gallery_id": text.parse_int(gid),
"title": text.unescape(name),
"_extractor": ImagefapGalleryExtractor,
}
yield Message.Queue, url, data
for folder_id in self.folders():
for gallery_id, name in self.galleries(folder_id):
url = "{}/gallery/{}".format(self.root, gallery_id)
data = {
"gallery_id": text.parse_int(gallery_id),
"title" : text.unescape(name),
"_extractor": ImagefapGalleryExtractor,
}
yield Message.Queue, url, data
def get_gallery_data(self):
"""Yield all gallery_ids of a specific user"""
folders = self.get_gallery_folders()
url = "{}/ajax_usergallery_folder.php".format(self.root)
params = {"userid": self.user_id}
for folder_id in folders:
params["id"] = folder_id
page = self.request(url, params=params).text
pos = 0
while True:
gid, pos = text.extract(page, '<a href="/gallery/', '"', pos)
if not gid:
break
name, pos = text.extract(page, "<b>", "<", pos)
yield gid, name
def get_gallery_folders(self):
"""Create a list of all folder_ids of a specific user"""
def folders(self):
"""Return a list of folder_ids of a specific user"""
if self.user:
url = "{}/profile/{}/galleries".format(self.root, self.user)
else:
url = "{}/usergallery.php?userid={}".format(
self.root, self.user_id)
page = self.request(url).text
self.user_id, pos = text.extract(page, '?userid=', '"')
folders, pos = text.extract(page, ' id="tgl_all" value="', '"', pos)
return folders.split("|")[:-1]
response = self.request(url)
self.user = response.url.split("/")[-2]
folders = text.extract(response.text, ' id="tgl_all" value="', '"')[0]
return folders.rstrip("|").split("|")
def galleries(self, folder_id):
"""Yield gallery_ids of a folder"""
if folder_id == "-1":
url = "{}/profile/{}/galleries?folderid=-1".format(
self.root, self.user)
else:
url = "{}/organizer/{}/".format(self.root, folder_id)
params = {"page": 0}
while True:
extr = text.extract_from(self.request(url, params=params).text)
cnt = 0
while True:
gid = extr('<a href="/gallery/', '"')
if not gid:
break
yield gid, extr("<b>", "<")
cnt += 1
if cnt < 25:
break
params["page"] += 1