mirror of
https://github.com/mikf/gallery-dl.git
synced 2025-01-31 19:51:34 +01:00
[bcy] reduce requests to '/item/detail/<id>' (#613)
The former implementation would try to use the embedded data from '/item/detail/' pages for every post, even if that wasn't really necessary. This commit also fixes some issues with posts only visible to logged in users.
This commit is contained in:
parent
f33b13aacf
commit
3fb41c34c8
@ -31,96 +31,103 @@ class BcyExtractor(Extractor):
|
|||||||
iroot = "https://img-bcy-qn.pstatp.com"
|
iroot = "https://img-bcy-qn.pstatp.com"
|
||||||
noop = self.config("noop")
|
noop = self.config("noop")
|
||||||
|
|
||||||
for post_id in self.posts():
|
for post in self.posts():
|
||||||
post = self._parse_post(post_id)
|
if not post["image_list"]:
|
||||||
if not post:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
yield Message.Directory, post
|
multi = None
|
||||||
for post["num"], image in enumerate(post["_multi"], 1):
|
tags = post.get("post_tags") or ()
|
||||||
post["id"] = image["mid"]
|
data = {
|
||||||
post["width"] = image["w"]
|
|
||||||
post["height"] = image["h"]
|
|
||||||
|
|
||||||
url = image["path"].partition("~")[0]
|
|
||||||
text.nameext_from_url(url, post)
|
|
||||||
|
|
||||||
if post["extension"]:
|
|
||||||
if not url.startswith(iroot):
|
|
||||||
url = sub(iroot, url)
|
|
||||||
post["filter"] = ""
|
|
||||||
yield Message.Url, url, post
|
|
||||||
|
|
||||||
else:
|
|
||||||
post["filter"] = "watermark"
|
|
||||||
yield Message.Url, image["origin"], post
|
|
||||||
|
|
||||||
if noop:
|
|
||||||
post["extension"] = ""
|
|
||||||
post["filter"] = "noop"
|
|
||||||
yield Message.Url, image["original_path"], post
|
|
||||||
|
|
||||||
def _parse_post(self, post_id):
|
|
||||||
url = "{}/item/detail/{}".format(self.root, post_id)
|
|
||||||
response = self.request(url)
|
|
||||||
if response.status_code >= 400:
|
|
||||||
return None
|
|
||||||
|
|
||||||
data = json.loads(
|
|
||||||
text.extract(response.text, 'JSON.parse("', '");')[0]
|
|
||||||
.replace('\\\\u002F', '/')
|
|
||||||
.replace('\\"', '"')
|
|
||||||
)["detail"]
|
|
||||||
|
|
||||||
post = data["post_data"]
|
|
||||||
if not post["multi"]:
|
|
||||||
return None
|
|
||||||
user = data["detail_user"]
|
|
||||||
|
|
||||||
return {
|
|
||||||
"user": {
|
"user": {
|
||||||
"id" : user["uid"],
|
"id" : post["uid"],
|
||||||
"name" : user["uname"],
|
"name" : post["uname"],
|
||||||
"avatar" : user["avatar"],
|
"avatar" : sub(iroot, post["avatar"].partition("~")[0]),
|
||||||
},
|
},
|
||||||
"post": {
|
"post": {
|
||||||
"id" : text.parse_int(post["item_id"]),
|
"id" : text.parse_int(post["item_id"]),
|
||||||
"tags" : [t["tag_name"] for t in post["post_tags"]],
|
"tags" : [t["tag_name"] for t in tags],
|
||||||
"date" : text.parse_timestamp(post["ctime"]),
|
"date" : text.parse_timestamp(post["ctime"]),
|
||||||
"parody" : text.parse_unicode_escapes(post["work"]),
|
"parody" : post["work"],
|
||||||
"content": post["plain"],
|
"content": post["plain"],
|
||||||
"likes" : post["like_count"],
|
"likes" : post["like_count"],
|
||||||
"shares" : post["share_count"],
|
"shares" : post["share_count"],
|
||||||
"replies": post["reply_count"],
|
"replies": post["reply_count"],
|
||||||
},
|
},
|
||||||
"_multi": post["multi"],
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
yield Message.Directory, data
|
||||||
|
for data["num"], image in enumerate(post["image_list"], 1):
|
||||||
|
data["id"] = image["mid"]
|
||||||
|
data["width"] = image["w"]
|
||||||
|
data["height"] = image["h"]
|
||||||
|
|
||||||
|
url = image["path"].partition("~")[0]
|
||||||
|
text.nameext_from_url(url, data)
|
||||||
|
|
||||||
|
if data["extension"]:
|
||||||
|
if not url.startswith(iroot):
|
||||||
|
url = sub(iroot, url)
|
||||||
|
data["filter"] = ""
|
||||||
|
yield Message.Url, url, data
|
||||||
|
|
||||||
|
else:
|
||||||
|
if not multi:
|
||||||
|
if len(post["multi"]) < len(post["image_list"]):
|
||||||
|
multi = self._data_from_post(post["item_id"])
|
||||||
|
multi = multi["post_data"]["multi"]
|
||||||
|
else:
|
||||||
|
multi = post["multi"]
|
||||||
|
image = multi[data["num"] - 1]
|
||||||
|
|
||||||
|
if image["origin"]:
|
||||||
|
data["filter"] = "watermark"
|
||||||
|
yield Message.Url, image["origin"], data
|
||||||
|
|
||||||
|
if noop:
|
||||||
|
data["extension"] = ""
|
||||||
|
data["filter"] = "noop"
|
||||||
|
yield Message.Url, image["original_path"], data
|
||||||
|
|
||||||
|
def posts(self):
|
||||||
|
"""Returns an iterable with all relevant 'post' objects"""
|
||||||
|
|
||||||
|
def _data_from_post(self, post_id):
|
||||||
|
url = "{}/item/detail/{}".format(self.root, post_id)
|
||||||
|
page = self.request(url).text
|
||||||
|
return json.loads(
|
||||||
|
text.extract(page, 'JSON.parse("', '");')[0]
|
||||||
|
.replace('\\\\u002F', '/')
|
||||||
|
.replace('\\"', '"')
|
||||||
|
)["detail"]
|
||||||
|
|
||||||
|
|
||||||
class BcyUserExtractor(BcyExtractor):
|
class BcyUserExtractor(BcyExtractor):
|
||||||
"""Extractor for user timelines"""
|
"""Extractor for user timelines"""
|
||||||
subcategory = "user"
|
subcategory = "user"
|
||||||
pattern = r"(?:https?://)?bcy\.net/u/(\d+)"
|
pattern = r"(?:https?://)?bcy\.net/u/(\d+)"
|
||||||
test = ("https://bcy.net/u/1933712", {
|
test = (
|
||||||
"pattern": r"https://img-bcy-qn.pstatp.com/\w+/\d+/post/\w+/\w+.jpg",
|
("https://bcy.net/u/1933712", {
|
||||||
|
"pattern": r"https://img-bcy-qn.pstatp.com/\w+/\d+/post/\w+/.+jpg",
|
||||||
"count": ">= 25",
|
"count": ">= 25",
|
||||||
})
|
}),
|
||||||
|
("https://bcy.net/u/109282764041", {
|
||||||
|
"pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+"
|
||||||
|
r"~tplv-banciyuan-logo-v3:.+\.image",
|
||||||
|
"range": "1-25",
|
||||||
|
"count": 25,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
url = self.root + "/apiv3/user/selfPosts"
|
url = self.root + "/apiv3/user/selfPosts"
|
||||||
params = {
|
params = {"uid": self.item_id, "since": None}
|
||||||
"uid": self.item_id,
|
|
||||||
"since": None,
|
|
||||||
# "_signature": None,
|
|
||||||
}
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
data = self.request(url, params=params).json()
|
data = self.request(url, params=params).json()
|
||||||
|
|
||||||
item = None
|
item = None
|
||||||
for item in data["data"]["items"]:
|
for item in data["data"]["items"]:
|
||||||
if item["item_detail"]["multi"]:
|
yield item["item_detail"]
|
||||||
yield item["item_detail"]["item_id"]
|
|
||||||
|
|
||||||
if not item:
|
if not item:
|
||||||
return
|
return
|
||||||
@ -131,14 +138,15 @@ class BcyPostExtractor(BcyExtractor):
|
|||||||
"""Extractor for individual posts"""
|
"""Extractor for individual posts"""
|
||||||
subcategory = "post"
|
subcategory = "post"
|
||||||
pattern = r"(?:https?://)?bcy\.net/item/detail/(\d+)"
|
pattern = r"(?:https?://)?bcy\.net/item/detail/(\d+)"
|
||||||
test = ("https://bcy.net/item/detail/6355835481002893070", {
|
test = (
|
||||||
|
("https://bcy.net/item/detail/6355835481002893070", {
|
||||||
"url": "301202375e61fd6e0e2e35de6c3ac9f74885dec3",
|
"url": "301202375e61fd6e0e2e35de6c3ac9f74885dec3",
|
||||||
"count": 1,
|
"count": 1,
|
||||||
"keyword": {
|
"keyword": {
|
||||||
"user": {
|
"user": {
|
||||||
"id" : 1933712,
|
"id" : 1933712,
|
||||||
"name" : "wukloo",
|
"name" : "wukloo",
|
||||||
"avatar" : str,
|
"avatar" : "re:https://img-bcy-qn.pstatp.com/Public/",
|
||||||
},
|
},
|
||||||
"post": {
|
"post": {
|
||||||
"id" : 6355835481002893070,
|
"id" : 6355835481002893070,
|
||||||
@ -157,7 +165,24 @@ class BcyPostExtractor(BcyExtractor):
|
|||||||
"filename": "712e0780b09011e696f973c3d1568337",
|
"filename": "712e0780b09011e696f973c3d1568337",
|
||||||
"extension": "jpg",
|
"extension": "jpg",
|
||||||
},
|
},
|
||||||
})
|
}),
|
||||||
|
# only watermarked images available
|
||||||
|
("https://bcy.net/item/detail/6780546160802143236", {
|
||||||
|
"pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+"
|
||||||
|
r"~tplv-banciyuan-logo-v3:.+\.image",
|
||||||
|
"count": 8,
|
||||||
|
"keyword": {"filter": "watermark"}
|
||||||
|
}),
|
||||||
|
# only visible to logged in users
|
||||||
|
("https://bcy.net/item/detail/6747523535150783495", {
|
||||||
|
"count": 0,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
|
||||||
def posts(self):
|
def posts(self):
|
||||||
return (self.item_id,)
|
data = self._data_from_post(self.item_id)
|
||||||
|
post = data["post_data"]
|
||||||
|
post["image_list"] = post["multi"]
|
||||||
|
post["plain"] = text.parse_unicode_escapes(post["plain"])
|
||||||
|
post.update(data["detail_user"])
|
||||||
|
return (post,)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user