1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 04:02:32 +01:00

[vsco] add 'space' extractor (#5202)

This commit is contained in:
Mike Fährmann 2024-02-17 01:51:27 +01:00
parent 770aec922d
commit 7033cc14e9
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 63 additions and 6 deletions

View File

@ -940,7 +940,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>VSCO</td>
<td>https://vsco.co/</td>
<td>Collections, individual Images, User Profiles</td>
<td>Collections, individual Images, Spaces, User Profiles</td>
<td></td>
</tr>
<tr>

View File

@ -11,8 +11,8 @@
from .common import Extractor, Message
from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co/([^/]+)"
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co"
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)"
class VscoExtractor(Extractor):
@ -115,7 +115,7 @@ class VscoExtractor(Extractor):
class VscoUserExtractor(VscoExtractor):
"""Extractor for images from a user on vsco.co"""
subcategory = "user"
pattern = BASE_PATTERN + r"(?:/gallery|/images(?:/\d+)?)?/?(?:$|[?#])"
pattern = USER_PATTERN + r"(?:/gallery|/images(?:/\d+)?)?/?(?:$|[?#])"
example = "https://vsco.co/USER/gallery"
def images(self):
@ -139,7 +139,7 @@ class VscoCollectionExtractor(VscoExtractor):
subcategory = "collection"
directory_fmt = ("{category}", "{user}", "collection")
archive_fmt = "c_{user}_{id}"
pattern = BASE_PATTERN + r"/collection/"
pattern = USER_PATTERN + r"/collection/"
example = "https://vsco.co/USER/collection/12345"
def images(self):
@ -159,10 +159,59 @@ class VscoCollectionExtractor(VscoExtractor):
))
class VscoSpaceExtractor(VscoExtractor):
"""Extractor for a vsco.co space"""
subcategory = "space"
directory_fmt = ("{category}", "space", "{user}")
archive_fmt = "s_{user}_{id}"
pattern = BASE_PATTERN + r"/spaces/([^/?#]+)"
example = "https://vsco.co/spaces/a1b2c3d4e5f"
def images(self):
url = "{}/spaces/{}".format(self.root, self.user)
data = self._extract_preload_state(url)
tkn = data["users"]["currentUser"]["tkn"]
sid = self.user
posts = data["entities"]["posts"]
images = data["entities"]["postImages"]
for post in posts.values():
post["image"] = images[post["image"]]
space = data["spaces"]["byId"][sid]
space["postsList"] = [posts[pid] for pid in space["postsList"]]
url = "{}/grpc/spaces/{}/posts".format(self.root, sid)
params = {}
return self._pagination(url, params, tkn, space)
def _pagination(self, url, params, token, data):
headers = {
"Accept" : "application/json",
"Referer" : "{}/spaces/{}".format(self.root, self.user),
"Content-Type" : "application/json",
"Authorization": "Bearer " + token,
}
while True:
for post in data["postsList"]:
post = self._transform_media(post["image"])
post["upload_date"] = post["upload_date"]["sec"] * 1000
yield post
cursor = data["cursor"]
if cursor.get("atEnd"):
return
params["cursor"] = cursor["postcursorcontext"]["postId"]
data = self.request(url, params=params, headers=headers).json()
class VscoImageExtractor(VscoExtractor):
"""Extractor for individual images on vsco.co"""
subcategory = "image"
pattern = BASE_PATTERN + r"/media/([0-9a-fA-F]+)"
pattern = USER_PATTERN + r"/media/([0-9a-fA-F]+)"
example = "https://vsco.co/USER/media/0123456789abcdef"
def __init__(self, match):

View File

@ -38,6 +38,14 @@ __tests__ = (
"#count" : 80,
},
{
"#url" : "https://vsco.co/spaces/6320a3e1e0338d1350b33fea",
"#category": ("", "vsco", "space"),
"#class" : vsco.VscoSpaceExtractor,
"#pattern" : r"https://image(-aws.+)?\.vsco\.co/[0-9a-f/]+/[\w\s-]+\.\w+",
"#count" : range(100, 150),
},
{
"#url" : "https://vsco.co/erenyildiz/media/5d34b93ef632433030707ce2",
"#category": ("", "vsco", "image"),