mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-25 12:12:34 +01:00
[weverse] add extractors
This commit is contained in:
parent
8ea75202ed
commit
b1b1cfe123
@ -4536,6 +4536,29 @@ Description
|
||||
Download video files.
|
||||
|
||||
|
||||
extractor.weverse.embeds
|
||||
------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Download YouTube embeds found in ``Media`` posts.
|
||||
|
||||
Note: Setting `extractor.weverse.videos`_ to ``false`` will
|
||||
override this setting.
|
||||
|
||||
|
||||
extractor.weverse.videos
|
||||
------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Download video files.
|
||||
|
||||
|
||||
extractor.ytdl.cmdline-args
|
||||
---------------------------
|
||||
Type
|
||||
|
@ -366,6 +366,14 @@
|
||||
"retweets": true,
|
||||
"videos": true
|
||||
},
|
||||
"weverse":
|
||||
{
|
||||
"username": null,
|
||||
"password": null,
|
||||
"cookies": null,
|
||||
"embeds": true,
|
||||
"videos": true
|
||||
},
|
||||
"ytdl":
|
||||
{
|
||||
"enabled": false,
|
||||
|
@ -997,6 +997,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Albums, Articles, Feeds, Images from Statuses, User Profiles, Videos</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Weverse</td>
|
||||
<td>https://weverse.io/</td>
|
||||
<td>Feed Tab, Artist Tab, Media Files, Media Categories, Media Tabs, Member Profiles, Moments, Posts</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>WikiArt.org</td>
|
||||
<td>https://www.wikiart.org/</td>
|
||||
|
@ -178,6 +178,7 @@ modules = [
|
||||
"webmshare",
|
||||
"webtoons",
|
||||
"weibo",
|
||||
"weverse",
|
||||
"wikiart",
|
||||
"wikifeet",
|
||||
"wikimedia",
|
||||
|
640
gallery_dl/extractor/weverse.py
Normal file
640
gallery_dl/extractor/weverse.py
Normal file
@ -0,0 +1,640 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://weverse.io/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from ..cache import cache
|
||||
import binascii
|
||||
import hashlib
|
||||
import hmac
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
from collections import OrderedDict
|
||||
from http import HTTPStatus
|
||||
|
||||
BASE_PATTERN = r"^(?:https?://)?(?:m\.)?weverse\.io/([^/?#]+)"
|
||||
MEMBER_ID_PATTERN = r"/([a-f0-9]+)"
|
||||
POST_ID_PATTERN = r"/(\d-\d+)"
|
||||
|
||||
|
||||
class WeverseExtractor(Extractor):
|
||||
"""Base class for weverse extractors"""
|
||||
|
||||
category = "weverse"
|
||||
filename_fmt = "{category}_{id}.{extension}"
|
||||
archive_fmt = "{category}_{post_id}_{id}"
|
||||
cookies_domain = ".weverse.io"
|
||||
cookies_names = ("we2_access_token",)
|
||||
root = "https://weverse.io"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.post_url = match.group(0)
|
||||
self.community_keyword = match.group(1)
|
||||
|
||||
def _init(self):
|
||||
self.embeds = self.config("embeds", True)
|
||||
self.videos = self.config("videos", True)
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
self.api = WeverseAPI(self)
|
||||
|
||||
post = self.post()
|
||||
data = self.metadata(post)
|
||||
|
||||
files = []
|
||||
if post["attachment"]:
|
||||
self._extract_post(post, files)
|
||||
elif post["extension"]:
|
||||
if isinstance(self, WeverseMomentExtractor):
|
||||
self._extract_moment(post, files)
|
||||
else:
|
||||
self._extract_media(post["extension"], files)
|
||||
data["count"] = len(files)
|
||||
|
||||
yield Message.Directory, data
|
||||
for file in files:
|
||||
file.update(data)
|
||||
url = file.pop("url")
|
||||
yield Message.Url, url, file
|
||||
|
||||
def _extract_image(self, image):
|
||||
url = image["url"]
|
||||
return {
|
||||
"id": image["photoId"],
|
||||
"url": url,
|
||||
"width": image["width"],
|
||||
"height": image["height"],
|
||||
"extension": text.ext_from_url(url),
|
||||
}
|
||||
|
||||
def _extract_video(self, video):
|
||||
video_id = video["videoId"]
|
||||
if isinstance(self, WeverseMediaExtractor):
|
||||
master_id = (
|
||||
video.get("uploadInfo", {}).get(
|
||||
"videoId") or video["infraVideoId"]
|
||||
)
|
||||
best_video = self.get_best_video(
|
||||
self.api.get_media_video_list(video_id, master_id),
|
||||
)
|
||||
else:
|
||||
best_video = self.get_best_video(
|
||||
self.api.get_post_video_list(video_id))
|
||||
url = best_video["source"]
|
||||
return {
|
||||
"id": video_id,
|
||||
"url": url,
|
||||
"width": best_video["encodingOption"]["width"],
|
||||
"height": best_video["encodingOption"]["height"],
|
||||
"extension": text.ext_from_url(url),
|
||||
}
|
||||
|
||||
def _extract_embed(self, embed):
|
||||
return {
|
||||
"id": embed["youtubeVideoId"],
|
||||
"extension": None,
|
||||
"url": "ytdl:" + embed["videoPath"],
|
||||
}
|
||||
|
||||
def _extract_post(self, post, files):
|
||||
attachments = {}
|
||||
attachments.update(post["attachment"].get("photo", {}))
|
||||
attachments.update(post["attachment"].get("video", {}))
|
||||
|
||||
# the order of attachments in the api response can differ to the order
|
||||
# of attachments on the site
|
||||
attachment_order = list(text.extract_iter(post["body"], 'id="', '"'))
|
||||
for index, attachment_id in enumerate(attachment_order, 1):
|
||||
file = {
|
||||
"num": index,
|
||||
}
|
||||
attachment = attachments[attachment_id]
|
||||
if "photoId" in attachment:
|
||||
file.update(self._extract_image(attachment))
|
||||
else:
|
||||
file.update(self._extract_video(attachment))
|
||||
files.append(file)
|
||||
|
||||
def _extract_moment(self, post, files):
|
||||
moment = next(
|
||||
post["extension"][key]
|
||||
for key in ("moment", "momentW1")
|
||||
if key in post["extension"]
|
||||
)
|
||||
if not moment:
|
||||
return
|
||||
|
||||
file = {
|
||||
"num": 1,
|
||||
}
|
||||
if "photo" in moment:
|
||||
file.update(self._extract_image(moment["photo"]))
|
||||
else:
|
||||
if not self.videos:
|
||||
return
|
||||
file.update(self._extract_video(moment["video"]))
|
||||
|
||||
files.append(file)
|
||||
|
||||
def _extract_media(self, extension, files):
|
||||
if "image" in extension:
|
||||
for index, photo in enumerate(extension["image"]["photos"], 1):
|
||||
file = self._extract_image(photo)
|
||||
file["num"] = index
|
||||
files.append(file)
|
||||
elif "video" in extension:
|
||||
if not self.videos:
|
||||
return
|
||||
file = self._extract_video(extension["video"])
|
||||
files.append(file)
|
||||
else:
|
||||
if not self.embeds or not self.videos:
|
||||
return
|
||||
file = self._extract_embed(extension["youtube"])
|
||||
file["num"] = 1
|
||||
files.append(file)
|
||||
|
||||
def get_best_video(self, videos):
|
||||
return max(
|
||||
videos,
|
||||
key=lambda video: video["encodingOption"]["width"] *
|
||||
video["encodingOption"]["height"],
|
||||
)
|
||||
|
||||
def metadata(self, post):
|
||||
published_at = text.parse_timestamp(post["publishedAt"] / 1000)
|
||||
data = {
|
||||
"date": published_at,
|
||||
"post_url": post.get("shareUrl", self.post_url),
|
||||
"post_id": post["postId"],
|
||||
"post_type": post["postType"],
|
||||
"section_type": post["sectionType"],
|
||||
}
|
||||
|
||||
if "hideFromArtist" in post:
|
||||
data["hide_from_artist"] = post["hideFromArtist"]
|
||||
|
||||
if "membershipOnly" in post:
|
||||
data["membership_only"] = post["membershipOnly"]
|
||||
|
||||
if post.get("tags", []):
|
||||
data["tags"] = post["tags"]
|
||||
|
||||
if "author" in post:
|
||||
author = {
|
||||
"id": post["author"]["memberId"],
|
||||
"name": post["author"]["profileName"],
|
||||
"profile_type": post["author"]["profileType"],
|
||||
}
|
||||
if "artistOfficialProfile" in post["author"]:
|
||||
artist_profile = post["author"]["artistOfficialProfile"]
|
||||
author["name"] = artist_profile["officialName"]
|
||||
data["author"] = author
|
||||
|
||||
if "community" in post:
|
||||
community = {
|
||||
"id": post["community"]["communityId"],
|
||||
"name": post["community"]["communityName"],
|
||||
"artist_code": post["community"]["artistCode"],
|
||||
}
|
||||
data["community"] = community
|
||||
|
||||
extension = post["extension"]
|
||||
media_info = extension.get("mediaInfo", {})
|
||||
if media_info:
|
||||
categories = [
|
||||
{
|
||||
"id": category["id"],
|
||||
"type": category["type"],
|
||||
"title": category["title"],
|
||||
}
|
||||
for category in media_info["categories"]
|
||||
]
|
||||
data.update(
|
||||
{
|
||||
"title": media_info["title"],
|
||||
"media_type": media_info["mediaType"],
|
||||
"categories": categories,
|
||||
},
|
||||
)
|
||||
|
||||
moment = next(
|
||||
(extension[key]
|
||||
for key in ("moment", "momentW1") if key in extension),
|
||||
None,
|
||||
)
|
||||
if moment:
|
||||
expire_at = text.parse_timestamp(moment["expireAt"] / 1000)
|
||||
data["expire_at"] = expire_at
|
||||
|
||||
return data
|
||||
|
||||
def post(self):
|
||||
return {}
|
||||
|
||||
def login(self):
|
||||
if self.cookies_check(self.cookies_names):
|
||||
return
|
||||
|
||||
username, password = self._get_auth_info()
|
||||
if username:
|
||||
self.cookies_update(_login_impl(self, username, password))
|
||||
|
||||
|
||||
class WeversePostExtractor(WeverseExtractor):
|
||||
"""Extractor for a weverse community post"""
|
||||
|
||||
subcategory = "post"
|
||||
directory_fmt = (
|
||||
"{category}", "{community[name]}", "{author[id]}", "{post_id}")
|
||||
pattern = BASE_PATTERN + r"/(?:artist|fanpost)" + POST_ID_PATTERN
|
||||
example = "https://weverse.io/abcdef/artist/1-123456789"
|
||||
|
||||
def __init__(self, match):
|
||||
WeverseExtractor.__init__(self, match)
|
||||
self.post_id = match.group(2)
|
||||
|
||||
def post(self):
|
||||
return self.api.get_post(self.post_id)
|
||||
|
||||
|
||||
class WeverseMemberExtractor(WeverseExtractor):
|
||||
"""Extractor for all posts from a weverse community member"""
|
||||
|
||||
subcategory = "member"
|
||||
pattern = BASE_PATTERN + "/profile" + MEMBER_ID_PATTERN + r"$"
|
||||
example = "https://weverse.io/abcdef/profile/a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5" # noqa E501
|
||||
|
||||
def __init__(self, match):
|
||||
WeverseExtractor.__init__(self, match)
|
||||
self.member_id = match.group(2)
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
self.api = WeverseAPI(self)
|
||||
|
||||
data = {"_extractor": WeversePostExtractor}
|
||||
posts = self.api.get_member_posts(self.member_id)
|
||||
for post in posts:
|
||||
yield Message.Queue, post["shareUrl"], data
|
||||
|
||||
|
||||
class WeverseFeedExtractor(WeverseExtractor):
|
||||
"""Extractor for a weverse community feed"""
|
||||
|
||||
subcategory = "feed"
|
||||
pattern = BASE_PATTERN + r"/(feed|artist)$"
|
||||
example = "https://weverse.io/abcdef/feed"
|
||||
|
||||
def __init__(self, match):
|
||||
WeverseExtractor.__init__(self, match)
|
||||
self.feed_name = match.group(2)
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
self.api = WeverseAPI(self)
|
||||
|
||||
data = {"_extractor": WeversePostExtractor}
|
||||
posts = self.api.get_feed_posts(self.community_keyword, self.feed_name)
|
||||
for post in posts:
|
||||
yield Message.Queue, post["shareUrl"], data
|
||||
|
||||
|
||||
class WeverseMomentExtractor(WeverseExtractor):
|
||||
"""Extractor for a weverse community artist moment"""
|
||||
|
||||
subcategory = "moment"
|
||||
pattern = (BASE_PATTERN + "/moment" +
|
||||
MEMBER_ID_PATTERN + "/post" +
|
||||
POST_ID_PATTERN)
|
||||
example = "https://weverse.io/abcdef/moment/a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5/post/1-123456789" # noqa E501
|
||||
|
||||
def __init__(self, match):
|
||||
WeverseExtractor.__init__(self, match)
|
||||
self.post_id = match.group(3)
|
||||
|
||||
def post(self):
|
||||
return self.api.get_post(self.post_id)
|
||||
|
||||
|
||||
class WeverseMomentsExtractor(WeverseExtractor):
|
||||
"""Extractor for all moments from a weverse community artist"""
|
||||
|
||||
subcategory = "moments"
|
||||
pattern = BASE_PATTERN + "/moment" + MEMBER_ID_PATTERN + r"$"
|
||||
example = "https://weverse.io/abcdef/moment/a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5" # noqa E501
|
||||
|
||||
def __init__(self, match):
|
||||
WeverseExtractor.__init__(self, match)
|
||||
self.member_id = match.group(2)
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
self.api = WeverseAPI(self)
|
||||
|
||||
data = {"_extractor": WeverseMomentExtractor}
|
||||
moments = self.api.get_member_moments(self.member_id)
|
||||
for moment in moments:
|
||||
yield Message.Queue, moment["shareUrl"], data
|
||||
|
||||
|
||||
class WeverseMediaExtractor(WeverseExtractor):
|
||||
"""Extractor for a weverse community media post"""
|
||||
|
||||
subcategory = "media"
|
||||
directory_fmt = ("{category}", "{community[name]}", "media", "{post_id}")
|
||||
pattern = BASE_PATTERN + "/media" + POST_ID_PATTERN
|
||||
example = "https://weverse.io/abcdef/media/1-123456789"
|
||||
|
||||
def __init__(self, match):
|
||||
WeverseExtractor.__init__(self, match)
|
||||
self.post_id = match.group(2)
|
||||
|
||||
def post(self):
|
||||
return self.api.get_post(self.post_id)
|
||||
|
||||
|
||||
class WeverseMediaTabExtractor(WeverseExtractor):
|
||||
"""Extractor for the media tab of a weverse community"""
|
||||
|
||||
subcategory = "media-tab"
|
||||
pattern = BASE_PATTERN + r"/media(?:/(all|membership|new))?$"
|
||||
example = "https://weverse.io/abcdef/media"
|
||||
|
||||
def __init__(self, match):
|
||||
WeverseExtractor.__init__(self, match)
|
||||
self.tab_name = match.group(2) or "all"
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
self.api = WeverseAPI(self)
|
||||
|
||||
data = {"_extractor": WeverseMediaExtractor}
|
||||
if self.tab_name == "new":
|
||||
get_media = self.api.get_latest_community_media
|
||||
elif self.tab_name == "membership":
|
||||
get_media = self.api.get_membership_community_media
|
||||
else:
|
||||
get_media = self.api.get_all_community_media
|
||||
medias = get_media(self.community_keyword)
|
||||
for media in medias:
|
||||
yield Message.Queue, media["shareUrl"], data
|
||||
|
||||
|
||||
class WeverseMediaCategoryExtractor(WeverseExtractor):
|
||||
"""Extractor for media by category of a weverse community"""
|
||||
|
||||
subcategory = "media-category"
|
||||
pattern = BASE_PATTERN + r"/media/category/(\d+)"
|
||||
example = "https://weverse.io/abcdef/media/category/1234"
|
||||
|
||||
def __init__(self, match):
|
||||
WeverseExtractor.__init__(self, match)
|
||||
self.media_category = match.group(2)
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
self.api = WeverseAPI(self)
|
||||
|
||||
data = {"_extractor": WeverseMediaExtractor}
|
||||
medias = self.api.get_media_by_category_id(self.media_category)
|
||||
for media in medias:
|
||||
yield Message.Queue, media["shareUrl"], data
|
||||
|
||||
|
||||
class WeverseAPI:
|
||||
"""Interface for the Weverse API"""
|
||||
|
||||
BASE_API_URL = "https://global.apis.naver.com"
|
||||
WMD_API_URL = BASE_API_URL + "/weverse/wevweb"
|
||||
VOD_API_URL = BASE_API_URL + "/rmcnmv/rmcnmv"
|
||||
|
||||
APP_ID = "be4d79eb8fc7bd008ee82c8ec4ff6fd4"
|
||||
SECRET = "1b9cb6378d959b45714bec49971ade22e6e24e42"
|
||||
|
||||
def __init__(self, extractor):
|
||||
self.extractor = extractor
|
||||
|
||||
cookies = extractor.cookies
|
||||
token_cookie_name = extractor.cookies_names[0]
|
||||
cookies_domain = extractor.cookies_domain
|
||||
self.access_token = cookies.get(
|
||||
token_cookie_name, domain=cookies_domain)
|
||||
self.headers = (
|
||||
{"Authorization": "Bearer " + self.access_token}
|
||||
if self.access_token
|
||||
else None
|
||||
)
|
||||
|
||||
def _endpoint_with_params(self, endpoint, params):
|
||||
params_delimiter = "?"
|
||||
if "?" in endpoint:
|
||||
params_delimiter = "&"
|
||||
return (endpoint + params_delimiter +
|
||||
urllib.parse.urlencode(query=params))
|
||||
|
||||
def _message_digest(self, endpoint, params, timestamp):
|
||||
key = self.SECRET.encode()
|
||||
url = self._endpoint_with_params(endpoint, params)
|
||||
message = "{}{}".format(url[:255], timestamp).encode()
|
||||
hash_digest = hmac.new(key, message, hashlib.sha1).digest()
|
||||
return binascii.b2a_base64(hash_digest).rstrip().decode()
|
||||
|
||||
def _apply_no_auth(self, endpoint, params):
|
||||
if not endpoint.endswith("/preview"):
|
||||
endpoint += "/preview"
|
||||
params.update({"fieldSet": "postForPreview"})
|
||||
return endpoint, params
|
||||
|
||||
def _is_text_only(self, post):
|
||||
for key in ("attachment", "extension"):
|
||||
if post.get(key, {}):
|
||||
return False
|
||||
if "summary" in post:
|
||||
s = post["summary"]
|
||||
if s.get("videoCount", 0) + s.get("photoCount", 0) > 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_in_key(self, video_id):
|
||||
endpoint = "/video/v1.1/vod/{}/inKey".format(video_id)
|
||||
return self._call_wmd(endpoint, method="POST")["inKey"]
|
||||
|
||||
def get_community_id(self, community_keyword):
|
||||
endpoint = "/community/v1.0/communityIdUrlPathByUrlPathArtistCode"
|
||||
params = {"keyword": community_keyword}
|
||||
return self._call_wmd(endpoint, params)["communityId"]
|
||||
|
||||
def get_post(self, post_id):
|
||||
endpoint = "/post/v1.0/post-{}".format(post_id)
|
||||
params = {"fieldSet": "postV1"}
|
||||
if not self.access_token:
|
||||
endpoint, params = self._apply_no_auth(endpoint, params)
|
||||
return self._call_wmd(endpoint, params)
|
||||
|
||||
def get_media_video_list(self, video_id, master_id):
|
||||
in_key = self.get_in_key(video_id)
|
||||
url = "{}/vod/play/v2.0/{}".format(self.VOD_API_URL, master_id)
|
||||
params = {"key": in_key}
|
||||
res = self._call(url, params=params)
|
||||
return res["videos"]["list"]
|
||||
|
||||
def get_post_video_list(self, video_id):
|
||||
endpoint = "/cvideo/v1.0/cvideo-{}/playInfo".format(video_id)
|
||||
params = {"videoId": video_id}
|
||||
res = self._call_wmd(endpoint, params=params)
|
||||
return res["playInfo"]["videos"]["list"]
|
||||
|
||||
def get_member_posts(self, member_id):
|
||||
endpoint = "/post/v1.0/member-{}/posts".format(member_id)
|
||||
params = {
|
||||
"fieldSet": "postsV1",
|
||||
"filterType": "DEFAULT",
|
||||
"limit": 20,
|
||||
"sortType": "LATEST",
|
||||
}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def get_feed_posts(self, community_keyword, feed_name):
|
||||
community_id = self.get_community_id(community_keyword)
|
||||
endpoint = "/post/v1.0/community-{}/{}TabPosts".format(
|
||||
community_id, feed_name)
|
||||
params = {
|
||||
"fieldSet": "postsV1",
|
||||
"limit": 20,
|
||||
"pagingType": "CURSOR",
|
||||
}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def get_latest_community_media(self, community_keyword):
|
||||
community_id = self.get_community_id(community_keyword)
|
||||
endpoint = "/media/v1.0/community-{}/more".format(community_id)
|
||||
params = {
|
||||
"fieldSet": "postsV1",
|
||||
"filterType": "RECENT",
|
||||
}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def get_membership_community_media(self, community_keyword):
|
||||
community_id = self.get_community_id(community_keyword)
|
||||
endpoint = "/media/v1.0/community-{}/more".format(community_id)
|
||||
params = {
|
||||
"fieldSet": "postsV1",
|
||||
"filterType": "MEMBERSHIP",
|
||||
}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def get_all_community_media(self, community_keyword):
|
||||
community_id = self.get_community_id(community_keyword)
|
||||
endpoint = "/media/v1.0/community-{}/searchAllMedia".format(
|
||||
community_id)
|
||||
params = {
|
||||
"fieldSet": "postsV1",
|
||||
"sortOrder": "DESC",
|
||||
}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def get_media_by_category_id(self, category_id):
|
||||
endpoint = "/media/v1.0/category-{}/mediaPosts".format(category_id)
|
||||
params = {
|
||||
"fieldSet": "postsV1",
|
||||
"sortOrder": "DESC",
|
||||
}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def get_member_moments(self, member_id):
|
||||
endpoint = "/post/v1.0/member-{}/posts".format(member_id)
|
||||
params = {
|
||||
"fieldSet": "postsV1",
|
||||
"filterType": "MOMENT",
|
||||
"limit": 1,
|
||||
}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def _call(self, url, **kwargs):
|
||||
while True:
|
||||
try:
|
||||
return self.extractor.request(url, **kwargs).json()
|
||||
except exception.HttpError as exc:
|
||||
if exc.response.status_code == HTTPStatus.UNAUTHORIZED:
|
||||
raise exception.AuthenticationError() from None
|
||||
if exc.response.status_code == HTTPStatus.FORBIDDEN:
|
||||
raise exception.AuthorizationError(
|
||||
"Post requires membership",
|
||||
) from None
|
||||
if exc.response.status_code == HTTPStatus.NOT_FOUND:
|
||||
raise exception.NotFoundError(
|
||||
self.extractor.subcategory) from None
|
||||
self.extractor.log.debug(exc)
|
||||
return None
|
||||
|
||||
def _call_wmd(self, endpoint, params=None, **kwargs):
|
||||
if params is None:
|
||||
params = {}
|
||||
params.update(
|
||||
{
|
||||
"appId": self.APP_ID,
|
||||
"language": "en",
|
||||
"os": "WEB",
|
||||
"platform": "WEB",
|
||||
"wpf": "pc",
|
||||
},
|
||||
)
|
||||
# the param order is important for the message digest
|
||||
params = OrderedDict(sorted(params.items()))
|
||||
timestamp = int(time.time() * 1000)
|
||||
message_digest = self._message_digest(endpoint, params, timestamp)
|
||||
params.update(
|
||||
{
|
||||
"wmsgpad": timestamp,
|
||||
"wmd": message_digest,
|
||||
},
|
||||
)
|
||||
return self._call(
|
||||
self.WMD_API_URL + endpoint,
|
||||
params=params,
|
||||
headers=self.headers,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def _pagination(self, endpoint, params=None):
|
||||
if not self.access_token:
|
||||
raise exception.AuthenticationError()
|
||||
if params is None:
|
||||
params = {}
|
||||
while True:
|
||||
res = self._call_wmd(endpoint, params)
|
||||
for post in res["data"]:
|
||||
if not self._is_text_only(post):
|
||||
yield post
|
||||
np = res.get("paging", {}).get("nextParams", {})
|
||||
if "after" not in np:
|
||||
return
|
||||
params["after"] = np["after"]
|
||||
|
||||
|
||||
@cache(maxage=365 * 24 * 3600, keyarg=1)
|
||||
def _login_impl(extr, username, password):
|
||||
url = "https://accountapi.weverse.io/web/api/v2/auth/token/by-credentials"
|
||||
data = {"email": username, "password": password}
|
||||
headers = {
|
||||
"x-acc-app-secret": "5419526f1c624b38b10787e5c10b2a7a",
|
||||
"x-acc-app-version": "3.3.3",
|
||||
"x-acc-language": "en",
|
||||
"x-acc-service-id": "weverse",
|
||||
"x-acc-trace-id": str(uuid.uuid4()),
|
||||
}
|
||||
extr.log.info("Logging in as %s", username)
|
||||
res = extr.request(url, method="POST", json=data, headers=headers).json()
|
||||
if "accessToken" not in res:
|
||||
extr.log.warning(
|
||||
"Unable to log in as %s, proceeding without auth", username)
|
||||
return {cookie.name: cookie.value for cookie in extr.cookies}
|
@ -323,6 +323,13 @@ SUBCATEGORY_MAP = {
|
||||
"home": "",
|
||||
"newvideo": "",
|
||||
},
|
||||
"weverse": {
|
||||
"feed": "Feed Tab, Artist Tab",
|
||||
"media-category": "Media Categories",
|
||||
"media-tab": "Media Tabs",
|
||||
"member": "Member Profiles",
|
||||
"moments": "",
|
||||
},
|
||||
"wikiart": {
|
||||
"artists": "Artist Listings",
|
||||
},
|
||||
@ -407,6 +414,7 @@ AUTH_MAP = {
|
||||
"vipergirls" : "Supported",
|
||||
"wallhaven" : _APIKEY_WH,
|
||||
"weasyl" : _APIKEY_WY,
|
||||
"weverse" : _COOKIES,
|
||||
"zerochan" : "Supported",
|
||||
}
|
||||
|
||||
|
216
test/results/weverse.py
Normal file
216
test/results/weverse.py
Normal file
@ -0,0 +1,216 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import weverse
|
||||
|
||||
|
||||
IMAGE_URL_PATTERN = r"https://phinf\.wevpstatic\.net/.+\.(?:gif|jpe?g|png|webp)$"
|
||||
VIDEO_URL_PATTERN = r"https://weverse-rmcnmv\.akamaized\.net/.+\.(?:mp4|webm)(?:\?.+)?$"
|
||||
COMBINED_URL_PATTERN = "(?i)" + IMAGE_URL_PATTERN + "|" + VIDEO_URL_PATTERN
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url": "https://weverse.io/lesserafim/artist/4-147791342",
|
||||
"#comment": "post containing both a video and image",
|
||||
"#category": ("", "weverse", "post"),
|
||||
"#class": weverse.WeversePostExtractor,
|
||||
"#pattern": COMBINED_URL_PATTERN,
|
||||
"#count": 2,
|
||||
|
||||
"date": "dt:2024-01-18 06:08:35",
|
||||
"post_url": "https://weverse.io/lesserafim/artist/4-147791342",
|
||||
"post_id": "4-147791342",
|
||||
"post_type": "NORMAL",
|
||||
"section_type": "ARTIST",
|
||||
"author": {
|
||||
"id": "b60d95bc3b71f4d97b28ac1b971cc641",
|
||||
"name": "KAZUHA",
|
||||
"profile_type": "ARTIST",
|
||||
},
|
||||
"community": {
|
||||
"id": 47,
|
||||
"name": "LE SSERAFIM",
|
||||
"artist_code": "LESSERAFIM",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/lesserafim/artist/4-150863209",
|
||||
"#comment": "text only",
|
||||
"#category": ("", "weverse", "post"),
|
||||
"#class": weverse.WeversePostExtractor,
|
||||
"#count": 0,
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/dreamcatcher/artist/3-138146100",
|
||||
"#comment": ("the order of the files returned by the api does not always match the order on the site"
|
||||
"the id of the second file returned by the api is `2-274423384`"
|
||||
"the id of the second file displayed on the site is `3-274413871`"),
|
||||
"#category": ("", "weverse", "post"),
|
||||
"#class": weverse.WeversePostExtractor,
|
||||
"#pattern": COMBINED_URL_PATTERN,
|
||||
"#range": "2",
|
||||
"#count": 1,
|
||||
|
||||
"id": "3-274413871",
|
||||
"num": 2,
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/dreamcatcher/fanpost/2-135105553",
|
||||
"#comment": "fan post",
|
||||
"#category": ("", "weverse", "post"),
|
||||
"#class": weverse.WeversePostExtractor,
|
||||
"#pattern": COMBINED_URL_PATTERN,
|
||||
"#count": 1,
|
||||
|
||||
"section_type": "FEED",
|
||||
"author": {
|
||||
"profile_type": "FAN",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/dreamcatcher/profile/e89820ec1a72d7255120284ca3aeafa5",
|
||||
"#category": ("", "weverse", "member"),
|
||||
"#class": weverse.WeverseMemberExtractor,
|
||||
"#pattern": weverse.WeversePostExtractor.pattern,
|
||||
"#auth": True,
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/dreamcatcher/feed",
|
||||
"#comment": "feed tab (fan posts)"
|
||||
"each pagination call returns up to 20 items",
|
||||
"#category": ("", "weverse", "feed"),
|
||||
"#class": weverse.WeverseFeedExtractor,
|
||||
"#pattern": weverse.WeversePostExtractor.pattern,
|
||||
"#auth": True,
|
||||
"#range": "21",
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/dreamcatcher/artist",
|
||||
"#comment": "artist tab (artist posts)"
|
||||
"each pagination call returns up to 20 items",
|
||||
"#category": ("", "weverse", "feed"),
|
||||
"#class": weverse.WeverseFeedExtractor,
|
||||
"#pattern": weverse.WeversePostExtractor.pattern,
|
||||
"#auth": True,
|
||||
"#range": "21",
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/dreamcatcher/moment/e89820ec1a72d7255120284ca3aeafa5/post/2-111675163",
|
||||
"#comment": "moment",
|
||||
"#category": ("", "weverse", "moment"),
|
||||
"#class": weverse.WeverseMomentExtractor,
|
||||
"#pattern": COMBINED_URL_PATTERN,
|
||||
"#count": 1,
|
||||
|
||||
"width": 1080,
|
||||
"height": 1920,
|
||||
"date": "dt:2023-01-09 06:25:41",
|
||||
"expire_at": "dt:2023-01-10 06:25:41",
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/dreamcatcher/moment/785506b50e7890c3b81491f20728ee82/post/2-101327656",
|
||||
"#comment": "momentW1",
|
||||
"#category": ("", "weverse", "moment"),
|
||||
"#class": weverse.WeverseMomentExtractor,
|
||||
"#pattern": COMBINED_URL_PATTERN,
|
||||
"#count": 1,
|
||||
|
||||
"width": 1128,
|
||||
"height": 1504,
|
||||
"date": "dt:2022-07-17 00:24:48",
|
||||
"expire_at": "dt:2022-07-18 00:24:48",
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/dreamcatcher/moment/e89820ec1a72d7255120284ca3aeafa5",
|
||||
"#comment": "each pagination call returns 1 item",
|
||||
"#category": ("", "weverse", "moments"),
|
||||
"#class": weverse.WeverseMomentsExtractor,
|
||||
"#pattern": weverse.WeverseMomentExtractor.pattern,
|
||||
"#auth": True,
|
||||
"#range": "2",
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/lesserafim/media/0-128617470",
|
||||
"#comment": "image",
|
||||
"#category": ("", "weverse", "media"),
|
||||
"#class": weverse.WeverseMediaExtractor,
|
||||
"#pattern": COMBINED_URL_PATTERN,
|
||||
"#count": 5,
|
||||
|
||||
"media_type": "IMAGE",
|
||||
"categories": [
|
||||
{
|
||||
"id": 1091,
|
||||
"type": "MEDIA",
|
||||
"title": "PHOTOBOOK",
|
||||
},
|
||||
],
|
||||
"community": {
|
||||
"name": "LE SSERAFIM",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/lesserafim/media/1-128435266",
|
||||
"#comment": "video",
|
||||
"#category": ("", "weverse", "media"),
|
||||
"#class": weverse.WeverseMediaExtractor,
|
||||
"#pattern": COMBINED_URL_PATTERN,
|
||||
"#count": 1,
|
||||
|
||||
"width": 1080,
|
||||
"height": 1920,
|
||||
"media_type": "VOD",
|
||||
"categories": [
|
||||
{
|
||||
"id": 1532,
|
||||
"type": "MEDIA",
|
||||
"title": "Perfect Night",
|
||||
}
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/dreamcatcher/media/1-128875973",
|
||||
"#comment": "embed",
|
||||
"#category": ("", "weverse", "media"),
|
||||
"#class": weverse.WeverseMediaExtractor,
|
||||
|
||||
"post_type": "YOUTUBE",
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/dreamcatcher/media",
|
||||
"#comment": "each pagination call returns up to 10 items",
|
||||
"#category": ("", "weverse", "media-tab"),
|
||||
"#class": weverse.WeverseMediaTabExtractor,
|
||||
"#pattern": weverse.WeverseMediaExtractor.pattern,
|
||||
"#auth": True,
|
||||
"#range": "11",
|
||||
},
|
||||
|
||||
{
|
||||
"#url": "https://weverse.io/lesserafim/media/category/494",
|
||||
"#comment": "each pagination call returns up to 10 items",
|
||||
"#category": ("", "weverse", "media-category"),
|
||||
"#class": weverse.WeverseMediaCategoryExtractor,
|
||||
"#pattern": weverse.WeverseMediaExtractor.pattern,
|
||||
"#auth": True,
|
||||
"#range": "11",
|
||||
},
|
||||
|
||||
)
|
Loading…
Reference in New Issue
Block a user