1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-18 08:53:10 +01:00
yt-dlp/yt_dlp/extractor/likee.py
Ha Tien Loi f963b7ab18
[Likee] Add extractor (#3625)
Closes #3603
Authored by: hatienl0i261299
2022-05-04 07:13:52 -07:00

194 lines
7.3 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
from .common import InfoExtractor
from ..utils import (
int_or_none,
js_to_json,
parse_iso8601,
str_or_none,
traverse_obj,
)
class LikeeIE(InfoExtractor):
IE_NAME = 'likee'
_VALID_URL = r'(?x)https?://(www\.)?likee\.video/(?:(?P<channel_name>[^/]+)/video/|v/)(?P<id>\w+)'
_TESTS = [{
'url': 'https://likee.video/@huynh_hong_quan_/video/7093444807096327263',
'info_dict': {
'id': '7093444807096327263',
'ext': 'mp4',
'title': '🤴🤴🤴',
'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4',
'thumbnail': r're:^https?://.+\.jpg',
'uploader': 'Huỳnh Hồng Qu&acirc;n ',
'play_count': int,
'download_count': int,
'artist': 'Huỳnh Hồng Qu&acirc;n ',
'timestamp': 1651571320,
'upload_date': '20220503',
'view_count': int,
'uploader_id': 'huynh_hong_quan_',
'duration': 12374,
'comment_count': int,
'like_count': int,
},
}, {
'url': 'https://likee.video/@649222262/video/7093167848050058862',
'info_dict': {
'id': '7093167848050058862',
'ext': 'mp4',
'title': 'likee video #7093167848050058862',
'description': 'md5:3f971c8c6ee8a216f2b1a9094c5de99f',
'thumbnail': r're:^https?://.+\.jpg',
'comment_count': int,
'like_count': int,
'uploader': 'Vương Phước Nhi',
'download_count': int,
'timestamp': 1651506835,
'upload_date': '20220502',
'duration': 60024,
'play_count': int,
'artist': 'Vương Phước Nhi',
'uploader_id': '649222262',
'view_count': int,
},
}, {
'url': 'https://likee.video/@fernanda_rivasg/video/6932224568407629502',
'info_dict': {
'id': '6932224568407629502',
'ext': 'mp4',
'title': 'Un trend viejito🔥 #LIKEE #Ferlovers #trend ',
'description': 'md5:c42b903a72a99d6d8b73e3d1126fbcef',
'thumbnail': r're:^https?://.+\.jpg',
'comment_count': int,
'duration': 9684,
'uploader_id': 'fernanda_rivasg',
'view_count': int,
'play_count': int,
'artist': 'La Cami La✨',
'download_count': int,
'like_count': int,
'uploader': 'Fernanda Rivas🎶',
'timestamp': 1614034308,
'upload_date': '20210222',
},
}, {
'url': 'https://likee.video/v/k6QcOp',
'info_dict': {
'id': 'k6QcOp',
'ext': 'mp4',
'title': '#AguaChallenge t&uacute; ya lo intentaste?😱🤩',
'description': 'md5:b0cc462689d4ff2b624daa4dba7640d9',
'thumbnail': r're:^https?://.+\.jpg',
'comment_count': int,
'duration': 18014,
'play_count': int,
'view_count': int,
'timestamp': 1611694774,
'like_count': int,
'uploader': 'Fernanda Rivas🎶',
'uploader_id': 'fernanda_rivasg',
'download_count': int,
'artist': 'ʟᴇʀɪᴋ_ɴɪʀɴ♡',
'upload_date': '20210126',
},
}, {
'url': 'https://www.likee.video/@649222262/video/7093167848050058862',
'only_matching': True,
}, {
'url': 'https://www.likee.video/v/k6QcOp',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info = self._parse_json(
self._search_regex(r'window\.data\s=\s({.+?});', webpage, 'video info'),
video_id, transform_source=js_to_json)
video_url = traverse_obj(info, 'video_url', ('originVideoInfo', 'video_url'))
if not video_url:
self.raise_no_formats('Video was deleted', expected=True)
formats = [{
'format_id': 'mp4-with-watermark',
'url': video_url,
'height': info.get('video_height'),
'width': info.get('video_width'),
}, {
'format_id': 'mp4-without-watermark',
'url': video_url.replace('_4', ''),
'height': info.get('video_height'),
'width': info.get('video_width'),
'quality': 1,
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': info.get('msgText'),
'description': info.get('share_desc'),
'view_count': int_or_none(info.get('video_count')),
'like_count': int_or_none(info.get('likeCount')),
'play_count': int_or_none(info.get('play_count')),
'download_count': int_or_none(info.get('download_count')),
'comment_count': int_or_none(info.get('comment_count')),
'uploader': str_or_none(info.get('nick_name')),
'uploader_id': str_or_none(info.get('likeeId')),
'artist': str_or_none(traverse_obj(info, ('sound', 'owner_name'))),
'timestamp': parse_iso8601(info.get('uploadDate')),
'thumbnail': info.get('coverUrl'),
'duration': int_or_none(traverse_obj(info, ('option_data', 'dur'))),
'formats': formats,
}
class LikeeUserIE(InfoExtractor):
IE_NAME = 'likee:user'
_VALID_URL = r'https?://(www\.)?likee\.video/(?P<id>[^/]+)/?$'
_TESTS = [{
'url': 'https://likee.video/@fernanda_rivasg',
'info_dict': {
'id': '925638334',
'title': 'fernanda_rivasg',
},
'playlist_mincount': 500,
}, {
'url': 'https://likee.video/@may_hmoob',
'info_dict': {
'id': '2943949041',
'title': 'may_hmoob',
},
'playlist_mincount': 80,
}]
_PAGE_SIZE = 50
_API_GET_USER_VIDEO = 'https://api.like-video.com/likee-activity-flow-micro/videoApi/getUserVideo'
def _entries(self, user_name, user_id):
last_post_id = ''
while True:
user_videos = self._download_json(
self._API_GET_USER_VIDEO, user_name,
data=json.dumps({
'uid': user_id,
'count': self._PAGE_SIZE,
'lastPostId': last_post_id,
'tabType': 0,
}).encode('utf-8'),
headers={'content-type': 'application/json'},
note=f'Get user info with lastPostId #{last_post_id}')
items = traverse_obj(user_videos, ('data', 'videoList'))
if not items:
break
for item in items:
last_post_id = item['postId']
yield self.url_result(f'https://likee.video/{user_name}/video/{last_post_id}')
def _real_extract(self, url):
user_name = self._match_id(url)
webpage = self._download_webpage(url, user_name)
info = self._parse_json(
self._search_regex(r'window\.data\s*=\s*({.+?});', webpage, 'user info'),
user_name, transform_source=js_to_json)
user_id = traverse_obj(info, ('userinfo', 'uid'))
return self.playlist_result(self._entries(user_name, user_id), user_id, traverse_obj(info, ('userinfo', 'user_name')))