From 65ea4cba293d283f1d03b48208fb07e7e2ae35e2 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Mon, 11 Jul 2022 04:32:12 +0900 Subject: [PATCH] [extractor/mocha] Add extractor (#4213) Closes https://github.com/yt-dlp/yt-dlp/issues/3752 Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/mocha.py | 66 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 yt_dlp/extractor/mocha.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 70c5565d9..a7a915fa5 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -948,6 +948,7 @@ ) from .mlssoccer import MLSSoccerIE from .mnet import MnetIE +from .mocha import MochaVideoIE from .moevideo import MoeVideoIE from .mofosex import ( MofosexIE, diff --git a/yt_dlp/extractor/mocha.py b/yt_dlp/extractor/mocha.py new file mode 100644 index 000000000..27d2d9c2c --- /dev/null +++ b/yt_dlp/extractor/mocha.py @@ -0,0 +1,66 @@ +from .common import InfoExtractor +from ..utils import int_or_none, traverse_obj + + +class MochaVideoIE(InfoExtractor): + _VALID_URL = r'https?://video.mocha.com.vn/(?P[\w-]+)' + _TESTS = [{ + 'url': 'http://video.mocha.com.vn/chuyen-meo-gia-su-tu-thong-diep-cuoc-song-v18694039', + 'info_dict': { + 'id': '18694039', + 'title': 'Chuyện mèo giả sư tử | Thông điệp cuộc sống', + 'ext': 'mp4', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'display_id': 'chuyen-meo-gia-su-tu-thong-diep-cuoc-song', + 'thumbnail': 'http://mcvideomd1fr.keeng.net/playnow/images/20220505/ad0a055d-2f69-42ca-b888-4790041fe6bc_640x480.jpg', + 'description': '', + 'duration': 70, + 'timestamp': 1652254203, + 'upload_date': '20220511', + 'comment_count': int, + 'categories': ['Kids'] + } + }] + + def _real_extract(self, url): + video_slug = self._match_valid_url(url).group('video_slug') + json_data = self._download_json( + 'http://apivideo.mocha.com.vn:8081/onMediaBackendBiz/mochavideo/getVideoDetail', + video_slug, query={'url': url, 'token': ''})['data']['videoDetail'] + video_id = str(json_data['id']) + video_urls = (json_data.get('list_resolution') or []) + [json_data.get('original_path')] + + formats, subtitles = [], {} + for video in video_urls: + if isinstance(video, str): + formats.extend([{'url': video, 'ext': 'mp4'}]) + else: + fmts, subs = self._extract_m3u8_formats_and_subtitles( + video.get('video_path'), video_id, ext='mp4') + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': json_data.get('slug') or video_slug, + 'title': json_data.get('name'), + 'formats': formats, + 'subtitles': subtitles, + 'description': json_data.get('description'), + 'duration': json_data.get('durationS'), + 'view_count': json_data.get('total_view'), + 'like_count': json_data.get('total_like'), + 'dislike_count': json_data.get('total_unlike'), + 'thumbnail': json_data.get('image_path_thumb'), + 'timestamp': int_or_none(json_data.get('publish_time'), scale=1000), + 'is_live': json_data.get('isLive'), + 'channel': traverse_obj(json_data, ('channels', '0', 'name')), + 'channel_id': traverse_obj(json_data, ('channels', '0', 'id')), + 'channel_follower_count': traverse_obj(json_data, ('channels', '0', 'numfollow')), + 'categories': traverse_obj(json_data, ('categories', ..., 'categoryname')), + 'comment_count': json_data.get('total_comment'), + }