1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-06-29 00:52:21 +02:00

Compare commits

...

6 Commits

Author SHA1 Message Date
hafeoz
f787db76d4
Merge ff028f2ac4 into f3411af12e 2024-06-25 23:34:39 +00:00
bashonly
ff028f2ac4
kilo_or_none() => _kilo_or_none()
Authored by: bashonly
2024-06-25 18:34:01 -05:00
bashonly
56898e9b99
query_api() => _query_api()
Authored by: bashonly
2024-06-25 18:32:39 -05:00
bashonly
a0cf17f9c6
extract_formats() => _extract_formats()
Authored by: bashonly
2024-06-25 18:31:07 -05:00
bashonly
ddf6ed2851
Fix extract_formats
Authored by: bashonly
2024-06-25 18:26:19 -05:00
megumin
f3411af12e
[ie/matchtv] Fix extractor (#10190)
Authored by: megumintyan
2024-06-25 00:49:09 +02:00
2 changed files with 48 additions and 69 deletions

View File

@ -1,51 +1,35 @@
import random
from .common import InfoExtractor
from ..utils import xpath_text
class MatchTVIE(InfoExtractor):
_VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
_VALID_URL = [
r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
]
_TESTS = [{
'url': 'http://matchtv.ru/#live-player',
'url': 'http://matchtv.ru/on-air/',
'info_dict': {
'id': 'matchtv-live',
'ext': 'flv',
'ext': 'mp4',
'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
'live_status': 'is_live',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://matchtv.ru/on-air/',
'url': 'https://video.matchtv.ru/iframe/channel/106',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = 'matchtv-live'
video_url = self._download_json(
'http://player.matchtv.ntvplus.tv/player/smil', video_id,
query={
'ts': '',
'quality': 'SD',
'contentId': '561d2c0df7159b37178b4567',
'sign': '',
'includeHighlights': '0',
'userId': '',
'sessionId': random.randint(1, 1000000000),
'contentType': 'channel',
'timeShift': '0',
'platform': 'portal',
},
headers={
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
})['data']['videoUrl']
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
formats = self._extract_f4m_formats(f4m_url, video_id)
webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
video_url = self._html_search_regex(
r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
return {
'id': video_id,
'title': 'Матч ТВ - Прямой эфир',
'is_live': True,
'formats': formats,
'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
}

View File

@ -22,12 +22,12 @@
class NetEaseMusicBaseIE(InfoExtractor):
_LEVELS = ['standard', 'exhigh', 'lossless', 'hires', 'jyeffect', 'sky', 'jymaster']
_LEVELS = ('standard', 'exhigh', 'lossless', 'hires', 'jyeffect', 'sky', 'jymaster')
_API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False
@staticmethod
def kilo_or_none(value):
def _kilo_or_none(value):
return int_or_none(value, scale=1000)
def _create_eapi_cipher(self, api_path, query_body, cookies):
@ -71,36 +71,31 @@ def _call_player_api(self, song_id, level):
'/song/enhance/player/url/v1', song_id, {'ids': f'[{song_id}]', 'level': level, 'encodeType': 'flac'},
note=f'Downloading song URL info: level {level}')
def extract_formats(self, info):
err = 0
def _extract_formats(self, info):
formats = []
song_id = info['id']
for song_level in self._LEVELS:
for song in traverse_obj(self._call_player_api(song_id, song_level), ('data', lambda _, v: url_or_none(v['url']))):
song_url = song['url']
if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({
'url': song_url,
'format_id': song_level,
'vcodec': 'none',
**traverse_obj(song, {
'ext': ('type', {str}),
'abr': ('br', {self.kilo_or_none}),
'filesize': ('size', {int_or_none}),
}),
})
elif err == 0:
err = traverse_obj(song, ('code', {int})) or 0
for level in self._LEVELS:
song = traverse_obj(self._call_player_api(song_id, level), ('data', 0, {dict})) or {}
if song.get('level') != level:
break # We have already extracted the highest level the user has access to
if not url_or_none(song.get('url')):
continue
formats.append({
'url': song['url'],
'format_id': level,
'vcodec': 'none',
**traverse_obj(song, {
'ext': ('type', {str}),
'abr': ('br', {self._kilo_or_none}),
'filesize': ('size', {int_or_none}),
}),
})
if not formats:
if err != 0 and (err < 200 or err >= 400):
raise ExtractorError(f'No media links found (site code {err})', expected=True)
else:
self.raise_geo_restricted(
'No media links found: probably due to geo restriction.', countries=['CN'])
self.raise_geo_restricted(
'No media links found; possibly due to geo restriction', countries=['CN'])
return formats
def query_api(self, endpoint, video_id, note):
def _query_api(self, endpoint, video_id, note):
result = self._download_json(
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
code = traverse_obj(result, ('code', {int}))
@ -253,12 +248,12 @@ def _process_lyrics(self, lyrics_info):
def _real_extract(self, url):
song_id = self._match_id(url)
info = self.query_api(
info = self._query_api(
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
formats = self.extract_formats(info)
formats = self._extract_formats(info)
lyrics = self._process_lyrics(self.query_api(
lyrics = self._process_lyrics(self._query_api(
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
lyric_data = {
'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
@ -274,9 +269,9 @@ def _real_extract(self, url):
**lyric_data,
**traverse_obj(info, {
'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
'thumbnail': ('album', 'picUrl', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
'duration': ('duration', {self._kilo_or_none}),
'album': ('album', 'name', {str}),
'average_rating': ('score', {int_or_none}),
}),
@ -360,7 +355,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
def _real_extract(self, url):
singer_id = self._match_id(url)
info = self.query_api(
info = self._query_api(
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
name = join_nonempty(
@ -428,7 +423,7 @@ def _real_extract(self, url):
'tags': ('tags', ..., {str}),
'uploader': ('creator', 'nickname', {str}),
'uploader_id': ('creator', 'userId', {str_or_none}),
'timestamp': ('updateTime', {self.kilo_or_none}),
'timestamp': ('updateTime', {self._kilo_or_none}),
}))
if traverse_obj(info, ('playlist', 'specialType')) == 10:
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
@ -475,7 +470,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
def _real_extract(self, url):
mv_id = self._match_id(url)
info = self.query_api(
info = self._query_api(
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
formats = [
@ -492,7 +487,7 @@ def _real_extract(self, url):
'creator': ('artistName', {str}),
'upload_date': ('publishTime', {unified_strdate}),
'thumbnail': ('cover', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}),
'duration': ('duration', {self._kilo_or_none}),
'view_count': ('playCount', {int_or_none}),
'like_count': ('likeCount', {int_or_none}),
'comment_count': ('commentCount', {int_or_none}),
@ -552,7 +547,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
def _real_extract(self, url):
program_id = self._match_id(url)
info = self.query_api(
info = self._query_api(
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
metainfo = traverse_obj(info, {
@ -560,17 +555,17 @@ def _real_extract(self, url):
'description': ('description', {str}),
'creator': ('dj', 'brand', {str}),
'thumbnail': ('coverUrl', {url_or_none}),
'timestamp': ('createTime', {self.kilo_or_none}),
'timestamp': ('createTime', {self._kilo_or_none}),
})
if not self._yes_playlist(
info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
formats = self.extract_formats(info['mainSong'])
formats = self._extract_formats(info['mainSong'])
return {
'id': str(info['mainSong']['id']),
'formats': formats,
'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
**metainfo,
}
@ -599,7 +594,7 @@ def _real_extract(self, url):
metainfo = {}
entries = []
for offset in itertools.count(start=0, step=self._PAGE_SIZE):
info = self.query_api(
info = self._query_api(
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
dj_id, note=f'Downloading dj programs - {offset}')