1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-06-29 00:52:21 +02:00

Compare commits

...

7 Commits

Author SHA1 Message Date
A. Sertaç Akkaya
ee0656e07d
Merge 0ffa180960 into f3411af12e 2024-06-25 22:49:24 +08:00
megumin
f3411af12e
[ie/matchtv] Fix extractor (#10190)
Authored by: megumintyan
2024-06-25 00:49:09 +02:00
A. Sertaç Akkaya
0ffa180960 Formatting 2024-05-29 19:40:53 +03:00
A. Sertaç Akkaya
3afcbe99ca Major bugfix: Downloads weren't correctly iterating to requested episode due to misparsed JSON. 2024-05-29 19:34:01 +03:00
bashonly
3eb6eab312
formatting 2024-05-29 02:37:03 +00:00
bashonly
214978e133
Apply suggestions from code review 2024-05-29 02:35:07 +00:00
A. Sertaç Akkaya
398bc159f8 [laracasts] Add extractor 2024-05-29 05:04:35 +03:00
3 changed files with 91 additions and 28 deletions

View File

@ -969,6 +969,10 @@
LA7PodcastEpisodeIE,
LA7PodcastIE,
)
from .laracasts import (
LaracastsIE,
LaracastsPlaylistIE,
)
from .lastfm import (
LastFMIE,
LastFMPlaylistIE,

View File

@ -0,0 +1,75 @@
from .common import InfoExtractor
from .vimeo import VimeoIE
from ..utils import (
extract_attributes,
get_element_html_by_id,
)
class LaracastsPlaylistIE(InfoExtractor):
IE_NAME = 'laracasts:series'
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<series>[^/?#]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
'info_dict': {
'title': '30 Days to Learn Laravel',
'id': 210,
},
'only_matching': True,
}]
def _entries(self, series, episode_count):
for current_episode in range(1, episode_count + 1):
webpage_url = f'https://laracasts.com/series/{series}/episodes/{current_episode}'
yield self.url_result(webpage_url, LaracastsIE)
def _real_extract(self, url):
mobj = self._match_valid_url(url)
display_id = mobj.group('series')
webpage = self._download_webpage(url, display_id)
episode_count = int(self._search_regex(r'episodeCount&quot;:(?P<episode_count>[0-9]+)', webpage, 'episode_count'))
playlist_title = self._search_regex(r'title&quot;:&quot;(?P<playlist_title>[^&]+)&quot;,', webpage, 'playlist_title')
playlist_id = self._search_regex(r'id&quot;:(?P<playlist_id>[0-9]+)', webpage, 'playlist_id')
return self.playlist_result(self._entries(display_id, episode_count), playlist_id, playlist_title)
class LaracastsIE(InfoExtractor):
IE_NAME = 'laracasts'
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<series>[\w\d-]+)/episodes/(?P<episode_number>[0-9]+)$'
_TESTS = [{
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
'info_dict': {
'id': '922040563',
'title': '1-Hello-Laravel',
'uploader': 'Laracasts',
'uploader_id': 'user20182673',
'uploader_url': 'https://vimeo.com/user20182673',
'ext': 'mp4',
'duration': 519,
'thumbnail': 'https://i.vimeocdn.com/video/1812897371-64aac3913bc92e99c5a56ff58fa0d4894993ba04bd2e6703d3f2295e998d5548-d_1280',
}
}]
def extract_vimeo_id(self, url):
mobj = self._match_valid_url(url)
series, episode_number = mobj.group('series', 'episode_number')
display_id = '%s/%s' % (series, episode_number)
webpage = self._download_webpage(url, display_id)
app_element = get_element_html_by_id('app', webpage)
app_attributes = extract_attributes(app_element)
app_json = self._parse_json(app_attributes.get('data-page'), display_id)
series_chapters = app_json['props']['series']['chapters']
for chapter in series_chapters:
for episode in chapter['episodes']:
if int(episode['position']) == int(episode_number):
return episode['vimeoId']
def _real_extract(self, url):
video_id = self.extract_vimeo_id(url)
embed_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{video_id}', 'https://laracasts.com/')
return self.url_result(embed_url)

View File

@ -1,51 +1,35 @@
import random
from .common import InfoExtractor
from ..utils import xpath_text
class MatchTVIE(InfoExtractor):
_VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
_VALID_URL = [
r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
]
_TESTS = [{
'url': 'http://matchtv.ru/#live-player',
'url': 'http://matchtv.ru/on-air/',
'info_dict': {
'id': 'matchtv-live',
'ext': 'flv',
'ext': 'mp4',
'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
'live_status': 'is_live',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://matchtv.ru/on-air/',
'url': 'https://video.matchtv.ru/iframe/channel/106',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = 'matchtv-live'
video_url = self._download_json(
'http://player.matchtv.ntvplus.tv/player/smil', video_id,
query={
'ts': '',
'quality': 'SD',
'contentId': '561d2c0df7159b37178b4567',
'sign': '',
'includeHighlights': '0',
'userId': '',
'sessionId': random.randint(1, 1000000000),
'contentType': 'channel',
'timeShift': '0',
'platform': 'portal',
},
headers={
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
})['data']['videoUrl']
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
formats = self._extract_f4m_formats(f4m_url, video_id)
webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
video_url = self._html_search_regex(
r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
return {
'id': video_id,
'title': 'Матч ТВ - Прямой эфир',
'is_live': True,
'formats': formats,
'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
}