Merge 0ffa180960 into f3411af12e

[ie/matchtv] Fix extractor (#10190 )
Authored by: megumintyan
2024-06-29 00:52:21 +02:00 · 2024-06-25 22:49:24 +08:00 · 2024-06-25 00:49:09 +02:00 · 2024-05-29 19:40:53 +03:00 · 2024-05-29 19:34:01 +03:00 · 2024-05-29 02:37:03 +00:00
3 changed files with 91 additions and 28 deletions
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@ -969,6 +969,10 @@
    LA7PodcastEpisodeIE,
    LA7PodcastIE,
 )
+from .laracasts import (
+    LaracastsIE,
+    LaracastsPlaylistIE,
+)
 from .lastfm import (
    LastFMIE,
    LastFMPlaylistIE,
--- a/yt_dlp/extractor/laracasts.py
+++ b/yt_dlp/extractor/laracasts.py
@ -0,0 +1,75 @@
+from .common import InfoExtractor
+from .vimeo import VimeoIE
+from ..utils import (
+    extract_attributes,
+    get_element_html_by_id,
+)
+
+
+class LaracastsPlaylistIE(InfoExtractor):
+    IE_NAME = 'laracasts:series'
+    _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<series>[^/?#]+)/?(?:$|[?#])'
+    _TESTS = [{
+        'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
+        'info_dict': {
+            'title': '30 Days to Learn Laravel',
+            'id': 210,
+        },
+        'only_matching': True,
+    }]
+
+    def _entries(self, series, episode_count):
+        for current_episode in range(1, episode_count + 1):
+            webpage_url = f'https://laracasts.com/series/{series}/episodes/{current_episode}'
+            yield self.url_result(webpage_url, LaracastsIE)
+
+    def _real_extract(self, url):
+        mobj = self._match_valid_url(url)
+        display_id = mobj.group('series')
+        webpage = self._download_webpage(url, display_id)
+        episode_count = int(self._search_regex(r'episodeCount&quot;:(?P<episode_count>[0-9]+)', webpage, 'episode_count'))
+        playlist_title = self._search_regex(r'title&quot;:&quot;(?P<playlist_title>[^&]+)&quot;,', webpage, 'playlist_title')
+        playlist_id = self._search_regex(r'id&quot;:(?P<playlist_id>[0-9]+)', webpage, 'playlist_id')
+        return self.playlist_result(self._entries(display_id, episode_count), playlist_id, playlist_title)
+
+
+class LaracastsIE(InfoExtractor):
+    IE_NAME = 'laracasts'
+    _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<series>[\w\d-]+)/episodes/(?P<episode_number>[0-9]+)$'
+    _TESTS = [{
+        'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
+        'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
+        'info_dict': {
+            'id': '922040563',
+            'title': '1-Hello-Laravel',
+            'uploader': 'Laracasts',
+            'uploader_id': 'user20182673',
+            'uploader_url': 'https://vimeo.com/user20182673',
+            'ext': 'mp4',
+            'duration': 519,
+            'thumbnail': 'https://i.vimeocdn.com/video/1812897371-64aac3913bc92e99c5a56ff58fa0d4894993ba04bd2e6703d3f2295e998d5548-d_1280',
+        }
+    }]
+
+    def extract_vimeo_id(self, url):
+        mobj = self._match_valid_url(url)
+
+        series, episode_number = mobj.group('series', 'episode_number')
+        display_id = '%s/%s' % (series, episode_number)
+
+        webpage = self._download_webpage(url, display_id)
+        app_element = get_element_html_by_id('app', webpage)
+        app_attributes = extract_attributes(app_element)
+        app_json = self._parse_json(app_attributes.get('data-page'), display_id)
+        series_chapters = app_json['props']['series']['chapters']
+
+        for chapter in series_chapters:
+            for episode in chapter['episodes']:
+                if int(episode['position']) == int(episode_number):
+                    return episode['vimeoId']
+
+    def _real_extract(self, url):
+        video_id = self.extract_vimeo_id(url)
+        embed_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{video_id}', 'https://laracasts.com/')
+
+        return self.url_result(embed_url)
--- a/yt_dlp/extractor/matchtv.py
+++ b/yt_dlp/extractor/matchtv.py
@ -1,51 +1,35 @@
-import random
-
 from .common import InfoExtractor
-from ..utils import xpath_text


 class MatchTVIE(InfoExtractor):
-    _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
+    _VALID_URL = [
+        r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
+        r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
+    ]
    _TESTS = [{
-        'url': 'http://matchtv.ru/#live-player',
+        'url': 'http://matchtv.ru/on-air/',
        'info_dict': {
            'id': 'matchtv-live',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
-            'is_live': True,
+            'live_status': 'is_live',
        },
        'params': {
            'skip_download': True,
        },
    }, {
-        'url': 'http://matchtv.ru/on-air/',
+        'url': 'https://video.matchtv.ru/iframe/channel/106',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = 'matchtv-live'
-        video_url = self._download_json(
-            'http://player.matchtv.ntvplus.tv/player/smil', video_id,
-            query={
-                'ts': '',
-                'quality': 'SD',
-                'contentId': '561d2c0df7159b37178b4567',
-                'sign': '',
-                'includeHighlights': '0',
-                'userId': '',
-                'sessionId': random.randint(1, 1000000000),
-                'contentType': 'channel',
-                'timeShift': '0',
-                'platform': 'portal',
-            },
-            headers={
-                'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
-            })['data']['videoUrl']
-        f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
-        formats = self._extract_f4m_formats(f4m_url, video_id)
+        webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
+        video_url = self._html_search_regex(
+            r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
        return {
            'id': video_id,
            'title': 'Матч ТВ - Прямой эфир',
            'is_live': True,
-            'formats': formats,
+            'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
        }
Author	SHA1	Message	Date
A. Sertaç Akkaya	ee0656e07d	Merge `0ffa180960` into `f3411af12e`	2024-06-25 22:49:24 +08:00
megumin	f3411af12e	[ie/matchtv] Fix extractor (#10190 ) Authored by: megumintyan	2024-06-25 00:49:09 +02:00
A. Sertaç Akkaya	0ffa180960	Formatting	2024-05-29 19:40:53 +03:00
A. Sertaç Akkaya	3afcbe99ca	Major bugfix: Downloads weren't correctly iterating to requested episode due to misparsed JSON.	2024-05-29 19:34:01 +03:00
bashonly	3eb6eab312	formatting	2024-05-29 02:37:03 +00:00
bashonly	214978e133	Apply suggestions from code review	2024-05-29 02:35:07 +00:00
A. Sertaç Akkaya	398bc159f8	[laracasts] Add extractor	2024-05-29 05:04:35 +03:00