From 398bc159f8061e7319036e6654942dc06dd92bef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?A=2E=20Serta=C3=A7=20Akkaya?= Date: Wed, 29 May 2024 05:04:35 +0300 Subject: [PATCH 1/5] [laracasts] Add extractor --- yt_dlp/extractor/_extractors.py | 5 +++ yt_dlp/extractor/laracasts.py | 56 +++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 yt_dlp/extractor/laracasts.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e9cd38a65..4890810f9 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -969,11 +969,16 @@ LA7PodcastEpisodeIE, LA7PodcastIE, ) +from .laracasts import ( + LaracastsIE, + LaracastsPlaylistIE, +) from .lastfm import ( LastFMIE, LastFMPlaylistIE, LastFMUserIE, ) + from .laxarxames import LaXarxaMesIE from .lbry import ( LBRYIE, diff --git a/yt_dlp/extractor/laracasts.py b/yt_dlp/extractor/laracasts.py new file mode 100644 index 000000000..37a780fe5 --- /dev/null +++ b/yt_dlp/extractor/laracasts.py @@ -0,0 +1,56 @@ +from .common import InfoExtractor +from .vimeo import VimeoIE + + +class LaracastsPlaylistIE(InfoExtractor): + IE_NAME = 'laracasts:series' + _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11', + 'info_dict': { + 'title': '30 Days to Learn Laravel', + 'id': 210, + }, + 'only_matching': True, + }] + + def _entries(self, series, episode_count): + for current_episode in range(1, episode_count + 1): + webpage_url = f'https://laracasts.com/series/{series}/episodes/{current_episode}' + yield self.url_result(webpage_url, LaracastsIE) + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + display_id = mobj.group('series') + webpage = self._download_webpage(url, display_id) + episode_count = int(self._search_regex(r'episodeCount":(?P[0-9]+)', webpage, 'episode_count')) + playlist_title = self._search_regex(r'title":"(?P[^&]+)",', webpage, 'playlist_title') + playlist_id = self._search_regex(r'id":(?P[0-9]+)', webpage, 'playlist_id') + return self.playlist_result(self._entries(display_id, episode_count), playlist_id, playlist_title) + +class LaracastsIE(InfoExtractor): + IE_NAME = 'laracasts' + _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P[^/?#]+)/episodes/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1', + 'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc', + 'info_dict': { + 'id': '922040563', + 'title': '1-Hello-Laravel', + 'uploader': 'Laracasts', + 'uploader_id': 'user20182673', + 'uploader_url': 'https://vimeo.com/user20182673', + 'ext': 'mp4', + 'duration': 519, + 'thumbnail': 'https://i.vimeocdn.com/video/1812897371-64aac3913bc92e99c5a56ff58fa0d4894993ba04bd2e6703d3f2295e998d5548-d_1280', + } + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + series, episode_number = mobj.group('series', 'episode_number') + display_id = '%s/%s' % (series, episode_number) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(r'vimeoId":"(?P[0-9]+)', webpage, 'vimeo_id') + embed_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{video_id}', 'https://laracasts.com/') + return self.url_result(embed_url) From 214978e1333a1eb4b79ea56f7eed88c3cd1406a5 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 29 May 2024 02:35:07 +0000 Subject: [PATCH 2/5] Apply suggestions from code review --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/laracasts.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4890810f9..c397da69c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -978,7 +978,6 @@ LastFMPlaylistIE, LastFMUserIE, ) - from .laxarxames import LaXarxaMesIE from .lbry import ( LBRYIE, diff --git a/yt_dlp/extractor/laracasts.py b/yt_dlp/extractor/laracasts.py index 37a780fe5..ab0160fe6 100644 --- a/yt_dlp/extractor/laracasts.py +++ b/yt_dlp/extractor/laracasts.py @@ -4,7 +4,7 @@ class LaracastsPlaylistIE(InfoExtractor): IE_NAME = 'laracasts:series' - _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P[^/?#]+)/?(?:$|[?#])' _TESTS = [{ 'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11', 'info_dict': { From 3eb6eab31280864974fda3febc255e953b6e96f6 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 29 May 2024 02:37:03 +0000 Subject: [PATCH 3/5] formatting --- yt_dlp/extractor/laracasts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/laracasts.py b/yt_dlp/extractor/laracasts.py index ab0160fe6..adaca42ba 100644 --- a/yt_dlp/extractor/laracasts.py +++ b/yt_dlp/extractor/laracasts.py @@ -28,6 +28,7 @@ def _real_extract(self, url): playlist_id = self._search_regex(r'id":(?P[0-9]+)', webpage, 'playlist_id') return self.playlist_result(self._entries(display_id, episode_count), playlist_id, playlist_title) + class LaracastsIE(InfoExtractor): IE_NAME = 'laracasts' _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P[^/?#]+)/episodes/(?P[0-9]+)' From 3afcbe99ca875487bb2de3c4227879b91f66023e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?A=2E=20Serta=C3=A7=20Akkaya?= Date: Wed, 29 May 2024 19:34:01 +0300 Subject: [PATCH 4/5] Major bugfix: Downloads weren't correctly iterating to requested episode due to misparsed JSON. --- yt_dlp/extractor/laracasts.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/laracasts.py b/yt_dlp/extractor/laracasts.py index adaca42ba..f75589ac6 100644 --- a/yt_dlp/extractor/laracasts.py +++ b/yt_dlp/extractor/laracasts.py @@ -1,6 +1,6 @@ from .common import InfoExtractor from .vimeo import VimeoIE - +from ..utils import extract_attributes, get_element_html_by_id class LaracastsPlaylistIE(InfoExtractor): IE_NAME = 'laracasts:series' @@ -31,7 +31,7 @@ def _real_extract(self, url): class LaracastsIE(InfoExtractor): IE_NAME = 'laracasts' - _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P[^/?#]+)/episodes/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P[\w\d-]+)/episodes/(?P[0-9]+)$' _TESTS = [{ 'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1', 'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc', @@ -47,11 +47,25 @@ class LaracastsIE(InfoExtractor): } }] - def _real_extract(self, url): + def extract_vimeo_id(self, url): mobj = self._match_valid_url(url) + series, episode_number = mobj.group('series', 'episode_number') display_id = '%s/%s' % (series, episode_number) + webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(r'vimeoId":"(?P[0-9]+)', webpage, 'vimeo_id') + app_element = get_element_html_by_id('app', webpage) + app_attributes = extract_attributes(app_element) + app_json = self._parse_json(app_attributes.get('data-page'), display_id) + series_chapters = app_json['props']['series']['chapters'] + + for chapter in series_chapters: + for episode in chapter['episodes']: + if int(episode['position']) == int(episode_number): + return episode['vimeoId'] + + def _real_extract(self, url): + video_id = self.extract_vimeo_id(url) embed_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{video_id}', 'https://laracasts.com/') + return self.url_result(embed_url) From 0ffa18096037df59793dedd0b674722621f6895f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?A=2E=20Serta=C3=A7=20Akkaya?= Date: Wed, 29 May 2024 19:40:53 +0300 Subject: [PATCH 5/5] Formatting --- yt_dlp/extractor/laracasts.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/laracasts.py b/yt_dlp/extractor/laracasts.py index f75589ac6..423058d83 100644 --- a/yt_dlp/extractor/laracasts.py +++ b/yt_dlp/extractor/laracasts.py @@ -1,6 +1,10 @@ from .common import InfoExtractor from .vimeo import VimeoIE -from ..utils import extract_attributes, get_element_html_by_id +from ..utils import ( + extract_attributes, + get_element_html_by_id, +) + class LaracastsPlaylistIE(InfoExtractor): IE_NAME = 'laracasts:series'