diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index c4d3e812e..4deee572f 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -28,7 +28,6 @@ # PLEASE FOLLOW THE GUIDE BELOW
### Before submitting a *pull request* make sure you have:
- [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions)
- [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
-- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) and [ran relevant tests](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions)
### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check all of the following options that apply:
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)
diff --git a/.github/banner.svg b/.github/banner.svg
index 35dc93eae..ea7f9e306 100644
--- a/.github/banner.svg
+++ b/.github/banner.svg
@@ -1,4 +1,4 @@
-\n*([\d ]+)',
- webpage, 'view_count', default='').replace(' ', '')),
- 'like_count': int_or_none(self._search_regex(
- r'#icon-heart">(\d+)', webpage, 'link_count', default=None)),
- }
-
-
-class JablePlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?jable\.tv/(?:categories|models|tags)/(?P[\w-]+)'
- _TESTS = [{
- 'url': 'https://jable.tv/models/kaede-karen/',
- 'info_dict': {
- 'id': 'kaede-karen',
- 'title': '楓カレン',
- },
- 'playlist_count': 34,
- }, {
- 'url': 'https://jable.tv/categories/roleplay/',
- 'only_matching': True,
- }, {
- 'url': 'https://jable.tv/tags/girl/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
-
- def page_func(page_num):
- return [
- self.url_result(player_url, JableIE)
- for player_url in orderedSet(re.findall(
- r'href="(https://jable.tv/videos/[\w-]+/?)"',
- self._download_webpage(url, playlist_id, query={
- 'mode': 'async',
- 'from': page_num + 1,
- 'function': 'get_block',
- 'block_id': 'list_videos_common_videos_list',
- }, note=f'Downloading page {page_num + 1}')))]
-
- return self.playlist_result(
- InAdvancePagedList(page_func, int_or_none(self._search_regex(
- r'from:(\d+)">[^<]+\s*»', webpage, 'last page number', default=1)), 24),
- playlist_id, self._search_regex(
- r'([^<]+)', webpage, 'playlist title', default=None))
diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py
index a2bbba397..8557a81ad 100644
--- a/yt_dlp/extractor/jamendo.py
+++ b/yt_dlp/extractor/jamendo.py
@@ -1,8 +1,8 @@
import hashlib
import random
-from ..compat import compat_str
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
int_or_none,
diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py
index 6c650568a..19d2b923b 100644
--- a/yt_dlp/extractor/japandiet.py
+++ b/yt_dlp/extractor/japandiet.py
@@ -1,5 +1,6 @@
import re
+from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
@@ -9,9 +10,8 @@
smuggle_url,
traverse_obj,
try_call,
- unsmuggle_url
+ unsmuggle_url,
)
-from .common import InfoExtractor
def _parse_japanese_date(text):
diff --git a/yt_dlp/extractor/jeuxvideo.py b/yt_dlp/extractor/jeuxvideo.py
index 56ea15cf9..793820600 100644
--- a/yt_dlp/extractor/jeuxvideo.py
+++ b/yt_dlp/extractor/jeuxvideo.py
@@ -2,6 +2,8 @@
class JeuxVideoIE(InfoExtractor):
+ _WORKING = False
+ _ENABLED = None # XXX: pass through to GenericIE
_VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
_TESTS = [{
diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py
new file mode 100644
index 000000000..e7186d75c
--- /dev/null
+++ b/yt_dlp/extractor/jiocinema.py
@@ -0,0 +1,403 @@
+import base64
+import itertools
+import json
+import random
+import re
+import string
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ jwt_decode_hs256,
+ parse_age_limit,
+ try_call,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
+
+
+class JioCinemaBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'jiocinema'
+ _GEO_BYPASS = False
+ _ACCESS_TOKEN = None
+ _REFRESH_TOKEN = None
+ _GUEST_TOKEN = None
+ _USER_ID = None
+ _DEVICE_ID = None
+ _API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'}
+ _APP_NAME = {'appName': 'RJIL_JioCinema'}
+ _APP_VERSION = {'appVersion': '5.0.0'}
+ _API_SIGNATURES = 'o668nxgzwff'
+ _METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi'
+ _ACCESS_HINT = 'the `accessToken` from your browser local storage'
+ _LOGIN_HINT = (
+ 'Log in with "-u phone -p " to authenticate with OTP, '
+ f'or use "-u token -p " to log in with {_ACCESS_HINT}. '
+ 'If you have previously logged in with yt-dlp and your session '
+ 'has been cached, you can use "-u device -p "')
+
+ def _cache_token(self, token_type):
+ assert token_type in ('access', 'refresh', 'all')
+ if token_type in ('access', 'all'):
+ self.cache.store(
+ JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN)
+ if token_type in ('refresh', 'all'):
+ self.cache.store(
+ JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN)
+
+ def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}):
+ return self._download_json(
+ url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Accept': 'application/json',
+ **self._API_HEADERS,
+ **headers,
+ }, expected_status=(400, 403, 474))
+
+ def _call_auth_api(self, service, endpoint, note, headers={}, data={}):
+ return self._call_api(
+ f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}',
+ None, note=note, headers=headers, data=data)
+
+ def _refresh_token(self):
+ if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID:
+ raise ExtractorError('User token has expired', expected=True)
+ response = self._call_auth_api(
+ 'token', 'refreshtoken', 'Refreshing token',
+ headers={'accesstoken': self._ACCESS_TOKEN}, data={
+ **self._APP_NAME,
+ 'deviceId': self._DEVICE_ID,
+ 'refreshToken': self._REFRESH_TOKEN,
+ **self._APP_VERSION,
+ })
+ refresh_token = response.get('refreshTokenId')
+ if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN:
+ JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
+ self._cache_token('refresh')
+ JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
+ self._cache_token('access')
+
+ def _fetch_guest_token(self):
+ JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10))
+ guest_token = self._call_auth_api(
+ 'token', 'guest', 'Downloading guest token', data={
+ **self._APP_NAME,
+ 'deviceType': 'phone',
+ 'os': 'ios',
+ 'deviceId': self._DEVICE_ID,
+ 'freshLaunch': False,
+ 'adId': self._DEVICE_ID,
+ **self._APP_VERSION,
+ })
+ self._GUEST_TOKEN = guest_token['authToken']
+ self._USER_ID = guest_token['userId']
+
+ def _call_login_api(self, endpoint, guest_token, data, note):
+ return self._call_auth_api(
+ 'user', f'loginotp/{endpoint}', note, headers={
+ **self.geo_verification_headers(),
+ 'accesstoken': self._GUEST_TOKEN,
+ **self._APP_NAME,
+ **traverse_obj(guest_token, 'data', {
+ 'deviceType': ('deviceType', {str}),
+ 'os': ('os', {str}),
+ })}, data=data)
+
+ def _is_token_expired(self, token):
+ return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180)
+
+ def _perform_login(self, username, password):
+ if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN):
+ return
+
+ UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
+
+ if username.lower() == 'token':
+ if try_call(lambda: jwt_decode_hs256(password)):
+ JioCinemaBaseIE._ACCESS_TOKEN = password
+ refresh_hint = 'the `refreshToken` UUID from your browser local storage'
+ refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0]
+ if not refresh_token:
+ self.to_screen(
+ 'To extend the life of your login session, in addition to your access token, '
+ 'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" '
+ f'where REFRESH_TOKEN is {refresh_hint}')
+ elif re.fullmatch(UUID_RE, refresh_token):
+ JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
+ else:
+ self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}')
+ else:
+ raise ExtractorError(
+ f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True)
+
+ elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password):
+ JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh')
+ JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access')
+ if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN:
+ raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True)
+
+ elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password):
+ self._fetch_guest_token()
+ guest_token = jwt_decode_hs256(self._GUEST_TOKEN)
+ initial_data = {
+ 'number': base64.b64encode(password.encode()).decode(),
+ **self._APP_VERSION,
+ }
+ response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP')
+ if not traverse_obj(response, ('OTPInfo', {dict})):
+ raise ExtractorError('There was a problem with the phone number login attempt')
+
+ is_iphone = guest_token.get('os') == 'ios'
+ response = self._call_login_api('verify', guest_token, {
+ 'deviceInfo': {
+ 'consumptionDeviceName': 'iPhone' if is_iphone else 'Android',
+ 'info': {
+ 'platform': {'name': 'iPhone OS' if is_iphone else 'Android'},
+ 'androidId': self._DEVICE_ID,
+ 'type': 'iOS' if is_iphone else 'Android'
+ }
+ },
+ **initial_data,
+ 'otp': self._get_tfa_info('the one-time password sent to your phone')
+ }, 'Submitting OTP')
+ if traverse_obj(response, 'code') == 1043:
+ raise ExtractorError('Wrong OTP', expected=True)
+ JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken']
+ JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
+
+ else:
+ raise ExtractorError(self._LOGIN_HINT, expected=True)
+
+ user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data']
+ JioCinemaBaseIE._USER_ID = user_token['userId']
+ JioCinemaBaseIE._DEVICE_ID = user_token['deviceId']
+ if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device':
+ self._cache_token('all')
+ if self.get_param('cachedir') is not False:
+ self.to_screen(
+ f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"')
+ elif not JioCinemaBaseIE._REFRESH_TOKEN:
+ JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(
+ JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh')
+ if JioCinemaBaseIE._REFRESH_TOKEN:
+ self._cache_token('access')
+ self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"')
+ if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN):
+ self._refresh_token()
+
+
+class JioCinemaIE(JioCinemaBaseIE):
+ IE_NAME = 'jiocinema'
+ _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P\d{3,})'
+ _TESTS = [{
+ 'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931',
+ 'info_dict': {
+ 'id': '3759931',
+ 'ext': 'mp4',
+ 'title': 'Pradeep to stop the wedding?',
+ 'description': 'md5:75f72d1d1a66976633345a3de6d672b1',
+ 'episode': 'Pradeep to stop the wedding?',
+ 'episode_number': 89,
+ 'season': 'Agnisakshi…Ek Samjhauta-S1',
+ 'season_number': 1,
+ 'series': 'Agnisakshi Ek Samjhauta',
+ 'duration': 1238.0,
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'age_limit': 13,
+ 'season_id': '3698031',
+ 'upload_date': '20230606',
+ 'timestamp': 1686009600,
+ 'release_date': '20230607',
+ 'genres': ['Drama'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch',
+ 'info_dict': {
+ 'id': '3754021',
+ 'ext': 'mp4',
+ 'title': 'Bhediya',
+ 'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0',
+ 'episode': 'Bhediya',
+ 'duration': 8500.0,
+ 'thumbnail': r're:https?://.+\.jpg',
+ 'age_limit': 13,
+ 'upload_date': '20230525',
+ 'timestamp': 1685026200,
+ 'release_date': '20230524',
+ 'genres': ['Comedy'],
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _extract_formats_and_subtitles(self, playback, video_id):
+ m3u8_url = traverse_obj(playback, (
+ 'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any))
+ if not m3u8_url: # DRM-only content only serves dash urls
+ self.report_drm(video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls')
+ self._remove_duplicate_formats(formats)
+
+ return {
+ # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
+ 'formats': traverse_obj(formats, (
+ lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN):
+ self._fetch_guest_token()
+ elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN):
+ self._refresh_token()
+
+ playback = self._call_api(
+ f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id,
+ 'Downloading playback JSON', headers={
+ **self.geo_verification_headers(),
+ 'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN,
+ **self._APP_NAME,
+ 'deviceid': self._DEVICE_ID,
+ 'uniqueid': self._USER_ID,
+ 'x-apisignatures': self._API_SIGNATURES,
+ 'x-platform': 'androidweb',
+ 'x-platform-token': 'web',
+ }, data={
+ '4k': False,
+ 'ageGroup': '18+',
+ 'appVersion': '3.4.0',
+ 'bitrateProfile': 'xhdpi',
+ 'capability': {
+ 'drmCapability': {
+ 'aesSupport': 'yes',
+ 'fairPlayDrmSupport': 'none',
+ 'playreadyDrmSupport': 'none',
+ 'widevineDRMSupport': 'none'
+ },
+ 'frameRateCapability': [{
+ 'frameRateSupport': '30fps',
+ 'videoQuality': '1440p'
+ }]
+ },
+ 'continueWatchingRequired': False,
+ 'dolby': False,
+ 'downloadRequest': False,
+ 'hevc': False,
+ 'kidsSafe': False,
+ 'manufacturer': 'Windows',
+ 'model': 'Windows',
+ 'multiAudioRequired': True,
+ 'osVersion': '10',
+ 'parentalPinValid': True,
+ 'x-apisignatures': self._API_SIGNATURES
+ })
+
+ status_code = traverse_obj(playback, ('code', {int}))
+ if status_code == 474:
+ self.raise_geo_restricted(countries=['IN'])
+ elif status_code == 1008:
+ error_msg = 'This content is only available for premium users'
+ if self._ACCESS_TOKEN:
+ raise ExtractorError(error_msg, expected=True)
+ self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None)
+ elif status_code == 400:
+ raise ExtractorError('The requested content is not available', expected=True)
+ elif status_code is not None and status_code != 200:
+ raise ExtractorError(
+ f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}')
+
+ metadata = self._download_json(
+ f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details',
+ video_id, fatal=False, query={
+ 'ids': f'include:{video_id}',
+ 'responseType': 'common',
+ 'devicePlatformType': 'desktop',
+ })
+
+ return {
+ 'id': video_id,
+ 'http_headers': self._API_HEADERS,
+ **self._extract_formats_and_subtitles(playback, video_id),
+ **traverse_obj(playback, ('data', {
+ # fallback metadata
+ 'title': ('name', {str}),
+ 'description': ('fullSynopsis', {str}),
+ 'series': ('show', 'name', {str}, {lambda x: x or None}),
+ 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
+ 'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}),
+ 'episode': ('fullTitle', {str}),
+ 'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}),
+ 'age_limit': ('ageNemonic', {parse_age_limit}),
+ 'duration': ('totalDuration', {float_or_none}),
+ 'thumbnail': ('images', {url_or_none}),
+ })),
+ **traverse_obj(metadata, ('result', 0, {
+ 'title': ('fullTitle', {str}),
+ 'description': ('fullSynopsis', {str}),
+ 'series': ('showName', {str}, {lambda x: x or None}),
+ 'season': ('seasonName', {str}, {lambda x: x or None}),
+ 'season_number': ('season', {int_or_none}),
+ 'season_id': ('seasonId', {str}, {lambda x: x or None}),
+ 'episode': ('fullTitle', {str}),
+ 'episode_number': ('episode', {int_or_none}),
+ 'timestamp': ('uploadTime', {int_or_none}),
+ 'release_date': ('telecastDate', {str}),
+ 'age_limit': ('ageNemonic', {parse_age_limit}),
+ 'duration': ('duration', {float_or_none}),
+ 'genres': ('genres', ..., {str}),
+ 'thumbnail': ('seo', 'ogImage', {url_or_none}),
+ })),
+ }
+
+
+class JioCinemaSeriesIE(JioCinemaBaseIE):
+ IE_NAME = 'jiocinema:series'
+ _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P[\w-]+)/(?P\d{3,})'
+ _TESTS = [{
+ 'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917',
+ 'info_dict': {
+ 'id': '3499917',
+ 'title': 'naagin',
+ },
+ 'playlist_mincount': 120,
+ }]
+
+ def _entries(self, series_id):
+ seasons = self._download_json(
+ f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
+ 'Downloading series metadata JSON', query={
+ 'sort': 'season:asc',
+ 'id': series_id,
+ 'responseType': 'common',
+ })
+
+ for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
+ season_id = season['id']
+ label = season.get('season') or season_num
+ for page_num in itertools.count(1):
+ episodes = traverse_obj(self._download_json(
+ f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
+ season_id, f'Downloading season {label} page {page_num} JSON', query={
+ 'sort': 'episode:asc',
+ 'id': season_id,
+ 'responseType': 'common',
+ 'page': page_num,
+ }), ('result', lambda _, v: v['id'] and url_or_none(v['slug'])))
+ if not episodes:
+ break
+ for episode in episodes:
+ yield self.url_result(
+ episode['slug'], JioCinemaIE, **traverse_obj(episode, {
+ 'video_id': 'id',
+ 'video_title': ('fullTitle', {str}),
+ 'season_number': ('season', {int_or_none}),
+ 'episode_number': ('episode', {int_or_none}),
+ }))
+
+ def _real_extract(self, url):
+ slug, series_id = self._match_valid_url(url).group('slug', 'id')
+ return self.playlist_result(self._entries(series_id), series_id, slug)
diff --git a/yt_dlp/extractor/jiosaavn.py b/yt_dlp/extractor/jiosaavn.py
index a59209835..35fb3fd6b 100644
--- a/yt_dlp/extractor/jiosaavn.py
+++ b/yt_dlp/extractor/jiosaavn.py
@@ -1,89 +1,143 @@
+import functools
+import math
+import re
+
from .common import InfoExtractor
from ..utils import (
+ InAdvancePagedList,
+ clean_html,
int_or_none,
- js_to_json,
+ make_archive_id,
+ smuggle_url,
+ unsmuggle_url,
+ url_basename,
url_or_none,
urlencode_postdata,
- urljoin,
)
from ..utils.traversal import traverse_obj
class JioSaavnBaseIE(InfoExtractor):
- def _extract_initial_data(self, url, audio_id):
- webpage = self._download_webpage(url, audio_id)
- return self._search_json(
- r'window\.__INITIAL_DATA__\s*=', webpage,
- 'init json', audio_id, transform_source=js_to_json)
+ _API_URL = 'https://www.jiosaavn.com/api.php'
+ _VALID_BITRATES = {'16', '32', '64', '128', '320'}
-
-class JioSaavnSongIE(JioSaavnBaseIE):
- _VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P[^/?#]+)'
- _TESTS = [{
- 'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
- 'md5': '3b84396d15ed9e083c3106f1fa589c04',
- 'info_dict': {
- 'id': 'OQsEfQFVUXk',
- 'ext': 'mp4',
- 'title': 'Leja Re',
- 'album': 'Leja Re',
- 'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
- 'duration': 205,
- 'view_count': int,
- 'release_year': 2018,
- },
- }, {
- 'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
- 'only_matching': True,
- }]
-
- _VALID_BITRATES = ('16', '32', '64', '128', '320')
-
- def _real_extract(self, url):
- audio_id = self._match_id(url)
- extract_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
- if invalid_bitrates := [br for br in extract_bitrates if br not in self._VALID_BITRATES]:
+ @functools.cached_property
+ def requested_bitrates(self):
+ requested_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
+ if invalid_bitrates := set(requested_bitrates) - self._VALID_BITRATES:
raise ValueError(
f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
- + f'Valid bitrates are: {", ".join(self._VALID_BITRATES)}')
+ + f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}')
+ return requested_bitrates
- song_data = self._extract_initial_data(url, audio_id)['song']['song']
- formats = []
- for bitrate in extract_bitrates:
+ def _extract_formats(self, song_data):
+ for bitrate in self.requested_bitrates:
media_data = self._download_json(
- 'https://www.jiosaavn.com/api.php', audio_id, f'Downloading format info for {bitrate}',
+ self._API_URL, song_data['id'],
+ f'Downloading format info for {bitrate}',
fatal=False, data=urlencode_postdata({
'__call': 'song.generateAuthToken',
'_format': 'json',
'bitrate': bitrate,
'url': song_data['encrypted_media_url'],
}))
- if not media_data.get('auth_url'):
+ if not traverse_obj(media_data, ('auth_url', {url_or_none})):
self.report_warning(f'Unable to extract format info for {bitrate}')
continue
- formats.append({
+ ext = media_data.get('type')
+ yield {
'url': media_data['auth_url'],
- 'ext': media_data.get('type'),
+ 'ext': 'm4a' if ext == 'mp4' else ext,
'format_id': bitrate,
'abr': int(bitrate),
'vcodec': 'none',
+ }
+
+ def _extract_song(self, song_data, url=None):
+ info = traverse_obj(song_data, {
+ 'id': ('id', {str}),
+ 'title': ('song', {clean_html}),
+ 'album': ('album', {clean_html}),
+ 'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
+ 'duration': ('duration', {int_or_none}),
+ 'view_count': ('play_count', {int_or_none}),
+ 'release_year': ('year', {int_or_none}),
+ 'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
+ 'webpage_url': ('perma_url', {url_or_none}),
+ })
+ if webpage_url := info.get('webpage_url') or url:
+ info['display_id'] = url_basename(webpage_url)
+ info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
+
+ return info
+
+ def _call_api(self, type_, token, note='API', params={}):
+ return self._download_json(
+ self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
+ query={
+ '__call': 'webapi.get',
+ '_format': 'json',
+ '_marker': '0',
+ 'ctx': 'web6dot0',
+ 'token': token,
+ 'type': type_,
+ **params,
})
- return {
- 'id': audio_id,
- 'formats': formats,
- **traverse_obj(song_data, {
- 'title': ('title', 'text'),
- 'album': ('album', 'text'),
- 'thumbnail': ('image', 0, {url_or_none}),
- 'duration': ('duration', {int_or_none}),
- 'view_count': ('play_count', {int_or_none}),
- 'release_year': ('year', {int_or_none}),
- }),
- }
+ def _yield_songs(self, playlist_data):
+ for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
+ song_info = self._extract_song(song_data)
+ url = smuggle_url(song_info['webpage_url'], {
+ 'id': song_data['id'],
+ 'encrypted_media_url': song_data['encrypted_media_url'],
+ })
+ yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
+
+
+class JioSaavnSongIE(JioSaavnBaseIE):
+ IE_NAME = 'jiosaavn:song'
+ _VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
+ 'md5': '3b84396d15ed9e083c3106f1fa589c04',
+ 'info_dict': {
+ 'id': 'IcoLuefJ',
+ 'display_id': 'OQsEfQFVUXk',
+ 'ext': 'm4a',
+ 'title': 'Leja Re',
+ 'album': 'Leja Re',
+ 'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
+ 'duration': 205,
+ 'view_count': int,
+ 'release_year': 2018,
+ 'artists': ['Sandesh Shandilya', 'Dhvani Bhanushali', 'Tanishk Bagchi'],
+ '_old_archive_ids': ['jiosaavnsong OQsEfQFVUXk'],
+ },
+ }, {
+ 'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url)
+ song_data = traverse_obj(smuggled_data, ({
+ 'id': ('id', {str}),
+ 'encrypted_media_url': ('encrypted_media_url', {str}),
+ }))
+
+ if 'id' in song_data and 'encrypted_media_url' in song_data:
+ result = {'id': song_data['id']}
+ else:
+ # only extract metadata if this is not a url_transparent result
+ song_data = self._call_api('song', self._match_id(url))['songs'][0]
+ result = self._extract_song(song_data, url)
+
+ result['formats'] = list(self._extract_formats(song_data))
+ return result
class JioSaavnAlbumIE(JioSaavnBaseIE):
+ IE_NAME = 'jiosaavn:album'
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
@@ -95,11 +149,46 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
}]
def _real_extract(self, url):
- album_id = self._match_id(url)
- album_view = self._extract_initial_data(url, album_id)['albumView']
+ display_id = self._match_id(url)
+ album_data = self._call_api('album', display_id)
- return self.playlist_from_matches(
- traverse_obj(album_view, (
- 'modules', lambda _, x: x['key'] == 'list', 'data', ..., 'title', 'action', {str})),
- album_id, traverse_obj(album_view, ('album', 'title', 'text', {str})), ie=JioSaavnSongIE,
- getter=lambda x: urljoin('https://www.jiosaavn.com/', x))
+ return self.playlist_result(
+ self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
+
+
+class JioSaavnPlaylistIE(JioSaavnBaseIE):
+ IE_NAME = 'jiosaavn:playlist'
+ _VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
+ 'info_dict': {
+ 'id': 'LlJ8ZWT1ibN5084vKHRj2Q__',
+ 'title': 'Mood English',
+ },
+ 'playlist_mincount': 301,
+ }, {
+ 'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-hindi/DVR,pFUOwyXqIp77B1JF,A__',
+ 'info_dict': {
+ 'id': 'DVR,pFUOwyXqIp77B1JF,A__',
+ 'title': 'Mood Hindi',
+ },
+ 'playlist_mincount': 801,
+ }]
+ _PAGE_SIZE = 50
+
+ def _fetch_page(self, token, page):
+ return self._call_api(
+ 'playlist', token, f'playlist page {page}', {'p': page, 'n': self._PAGE_SIZE})
+
+ def _entries(self, token, first_page_data, page):
+ page_data = first_page_data if not page else self._fetch_page(token, page + 1)
+ yield from self._yield_songs(page_data)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ playlist_data = self._fetch_page(display_id, 1)
+ total_pages = math.ceil(int(playlist_data['list_count']) / self._PAGE_SIZE)
+
+ return self.playlist_result(InAdvancePagedList(
+ functools.partial(self._entries, display_id, playlist_data),
+ total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))
diff --git a/yt_dlp/extractor/joqrag.py b/yt_dlp/extractor/joqrag.py
index 3bb28af94..7a91d4a23 100644
--- a/yt_dlp/extractor/joqrag.py
+++ b/yt_dlp/extractor/joqrag.py
@@ -1,4 +1,4 @@
-import datetime
+import datetime as dt
import urllib.parse
from .common import InfoExtractor
@@ -50,8 +50,8 @@ def _extract_metadata(self, variable, html):
def _extract_start_timestamp(self, video_id, is_live):
def extract_start_time_from(date_str):
- dt = datetime_from_str(date_str) + datetime.timedelta(hours=9)
- date = dt.strftime('%Y%m%d')
+ dt_ = datetime_from_str(date_str) + dt.timedelta(hours=9)
+ date = dt_.strftime('%Y%m%d')
start_time = self._search_regex(
r']+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
self._download_webpage(
@@ -60,7 +60,7 @@ def extract_start_time_from(date_str):
errnote=f'Failed to download program list of {date}') or '',
'start time', default=None)
if start_time:
- return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00')
+ return unified_timestamp(f'{dt_.strftime("%Y/%m/%d")} {start_time} +09:00')
return None
start_timestamp = extract_start_time_from('today')
@@ -80,14 +80,14 @@ def _real_extract(self, url):
note='Downloading metadata', errnote='Failed to download metadata')
title = self._extract_metadata('Program_name', metadata)
- if title == '放送休止':
+ if not title or title == '放送休止':
formats = []
live_status = 'is_upcoming'
release_timestamp = self._extract_start_timestamp(video_id, False)
msg = 'This stream is not currently live'
if release_timestamp:
msg += (' and will start at '
- + datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
+ + dt.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
self.raise_no_formats(msg, expected=True)
else:
m3u8_path = self._search_regex(
diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py
index 245fe73d4..8069fea4c 100644
--- a/yt_dlp/extractor/jove.py
+++ b/yt_dlp/extractor/jove.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- unified_strdate
-)
+from ..utils import ExtractorError, unified_strdate
class JoveIE(InfoExtractor):
diff --git a/yt_dlp/extractor/jstream.py b/yt_dlp/extractor/jstream.py
index 3e2e62712..00ac7ccca 100644
--- a/yt_dlp/extractor/jstream.py
+++ b/yt_dlp/extractor/jstream.py
@@ -1,6 +1,6 @@
import base64
-import re
import json
+import re
from .common import InfoExtractor
from ..utils import (
diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py
index 43055e89d..563aa2d72 100644
--- a/yt_dlp/extractor/kakao.py
+++ b/yt_dlp/extractor/kakao.py
@@ -3,8 +3,8 @@
from ..utils import (
ExtractorError,
int_or_none,
- strip_or_none,
str_or_none,
+ strip_or_none,
traverse_obj,
unified_timestamp,
)
diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py
index 95e2deea5..4752d5a55 100644
--- a/yt_dlp/extractor/kaltura.py
+++ b/yt_dlp/extractor/kaltura.py
@@ -4,18 +4,18 @@
from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
compat_parse_qs,
+ compat_urlparse,
)
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
format_field,
int_or_none,
- unsmuggle_url,
+ remove_start,
smuggle_url,
traverse_obj,
- remove_start
+ unsmuggle_url,
)
diff --git a/yt_dlp/extractor/kanal2.py b/yt_dlp/extractor/kanal2.py
deleted file mode 100644
index 3c0efe598..000000000
--- a/yt_dlp/extractor/kanal2.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- join_nonempty,
- traverse_obj,
- unified_timestamp,
- update_url_query,
-)
-
-
-class Kanal2IE(InfoExtractor):
- _VALID_URL = r'https?://kanal2\.postimees\.ee/[^?#]+\?([^#]+&)?id=(?P\d+)'
- _TESTS = [{
- 'note': 'Test standard url (#5575)',
- 'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792',
- 'md5': '7ea7b16266ec1798743777df241883dd',
- 'info_dict': {
- 'id': '40792',
- 'ext': 'mp4',
- 'title': 'Aedniku aabits / Osa 53 (05.08.2016 20:00)',
- 'thumbnail': r're:https?://.*\.jpg$',
- 'description': 'md5:53cabf3c5d73150d594747f727431248',
- 'upload_date': '20160805',
- 'timestamp': 1470420000,
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- playlist = self._download_json(
- f'https://kanal2.postimees.ee/player/playlist/{video_id}',
- video_id, query={'type': 'episodes'},
- headers={'X-Requested-With': 'XMLHttpRequest'})
-
- return {
- 'id': video_id,
- 'title': join_nonempty(*traverse_obj(playlist, ('info', ('title', 'subtitle'))), delim=' / '),
- 'description': traverse_obj(playlist, ('info', 'description')),
- 'thumbnail': traverse_obj(playlist, ('data', 'image')),
- 'formats': self.get_formats(playlist, video_id),
- 'timestamp': unified_timestamp(self._search_regex(
- r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$',
- traverse_obj(playlist, ('info', 'subtitle')), 'timestamp', default='') + ' +0200'),
- }
-
- def get_formats(self, playlist, video_id):
- path = traverse_obj(playlist, ('data', 'path'))
- if not path:
- raise ExtractorError('Path value not found in playlist JSON response')
- session = self._download_json(
- 'https://sts.postimees.ee/session/register',
- video_id, note='Creating session', errnote='Error creating session',
- headers={
- 'X-Original-URI': path,
- 'Accept': 'application/json',
- })
- if session.get('reason') != 'OK' or not session.get('session'):
- reason = session.get('reason', 'unknown error')
- raise ExtractorError(f'Unable to obtain session: {reason}')
-
- formats = []
- for stream in traverse_obj(playlist, ('data', 'streams', ..., 'file')):
- formats.extend(self._extract_m3u8_formats(
- update_url_query(stream, {'s': session['session']}), video_id, 'mp4'))
-
- return formats
diff --git a/yt_dlp/extractor/kankanews.py b/yt_dlp/extractor/kankanews.py
index 46e239bd6..3d74c745c 100644
--- a/yt_dlp/extractor/kankanews.py
+++ b/yt_dlp/extractor/kankanews.py
@@ -1,13 +1,14 @@
-import time
+import hashlib
import random
import string
-import hashlib
+import time
import urllib.parse
from .common import InfoExtractor
class KankaNewsIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?kankanews\.com/a/\d+\-\d+\-\d+/(?P\d+)\.shtml'
_TESTS = [{
'url': 'https://www.kankanews.com/a/2022-11-08/00310276054.shtml?appid=1088227',
diff --git a/yt_dlp/extractor/karrierevideos.py b/yt_dlp/extractor/karrierevideos.py
deleted file mode 100644
index 28d4841aa..000000000
--- a/yt_dlp/extractor/karrierevideos.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- fix_xml_ampersands,
- float_or_none,
- xpath_with_ns,
- xpath_text,
-)
-
-
-class KarriereVideosIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P[^/]+)'
- _TESTS = [{
- 'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
- 'info_dict': {
- 'id': '32c91',
- 'ext': 'flv',
- 'title': 'AltenpflegerIn',
- 'description': 'md5:dbadd1259fde2159a9b28667cb664ae2',
- 'thumbnail': r're:^http://.*\.png',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }, {
- # broken ampersands
- 'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun',
- 'info_dict': {
- 'id': '5sniu',
- 'ext': 'flv',
- 'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"',
- 'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33',
- 'thumbnail': r're:^http://.*\.png',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- title = (self._html_search_meta('title', webpage, default=None)
- or self._search_regex(r'([^<]+)
', webpage, 'video title'))
-
- video_id = self._search_regex(
- r'/config/video/(.+?)\.xml', webpage, 'video id')
- # Server returns malformed headers
- # Force Accept-Encoding: * to prevent gzipped results
- playlist = self._download_xml(
- 'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
- video_id, transform_source=fix_xml_ampersands,
- headers={'Accept-Encoding': '*'})
-
- NS_MAP = {
- 'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
- }
-
- def ns(path):
- return xpath_with_ns(path, NS_MAP)
-
- item = playlist.find('./tracklist/item')
- video_file = xpath_text(
- item, ns('./jwplayer:file'), 'video url', fatal=True)
- streamer = xpath_text(
- item, ns('./jwplayer:streamer'), 'streamer', fatal=True)
-
- uploader = xpath_text(
- item, ns('./jwplayer:author'), 'uploader')
- duration = float_or_none(
- xpath_text(item, ns('./jwplayer:duration'), 'duration'))
-
- description = self._html_search_regex(
- r'(?s)(.+?)
',
- webpage, 'description')
-
- thumbnail = self._html_search_meta(
- 'thumbnail', webpage, 'thumbnail')
- if thumbnail:
- thumbnail = compat_urlparse.urljoin(url, thumbnail)
-
- return {
- 'id': video_id,
- 'url': streamer.replace('rtmpt', 'rtmp'),
- 'play_path': 'mp4:%s' % video_file,
- 'ext': 'flv',
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'duration': duration,
- }
diff --git a/yt_dlp/extractor/kelbyone.py b/yt_dlp/extractor/kelbyone.py
index 2ca9ad426..bba527e29 100644
--- a/yt_dlp/extractor/kelbyone.py
+++ b/yt_dlp/extractor/kelbyone.py
@@ -3,6 +3,7 @@
class KelbyOneIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://members\.kelbyone\.com/course/(?P[^$&?#/]+)'
_TESTS = [{
diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py
index d12437242..889548f52 100644
--- a/yt_dlp/extractor/kick.py
+++ b/yt_dlp/extractor/kick.py
@@ -13,7 +13,8 @@
class KickBaseIE(InfoExtractor):
def _real_initialize(self):
- self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False)
+ self._request_webpage(
+ HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False, impersonate=True)
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
if not xsrf_token:
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
@@ -25,7 +26,7 @@ def _real_initialize(self):
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
return self._download_json(
f'https://kick.com/api/v1/{path}', display_id, note=note,
- headers=merge_dicts(headers, self._API_HEADERS), **kwargs)
+ headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
class KickIE(KickBaseIE):
@@ -82,26 +83,27 @@ def _real_extract(self, url):
class KickVODIE(KickBaseIE):
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
- 'url': 'https://kick.com/video/54244b5e-050a-4df4-a013-b2433dafbe35',
- 'md5': '73691206a6a49db25c5aa1588e6538fc',
+ 'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3',
+ 'md5': '3870f94153e40e7121a6e46c068b70cb',
'info_dict': {
- 'id': '54244b5e-050a-4df4-a013-b2433dafbe35',
+ 'id': '58bac65b-e641-4476-a7ba-3707a35e60e3',
'ext': 'mp4',
- 'title': 'Making 710-carBoosting. Kinda No Pixel inspired. !guilded - !links',
- 'description': 'md5:a0d3546bf7955d0a8252ffe0fd6f518f',
- 'channel': 'kmack710',
- 'channel_id': '16278',
- 'uploader': 'Kmack710',
- 'uploader_id': '16412',
- 'upload_date': '20221206',
- 'timestamp': 1670318289,
- 'duration': 40104.0,
+ 'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠',
+ 'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d',
+ 'channel': 'jaredfps',
+ 'channel_id': '26608',
+ 'uploader': 'JaredFPS',
+ 'uploader_id': '26799',
+ 'upload_date': '20240402',
+ 'timestamp': 1712097108,
+ 'duration': 33859.0,
'thumbnail': r're:^https?://.*\.jpg',
- 'categories': ['Grand Theft Auto V'],
+ 'categories': ['Call of Duty: Warzone'],
},
'params': {
'skip_download': 'm3u8',
},
+ 'expected_warnings': [r'impersonation'],
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/kommunetv.py b/yt_dlp/extractor/kommunetv.py
index a30905b57..432816cd8 100644
--- a/yt_dlp/extractor/kommunetv.py
+++ b/yt_dlp/extractor/kommunetv.py
@@ -3,7 +3,7 @@
class KommunetvIE(InfoExtractor):
- _VALID_URL = r'https://\w+\.kommunetv\.no/archive/(?P\w+)'
+ _VALID_URL = r'https?://\w+\.kommunetv\.no/archive/(?P\w+)'
_TEST = {
'url': 'https://oslo.kommunetv.no/archive/921',
'md5': '5f102be308ee759be1e12b63d5da4bbc',
diff --git a/yt_dlp/extractor/konserthusetplay.py b/yt_dlp/extractor/konserthusetplay.py
deleted file mode 100644
index 10767f1b6..000000000
--- a/yt_dlp/extractor/konserthusetplay.py
+++ /dev/null
@@ -1,119 +0,0 @@
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- float_or_none,
- int_or_none,
- url_or_none,
-)
-
-
-class KonserthusetPlayIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P[^&]+)'
- _TESTS = [{
- 'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
- 'md5': 'e3fd47bf44e864bd23c08e487abe1967',
- 'info_dict': {
- 'id': 'CKDDnlCY-dhWAAqiMERd-A',
- 'ext': 'mp4',
- 'title': 'Orkesterns instrument: Valthornen',
- 'description': 'md5:f10e1f0030202020396a4d712d2fa827',
- 'thumbnail': 're:^https?://.*$',
- 'duration': 398.76,
- },
- }, {
- 'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- e = self._search_regex(
- r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e')
-
- rest = self._download_json(
- 'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e,
- video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
-
- media = rest['media']
- player_config = media['playerconfig']
- playlist = player_config['playlist']
-
- source = next(f for f in playlist if f.get('bitrates') or f.get('provider'))
-
- FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
-
- formats = []
-
- m3u8_url = source.get('url')
- if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
-
- fallback_url = source.get('fallbackUrl')
- fallback_format_id = None
- if fallback_url:
- fallback_format_id = self._search_regex(
- FORMAT_ID_REGEX, fallback_url, 'format id', default=None)
-
- connection_url = (player_config.get('rtmp', {}).get(
- 'netConnectionUrl') or player_config.get(
- 'plugins', {}).get('bwcheck', {}).get('netConnectionUrl'))
- if connection_url:
- for f in source['bitrates']:
- video_url = f.get('url')
- if not video_url:
- continue
- format_id = self._search_regex(
- FORMAT_ID_REGEX, video_url, 'format id', default=None)
- f_common = {
- 'vbr': int_or_none(f.get('bitrate')),
- 'width': int_or_none(f.get('width')),
- 'height': int_or_none(f.get('height')),
- }
- f = f_common.copy()
- f.update({
- 'url': connection_url,
- 'play_path': video_url,
- 'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp',
- 'ext': 'flv',
- })
- formats.append(f)
- if format_id and format_id == fallback_format_id:
- f = f_common.copy()
- f.update({
- 'url': fallback_url,
- 'format_id': 'http-%s' % format_id if format_id else 'http',
- })
- formats.append(f)
-
- if not formats and fallback_url:
- formats.append({
- 'url': fallback_url,
- })
-
- title = player_config.get('title') or media['title']
- description = player_config.get('mediaInfo', {}).get('description')
- thumbnail = media.get('image')
- duration = float_or_none(media.get('duration'), 1000)
-
- subtitles = {}
- captions = source.get('captionsAvailableLanguages')
- if isinstance(captions, dict):
- for lang, subtitle_url in captions.items():
- subtitle_url = url_or_none(subtitle_url)
- if lang != 'none' and subtitle_url:
- subtitles.setdefault(lang, []).append({'url': subtitle_url})
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- }
diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py
index 9cfec5eb9..c78a7b9ca 100644
--- a/yt_dlp/extractor/koo.py
+++ b/yt_dlp/extractor/koo.py
@@ -6,6 +6,7 @@
class KooIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P[^/$?]+)'
_TESTS = [{ # Test for video in the comments
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
diff --git a/yt_dlp/extractor/krasview.py b/yt_dlp/extractor/krasview.py
index 4323aa429..0febf759b 100644
--- a/yt_dlp/extractor/krasview.py
+++ b/yt_dlp/extractor/krasview.py
@@ -8,6 +8,7 @@
class KrasViewIE(InfoExtractor):
+ _WORKING = False
IE_DESC = 'Красвью'
_VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P\d+)'
diff --git a/yt_dlp/extractor/kusi.py b/yt_dlp/extractor/kusi.py
deleted file mode 100644
index a23ad8945..000000000
--- a/yt_dlp/extractor/kusi.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import random
-import urllib.parse
-
-from .common import InfoExtractor
-from ..utils import (
- float_or_none,
- int_or_none,
- timeconvert,
- update_url_query,
- xpath_text,
-)
-
-
-class KUSIIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?Pstory/.+|video\?clipId=(?P\d+))'
- _TESTS = [{
- 'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right',
- 'md5': '4e76ce8e53660ce9697d06c0ba6fc47d',
- 'info_dict': {
- 'id': '12689020',
- 'ext': 'mp4',
- 'title': "Turko Files: Refused to Help, It Ain't Right!",
- 'duration': 223.586,
- 'upload_date': '20160826',
- 'timestamp': 1472233118,
- 'thumbnail': r're:^https?://.*\.jpg$'
- },
- }, {
- 'url': 'http://kusi.com/video?clipId=12203019',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- clip_id = mobj.group('clipId')
- video_id = clip_id or mobj.group('path')
-
- webpage = self._download_webpage(url, video_id)
-
- if clip_id is None:
- video_id = clip_id = self._html_search_regex(
- r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
-
- affiliate_id = self._search_regex(
- r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
-
- # See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
- xml_url = update_url_query('http://www.kusi.com/build.asp', {
- 'buildtype': 'buildfeaturexmlrequest',
- 'featureType': 'Clip',
- 'featureid': clip_id,
- 'affiliateno': affiliate_id,
- 'clientgroupid': '1',
- 'rnd': int(round(random.random() * 1000000)),
- })
-
- doc = self._download_xml(xml_url, video_id)
-
- video_title = xpath_text(doc, 'HEADLINE', fatal=True)
- duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
- description = xpath_text(doc, 'ABSTRACT')
- thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
- creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
-
- quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
- formats = []
- for quality in quality_options:
- formats.append({
- 'url': urllib.parse.unquote_plus(quality.attrib['url']),
- 'height': int_or_none(quality.attrib.get('height')),
- 'width': int_or_none(quality.attrib.get('width')),
- 'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
- })
-
- return {
- 'id': video_id,
- 'title': video_title,
- 'description': description,
- 'duration': duration,
- 'formats': formats,
- 'thumbnail': thumbnail,
- 'timestamp': creation_time,
- }
diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py
index e8a061a10..b77667160 100644
--- a/yt_dlp/extractor/kuwo.py
+++ b/yt_dlp/extractor/kuwo.py
@@ -3,10 +3,10 @@
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
- get_element_by_id,
- clean_html,
ExtractorError,
InAdvancePagedList,
+ clean_html,
+ get_element_by_id,
remove_start,
)
@@ -54,6 +54,7 @@ def _get_formats(self, song_id, tolerate_ip_deny=False):
class KuwoIE(KuwoBaseIE):
+ _WORKING = False
IE_NAME = 'kuwo:song'
IE_DESC = '酷我音乐'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P\d+)'
@@ -133,6 +134,7 @@ def _real_extract(self, url):
class KuwoAlbumIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'kuwo:album'
IE_DESC = '酷我音乐 - 专辑'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P\d+?)/'
@@ -169,6 +171,7 @@ def _real_extract(self, url):
class KuwoChartIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'kuwo:chart'
IE_DESC = '酷我音乐 - 排行榜'
_VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P[^.]+).htm'
@@ -194,6 +197,7 @@ def _real_extract(self, url):
class KuwoSingerIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'kuwo:singer'
IE_DESC = '酷我音乐 - 歌手'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P[^/]+)'
@@ -251,6 +255,7 @@ def page_func(page_num):
class KuwoCategoryIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'kuwo:category'
IE_DESC = '酷我音乐 - 分类'
_VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P\d+?).htm'
@@ -290,6 +295,7 @@ def _real_extract(self, url):
class KuwoMvIE(KuwoBaseIE):
+ _WORKING = False
IE_NAME = 'kuwo:mv'
IE_DESC = '酷我音乐 - MV'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P\d+?)/'
diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
index cc37c41e8..dcb44d07f 100644
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -231,7 +231,6 @@ class LBRYIE(LBRYBaseIE):
'release_timestamp': int,
'release_date': str,
'tags': list,
- 'duration': None,
'channel': 'RT',
'channel_id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
'channel_url': 'https://odysee.com/@RT:fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py
index e7d2f8a24..708cb548d 100644
--- a/yt_dlp/extractor/lci.py
+++ b/yt_dlp/extractor/lci.py
@@ -1,9 +1,25 @@
from .common import InfoExtractor
+from .wat import WatIE
+from ..utils import ExtractorError, int_or_none
+from ..utils.traversal import traverse_obj
class LCIIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P\d+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/(?:[^/?#]+/)+[\w-]+-(?P\d+)\.html'
_TESTS = [{
+ 'url': 'https://www.tf1info.fr/replay-lci/videos/video-24h-pujadas-du-vendredi-24-mai-6708-2300831.html',
+ 'info_dict': {
+ 'id': '14113788',
+ 'ext': 'mp4',
+ 'title': '24H Pujadas du vendredi 24 mai 2024',
+ 'thumbnail': 'https://photos.tf1.fr/1280/720/24h-pujadas-du-24-mai-2024-55bf2d-0@1x.jpg',
+ 'upload_date': '20240524',
+ 'duration': 6158,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html',
'info_dict': {
'id': '13875948',
@@ -24,5 +40,10 @@ class LCIIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id')
- return self.url_result('wat:' + wat_id, 'Wat', wat_id)
+ next_data = self._search_nextjs_data(webpage, video_id)
+ wat_id = traverse_obj(next_data, (
+ 'props', 'pageProps', 'page', 'tms', 'videos', {dict.keys}, ..., {int_or_none}, any))
+ if wat_id is None:
+ raise ExtractorError('Could not find wat_id')
+
+ return self.url_result(f'wat:{wat_id}', WatIE, str(wat_id))
diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py
index 9846319e0..62874195f 100644
--- a/yt_dlp/extractor/lcp.py
+++ b/yt_dlp/extractor/lcp.py
@@ -1,5 +1,5 @@
-from .common import InfoExtractor
from .arkena import ArkenaIE
+from .common import InfoExtractor
class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE
diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py
index 3a9b30a3c..1a3ada1e5 100644
--- a/yt_dlp/extractor/lecture2go.py
+++ b/yt_dlp/extractor/lecture2go.py
@@ -4,12 +4,13 @@
from ..utils import (
determine_ext,
determine_protocol,
- parse_duration,
int_or_none,
+ parse_duration,
)
class Lecture2GoIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P\d+)'
_TEST = {
'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',
diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py
index 795012541..90f0268d7 100644
--- a/yt_dlp/extractor/lecturio.py
+++ b/yt_dlp/extractor/lecturio.py
@@ -2,9 +2,9 @@
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
float_or_none,
int_or_none,
str_or_none,
@@ -172,7 +172,7 @@ def _real_extract(self, url):
class LecturioCourseIE(LecturioBaseIE):
- _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P[^/?#&]+)\.course|(?:#/)?course/c/(?P\d+))'
+ _VALID_URL = r'https?://app\.lecturio\.com/(?:[^/]+/(?P[^/?#&]+)\.course|(?:#/)?course/c/(?P\d+))'
_TESTS = [{
'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
'info_dict': {
@@ -209,7 +209,7 @@ def _real_extract(self, url):
class LecturioDeCourseIE(LecturioBaseIE):
- _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.kurs'
+ _VALID_URL = r'https?://(?:www\.)?lecturio\.de/[^/]+/(?P[^/?#&]+)\.kurs'
_TEST = {
'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
'only_matching': True,
diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py
index 85033b8f8..a113b3d0d 100644
--- a/yt_dlp/extractor/leeco.py
+++ b/yt_dlp/extractor/leeco.py
@@ -1,4 +1,4 @@
-import datetime
+import datetime as dt
import hashlib
import re
import time
@@ -11,9 +11,9 @@
compat_urllib_parse_urlencode,
)
from ..utils import (
+ ExtractorError,
determine_ext,
encode_data_uri,
- ExtractorError,
int_or_none,
orderedSet,
parse_iso8601,
@@ -185,7 +185,7 @@ def get_flash_urls(media_url, format_id):
publish_time = parse_iso8601(self._html_search_regex(
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
- delimiter=' ', timezone=datetime.timedelta(hours=8))
+ delimiter=' ', timezone=dt.timedelta(hours=8))
description = self._html_search_meta('description', page, fatal=False)
return {
diff --git a/yt_dlp/extractor/lenta.py b/yt_dlp/extractor/lenta.py
index 10aac984e..fe01bda1c 100644
--- a/yt_dlp/extractor/lenta.py
+++ b/yt_dlp/extractor/lenta.py
@@ -2,6 +2,7 @@
class LentaIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P[^/?#&]+)'
_TESTS = [{
'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',
diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py
index b76ca0908..297993939 100644
--- a/yt_dlp/extractor/libraryofcongress.py
+++ b/yt_dlp/extractor/libraryofcongress.py
@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
-
from ..utils import (
determine_ext,
float_or_none,
diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py
index 919cfcb37..ea150a58b 100644
--- a/yt_dlp/extractor/lifenews.py
+++ b/yt_dlp/extractor/lifenews.py
@@ -6,8 +6,8 @@
compat_urlparse,
)
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_iso8601,
remove_end,
diff --git a/yt_dlp/extractor/likee.py b/yt_dlp/extractor/likee.py
index 74ee2bea9..324463136 100644
--- a/yt_dlp/extractor/likee.py
+++ b/yt_dlp/extractor/likee.py
@@ -22,8 +22,6 @@ class LikeeIE(InfoExtractor):
'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4',
'thumbnail': r're:^https?://.+\.jpg',
'uploader': 'Huỳnh Hồng Quân ',
- 'play_count': int,
- 'download_count': int,
'artist': 'Huỳnh Hồng Quân ',
'timestamp': 1651571320,
'upload_date': '20220503',
@@ -44,11 +42,9 @@ class LikeeIE(InfoExtractor):
'comment_count': int,
'like_count': int,
'uploader': 'Vương Phước Nhi',
- 'download_count': int,
'timestamp': 1651506835,
'upload_date': '20220502',
'duration': 60024,
- 'play_count': int,
'artist': 'Vương Phước Nhi',
'uploader_id': '649222262',
'view_count': int,
@@ -65,9 +61,7 @@ class LikeeIE(InfoExtractor):
'duration': 9684,
'uploader_id': 'fernanda_rivasg',
'view_count': int,
- 'play_count': int,
'artist': 'La Cami La✨',
- 'download_count': int,
'like_count': int,
'uploader': 'Fernanda Rivas🎶',
'timestamp': 1614034308,
@@ -83,13 +77,11 @@ class LikeeIE(InfoExtractor):
'thumbnail': r're:^https?://.+\.jpg',
'comment_count': int,
'duration': 18014,
- 'play_count': int,
'view_count': int,
'timestamp': 1611694774,
'like_count': int,
'uploader': 'Fernanda Rivas🎶',
'uploader_id': 'fernanda_rivasg',
- 'download_count': int,
'artist': 'ʟᴇʀɪᴋ_ᴜɴɪᴄᴏʀɴ♡︎',
'upload_date': '20210126',
},
@@ -128,8 +120,6 @@ def _real_extract(self, url):
'description': info.get('share_desc'),
'view_count': int_or_none(info.get('video_count')),
'like_count': int_or_none(info.get('likeCount')),
- 'play_count': int_or_none(info.get('play_count')),
- 'download_count': int_or_none(info.get('download_count')),
'comment_count': int_or_none(info.get('comment_count')),
'uploader': str_or_none(info.get('nick_name')),
'uploader_id': str_or_none(info.get('likeeId')),
diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py
index 4e50f106f..1ff091ddb 100644
--- a/yt_dlp/extractor/limelight.py
+++ b/yt_dlp/extractor/limelight.py
@@ -3,13 +3,13 @@
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
+ ExtractorError,
determine_ext,
float_or_none,
int_or_none,
smuggle_url,
try_get,
unsmuggle_url,
- ExtractorError,
)
diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py
index ad41c0e20..2a7c6f0e0 100644
--- a/yt_dlp/extractor/linkedin.py
+++ b/yt_dlp/extractor/linkedin.py
@@ -1,4 +1,4 @@
-from itertools import zip_longest
+import itertools
import re
from .common import InfoExtractor
@@ -7,8 +7,8 @@
extract_attributes,
float_or_none,
int_or_none,
- srt_subtitles_timecode,
mimetype2ext,
+ srt_subtitles_timecode,
traverse_obj,
try_get,
url_or_none,
@@ -156,7 +156,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
def json2srt(self, transcript_lines, duration=None):
srt_data = ''
- for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])):
+ for line, (line_dict, next_dict) in enumerate(itertools.zip_longest(transcript_lines, transcript_lines[1:])):
start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time),
diff --git a/yt_dlp/extractor/localnews8.py b/yt_dlp/extractor/localnews8.py
deleted file mode 100644
index 6f3f02c70..000000000
--- a/yt_dlp/extractor/localnews8.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from .common import InfoExtractor
-
-
-class LocalNews8IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P[^/]+)/(?P[0-9]+)'
- _TEST = {
- 'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
- 'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
- 'info_dict': {
- 'id': '35183304',
- 'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
- 'ext': 'mp4',
- 'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
- 'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
- 'duration': 153,
- 'timestamp': 1441844822,
- 'upload_date': '20150910',
- 'uploader_id': 'api',
- }
- }
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id)
-
- partner_id = self._search_regex(
- r'partnerId\s*[:=]\s*(["\'])(?P\d+)\1',
- webpage, 'partner id', group='id')
- kaltura_id = self._search_regex(
- r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P[0-9a-z_]+)\1',
- webpage, 'videl id', group='id')
-
- return {
- '_type': 'url_transparent',
- 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
- 'ie_key': 'Kaltura',
- 'id': video_id,
- 'display_id': display_id,
- }
diff --git a/yt_dlp/extractor/loom.py b/yt_dlp/extractor/loom.py
new file mode 100644
index 000000000..1191aa17e
--- /dev/null
+++ b/yt_dlp/extractor/loom.py
@@ -0,0 +1,461 @@
+import json
+import textwrap
+import urllib.parse
+import uuid
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ filter_dict,
+ get_first,
+ int_or_none,
+ parse_iso8601,
+ update_url,
+ url_or_none,
+ variadic,
+)
+from ..utils.traversal import traverse_obj
+
+
+class LoomIE(InfoExtractor):
+ IE_NAME = 'loom'
+ _VALID_URL = r'https?://(?:www\.)?loom\.com/(?:share|embed)/(?P[\da-f]{32})'
+ _EMBED_REGEX = [rf'