mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-02 17:22:31 +01:00
[rcs] Improved extraction (See desc) (#170)
* improved `video_data` extraction * added an extra fallback value for `description` * improved regex in `RCSVariousIE` Authored by: nixxo
This commit is contained in:
parent
e4beae703d
commit
18c1f04362
@ -15,6 +15,9 @@
|
|||||||
|
|
||||||
|
|
||||||
class RCSBaseIE(InfoExtractor):
|
class RCSBaseIE(InfoExtractor):
|
||||||
|
# based on VideoPlayerLoader.prototype.getVideoSrc
|
||||||
|
# and VideoPlayerLoader.prototype.transformSrc from
|
||||||
|
# https://js2.corriereobjects.it/includes2013/LIBS/js/corriere_video.sjs
|
||||||
_ALL_REPLACE = {
|
_ALL_REPLACE = {
|
||||||
'media2vam.corriere.it.edgesuite.net':
|
'media2vam.corriere.it.edgesuite.net':
|
||||||
'media2vam-corriere-it.akamaized.net',
|
'media2vam-corriere-it.akamaized.net',
|
||||||
@ -191,10 +194,10 @@ def _create_formats(self, urls, video_id):
|
|||||||
urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native',
|
urls.get('m3u8'), video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
|
|
||||||
if not formats:
|
if urls.get('mp4'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'http-mp4',
|
'format_id': 'http-mp4',
|
||||||
'url': urls.get('mp4')
|
'url': urls['mp4']
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
@ -216,10 +219,12 @@ def _real_extract(self, url):
|
|||||||
video_data = None
|
video_data = None
|
||||||
# look for json video data url
|
# look for json video data url
|
||||||
json = self._search_regex(
|
json = self._search_regex(
|
||||||
r'''(?x)var url\s*=\s*["']((?:https?:)?
|
r'''(?x)url\s*=\s*(["'])
|
||||||
//video\.rcs\.it
|
(?P<url>
|
||||||
/fragment-includes/video-includes/.+?\.json)["'];''',
|
(?:https?:)?//video\.rcs\.it
|
||||||
page, video_id, default=None)
|
/fragment-includes/video-includes/.+?\.json
|
||||||
|
)\1;''',
|
||||||
|
page, video_id, group='url', default=None)
|
||||||
if json:
|
if json:
|
||||||
if json.startswith('//'):
|
if json.startswith('//'):
|
||||||
json = 'https:%s' % json
|
json = 'https:%s' % json
|
||||||
@ -227,13 +232,16 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
# if json url not found, look for json video data directly in the page
|
# if json url not found, look for json video data directly in the page
|
||||||
else:
|
else:
|
||||||
|
# RCS normal pages and most of the embeds
|
||||||
json = self._search_regex(
|
json = self._search_regex(
|
||||||
r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)',
|
r'[\s;]video\s*=\s*({[\s\S]+?})(?:;|,playlist=)',
|
||||||
page, video_id, default=None)
|
page, video_id, default=None)
|
||||||
if json:
|
if not json and 'video-embed' in url:
|
||||||
video_data = self._parse_json(
|
page = self._download_webpage(url.replace('video-embed', 'video-json'), video_id)
|
||||||
json, video_id, transform_source=js_to_json)
|
json = self._search_regex(
|
||||||
else:
|
r'##start-video##({[\s\S]+?})##end-video##',
|
||||||
|
page, video_id, default=None)
|
||||||
|
if not json:
|
||||||
# if no video data found try search for iframes
|
# if no video data found try search for iframes
|
||||||
emb = RCSEmbedsIE._extract_url(page)
|
emb = RCSEmbedsIE._extract_url(page)
|
||||||
if emb:
|
if emb:
|
||||||
@ -242,6 +250,9 @@ def _real_extract(self, url):
|
|||||||
'url': emb,
|
'url': emb,
|
||||||
'ie_key': RCSEmbedsIE.ie_key()
|
'ie_key': RCSEmbedsIE.ie_key()
|
||||||
}
|
}
|
||||||
|
if json:
|
||||||
|
video_data = self._parse_json(
|
||||||
|
json, video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
raise ExtractorError('Video data not found in the page')
|
raise ExtractorError('Video data not found in the page')
|
||||||
@ -250,7 +261,8 @@ def _real_extract(self, url):
|
|||||||
self._get_video_src(video_data), video_id)
|
self._get_video_src(video_data), video_id)
|
||||||
|
|
||||||
description = (video_data.get('description')
|
description = (video_data.get('description')
|
||||||
or clean_html(video_data.get('htmlDescription')))
|
or clean_html(video_data.get('htmlDescription'))
|
||||||
|
or self._html_search_meta('description', page))
|
||||||
uploader = video_data.get('provider') or mobj.group('cdn')
|
uploader = video_data.get('provider') or mobj.group('cdn')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@ -283,6 +295,7 @@ class RCSEmbedsIE(RCSBaseIE):
|
|||||||
'uploader': 'rcs.it',
|
'uploader': 'rcs.it',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
# redownload the page changing 'video-embed' in 'video-json'
|
||||||
'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
|
'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789',
|
||||||
'md5': 'a043e3fecbe4d9ed7fc5d888652a5440',
|
'md5': 'a043e3fecbe4d9ed7fc5d888652a5440',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -359,6 +372,7 @@ class RCSIE(RCSBaseIE):
|
|||||||
'uploader': 'Corriere Tv',
|
'uploader': 'Corriere Tv',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
# video data inside iframe
|
||||||
'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/',
|
'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/',
|
||||||
'md5': 'da378e4918d2afbf7d61c35abb948d4c',
|
'md5': 'da378e4918d2afbf7d61c35abb948d4c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -389,15 +403,15 @@ class RCSVariousIE(RCSBaseIE):
|
|||||||
(?P<cdn>
|
(?P<cdn>
|
||||||
leitv\.it|
|
leitv\.it|
|
||||||
youreporter\.it
|
youreporter\.it
|
||||||
)/(?:video/)?(?P<id>[^/]+?)(?:$|\?|/)'''
|
)/(?:[^/]+/)?(?P<id>[^/]+?)(?:$|\?|/)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.leitv.it/video/marmellata-di-ciliegie-fatta-in-casa/',
|
'url': 'https://www.leitv.it/benessere/mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa/',
|
||||||
'md5': '618aaabac32152199c1af86784d4d554',
|
'md5': '92b4e63667b8f95acb0a04da25ae28a1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'marmellata-di-ciliegie-fatta-in-casa',
|
'id': 'mal-di-testa-come-combatterlo-ed-evitarne-la-comparsa',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Marmellata di ciliegie fatta in casa',
|
'title': 'Cervicalgia e mal di testa, il video con i suggerimenti dell\'esperto',
|
||||||
'description': 'md5:89133864d6aad456dbcf6e7a29f86263',
|
'description': 'md5:ae21418f34cee0b8d02a487f55bcabb5',
|
||||||
'uploader': 'leitv.it',
|
'uploader': 'leitv.it',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
Loading…
Reference in New Issue
Block a user