1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-02 17:22:31 +01:00

[7plus] Add new extractor(closes #15043)

This commit is contained in:
Remita Amine 2017-12-23 13:21:33 +01:00
parent 9e3682d555
commit 4b7dd1705a
3 changed files with 128 additions and 56 deletions

View File

@ -464,7 +464,7 @@ class BrightcoveNewIE(AdobePassIE):
'timestamp': 1441391203, 'timestamp': 1441391203,
'upload_date': '20150904', 'upload_date': '20150904',
'uploader_id': '929656772001', 'uploader_id': '929656772001',
'formats': 'mincount:22', 'formats': 'mincount:20',
}, },
}, { }, {
# with rtmp streams # with rtmp streams
@ -478,7 +478,7 @@ class BrightcoveNewIE(AdobePassIE):
'timestamp': 1433556729, 'timestamp': 1433556729,
'upload_date': '20150606', 'upload_date': '20150606',
'uploader_id': '4036320279001', 'uploader_id': '4036320279001',
'formats': 'mincount:41', 'formats': 'mincount:39',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
@ -564,59 +564,7 @@ def _extract_urls(ie, webpage):
return entries return entries
def _real_extract(self, url): def _parse_brightcove_metadata(self, json_data, video_id):
url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
policy_key = None
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
try:
json_data = self._download_json(api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message)
raise ExtractorError(message, expected=True)
raise
errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
custom_fields = json_data['custom_fields']
tve_token = self._extract_mvpd_auth(
smuggled_data['source_url'], video_id,
custom_fields['bcadobepassrequestorid'],
custom_fields['bcadobepassresourceid'])
json_data = self._download_json(
api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
}, query={
'tveToken': tve_token,
})
title = json_data['name'].strip() title = json_data['name'].strip()
formats = [] formats = []
@ -682,6 +630,7 @@ def build_format_id(kind):
}) })
formats.append(f) formats.append(f)
errors = json_data.get('errors')
if not formats and errors: if not formats and errors:
error = errors[0] error = errors[0]
raise ExtractorError( raise ExtractorError(
@ -708,9 +657,64 @@ def build_format_id(kind):
'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
'duration': duration, 'duration': duration,
'timestamp': parse_iso8601(json_data.get('published_at')), 'timestamp': parse_iso8601(json_data.get('published_at')),
'uploader_id': account_id, 'uploader_id': json_data.get('account_id'),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'tags': json_data.get('tags', []), 'tags': json_data.get('tags', []),
'is_live': is_live, 'is_live': is_live,
} }
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass(smuggled_data.get('geo_countries'))
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
policy_key = None
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
try:
json_data = self._download_json(api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message)
raise ExtractorError(message, expected=True)
raise
errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
custom_fields = json_data['custom_fields']
tve_token = self._extract_mvpd_auth(
smuggled_data['source_url'], video_id,
custom_fields['bcadobepassrequestorid'],
custom_fields['bcadobepassresourceid'])
json_data = self._download_json(
api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
}, query={
'tveToken': tve_token,
})
return self._parse_brightcove_metadata(json_data, video_id)

View File

@ -926,6 +926,7 @@
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .servus import ServusIE from .servus import ServusIE
from .sevenplus import SevenPlusIE
from .sexu import SexuIE from .sexu import SexuIE
from .shahid import ( from .shahid import (
ShahidIE, ShahidIE,

View File

@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .brightcove import BrightcoveNewIE
from ..utils import update_url_query
class SevenPlusIE(BrightcoveNewIE):
IE_NAME = '7plus'
_VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
_TESTS = [{
'url': 'https://7plus.com.au/BEAT?episode-id=BEAT-001',
'info_dict': {
'id': 'BEAT-001',
'ext': 'mp4',
'title': 'S1 E1 - Help / Lucy In The Sky With Diamonds',
'description': 'md5:37718bea20a8eedaca7f7361af566131',
'uploader_id': '5303576322001',
'upload_date': '20171031',
'timestamp': 1509440068,
},
'params': {
'format': 'bestvideo',
'skip_download': True,
}
}, {
'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001',
'only_matching': True,
}]
def _real_extract(self, url):
path, episode_id = re.match(self._VALID_URL, url).groups()
media = self._download_json(
'https://videoservice.swm.digital/playback', episode_id, query={
'appId': '7plus',
'deviceType': 'web',
'platformType': 'web',
'accountId': 5303576322001,
'referenceId': 'ref:' + episode_id,
'deliveryId': 'csai',
'videoType': 'vod',
})['media']
for source in media.get('sources', {}):
src = source.get('src')
if not src:
continue
source['src'] = update_url_query(src, {'rule': ''})
info = self._parse_brightcove_metadata(media, episode_id)
content = self._download_json(
'https://component-cdn.swm.digital/content/' + path,
episode_id, headers={
'market-id': 4,
}, fatal=False) or {}
for item in content.get('items', {}):
if item.get('componentData', {}).get('componentType') == 'infoPanel':
for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]:
value = item.get(src_key)
if value:
info[dst_key] = value
return info