From a646a8cf980a946cfc15d2286fcec6ee3987886f Mon Sep 17 00:00:00 2001 From: remitamine Date: Thu, 17 Mar 2016 02:02:18 +0100 Subject: [PATCH] [sbs] improve extraction(fixes #3811) - extract error messages - force the platform smil url(previously the manifest param in the query is not respected which make theplatform return non working mp4 files for some videos) --- youtube_dl/extractor/sbs.py | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index d6ee2d9e2..2f96477ca 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -2,6 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ( + smuggle_url, + ExtractorError, +) class SBSIE(InfoExtractor): @@ -31,21 +35,28 @@ class SBSIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) + player_params = self._download_json( + 'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id) - webpage = self._download_webpage( - 'http://www.sbs.com.au/ondemand/video/single/%s?context=web' % video_id, video_id) - - player_params = self._parse_json( - self._search_regex( - r'(?s)var\s+playerParams\s*=\s*({.+?});', webpage, 'playerParams'), - video_id) + error = player_params.get('error') + if error: + error_message = 'Sorry, The video you are looking for does not exist.' + video_data = error.get('results') or {} + error_code = error.get('errorCode') + if error_code == 'ComingSoon': + error_message = '%s is not yet available.' % video_data.get('title', '') + elif error_code in ('Forbidden', 'intranetAccessOnly'): + error_message = 'Sorry, This video cannot be accessed via this website' + elif error_code == 'Expired': + error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '') + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) urls = player_params['releaseUrls'] - theplatform_url = (urls.get('progressive') or urls.get('standard') or - urls.get('html') or player_params['relatedItemsURL']) + theplatform_url = (urls.get('progressive') or urls.get('html') or + urls.get('standard') or player_params['relatedItemsURL']) return { '_type': 'url_transparent', 'id': video_id, - 'url': theplatform_url, + 'url': smuggle_url(theplatform_url, {'force_smil_url': True}), }