From e2dbcaa1bf65aa502718005d5fbd00189618469f Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 11 Jul 2016 00:52:25 +0800 Subject: [PATCH] [vuclip] Fix extraction --- youtube_dl/extractor/vuclip.py | 43 ++++++++++++---------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index eaa888f00..b73da5cd0 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -9,7 +9,7 @@ from ..utils import ( ExtractorError, parse_duration, - qualities, + remove_end, ) @@ -22,7 +22,7 @@ class VuClipIE(InfoExtractor): 'id': '922692425', 'ext': '3gp', 'title': 'The Toy Soldiers - Hollywood Movie Trailer', - 'duration': 180, + 'duration': 177, } } @@ -46,34 +46,21 @@ def _real_extract(self, url): '%s said: %s' % (self.IE_NAME, error_msg), expected=True) # These clowns alternate between two page types - links_code = self._search_regex( - r'''(?xs) - (?: - | - \s* - ) - (.*?) - (?: - - ) - ''', webpage, 'links') - title = self._html_search_regex( - r'(.*?)-\s*Vuclip', webpage, 'title').strip() + video_url = self._search_regex( + r']+href="([^"]+)"[^>]*>]+src="[^"]*/play\.gif', + webpage, 'video URL', default=None) + if video_url: + formats = [{ + 'url': video_url, + }] + else: + formats = self._parse_html5_media_entries(url, webpage)[0]['formats'] - quality_order = qualities(['Reg', 'Hi']) - formats = [] - for url, q in re.findall( - r'[^"]+)".*?>(?:]*>)?(?P[^<]+)(?:)?', links_code): - format_id = compat_urllib_parse_urlparse(url).scheme + '-' + q - formats.append({ - 'format_id': format_id, - 'url': url, - 'quality': quality_order(q), - }) - self._sort_formats(formats) + title = remove_end(self._html_search_regex( + r'(.*?)-\s*Vuclip', webpage, 'title').strip(), ' - Video') - duration = parse_duration(self._search_regex( - r'\(([0-9:]+)\)', webpage, 'duration', fatal=False)) + duration = parse_duration(self._html_search_regex( + r'[(>]([0-9]+:[0-9]+)(?: