From 6fb8ace671db2f2bdcc9cd7ac6b9f81fbd356791 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Oct 2015 22:44:01 +0600 Subject: [PATCH] [moniker] Add support for builtin embedded videos (Closes #7244) --- youtube_dl/extractor/moniker.py | 35 ++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/moniker.py b/youtube_dl/extractor/moniker.py index 69e4bcd1a..204c03c4a 100644 --- a/youtube_dl/extractor/moniker.py +++ b/youtube_dl/extractor/moniker.py @@ -17,7 +17,7 @@ class MonikerIE(InfoExtractor): IE_DESC = 'allmyvideos.net and vidspot.net' - _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P[a-zA-Z0-9_-]+)' + _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P[a-zA-Z0-9_-]+)' _TESTS = [{ 'url': 'http://allmyvideos.net/jih3nce3x6wn', @@ -64,18 +64,30 @@ def _real_extract(self, url): raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, error), expected=True) - fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) - data = dict(fields) + builtin_url = self._search_regex( + r']+src=(["\'])(?P.+?/builtin-.+?)\1', + orig_webpage, 'builtin URL', default=None, group='url') - post = compat_urllib_parse.urlencode(data) - headers = { - b'Content-Type': b'application/x-www-form-urlencoded', - } - req = compat_urllib_request.Request(url, post, headers) - webpage = self._download_webpage( - req, video_id, note='Downloading video page ...') + if builtin_url: + req = compat_urllib_request.Request(builtin_url) + req.add_header('Referer', url) + webpage = self._download_webpage(req, video_id, 'Downloading builtin page') + title = self._og_search_title(orig_webpage).strip() + description = self._og_search_description(orig_webpage).strip() + else: + fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage) + data = dict(fields) - title = os.path.splitext(data['fname'])[0] + post = compat_urllib_parse.urlencode(data) + headers = { + b'Content-Type': b'application/x-www-form-urlencoded', + } + req = compat_urllib_request.Request(url, post, headers) + webpage = self._download_webpage( + req, video_id, note='Downloading video page ...') + + title = os.path.splitext(data['fname'])[0] + description = None # Could be several links with different quality links = re.findall(r'"file" : "?(.+?)",', webpage) @@ -89,5 +101,6 @@ def _real_extract(self, url): return { 'id': video_id, 'title': title, + 'description': description, 'formats': formats, }