1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-02 17:22:31 +01:00

[vk:uservideos] Improve extraction

This commit is contained in:
Sergey M․ 2015-07-18 17:22:25 +06:00
parent 74fe23ec35
commit dc786d3db5

View File

@ -291,23 +291,32 @@ def _real_extract(self, url):
class VKUserVideosIE(InfoExtractor): class VKUserVideosIE(InfoExtractor):
IE_NAME = 'vk.com:user-videos' IE_NAME = 'vk.com:user-videos'
IE_DESC = 'vk.com:All of a user\'s videos' IE_DESC = 'vk.com:All of a user\'s videos'
_VALID_URL = r'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?' _VALID_URL = r'https?://vk\.com/videos(?P<id>-?[0-9]+)$'
_TEMPLATE_URL = 'https://vk.com/videos' _TEMPLATE_URL = 'https://vk.com/videos'
_TEST = { _TESTS = [{
'url': 'http://vk.com/videos205387401', 'url': 'http://vk.com/videos205387401',
'info_dict': { 'info_dict': {
'id': '205387401', 'id': '205387401',
'title': "Tom Cruise's Videos",
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
} }, {
'url': 'http://vk.com/videos-77521',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
page = self._download_webpage(url, page_id)
video_ids = orderedSet( webpage = self._download_webpage(url, page_id)
m.group(1) for m in re.finditer(r'href="/video([0-9_]+)"', page))
url_entries = [ entries = [
self.url_result( self.url_result(
'http://vk.com/video' + video_id, 'VK', video_id=video_id) 'http://vk.com/video' + video_id, 'VK', video_id=video_id)
for video_id in video_ids] for video_id in set(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
return self.playlist_result(url_entries, page_id)
title = unescapeHTML(self._search_regex(
r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
webpage, 'title', default=page_id))
return self.playlist_result(entries, page_id, title)