From 6de8f1afb72d35560396817cbc2ed96180daa019 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 20 Apr 2013 12:50:14 +0200 Subject: [PATCH] Allows to specify which IE should be used for extracting info for a result of type url --- youtube_dl/FileDownloader.py | 17 ++++++++++++++--- youtube_dl/InfoExtractors.py | 16 +++++++--------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 03346ab04..9c0c42f8d 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -17,6 +17,7 @@ import ctypes from .utils import * +from .InfoExtractors import get_info_extractor class FileDownloader(object): @@ -425,13 +426,23 @@ def _match_entry(self, info_dict): return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None - def extract_info(self, url, download = True): + def extract_info(self, url, download = True, ie_name = None): ''' Returns a list with a dictionary for each video we find. If 'download', also downloads the videos. ''' suitable_found = False - for ie in self._ies: + + #We copy the original list + ies = list(self._ies) + + if ie_name is not None: + #We put in the first place the given info extractor + first_ie = get_info_extractor(ie_name)() + first_ie.set_downloader(self) + ies.insert(0, first_ie) + + for ie in ies: # Go to next InfoExtractor if not suitable if not ie.suitable(url): continue @@ -486,7 +497,7 @@ def process_ie_result(self, ie_result, download = True): return ie_result elif result_type == 'url': #We get the video pointed by the url - result = self.extract_info(ie_result['url'], download)[0] + result = self.extract_info(ie_result['url'], download, ie_name = ie_result['ie_key'])[0] return result elif result_type == 'playlist': #We process each entry in the playlist diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index eeedcf792..e47d8b85d 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -154,7 +154,8 @@ def url_result(self, url, ie=None): """Returns a url that points to a page that should be processed""" #TODO: ie should be the class used for getting the info video_info = {'_type': 'url', - 'url': url} + 'url': url, + 'ie_key': ie} return video_info def playlist_result(self, entries, playlist_id=None, playlist_title=None): """Returns a playlist""" @@ -728,7 +729,7 @@ def _real_extract(self, url): # Check if video comes from YouTube mobj2 = re.match(r'^yt-(.*)$', video_id) if mobj2 is not None: - return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1))] + return [self.url_result('http://www.youtube.com/watch?v=%s' % mobj2.group(1), 'Youtube')] # Retrieve video webpage to extract further information webpage = self._download_webpage('http://www.metacafe.com/watch/%s/' % video_id, video_id) @@ -1810,7 +1811,7 @@ def _real_extract(self, url): videos = [v[1] for v in sorted(videos)] - url_results = [self.url_result(url) for url in videos] + url_results = [self.url_result(url, 'Youtube') for url in videos] return [self.playlist_result(url_results, playlist_id)] @@ -1884,7 +1885,7 @@ def _real_extract(self, url): self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] - url_entries = [self.url_result(url) for url in urls] + url_entries = [self.url_result(url, 'Youtube') for url in urls] return [self.playlist_result(url_entries, channel_id)] @@ -1956,7 +1957,7 @@ def _real_extract(self, url): pagenum += 1 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] - url_results = [self.url_result(url) for url in urls] + url_results = [self.url_result(url, 'Youtube') for url in urls] return [self.playlist_result(url_results, playlist_title = username)] @@ -2035,11 +2036,8 @@ def _real_extract(self, url): pagenum += 1 - self._downloader.to_screen(u"[%s] user %s: Collected %d video ids (downloading %d of them)" % - (self.IE_NAME, username, all_ids_count, len(video_ids))) - urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] - url_entries = [self.url_result(url) for url in urls] + url_entries = [self.url_result(url, 'BlipTV') for url in urls] return [self.playlist_result(url_entries, playlist_title = username)]