From fd9cf738363e180bb3abf0829e1f322e12c68b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 6 Sep 2013 10:41:46 +0200 Subject: [PATCH] [youtube] Users: download from the api in json to simplify extraction (fixes #1358) There could be duplicate videos or other videos if the description have links. --- youtube_dl/extractor/youtube.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cd5165c94..98a44f333 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1018,8 +1018,7 @@ class YoutubeUserIE(InfoExtractor): _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)' _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' _GDATA_PAGE_SIZE = 50 - _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' - _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]' + _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' IE_NAME = u'youtube:user' def suitable(cls, url): @@ -1048,13 +1047,15 @@ def _real_extract(self, url): page = self._download_webpage(gdata_url, username, u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE)) + try: + response = json.loads(page) + except ValueError as err: + raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) + # Extract video identifiers ids_in_page = [] - - for mobj in re.finditer(self._VIDEO_INDICATOR, page): - if mobj.group(1) not in ids_in_page: - ids_in_page.append(mobj.group(1)) - + for entry in response['feed']['entry']: + ids_in_page.append(entry['id']['$t'].split('/')[-1]) video_ids.extend(ids_in_page) # A little optimization - if current page is not