From 8409501206e37d57f01e5fe72bfc54a5562e4e0a Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Fri, 7 Jun 2013 11:46:03 +0200 Subject: [PATCH] use search_regex in new IEs --- youtube_dl/InfoExtractors.py | 50 ++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index bd6fce3b6..5d54e93e7 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3347,7 +3347,7 @@ def _real_extract(self, url): title = clean_html(title) video_description = self._search_regex(r'[0-9]+)/.*\.html' @@ -4310,8 +4310,9 @@ def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - mrss_url='http://xhamster.com/movies/%s/.html' % video_id + mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id webpage = self._download_webpage(mrss_url, video_id) + mobj = re.search(r'\'srv\': \'(?P[^\']*)\',\s*\'file\': \'(?P[^\']+)\',', webpage) if mobj is None: raise ExtractorError(u'Unable to extract media URL') @@ -4321,32 +4322,26 @@ def _real_extract(self,url): video_url = mobj.group('server')+'/key='+mobj.group('file') video_extension = video_url.split('.')[-1] - mobj = re.search(r'(?P<title>.+?) - xHamster\.com', webpage) - if mobj is None: - raise ExtractorError(u'Unable to extract title') - video_title = unescapeHTML(mobj.group('title')) + video_title = self._search_regex(r'(?P<title>.+?) - xHamster\.com', + webpage, u'title') + video_title = unescapeHTML(video_title) - mobj = re.search(r'Description: (?P[^<]+)', webpage) - if mobj is None: - video_description = u'' - else: - video_description = unescapeHTML(mobj.group('description')) + video_description = self._search_regex(r'Description: (?P[^<]+)', + webpage, u'description', fatal=False) + if video_description: video_description = unescapeHTML(video_description) mobj = re.search(r'hint=\'(?P[0-9]{4})-(?P[0-9]{2})-(?P[0-9]{2}) [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{3,4}\'', webpage) - if mobj is None: - raise ExtractorError(u'Unable to extract upload date') - video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') - - mobj = re.search(r']+>(?P[^>]+)', webpage) - if mobj is None: - video_uploader_id = u'anonymous' + if mobj: + video_upload_date = mobj.group('upload_date_Y')+mobj.group('upload_date_m')+mobj.group('upload_date_d') else: - video_uploader_id = mobj.group('uploader_id') + video_upload_date = None + self._downloader.report_warning(u'Unable to extract upload date') - mobj = re.search(r'\'image\':\'(?P[^\']+)\'', webpage) - if mobj is None: - raise ExtractorError(u'Unable to extract thumbnail URL') - video_thumbnail = mobj.group('thumbnail') + video_uploader_id = self._search_regex(r']+>(?P[^>]+)', + webpage, u'uploader id', default=u'anonymous') + + video_thumbnail = self._search_regex(r'\'image\':\'(?P[^\']+)\'', + webpage, u'thumbnail', fatal=False) return [{ 'id': video_id, @@ -4377,10 +4372,9 @@ def _real_extract(self, url): cookie = urlh.headers.get('Set-Cookie', '') self.report_extraction(track_id) - mobj = re.search(r'', response, flags=re.MULTILINE|re.DOTALL) - if mobj is None: - raise ExtractorError(u'Unable to extrack tracks') - html_tracks = mobj.group(1).strip() + + html_tracks = self._search_regex(r'', + response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip() try: track_list = json.loads(html_tracks) track = track_list[u'tracks'][0]