[\S\s]+?
(?P.+?)
',
webpage, u'uploader', fatal=False)
- if uploader: uploader = clean_html(uploader)
info = {
'id': video_id,
@@ -3907,9 +3903,8 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, video_id)
- video_title = self._search_regex(r'
(.*?)
',
+ video_title = self._html_search_regex(r'
(.*?)
',
webpage, u'title')
- video_title = unescapeHTML(video_title)
xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
xml_code = self._download_webpage(xml_url, video_id,
@@ -3948,15 +3943,13 @@ def _real_extract(self, url):
video_url = self._search_regex(r'file: "(.*?)",',
webpage, u'video URL')
- video_title = self._search_regex(r'
',
+ video_uploader = self._html_search_regex(r'By:.*?(\w+)',
webpage, u'uploader', fatal=False)
info = {
@@ -4033,9 +4026,8 @@ def _real_extract(self, url):
# The only place where you can get a title, it's not complete,
# but searching in other places doesn't work for all videos
- video_title = self._search_regex(r'
(?P.*?)',
+ video_title = self._html_search_regex(r'(?P.*?)',
webpage, u'title', flags=re.DOTALL)
- video_title = unescapeHTML(video_title)
return [{'id': video_id,
'url': video_url,
@@ -4105,10 +4097,10 @@ def _real_extract(self,url):
self.report_extraction(video_id)
- video_url = self._search_regex(r'',
@@ -4161,13 +4153,13 @@ def _real_extract(self, url):
video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
webpage, u'video URL')
- video_title = self._search_regex(r'.*?(.+?)
',
+ uploader = self._html_search_regex(r'.*?
(.+?)
',
webpage, u'uploader', fatal=False, flags=re.DOTALL)
return [{
@@ -4230,7 +4222,7 @@ def _real_extract(self, url):
first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
- node_id = self._search_regex(r'
- (\d+-\d+)
',
+ node_id = self._html_search_regex(r'
- (\d+-\d+)
',
first_xml, u'node_id')
second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
@@ -4243,13 +4235,13 @@ def _real_extract(self, url):
raise ExtractorError(u'Unable to extract video url')
video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
- video_title = self._search_regex(r'
(.*?)',
+ video_url = self._html_search_regex(r'
(.*?)',
data, u'video URL')
return [{
@@ -4321,12 +4313,11 @@ def _real_extract(self,url):
video_url = mobj.group('server')+'/key='+mobj.group('file')
video_extension = video_url.split('.')[-1]
- video_title = self._search_regex(r'
(?P.+?) - xHamster\.com',
+ video_title = self._html_search_regex(r'(?P.+?) - xHamster\.com',
webpage, u'title')
- video_title = unescapeHTML(video_title)
# Can't see the description anywhere in the UI
- # video_description = self._search_regex(r'Description: (?P[^<]+)',
+ # video_description = self._html_search_regex(r'Description: (?P[^<]+)',
# webpage, u'description', fatal=False)
# if video_description: video_description = unescapeHTML(video_description)
@@ -4337,7 +4328,7 @@ def _real_extract(self,url):
video_upload_date = None
self._downloader.report_warning(u'Unable to extract upload date')
- video_uploader_id = self._search_regex(r']+>(?P[^>]+)',
+ video_uploader_id = self._html_search_regex(r']+>(?P[^>]+)',
webpage, u'uploader id', default=u'anonymous')
video_thumbnail = self._search_regex(r'\'image\':\'(?P[^\']+)\'',
@@ -4373,7 +4364,7 @@ def _real_extract(self, url):
self.report_extraction(track_id)
- html_tracks = self._search_regex(r'',
+ html_tracks = self._html_search_regex(r'',
response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip()
try:
track_list = json.loads(html_tracks)