1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-02 17:22:31 +01:00

[hentaistigma] Add new extractor

This commit is contained in:
hojel 2014-05-12 03:58:07 -07:00
parent e399853d0c
commit 33c7ff861e
2 changed files with 44 additions and 0 deletions

View File

@ -109,6 +109,7 @@
from .googlesearch import GoogleSearchIE
from .hark import HarkIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE
from .huffpost import HuffPostIE

View File

@ -0,0 +1,43 @@
import re
from .common import InfoExtractor
class HentaiStigmaIE(InfoExtractor):
_VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<videoid>[^/]+)'
_TEST = {
u'url': u'http://hentai.animestigma.com/inyouchuu-etsu-bonus/',
u'file': u'inyouchuu-etsu-bonus.mp4',
u'md5': u'4e3d07422a68a4cc363d8f57c8bf0d23',
u'info_dict': {
u"title": u"Inyouchuu Etsu Bonus",
u"age_limit": 18,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
# Get webpage content
webpage = self._download_webpage(url, video_id)
# Get the video title
video_title = self._html_search_regex(r'<h2 class="posttitle"><a[^>]*>([^<]+)</a>',
webpage, u'title').strip()
# Get the wrapper url
wrap_url = self._html_search_regex(r'<iframe src="([^"]+mp4)"', webpage, u'wrapper url')
# Get wrapper content
wrap_webpage = self._download_webpage(wrap_url, video_id)
video_url = self._html_search_regex(r'clip:\s*{\s*url: "([^"]*)"', wrap_webpage, u'video url')
info = {'id': video_id,
'url': video_url,
'title': video_title,
'format': 'mp4',
'age_limit': 18}
return [info]