Merge branch 'peugeot-tnaflix'

2024-11-02 09:12:40 +01:00 · 2014-09-03 21:08:50 +07:00 · 2014-09-03 21:08:50 +07:00 · 88fc294f7f
commit 88fc294f7f
parent 884ae74785 a232bb9551
4 changed files with 97 additions and 45 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -86,7 +86,7 @@
    EllenTVClipsIE,
 )
 from .elpais import ElPaisIE
-from .empflix import EmpflixIE
+from .empflix import EMPFlixIE
 from .engadget import EngadgetIE
 from .eporner import EpornerIE
 from .escapist import EscapistIE
@ -348,6 +348,7 @@
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .tlc import TlcIE, TlcDeIE
 from .tnaflix import TNAFlixIE
 from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@ -1,58 +1,25 @@
 from __future__ import unicode_literals
-import re
+from .tnaflix import TNAFlixIE
 from .common import InfoExtractor
 from ..utils import fix_xml_ampersands
-class EmpflixIE(InfoExtractor):
+class EMPFlixIE(TNAFlixIE):
-    _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'
+    _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html'
    _TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
    _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
    _TEST = {
        'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
        'md5': 'b1bc15b6412d33902d6e5952035fcabc',
        'info_dict': {
            'id': '33051',
            'display_id': 'Amateur-Finger-Fuck',
            'ext': 'mp4',
            'title': 'Amateur Finger Fuck',
            'description': 'Amateur solo finger fucking.',
            'thumbnail': 're:https?://.*\.jpg$',
            'age_limit': 18,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        age_limit = self._rta_search(webpage)
        video_title = self._html_search_regex(
            r'name="title" value="(?P<title>[^"]*)"', webpage, 'title')
        video_description = self._html_search_regex(
            r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)
        cfg_url = self._html_search_regex(
            r'flashvars\.config = escape\("([^"]+)"',
            webpage, 'flashvars.config')
        cfg_xml = self._download_xml(
            cfg_url, video_id, note='Downloading metadata',
            transform_source=fix_xml_ampersands)
        formats = [
            {
                'url': item.find('videoLink').text,
                'format_id': item.find('res').text,
            } for item in cfg_xml.findall('./quality/item')
        ]
        thumbnail = cfg_xml.find('./startThumb').text
        return {
            'id': video_id,
            'title': video_title,
            'description': video_description,
            'thumbnail': thumbnail,
            'formats': formats,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@ -0,0 +1,84 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    parse_duration,
    fix_xml_ampersands,
 )
 class TNAFlixIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
    _TITLE_REGEX = None
    _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
    _TEST = {
        'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
        'info_dict': {
            'id': '553878',
            'display_id': 'Carmella-Decesare-striptease',
            'ext': 'mp4',
            'title': 'Carmella Decesare - striptease',
            'description': '',
            'thumbnail': 're:https?://.*\.jpg$',
            'duration': 91,
            'age_limit': 18,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        display_id = mobj.group('display_id')
        webpage = self._download_webpage(url, display_id)
        title = self._html_search_regex(
            self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
        description = self._html_search_regex(
            self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
        age_limit = self._rta_search(webpage)
        duration = self._html_search_meta('duration', webpage, 'duration', default=None)
        if duration:
            duration = parse_duration(duration[1:])
        cfg_url = self._html_search_regex(
            self._CONFIG_REGEX, webpage, 'flashvars.config')
        cfg_xml = self._download_xml(
            cfg_url, display_id, note='Downloading metadata',
            transform_source=fix_xml_ampersands)
        thumbnail = cfg_xml.find('./startThumb').text
        formats = []
        for item in cfg_xml.findall('./quality/item'):
            video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
            format_id = item.find('res').text
            fmt = {
                'url': video_url,
                'format_id': format_id,
            }
            m = re.search(r'^(\d+)', format_id)
            if m:
                fmt['height'] = int(m.group(1))
            formats.append(fmt)
        self._sort_formats(formats)
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'age_limit': age_limit,
            'formats': formats,
        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1336,7 +1336,7 @@ def parse_duration(s):
    s = s.strip()
    m = re.match(
-        r'(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
+        r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
    if not m:
        return None
    res = int(m.group('secs'))