Add support to www.ina.fr

2024-11-02 17:22:31 +01:00 · 2013-05-05 16:07:19 +02:00 · 2013-05-05 16:07:19 +02:00 · 7f5bd09baf
commit 7f5bd09baf
parent 02d5eb935f
2 changed files with 46 additions and 0 deletions
--- a/test/tests.json
+++ b/test/tests.json
@ -420,5 +420,14 @@
    "info_dict":{
      "title":"Tired of Link Building? Try BacklinkMyDomain.com!"
    }
  },
  {
    "name": "Ina",
    "url": "www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html",
    "file": "I12055569.mp4",
    "md5": "a667021bf2b41f8dc6049479d9bb38a3",
    "info_dict":{
      "title":"François Hollande \"Je crois que c'est clair\""
    }
  }
 ]
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@ -4124,7 +4124,43 @@ def _real_extract(self,url):
            'ext':      video_extension,
            'title':    video_title,
        }]
 class InaIE(InfoExtractor):
    """Information Extractor for Ina.fr"""
    _VALID_URL = r'(?:http://)?(?:www.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
    IE_NAME = u'Ina'
    def _real_extract(self,url):
        mobj = re.match(self._VALID_URL, url)
        if mobj is None:
            raise ExtractorError(u'Invalid URL: %s' % url)
        video_id = mobj.group('id')
        self.to_screen(u'video id : %s' % video_id)
        mrss_url='http://player.ina.fr/notices/%s.mrss'%video_id
        self.to_screen(u'mrss url : %s' % mrss_url)
        video_extension = 'mp4'        
        webpage = self._download_webpage(mrss_url,video_id)
        self.report_extraction(video_id)
        reg1=r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)'
        mobj = re.search(reg1,webpage)
        if mobj is None:
            raise ExtractorError(u'Unable to extract media URL')
        video_url = mobj.group(1)
        reg2=r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>'
        mobj = re.search(reg2,webpage)
        if mobj is None:
            raise ExtractorError(u'Unable to extract title')
        video_title = mobj.group(1)
        self.to_screen(u'Titre de la video : %s' % video_title)
        return [{
            'id':       video_id,
            'url':      video_url,
            'ext':      video_extension,
            'title':    video_title,
        }]
 def gen_extractors():
    """ Return a list of an instance of every supported extractor.
@ -4182,6 +4218,7 @@ def gen_extractors():
        TumblrIE(),
        BandcampIE(),
        RedTubeIE(),
        InaIE,
        GenericIE()
    ]