yt-dlp/youtube_dl/extractor/fc2.py

#! -*- coding: utf-8 -*-
from __future__ import unicode_literals

import hashlib

from .common import InfoExtractor
from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
    compat_urlparse,
)
from ..utils import (
    ExtractorError,
)


class FC2IE(InfoExtractor):
    _VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'
    IE_NAME = 'fc2'
    _NETRC_MACHINE = 'fc2'
    _TESTS = [{
        'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',
        'md5': 'a6ebe8ebe0396518689d963774a54eb7',
        'info_dict': {
            'id': '20121103kUan1KHs',
            'ext': 'flv',
            'title': 'Boxing again with Puff',
        },
    }, {
        'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
        'info_dict': {
            'id': '20150125cEva0hDn',
            'ext': 'mp4',
        },
        'params': {
            'username': 'ytdl@yt-dl.org',
            'password': '(snip)',
            'skip': 'requires actual password'
        }
    }]

    def _login(self):
        (username, password) = self._get_login_info()
        if username is None or password is None:
            return False

        # Log in
        login_form_strs = {
            'email': username,
            'password': password,
            'done': 'video',
            'Submit': ' Login ',
        }

        # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
        # chokes on unicode
        login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())
        login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')
        request = compat_urllib_request.Request(
            'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)

        login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
        if 'mode=redirect&login=done' not in login_results:
            self.report_warning('unable to log in: bad username or password')
            return False

        # this is also needed
        login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')
        self._download_webpage(
            login_redir, None, note='Login redirect', errnote='Login redirect failed')

        return True

    def _real_extract(self, url):
        video_id = self._match_id(url)
        self._login()
        webpage = self._download_webpage(url, video_id)
        self._downloader.cookiejar.clear_session_cookies()  # must clear
        self._login()

        title = self._og_search_title(webpage)
        thumbnail = self._og_search_thumbnail(webpage)
        refer = url.replace('/content/', '/a/content/')

        mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()

        info_url = (
            "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".
            format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))

        info_webpage = self._download_webpage(
            info_url, video_id, note='Downloading info page')
        info = compat_urlparse.parse_qs(info_webpage)

        if 'err_code' in info:
            # most of the time we can still download wideo even if err_code is 403 or 602
            self.report_warning(
                'Error code was: %s... but still trying' % info['err_code'][0])

        if 'filepath' not in info:
            raise ExtractorError('Cannot download file. Are you logged in?')

        video_url = info['filepath'][0] + '?mid=' + info['mid'][0]
        title_info = info.get('title')
        if title_info:
            title = title_info[0]

        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'ext': 'flv',
            'thumbnail': thumbnail,
        }
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00			`#! -- coding: utf-8 --`
			`from __future__ import unicode_literals`

			`import hashlib`

			`from .common import InfoExtractor`
Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 2014-12-13 12:24:42 +01:00			`from ..compat import (`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00			`compat_urllib_parse,`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00			`compat_urllib_request,`
			`compat_urlparse,`
			`)`
Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 2014-12-13 12:24:42 +01:00			`from ..utils import (`
			`ExtractorError,`
			`)`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00

			`class FC2IE(InfoExtractor):`
Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 2014-12-13 12:24:42 +01:00			`_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)?content/(?P<id>[^/]+)'`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00			`IE_NAME = 'fc2'`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00			`_NETRC_MACHINE = 'fc2'`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`_TESTS = [{`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00			`'url': 'http://video.fc2.com/en/content/20121103kUan1KHs',`
			`'md5': 'a6ebe8ebe0396518689d963774a54eb7',`
			`'info_dict': {`
			`'id': '20121103kUan1KHs',`
			`'ext': 'flv',`
			`'title': 'Boxing again with Puff',`
			`},`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`}, {`
			`'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',`
			`'info_dict': {`
			`'id': '20150125cEva0hDn',`
			`'ext': 'mp4',`
			`},`
			`'params': {`
			`'username': 'ytdl@yt-dl.org',`
			`'password': '(snip)',`
			`'skip': 'requires actual password'`
			`}`
			`}]`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00
			`def _login(self):`
			`(username, password) = self._get_login_info()`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`if username is None or password is None:`
			`return False`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00
			`# Log in`
			`login_form_strs = {`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`'email': username,`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00			`'password': password,`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`'done': 'video',`
			`'Submit': ' Login ',`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00			`}`

			`# Convert to UTF-8 before urlencode because Python 2.x's urlencode`
			`# chokes on unicode`
			`login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k, v in login_form_strs.items())`
			`login_data = compat_urllib_parse.urlencode(login_form).encode('utf-8')`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`request = compat_urllib_request.Request(`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00			`'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)`

			`login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')`
			`if 'mode=redirect&login=done' not in login_results:`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`self.report_warning('unable to log in: bad username or password')`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00			`return False`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00			`# this is also needed`
			`login_redir = compat_urllib_request.Request('http://id.fc2.com/?mode=redirect&login=done')`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`self._download_webpage(`
			`login_redir, None, note='Login redirect', errnote='Login redirect failed')`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00
			`return True`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00
			`def _real_extract(self, url):`
Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 2014-12-13 12:24:42 +01:00			`video_id = self._match_id(url)`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`self._login()`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00			`webpage = self._download_webpage(url, video_id)`
			`self._downloader.cookiejar.clear_session_cookies() # must clear`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00			`self._login()`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00
			`title = self._og_search_title(webpage)`
			`thumbnail = self._og_search_thumbnail(webpage)`
			`refer = url.replace('/content/', '/a/content/')`

[fc2] Encode the string used for the md5 checksum In python 3 it must be a bytes object. 2014-05-31 14:40:05 +02:00			`mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest()`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00
			`info_url = (`
			`"http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&".`
PEP8 applied 2014-11-23 20:41:03 +01:00			`format(video_id, mimi, compat_urllib_request.quote(refer, safe='').replace('.', '%2E')))`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00
			`info_webpage = self._download_webpage(`
			`info_url, video_id, note='Downloading info page')`
			`info = compat_urlparse.parse_qs(info_webpage)`

			`if 'err_code' in info:`
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00			`# most of the time we can still download wideo even if err_code is 403 or 602`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`self.report_warning(`
			`'Error code was: %s... but still trying' % info['err_code'][0])`
Update fc2.py 2014-07-06 01:48:07 +02:00
Fix issues with fc2 Fix issues #2912 and #3171 2014-07-06 01:42:41 +02:00			`if 'filepath' not in info:`
Merge remote-tracking branch 'h-collector/master' Conflicts: youtube_dl/extractor/fc2.py 2015-01-25 03:37:51 +01:00			`raise ExtractorError('Cannot download file. Are you logged in?')`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00
			`video_url = info['filepath'][0] + '?mid=' + info['mid'][0]`
[fc2] Fall back to webpage title if needed 2014-06-07 16:52:11 +02:00			`title_info = info.get('title')`
			`if title_info:`
			`title = title_info[0]`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00
			`return {`
			`'id': video_id,`
[fc2] Fall back to webpage title if needed 2014-06-07 16:52:11 +02:00			`'title': title,`
[fc2] Add new extractor (Fixes #2877) This commit has been recreated, since there seems to have been a problem with GitHub; the PR doesn't have a branch. 2014-05-13 09:58:32 +02:00			`'url': video_url,`
			`'ext': 'flv',`
			`'thumbnail': thumbnail,`
			`}`