yt-dlp/yt_dlp/extractor/chirbit.py

import re

from .common import InfoExtractor
from ..compat import compat_b64decode
from ..utils import parse_duration


class ChirbitIE(InfoExtractor):
    IE_NAME = 'chirbit'
    _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'
    _TESTS = [{
        'url': 'http://chirb.it/be2abG',
        'info_dict': {
            'id': 'be2abG',
            'ext': 'mp3',
            'title': 'md5:f542ea253f5255240be4da375c6a5d7e',
            'description': 'md5:f24a4e22a71763e32da5fed59e47c770',
            'duration': 306,
            'uploader': 'Gerryaudio',
        },
        'params': {
            'skip_download': True,
        }
    }, {
        'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',
        'only_matching': True,
    }, {
        'url': 'https://chirb.it/wp/MN58c2',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        audio_id = self._match_id(url)

        webpage = self._download_webpage(
            'http://chirb.it/%s' % audio_id, audio_id)

        data_fd = self._search_regex(
            r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1',
            webpage, 'data fd', group='url')

        # Reverse engineered from https://chirb.it/js/chirbit.player.js (look
        # for soundURL)
        audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8')

        title = self._search_regex(
            r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')
        description = self._search_regex(
            r'<h3>Description</h3>\s*<pre[^>]*>([^<]+)</pre>',
            webpage, 'description', default=None)
        duration = parse_duration(self._search_regex(
            r'class=["\']c-length["\'][^>]*>([^<]+)',
            webpage, 'duration', fatal=False))
        uploader = self._search_regex(
            r'id=["\']chirbit-username["\'][^>]*>([^<]+)',
            webpage, 'uploader', fatal=False)

        return {
            'id': audio_id,
            'url': audio_url,
            'title': title,
            'description': description,
            'duration': duration,
            'uploader': uploader,
        }


class ChirbitProfileIE(InfoExtractor):
    IE_NAME = 'chirbit:profile'
    _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)'
    _TEST = {
        'url': 'http://chirbit.com/ScarletBeauty',
        'info_dict': {
            'id': 'ScarletBeauty',
        },
        'playlist_mincount': 3,
    }

    def _real_extract(self, url):
        profile_id = self._match_id(url)

        webpage = self._download_webpage(url, profile_id)

        entries = [
            self.url_result(self._proto_relative_url('//chirb.it/' + video_id))
            for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]

        return self.playlist_result(entries, profile_id)
[chirbit:profile] Fix extraction 2016-10-14 18:01:46 +02:00			`import re`
[chirbit] Fix extraction (Closes #10296) 2016-08-11 18:37:56 +02:00
[chirbit] Add new extractor. 2015-02-20 10:49:45 +01:00			`from .common import InfoExtractor`
Switch codebase to use compat_b64decode 2018-01-23 16:23:12 +01:00			`from ..compat import compat_b64decode`
[chirbit] Fix extraction (Closes #10296) 2016-08-11 18:37:56 +02:00			`from ..utils import parse_duration`
[chirbit] Add new extractor. 2015-02-20 10:49:45 +01:00

			`class ChirbitIE(InfoExtractor):`
[chirbit] Clarify extractors' IE_NAMEs 2015-02-23 16:28:14 +01:00			`IE_NAME = 'chirbit'`
[chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 16:15:16 +01:00			`_VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp\|pl)/\|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)'`
			`_TESTS = [{`
[chirbit] Fix extraction (Closes #10296) 2016-08-11 18:37:56 +02:00			`'url': 'http://chirb.it/be2abG',`
[chirbit] Add new extractor. 2015-02-20 10:49:45 +01:00			`'info_dict': {`
[chirbit] Fix extraction (Closes #10296) 2016-08-11 18:37:56 +02:00			`'id': 'be2abG',`
[chirbit] Add new extractor. 2015-02-20 10:49:45 +01:00			`'ext': 'mp3',`
[chirbit] Fix extraction (Closes #10296) 2016-08-11 18:37:56 +02:00			`'title': 'md5:f542ea253f5255240be4da375c6a5d7e',`
			`'description': 'md5:f24a4e22a71763e32da5fed59e47c770',`
			`'duration': 306,`
[chirbit] Extract uploader 2017-01-22 20:27:38 +01:00			`'uploader': 'Gerryaudio',`
[chirbit] Fix extraction (Closes #10296) 2016-08-11 18:37:56 +02:00			`},`
			`'params': {`
			`'skip_download': True,`
[chirbit] Add new extractor. 2015-02-20 10:49:45 +01:00			`}`
[chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 16:15:16 +01:00			`}, {`
			`'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5',`
			`'only_matching': True,`
[chirbit] Fix extraction (Closes #10296) 2016-08-11 18:37:56 +02:00			`}, {`
			`'url': 'https://chirb.it/wp/MN58c2',`
			`'only_matching': True,`
[chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 16:15:16 +01:00			`}]`
[chirbit] Add new extractor. 2015-02-20 10:49:45 +01:00
			`def _real_extract(self, url):`
[chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 16:15:16 +01:00			`audio_id = self._match_id(url)`

			`webpage = self._download_webpage(`
			`'http://chirb.it/%s' % audio_id, audio_id)`

[chirbit] Fix extraction (Closes #10296) 2016-08-11 18:37:56 +02:00			`data_fd = self._search_regex(`
			`r'data-fd=(["\'])(?P<url>(?:(?!\1).)+)\1',`
			`webpage, 'data fd', group='url')`

			`# Reverse engineered from https://chirb.it/js/chirbit.player.js (look`
			`# for soundURL)`
Switch codebase to use compat_b64decode 2018-01-23 16:23:12 +01:00			`audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8')`
[chirbit] Add new extractor. 2015-02-20 10:49:45 +01:00
[chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 16:15:16 +01:00			`title = self._search_regex(`
[chirbit] Fix extraction (Closes #10296) 2016-08-11 18:37:56 +02:00			`r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title')`
			`description = self._search_regex(`
			`r'<h3>Description</h3>\s<pre[^>]>([^<]+)</pre>',`
			`webpage, 'description', default=None)`
			`duration = parse_duration(self._search_regex(`
			`r'class=["\']c-length["\'][^>]*>([^<]+)',`
			`webpage, 'duration', fatal=False))`
[chirbit] Extract uploader 2017-01-22 20:27:38 +01:00			`uploader = self._search_regex(`
			`r'id=["\']chirbit-username["\'][^>]*>([^<]+)',`
			`webpage, 'uploader', fatal=False)`
[chirbit] Add new extractor. 2015-02-20 10:49:45 +01:00
			`return {`
[chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 16:15:16 +01:00			`'id': audio_id,`
			`'url': audio_url,`
			`'title': title,`
[chirbit] Fix extraction (Closes #10296) 2016-08-11 18:37:56 +02:00			`'description': description,`
[chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 16:15:16 +01:00			`'duration': duration,`
[chirbit] Extract uploader 2017-01-22 20:27:38 +01:00			`'uploader': uploader,`
[chirbit] Add new extractor. 2015-02-20 10:49:45 +01:00			`}`
[chirbit] add profile extractor. 2015-02-20 14:48:12 +01:00
[chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 16:15:16 +01:00
[chirbit] add profile extractor. 2015-02-20 14:48:12 +01:00			`class ChirbitProfileIE(InfoExtractor):`
[chirbit] Clarify extractors' IE_NAMEs 2015-02-23 16:28:14 +01:00			`IE_NAME = 'chirbit:profile'`
Improve some _VALID_URLs 2016-09-08 13:29:05 +02:00			`_VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P<id>[^/]+)'`
[chirbit] add profile extractor. 2015-02-20 14:48:12 +01:00			`_TEST = {`
			`'url': 'http://chirbit.com/ScarletBeauty',`
			`'info_dict': {`
[chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 16:15:16 +01:00			`'id': 'ScarletBeauty',`
			`},`
			`'playlist_mincount': 3,`
[chirbit] add profile extractor. 2015-02-20 14:48:12 +01:00			`}`

			`def _real_extract(self, url):`
			`profile_id = self._match_id(url)`

[chirbit:profile] Fix extraction 2016-10-14 18:01:46 +02:00			`webpage = self._download_webpage(url, profile_id)`
[chirbit] add profile extractor. 2015-02-20 14:48:12 +01:00
[chirbit] Simplify and extract profile from RSS (#5032) 2015-02-23 16:15:16 +01:00			`entries = [`
[chirbit:profile] Fix extraction 2016-10-14 18:01:46 +02:00			`self.url_result(self._proto_relative_url('//chirb.it/' + video_id))`
			`for _, video_id in re.findall(r'<input[^>]+id=([\'"])copy-btn-(?P<id>[0-9a-zA-Z]+)\1', webpage)]`
[chirbit] add profile extractor. 2015-02-20 14:48:12 +01:00
[chirbit:profile] Fix extraction 2016-10-14 18:01:46 +02:00			`return self.playlist_result(entries, profile_id)`