yt-dlp/yt_dlp/extractor/rtrfm.py

from __future__ import unicode_literals

from .common import InfoExtractor


class RTRFMIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows|show-episode)/(?P<id>[^/?\#&]+)'
    _TESTS = [
        {
            'url': 'https://rtrfm.com.au/shows/breakfast/',
            'md5': '46168394d3a5ce237cf47e85d0745413',
            'info_dict': {
                'id': 'breakfast-2021-11-16',
                'ext': 'mp3',
                'series': 'Breakfast with Taylah',
                'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$',
                'description': 'md5:0979c3ab1febfbec3f1ccb743633c611',
            },
            'skip': 'ID and md5 changes daily',
        },
        {
            'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/',
            'md5': '396bedf1e40f96c62b30d4999202a790',
            'info_dict': {
                'id': 'breakfast-2021-11-11',
                'ext': 'mp3',
                'series': 'Breakfast with Taylah',
                'title': 'Breakfast with Taylah 2021-11-11',
                'description': 'md5:0979c3ab1febfbec3f1ccb743633c611',
            },
        },
        {
            'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/',
            'md5': '594027f513ec36a24b15d65007a24dff',
            'info_dict': {
                'id': 'breakfast-2020-06-01',
                'ext': 'mp3',
                'series': 'Breakfast with Taylah',
                'title': 'Breakfast with Taylah 2020-06-01',
                'description': r're:^Breakfast with Taylah ',
            },
            'skip': 'This audio has expired',
        },
    ]

    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        show, date, title = self._search_regex(
            r'''\.playShow(?:From)?\(['"](?P<show>[^'"]+)['"],\s*['"](?P<date>[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s*['"](?P<title>[^'"]+)['"]''',
            webpage, 'details', group=('show', 'date', 'title'))
        url = self._download_json(
            'https://restreams.rtrfm.com.au/rzz',
            show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u']
        # This is the only indicator of an error until trying to download the URL and
        # downloads of mp4 URLs always fail (403 for current episodes, 404 for missing).
        if '.mp4' in url:
            url = None
            self.raise_no_formats('Expired or no episode on this date', expected=True)
        return {
            'id': '%s-%s' % (show, date),
            'title': '%s %s' % (title, date),
            'series': title,
            'url': url,
            'release_date': date,
            'description': self._og_search_description(webpage),
        }
[rtrfm] Add extractor (#1628) Authored by: pabs3 2021-11-18 23:14:38 +01:00			`from __future__ import unicode_literals`

			`from .common import InfoExtractor`


			`class RTRFMIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows\|show-episode)/(?P<id>[^/?\#&]+)'`
			`_TESTS = [`
			`{`
			`'url': 'https://rtrfm.com.au/shows/breakfast/',`
			`'md5': '46168394d3a5ce237cf47e85d0745413',`
			`'info_dict': {`
			`'id': 'breakfast-2021-11-16',`
			`'ext': 'mp3',`
			`'series': 'Breakfast with Taylah',`
			`'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$',`
			`'description': 'md5:0979c3ab1febfbec3f1ccb743633c611',`
			`},`
			`'skip': 'ID and md5 changes daily',`
			`},`
			`{`
			`'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/',`
			`'md5': '396bedf1e40f96c62b30d4999202a790',`
			`'info_dict': {`
			`'id': 'breakfast-2021-11-11',`
			`'ext': 'mp3',`
			`'series': 'Breakfast with Taylah',`
			`'title': 'Breakfast with Taylah 2021-11-11',`
			`'description': 'md5:0979c3ab1febfbec3f1ccb743633c611',`
			`},`
			`},`
			`{`
			`'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/',`
			`'md5': '594027f513ec36a24b15d65007a24dff',`
			`'info_dict': {`
			`'id': 'breakfast-2020-06-01',`
			`'ext': 'mp3',`
			`'series': 'Breakfast with Taylah',`
			`'title': 'Breakfast with Taylah 2020-06-01',`
			`'description': r're:^Breakfast with Taylah ',`
			`},`
			`'skip': 'This audio has expired',`
			`},`
			`]`

			`def _real_extract(self, url):`
			`display_id = self._match_id(url)`
			`webpage = self._download_webpage(url, display_id)`
			`show, date, title = self._search_regex(`
			`r'''\.playShow(?:From)?\(['"](?P<show>[^'"]+)['"],\s['"](?P<date>[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s['"](?P<title>[^'"]+)['"]''',`
			`webpage, 'details', group=('show', 'date', 'title'))`
			`url = self._download_json(`
			`'https://restreams.rtrfm.com.au/rzz',`
			`show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u']`
			`# This is the only indicator of an error until trying to download the URL and`
			`# downloads of mp4 URLs always fail (403 for current episodes, 404 for missing).`
			`if '.mp4' in url:`
			`url = None`
			`self.raise_no_formats('Expired or no episode on this date', expected=True)`
			`return {`
			`'id': '%s-%s' % (show, date),`
			`'title': '%s %s' % (title, date),`
			`'series': title,`
			`'url': url,`
			`'release_date': date,`
			`'description': self._og_search_description(webpage),`
			`}`