yt-dlp/yt_dlp/extractor/hearthisat.py

# coding: utf-8
from __future__ import unicode_literals


from .common import InfoExtractor
from ..utils import (
    determine_ext,
    KNOWN_EXTENSIONS,
    str_to_int,
)


class HearThisAtIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'
    _PLAYLIST_URL = 'https://hearthis.at/playlist.php'
    _TESTS = [{
        'url': 'https://hearthis.at/moofi/dr-kreep',
        'md5': 'ab6ec33c8fed6556029337c7885eb4e0',
        'info_dict': {
            'id': '150939',
            'ext': 'wav',
            'title': 'Moofi - Dr. Kreep',
            'thumbnail': r're:^https?://.*\.jpg$',
            'timestamp': 1421564134,
            'description': 'md5:1adb0667b01499f9d27e97ddfd53852a',
            'upload_date': '20150118',
            'view_count': int,
            'duration': 71,
            'genre': 'Experimental',
        }
    }, {
        # 'download' link redirects to the original webpage
        'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',
        'md5': '5980ceb7c461605d30f1f039df160c6e',
        'info_dict': {
            'id': '811296',
            'ext': 'mp3',
            'title': 'TwitchSF - DJ Jim Hopkins -  Totally Bitchin\' 80\'s Dance Mix!',
            'description': 'md5:ef26815ca8f483272a87b137ff175be2',
            'upload_date': '20160328',
            'timestamp': 1459186146,
            'thumbnail': r're:^https?://.*\.jpg$',
            'view_count': int,
            'duration': 4360,
            'genre': 'Dance',
        },
    }]

    def _real_extract(self, url):
        m = self._match_valid_url(url)
        display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
        api_url = url.replace('www.', '').replace('hearthis.at', 'api-v2.hearthis.at')
        data_json = self._download_json(api_url, display_id)
        track_id = data_json.get('id')
        artist_json = data_json.get('user')
        title = '{} - {}'.format(artist_json.get('username'), data_json.get('title'))
        genre = data_json.get('genre')
        description = data_json.get('description')
        thumbnail = data_json.get('artwork_url') or data_json.get('thumb')
        view_count = str_to_int(data_json.get('playback_count'))
        duration = str_to_int(data_json.get('duration'))
        timestamp = data_json.get('release_timestamp')

        formats = []
        mp3_url = data_json.get('stream_url')

        if mp3_url:
            formats.append({
                'format_id': 'mp3',
                'vcodec': 'none',
                'acodec': 'mp3',
                'url': mp3_url,
                'ext': 'mp3',
            })

        if data_json.get('download_url'):
            download_url = data_json['download_url']
            ext = determine_ext(data_json['download_filename'])
            if ext in KNOWN_EXTENSIONS:
                formats.append({
                    'format_id': ext,
                    'vcodec': 'none',
                    'ext': ext,
                    'url': download_url,
                    'acodec': ext,
                    'quality': 2,  # Usually better quality
                })
        self._sort_formats(formats)

        return {
            'id': track_id,
            'display_id': display_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
            'description': description,
            'duration': duration,
            'timestamp': timestamp,
            'view_count': view_count,
            'genre': genre,
        }
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`# coding: utf-8`
			`from __future__ import unicode_literals`


			`from .common import InfoExtractor`
			`from ..utils import (`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`determine_ext,`
[hearthisat] Detect invalid download links (fixes #9440) 2016-05-15 09:35:31 +02:00			`KNOWN_EXTENSIONS,`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`str_to_int,`
			`)`


			`class HearThisAtIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$'`
			`_PLAYLIST_URL = 'https://hearthis.at/playlist.php'`
[hearthisat] Detect invalid download links (fixes #9440) 2016-05-15 09:35:31 +02:00			`_TESTS = [{`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`'url': 'https://hearthis.at/moofi/dr-kreep',`
[hearthisat] Add support for more high-quality download links 2015-01-22 12:04:07 +01:00			`'md5': 'ab6ec33c8fed6556029337c7885eb4e0',`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`'info_dict': {`
			`'id': '150939',`
[hearthisat] Add support for more high-quality download links 2015-01-22 12:04:07 +01:00			`'ext': 'wav',`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`'title': 'Moofi - Dr. Kreep',`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 13:08:07 +01:00			`'thumbnail': r're:^https?://.*\.jpg$',`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`'timestamp': 1421564134,`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`'description': 'md5:1adb0667b01499f9d27e97ddfd53852a',`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`'upload_date': '20150118',`
			`'view_count': int,`
			`'duration': 71,`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`'genre': 'Experimental',`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`}`
[hearthisat] Detect invalid download links (fixes #9440) 2016-05-15 09:35:31 +02:00			`}, {`
			`# 'download' link redirects to the original webpage`
			`'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/',`
			`'md5': '5980ceb7c461605d30f1f039df160c6e',`
			`'info_dict': {`
			`'id': '811296',`
			`'ext': 'mp3',`
			`'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!',`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`'description': 'md5:ef26815ca8f483272a87b137ff175be2',`
[hearthisat] Detect invalid download links (fixes #9440) 2016-05-15 09:35:31 +02:00			`'upload_date': '20160328',`
			`'timestamp': 1459186146,`
Fix "invalid escape sequences" error on Python 3.6 2017-01-02 13:08:07 +01:00			`'thumbnail': r're:^https?://.*\.jpg$',`
[hearthisat] Detect invalid download links (fixes #9440) 2016-05-15 09:35:31 +02:00			`'view_count': int,`
			`'duration': 4360,`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`'genre': 'Dance',`
[hearthisat] Detect invalid download links (fixes #9440) 2016-05-15 09:35:31 +02:00			`},`
			`}]`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00
			`def _real_extract(self, url):`
[extractor] Common function `_match_valid_url` 2021-08-19 03:41:24 +02:00			`m = self._match_valid_url(url)`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`display_id = '{artist:s} - {title:s}'.format(**m.groupdict())`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`api_url = url.replace('www.', '').replace('hearthis.at', 'api-v2.hearthis.at')`
			`data_json = self._download_json(api_url, display_id)`
			`track_id = data_json.get('id')`
			`artist_json = data_json.get('user')`
			`title = '{} - {}'.format(artist_json.get('username'), data_json.get('title'))`
			`genre = data_json.get('genre')`
			`description = data_json.get('description')`
			`thumbnail = data_json.get('artwork_url') or data_json.get('thumb')`
			`view_count = str_to_int(data_json.get('playback_count'))`
			`duration = str_to_int(data_json.get('duration'))`
			`timestamp = data_json.get('release_timestamp')`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00
[hearthisat] Add support for more high-quality download links 2015-01-22 12:04:07 +01:00			`formats = []`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`mp3_url = data_json.get('stream_url')`

[hearthisat] Add support for more high-quality download links 2015-01-22 12:04:07 +01:00			`if mp3_url:`
			`formats.append({`
			`'format_id': 'mp3',`
			`'vcodec': 'none',`
			`'acodec': 'mp3',`
			`'url': mp3_url,`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`'ext': 'mp3',`
[hearthisat] Add support for more high-quality download links 2015-01-22 12:04:07 +01:00			`})`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00
			`if data_json.get('download_url'):`
			`download_url = data_json['download_url']`
			`ext = determine_ext(data_json['download_filename'])`
[hearthisat] Detect invalid download links (fixes #9440) 2016-05-15 09:35:31 +02:00			`if ext in KNOWN_EXTENSIONS:`
			`formats.append({`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`'format_id': ext,`
[hearthisat] Detect invalid download links (fixes #9440) 2016-05-15 09:35:31 +02:00			`'vcodec': 'none',`
			`'ext': ext,`
			`'url': download_url,`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`'acodec': ext,`
[formatsort] Remove misuse of 'preference' 'preference' is to be used only when the format is better that ALL qualities of a lower preference irrespective of ANY sorting order the user requests. See deezer.py for correct use of this In the older sorting method, `preference`, `quality` and `language_preference` were functionally almost equivalent. So these disparities doesn't really matter there Also, despite what the documentation says, the default for `preference` was actually 0 and not -1. I have tried to correct this and also account for it when converting `preference` to `quality` 2021-02-18 23:03:16 +01:00			`'quality': 2, # Usually better quality`
[hearthisat] Detect invalid download links (fixes #9440) 2016-05-15 09:35:31 +02:00			`})`
[hearthisat] Add support for more high-quality download links 2015-01-22 12:04:07 +01:00			`self._sort_formats(formats)`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00
			`return {`
			`'id': track_id,`
[hearthisat] Add support for more high-quality download links 2015-01-22 12:04:07 +01:00			`'display_id': display_id,`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`'title': title,`
			`'formats': formats,`
			`'thumbnail': thumbnail,`
			`'description': description,`
			`'duration': duration,`
			`'timestamp': timestamp,`
			`'view_count': view_count,`
[HearThisAtIE] Fix extractor (#742) Closes: #740 Authored by: Ashish0804 2021-08-20 21:39:59 +02:00			`'genre': genre,`
[hearthisat] Add new extractor (Closes #4743) 2015-01-21 20:47:55 +01:00			`}`