From 14183d1f80a5417e3870bbe79dc0a7b7bb48d2a6 Mon Sep 17 00:00:00 2001 From: Ashish <39122144+Ashish0804@users.noreply.github.com> Date: Fri, 20 Aug 2021 23:46:59 +0530 Subject: [PATCH] [Hungama] Fix `HungamaSongIE` and add `HungamaAlbumPlaylistIE` (#744) Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/hungama.py | 56 ++++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 955a44a90..1997e3c5b 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -538,6 +538,7 @@ from .hungama import ( HungamaIE, HungamaSongIE, + HungamaAlbumPlaylistIE, ) from .hypem import HypemIE from .ign import ( diff --git a/yt_dlp/extractor/hungama.py b/yt_dlp/extractor/hungama.py index 3fdaac5b6..c33e387e7 100644 --- a/yt_dlp/extractor/hungama.py +++ b/yt_dlp/extractor/hungama.py @@ -1,9 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, + try_get, urlencode_postdata, ) @@ -71,14 +74,14 @@ class HungamaSongIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P\d+)' _TEST = { 'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/', - 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0', + 'md5': 'd4a6a05a394ad0453a9bea3ca00e6024', 'info_dict': { 'id': '2931166', - 'ext': 'mp4', + 'ext': 'mp3', 'title': 'Lucky Ali - Kitni Haseen Zindagi', 'track': 'Kitni Haseen Zindagi', 'artist': 'Lucky Ali', - 'album': 'Aks', + 'album': None, 'release_year': 2000, } } @@ -89,18 +92,20 @@ def _real_extract(self, url): data = self._download_json( 'https://www.hungama.com/audio-player-data/track/%s' % audio_id, audio_id, query={'_country': 'IN'})[0] - track = data['song_name'] artist = data.get('singer_name') + formats = [] + media_json = self._download_json(data.get('file') or data['preview_link'], audio_id) + media_url = try_get(media_json, lambda x: x['response']['media_url'], str) + media_type = try_get(media_json, lambda x: x['response']['type'], str) - m3u8_url = self._download_json( - data.get('file') or data['preview_link'], - audio_id)['response']['media_url'] - - formats = self._extract_m3u8_formats( - m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - self._sort_formats(formats) + if media_url: + formats.append({ + 'url': media_url, + 'ext': media_type, + 'vcodec': 'none', + 'acodec': media_type, + }) title = '%s - %s' % (artist, track) if artist else track thumbnail = data.get('img_src') or data.get('album_image') @@ -111,7 +116,32 @@ def _real_extract(self, url): 'thumbnail': thumbnail, 'track': track, 'artist': artist, - 'album': data.get('album_name'), + 'album': data.get('album_name') or None, 'release_year': int_or_none(data.get('date')), 'formats': formats, } + + +class HungamaAlbumPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hungama\.com/(?:playlists|album)/[^/]+/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.hungama.com/album/bhuj-the-pride-of-india/69481490/', + 'playlist_mincount': 7, + 'info_dict': { + 'id': '69481490', + }, + }, { + 'url': 'https://www.hungama.com/playlists/hindi-jan-to-june-2021/123063/', + 'playlist_mincount': 50, + 'info_dict': { + 'id': '123063', + }, + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + ptrn = r']+?property=[\"\']?music:song:url[\"\']?[^>]+?content=[\"\']?([^\"\']+)' + items = re.findall(ptrn, webpage) + entries = [self.url_result(item, ie=HungamaSongIE.ie_key()) for item in items] + return self.playlist_result(entries, id)