1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2024-11-02 09:12:40 +01:00

[extractor/amazonminitv] Cleanup 48652590ec

This commit is contained in:
pukkandan 2022-11-29 07:50:58 +05:30
parent 48652590ec
commit a9d069f5b8
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39

View File

@ -4,16 +4,43 @@
from ..utils import ExtractorError, int_or_none, traverse_obj, try_get from ..utils import ExtractorError, int_or_none, traverse_obj, try_get
class AmazonMiniTVIE(InfoExtractor): class AmazonMiniTVBaseIE(InfoExtractor):
def _real_initialize(self):
self._download_webpage(
'https://www.amazon.in/minitv', None,
note='Fetching guest session cookies')
AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
def _call_api(self, asin, data=None, note=None):
device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'}
if data:
data['variables'].update({
'contentType': 'VOD',
'sessionIdToken': self.session_id,
**device,
})
resp = self._download_json(
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
asin, note=note, headers={'Content-Type': 'application/json'},
data=json.dumps(data).encode() if data else None,
query=None if data else {
'deviceType': 'A1WMMUXPCUJL4N',
'contentId': asin,
**device,
})
if resp.get('errors'):
raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
elif not data:
return resp
return resp['data'][data['operationName']]
class AmazonMiniTVIE(AmazonMiniTVBaseIE):
_VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)' _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
_HEADERS = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Mobile Safari/537.36',
}
_CLIENT_ID = 'ATVIN'
_DEVICE_LOCALE = 'en_GB'
_TESTS = [{ _TESTS = [{
'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv', 'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
'md5': '0045a5ea38dddd4de5a5fcec7274b476',
'info_dict': { 'info_dict': {
'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840', 'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
'ext': 'mp4', 'ext': 'mp4',
@ -24,11 +51,7 @@ class AmazonMiniTVIE(InfoExtractor):
'release_timestamp': 1644710400, 'release_timestamp': 1644710400,
'release_date': '20220213', 'release_date': '20220213',
'duration': 846, 'duration': 846,
'chapters': [{ 'chapters': 'count:2',
'start_time': 815.0,
'end_time': 846,
'title': 'End Credits',
}],
'series': 'Couple Goals', 'series': 'Couple Goals',
'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', 'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
'season': 'Season 3', 'season': 'Season 3',
@ -40,7 +63,6 @@ class AmazonMiniTVIE(InfoExtractor):
}, },
}, { }, {
'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv', 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
'md5': '9a977bffd5d99c4dd2a32b360aee1863',
'info_dict': { 'info_dict': {
'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab', 'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
'ext': 'mp4', 'ext': 'mp4',
@ -63,6 +85,7 @@ class AmazonMiniTVIE(InfoExtractor):
'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab', 'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab',
'only_matching': True, 'only_matching': True,
}] }]
_GRAPHQL_QUERY_CONTENT = ''' _GRAPHQL_QUERY_CONTENT = '''
query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) { query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
content( content(
@ -107,68 +130,13 @@ class AmazonMiniTVIE(InfoExtractor):
} }
}''' }'''
def _call_api(self, asin, data=None, note=None):
query = {}
headers = self._HEADERS.copy()
if data:
name = 'graphql'
data['variables'].update({
'clientId': self._CLIENT_ID,
'contentType': 'VOD',
'deviceLocale': self._DEVICE_LOCALE,
'sessionIdToken': self.session_id,
})
headers.update({'Content-Type': 'application/json'})
else:
name = 'prs'
query.update({
'clientId': self._CLIENT_ID,
'deviceType': 'A1WMMUXPCUJL4N',
'contentId': asin,
'deviceLocale': self._DEVICE_LOCALE,
})
resp = self._download_json(
f'https://www.amazon.in/minitv/api/web/{name}',
asin, query=query, data=json.dumps(data).encode() if data else None,
headers=headers, note=note)
if 'errors' in resp:
raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
if data:
resp = resp['data'][data['operationName']]
return resp
def _real_initialize(self):
# Download webpage to get the required guest session cookies
self._download_webpage(
'https://www.amazon.in/minitv',
None,
headers=self._HEADERS,
note='Downloading webpage')
self.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
def _real_extract(self, url): def _real_extract(self, url):
asin = f'amzn1.dv.gti.{self._match_id(url)}' asin = f'amzn1.dv.gti.{self._match_id(url)}'
title_info = self._call_api(
asin, data={
'operationName': 'content',
'variables': {
'contentId': asin,
},
'query': self._GRAPHQL_QUERY_CONTENT,
},
note='Downloading title info')
prs = self._call_api(asin, note='Downloading playback info') prs = self._call_api(asin, note='Downloading playback info')
formats = [] formats, subtitles = [], {}
subtitles = {}
for type_, asset in prs['playbackAssets'].items(): for type_, asset in prs['playbackAssets'].items():
if not isinstance(asset, dict): if not traverse_obj(asset, 'manifestUrl'):
continue continue
if type_ == 'hls': if type_ == 'hls':
m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
@ -181,14 +149,16 @@ def _real_extract(self, url):
asset['manifestUrl'], asin, mpd_id=type_, fatal=False) asset['manifestUrl'], asin, mpd_id=type_, fatal=False)
formats.extend(mpd_fmts) formats.extend(mpd_fmts)
subtitles = self._merge_subtitles(subtitles, mpd_subs) subtitles = self._merge_subtitles(subtitles, mpd_subs)
else:
self.report_warning(f'Unknown asset type: {type_}')
duration = traverse_obj(title_info, ('description', 'contentLengthInSeconds')) title_info = self._call_api(
asin, note='Downloading title info', data={
'operationName': 'content',
'variables': {'contentId': asin},
'query': self._GRAPHQL_QUERY_CONTENT,
})
credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000) credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
chapters = [{
'start_time': credits_time,
'end_time': duration + credits_time, # FIXME: I suppose this is correct
'title': 'End Credits',
}] if credits_time and duration else []
is_episode = title_info.get('vodType') == 'EPISODE' is_episode = title_info.get('vodType') == 'EPISODE'
return { return {
@ -203,8 +173,11 @@ def _real_extract(self, url):
} for type_, url in (title_info.get('images') or {}).items()], } for type_, url in (title_info.get('images') or {}).items()],
'description': traverse_obj(title_info, ('description', 'synopsis')), 'description': traverse_obj(title_info, ('description', 'synopsis')),
'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)), 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)),
'duration': duration, 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')),
'chapters': chapters, 'chapters': [{
'start_time': credits_time,
'title': 'End Credits',
}] if credits_time else [],
'series': title_info.get('seriesName'), 'series': title_info.get('seriesName'),
'series_id': title_info.get('seriesId'), 'series_id': title_info.get('seriesId'),
'season_number': title_info.get('seasonNumber'), 'season_number': title_info.get('seasonNumber'),
@ -215,7 +188,7 @@ def _real_extract(self, url):
} }
class AmazonMiniTVSeasonIE(AmazonMiniTVIE): class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
IE_NAME = 'amazonminitv:season' IE_NAME = 'amazonminitv:season'
_VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)' _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix' IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix'
@ -229,6 +202,7 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVIE):
'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0', 'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0',
'only_matching': True, 'only_matching': True,
}] }]
_GRAPHQL_QUERY = ''' _GRAPHQL_QUERY = '''
query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) { query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) {
getEpisodes( getEpisodes(
@ -258,25 +232,22 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVIE):
def _entries(self, asin): def _entries(self, asin):
season_info = self._call_api( season_info = self._call_api(
asin, asin, note='Downloading season info', data={
data={
'operationName': 'getEpisodes', 'operationName': 'getEpisodes',
'variables': { 'variables': {'episodeOrSeasonId': asin},
'episodeOrSeasonId': asin,
},
'query': self._GRAPHQL_QUERY, 'query': self._GRAPHQL_QUERY,
}, })
note='Downloading season info')
for episode in season_info['episodes']: for episode in season_info['episodes']:
yield self.url_result(f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId']) yield self.url_result(
f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId'])
def _real_extract(self, url): def _real_extract(self, url):
asin = f'amzn1.dv.gti.{self._match_id(url)}' asin = f'amzn1.dv.gti.{self._match_id(url)}'
return self.playlist_result(self._entries(asin), playlist_id=asin) return self.playlist_result(self._entries(asin), asin)
class AmazonMiniTVSeriesIE(AmazonMiniTVIE): class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
IE_NAME = 'amazonminitv:series' IE_NAME = 'amazonminitv:series'
_VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)' _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
_TESTS = [{ _TESTS = [{
@ -289,6 +260,7 @@ class AmazonMiniTVSeriesIE(AmazonMiniTVIE):
'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0', 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0',
'only_matching': True, 'only_matching': True,
}] }]
_GRAPHQL_QUERY = ''' _GRAPHQL_QUERY = '''
query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) { query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) {
getSeasons( getSeasons(
@ -304,19 +276,15 @@ class AmazonMiniTVSeriesIE(AmazonMiniTVIE):
def _entries(self, asin): def _entries(self, asin):
season_info = self._call_api( season_info = self._call_api(
asin, asin, note='Downloading series info', data={
data={
'operationName': 'getSeasons', 'operationName': 'getSeasons',
'variables': { 'variables': {'episodeOrSeasonOrSeriesId': asin},
'episodeOrSeasonOrSeriesId': asin,
},
'query': self._GRAPHQL_QUERY, 'query': self._GRAPHQL_QUERY,
}, })
note='Downloading series info')
for season in season_info['seasons']: for season in season_info['seasons']:
yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId']) yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId'])
def _real_extract(self, url): def _real_extract(self, url):
asin = f'amzn1.dv.gti.{self._match_id(url)}' asin = f'amzn1.dv.gti.{self._match_id(url)}'
return self.playlist_result(self._entries(asin), playlist_id=asin) return self.playlist_result(self._entries(asin), asin)