mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-07-08 21:04:38 +02:00
correction and update
This commit is contained in:
parent
4399f5e0e9
commit
80637a220c
@ -97,11 +97,11 @@ def _extract_info(self, gif_data, video_id):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _api_channel_feed(self, channel_id, max_offset=_GIPHY_MAX): # offset cannot exceed 5000
|
def _api_channel_feed(self, channel_id, max_offset=_GIPHY_MAX):
|
||||||
offset = 0
|
offset = 0
|
||||||
query_url = f'https://giphy.com/api/v4/channels/{channel_id}/feed/?offset={offset}'
|
query_url = f'https://giphy.com/api/v4/channels/{channel_id}/feed/?offset={offset}'
|
||||||
for _ in itertools.count(1):
|
for _ in itertools.count(1):
|
||||||
search_results = self._download_json(query_url, channel_id,
|
search_results = self._download_json(query_url, channel_id, fatal=False,
|
||||||
note=f'Fetching feed {offset + 1}-{offset + 25}')
|
note=f'Fetching feed {offset + 1}-{offset + 25}')
|
||||||
if not search_results.get('results'):
|
if not search_results.get('results'):
|
||||||
return
|
return
|
||||||
@ -110,9 +110,10 @@ def _api_channel_feed(self, channel_id, max_offset=_GIPHY_MAX): # offset cann
|
|||||||
**self._extract_info(video, video['id']),
|
**self._extract_info(video, video['id']),
|
||||||
'webpage_url': video['url'],
|
'webpage_url': video['url'],
|
||||||
}
|
}
|
||||||
offset += len(search_results['results'])
|
query_url = url_or_none(search_results.get('next')) or ''
|
||||||
query_url = url_or_none(search_results.get('next'))
|
offset = int(self._search_regex(r'offset=(\d+)', query_url, 'offset', default=0))
|
||||||
if not query_url or offset > max_offset:
|
# offset cannot exceed 5000
|
||||||
|
if not query_url or offset > min((max_offset or self._GIPHY_MAX) + 1, self._GIPHY_MAX):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@ -164,12 +165,12 @@ class GiphyIE(GiphyBaseIE):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'You Can\'t Break Up With Me',
|
'title': 'You Can\'t Break Up With Me',
|
||||||
'description': 'South Park, Season 20, Episode 4, Wieners Out',
|
'description': 'South Park, Season 20, Episode 4, Wieners Out',
|
||||||
'tags': 'count:17',
|
'tags': 'count:16',
|
||||||
'thumbnail': r're:^https?://.*',
|
'thumbnail': r're:^https?://.*',
|
||||||
'upload_date': '20220516',
|
'upload_date': '20220516',
|
||||||
'uploader': 'South Park',
|
'uploader': 'South Park',
|
||||||
'uploader_id': 'southpark',
|
'uploader_id': 'southpark',
|
||||||
'uploader_url': 'https://giphy.com/southpark/',
|
'uploader_url': 'https://giphy.com/southpark',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://giphy.com/embed/00xGP4zv8xENZ2tc3Y',
|
'url': 'https://giphy.com/embed/00xGP4zv8xENZ2tc3Y',
|
||||||
@ -234,7 +235,8 @@ class GiphyIE(GiphyBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
webpage = self._download_webpage(url, 'giphy')
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url.replace('/embed/', '/gifs/'), video_id)
|
||||||
|
|
||||||
# {"channelId": ...}
|
# {"channelId": ...}
|
||||||
if channel_id := self._html_search_regex(r'\{"channelId":\s*([^\}]+)\}', webpage, 'channel_id', default=None):
|
if channel_id := self._html_search_regex(r'\{"channelId":\s*([^\}]+)\}', webpage, 'channel_id', default=None):
|
||||||
@ -248,18 +250,15 @@ def _real_extract(self, url):
|
|||||||
'title': (self._html_search_meta('twitter:title', webpage)
|
'title': (self._html_search_meta('twitter:title', webpage)
|
||||||
or self._og_search_title(webpage)
|
or self._og_search_title(webpage)
|
||||||
).replace(' GIFs on GIPHY - Be Animated', '').strip(),
|
).replace(' GIFs on GIPHY - Be Animated', '').strip(),
|
||||||
'uploader_id': self._html_search_meta('twitter:creator', webpage).replace('@', '').lower(),
|
'uploader_id': uploader_id,
|
||||||
'uploader_url': f'https://giphy.com/channel/{uploader_id}' if uploader_id != 'giphy' else None,
|
'uploader_url': f'https://giphy.com/channel/{uploader_id}' if uploader_id != 'giphy' else None,
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
video_id = self._match_id(url)
|
title = (self._html_search_meta('twitter:title', webpage, default=None)
|
||||||
webpage = self._download_webpage(f'https://giphy.com/gifs/{video_id}', video_id)
|
|
||||||
|
|
||||||
title = (self._html_search_meta('twitter:title', webpage)
|
|
||||||
or self._og_search_title(webpage).replace(' - Find & Share on GIPHY', '').strip())
|
or self._og_search_title(webpage).replace(' - Find & Share on GIPHY', '').strip())
|
||||||
description = (self._html_search_meta('twitter:description', webpage)
|
description = (self._html_search_meta('twitter:description', webpage, default=None)
|
||||||
or self._og_search_description(webpage))
|
or self._og_search_description(webpage))
|
||||||
description = description if not description.startswith('Discover & share') else None
|
description = description if not description.startswith('Discover & share') else None
|
||||||
|
|
||||||
@ -268,31 +267,31 @@ def _real_extract(self, url):
|
|||||||
webpage, 'video_data', default=None):
|
webpage, 'video_data', default=None):
|
||||||
gif_data = self._parse_json(json_str.encode('utf-8').decode('unicode_escape'), video_id)
|
gif_data = self._parse_json(json_str.encode('utf-8').decode('unicode_escape'), video_id)
|
||||||
# gif: {"id":...},
|
# gif: {"id":...},
|
||||||
elif json_str := self._html_search_regex(r'\s+\w+:\s*({".*?}),\n\s+', webpage, 'video_data', default={}):
|
elif json_str := self._html_search_regex(r'\s+\w+:\s*({".*?}),\n\s+', webpage, 'video_data', default='{}'):
|
||||||
gif_data = self._parse_json(json_str, video_id)
|
gif_data = self._parse_json(json_str, video_id)
|
||||||
|
|
||||||
info = self._extract_info(gif_data, video_id)
|
info = self._extract_info(gif_data, video_id)
|
||||||
|
|
||||||
if not info.get('formats'):
|
if not info.get('formats'):
|
||||||
formats = []
|
formats = []
|
||||||
if url := self._og_search_video_url(webpage):
|
if url := self._og_search_video_url(webpage, default=None):
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': determine_ext(url),
|
'format_id': determine_ext(url),
|
||||||
'width': int_or_none(self._og_search_property('video:width', webpage)),
|
'width': int_or_none(self._og_search_property('video:width', webpage)),
|
||||||
'height': int_or_none(self._og_search_property('video:height', webpage)),
|
'height': int_or_none(self._og_search_property('video:height', webpage)),
|
||||||
'url': url,
|
'url': url,
|
||||||
})
|
})
|
||||||
if url := self._og_search_thumbnail(webpage):
|
if url := self._og_search_thumbnail(webpage, default=None):
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': determine_ext(url),
|
'format_id': determine_ext(url),
|
||||||
'width': int_or_none(self._og_search_property('image:width', webpage)),
|
'width': int_or_none(self._og_search_property('image:width', webpage)),
|
||||||
'height': int_or_none(self._og_search_property('image:height', webpage)),
|
'height': int_or_none(self._og_search_property('image:height', webpage)),
|
||||||
'url': url,
|
'url': url,
|
||||||
})
|
})
|
||||||
if url := self._html_search_meta('twitter:image', webpage):
|
if url := self._html_search_meta('twitter:image', webpage, default=None):
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
'width': int_or_none(self._html_search_meta('twitter:image:width', webpage)),
|
'width': int_or_none(self._html_search_meta('twitter:image:width', webpage, default=None)),
|
||||||
'height': int_or_none(self._html_search_meta('twitter:image:height', webpage)),
|
'height': int_or_none(self._html_search_meta('twitter:image:height', webpage, default=None)),
|
||||||
'url': url,
|
'url': url,
|
||||||
}]
|
}]
|
||||||
info['formats'] = formats
|
info['formats'] = formats
|
||||||
@ -386,7 +385,7 @@ def search_query(query, offset, limit, category):
|
|||||||
return self._download_json(
|
return self._download_json(
|
||||||
f'https://api.giphy.com/v1/{category}/search', query,
|
f'https://api.giphy.com/v1/{category}/search', query,
|
||||||
note=f'Fetching {category} result {offset + 1}-{offset + limit}', query={
|
note=f'Fetching {category} result {offset + 1}-{offset + limit}', query={
|
||||||
'rating': 'pg-13',
|
'rating': 'r', # MPA film rating
|
||||||
'offset': offset,
|
'offset': offset,
|
||||||
'limit': limit,
|
'limit': limit,
|
||||||
'type': category, # known types: 'gifs', 'stickers', 'text', 'videos'
|
'type': category, # known types: 'gifs', 'stickers', 'text', 'videos'
|
||||||
@ -396,12 +395,12 @@ def search_query(query, offset, limit, category):
|
|||||||
})
|
})
|
||||||
|
|
||||||
# type: comma delimited list
|
# type: comma delimited list
|
||||||
types = self._search_regex(r'&type=([^&]+)', query, 'type', default='gifs,videos')
|
types = self._search_regex(r'&type=([^&]+)', query, 'type', default='gifs,stickers,videos')
|
||||||
types = [(f'{x}s' if x[-1] != 's' and any(x in t for t in ['gifs', 'stickers', 'videos']) else x)
|
types = [(f'{x}s' if x[-1] != 's' and any(x in t for t in ['gifs', 'stickers', 'videos']) else x)
|
||||||
for x in [x.strip() for x in types.lower().split(',')]]
|
for x in [x.strip() for x in types.lower().split(',')]]
|
||||||
query = query.split('&type=')[0]
|
query = query.split('&type=')[0]
|
||||||
|
|
||||||
offset, limit = 0, 15 # 'offset' max: 5000, 'limit' max: 100
|
offset, limit = 0, 50
|
||||||
types_done = []
|
types_done = []
|
||||||
for _ in itertools.count(1):
|
for _ in itertools.count(1):
|
||||||
for t in types:
|
for t in types:
|
||||||
@ -422,7 +421,7 @@ def search_query(query, offset, limit, category):
|
|||||||
types_done.append(t)
|
types_done.append(t)
|
||||||
if len(types) > len(types_done):
|
if len(types) > len(types_done):
|
||||||
offset += limit
|
offset += limit
|
||||||
if offset > self._GIPHY_MAX:
|
if offset >= self._GIPHY_MAX:
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
return
|
return
|
||||||
@ -478,8 +477,7 @@ class GiphyStoriesIE(GiphyBaseIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
slug = self._match_id(url)
|
slug = self._match_id(url)
|
||||||
# https://x.giphy.com/v1/stories/slug/{slug}?api_key=3eFQvabDx69SMoOemSPiYfh9FY0nzO9x
|
# https://x.giphy.com/v1/stories/slug/{slug}?api_key=3eFQvabDx69SMoOemSPiYfh9FY0nzO9x
|
||||||
story = self._download_json(f'https://x.giphy.com/v1/stories/slug/{slug}?api_key={self._GIPHY_FE_STORIES_AND_GIPHY_TV_API_KEY}',
|
story = self._download_json(f'https://x.giphy.com/v1/stories/slug/{slug}?api_key={self._GIPHY_FE_STORIES_AND_GIPHY_TV_API_KEY}', slug)
|
||||||
slug, note=f'Extracting URL: {url}')
|
|
||||||
|
|
||||||
if data := story.get('data'):
|
if data := story.get('data'):
|
||||||
entries = []
|
entries = []
|
||||||
|
Loading…
Reference in New Issue
Block a user