1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 10:42:34 +01:00

[twitter] improve results for regular user URLs

- continuation of 3346f58a
- use media timeline results (or tweet timeline if retweets are enabled)
  plus search results starting from the last tweet id of the first
  timeline, similar to how Twitter Media Downloader operates
- the old behavior can be forced by appending '/tweets' to a user URL,
  like with '/media' (https://twitter.com/USER/tweets)
  although there should be no need to ever do that
This commit is contained in:
Mike Fährmann 2022-05-23 18:23:21 +02:00
parent 6ad39f2b68
commit 915dba8345
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 30 additions and 6 deletions

View File

@ -2366,6 +2366,7 @@ Description
Special values:
* ``"timeline"``: ``https://twitter.com/i/user/{rest_id}``
* ``"tweets"``: ``https://twitter.com/id:{rest_id}/tweets``
* ``"media"``: ``https://twitter.com/id:{rest_id}/media``
Note: To allow gallery-dl to follow custom URL formats, set the blacklist__

View File

@ -323,6 +323,9 @@ class TwitterExtractor(Extractor):
elif userfmt == "media":
cls = TwitterMediaExtractor
fmt = (self.root + "/id:{rest_id}/media").format_map
elif userfmt == "tweets":
cls = TwitterTweetsExtractor
fmt = (self.root + "/id:{rest_id}/tweets").format_map
else:
cls = None
fmt = userfmt.format_map
@ -383,7 +386,7 @@ class TwitterExtractor(Extractor):
class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's timeline"""
"""Extractor for a Twitter user timeline"""
subcategory = "timeline"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
@ -409,18 +412,18 @@ class TwitterTimelineExtractor(TwitterExtractor):
self.user = "id:" + user_id
def tweets(self):
if not self.config("strategy"):
return self._tweets_twMediaDownloader()
return self.api.user_tweets(self.user)
tweets = (self.api.user_tweets(self.user) if self.retweets else
self.api.user_media(self.user))
def _tweets_twMediaDownloader(self):
# yield initial batch of (media) tweets
tweet = None
for tweet in self.api.user_media(self.user):
for tweet in tweets:
yield tweet
if tweet is None:
return
# get username
if not self.user.startswith("id:"):
username = self.user
elif "core" in tweet:
@ -429,9 +432,11 @@ class TwitterTimelineExtractor(TwitterExtractor):
else:
username = tweet["user"]["screen_name"]
# get tweet data
if "legacy" in tweet:
tweet = tweet["legacy"]
# yield search results starting from last tweet id
yield from self.api.search_adaptive(
"from:{} include:retweets include:nativeretweets max_id:{} "
"filter:images OR card_name:animated_gif OR filter:native_video"
@ -439,6 +444,23 @@ class TwitterTimelineExtractor(TwitterExtractor):
)
class TwitterTweetsExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Tweets timeline"""
subcategory = "tweets"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)"
test = (
("https://twitter.com/supernaturepics/tweets", {
"range": "1-40",
"url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
}),
("https://mobile.twitter.com/supernaturepics/tweets#t"),
("https://www.twitter.com/id:2976459548/tweets"),
)
def tweets(self):
return self.api.user_tweets(self.user)
class TwitterRepliesExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's timeline including replies"""
subcategory = "replies"

View File

@ -208,6 +208,7 @@ SUBCATEGORY_MAP = {
},
"twitter": {
"media": "Media Timelines",
"tweets": "",
"replies": "",
"list-members": "List Members",
},