1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 12:12:34 +01:00

[twitter] add 'quotes' extractor (#5262)

https://github.com/mikf/gallery-dl/issues/5262#issuecomment-1981571924

It's implemented as a search for 'quoted_tweet_id:…' on Twitter.
This commit is contained in:
Mike Fährmann 2024-03-07 00:52:50 +01:00
parent 790c0ffb8d
commit 40c0553523
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 24 additions and 4 deletions

View File

@ -898,7 +898,7 @@ Consider all listed sites to potentially be NSFW.
<tr> <tr>
<td>Twitter</td> <td>Twitter</td>
<td>https://twitter.com/</td> <td>https://twitter.com/</td>
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets, User Profiles</td> <td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td>
<td>Supported</td> <td>Supported</td>
</tr> </tr>
<tr> <tr>

View File

@ -731,9 +731,9 @@ class TwitterEventExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor): class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for images from individual tweets""" """Extractor for individual tweets"""
subcategory = "tweet" subcategory = "tweet"
pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)" pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)/?$"
example = "https://twitter.com/USER/status/12345" example = "https://twitter.com/USER/status/12345"
def __init__(self, match): def __init__(self, match):
@ -810,6 +810,18 @@ class TwitterTweetExtractor(TwitterExtractor):
return itertools.chain(buffer, tweets) return itertools.chain(buffer, tweets)
class TwitterQuotesExtractor(TwitterExtractor):
"""Extractor for quotes of a Tweet"""
subcategory = "quotes"
pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
example = "https://twitter.com/USER/status/12345/quotes"
def items(self):
url = "{}/search?q=quoted_tweet_id:{}".format(self.root, self.user)
data = {"_extractor": TwitterSearchExtractor}
yield Message.Queue, url, data
class TwitterAvatarExtractor(TwitterExtractor): class TwitterAvatarExtractor(TwitterExtractor):
subcategory = "avatar" subcategory = "avatar"
filename_fmt = "avatar {date}.{extension}" filename_fmt = "avatar {date}.{extension}"

View File

@ -218,7 +218,7 @@ __tests__ = (
"#category": ("", "twitter", "hashtag"), "#category": ("", "twitter", "hashtag"),
"#class" : twitter.TwitterHashtagExtractor, "#class" : twitter.TwitterHashtagExtractor,
"#pattern" : twitter.TwitterSearchExtractor.pattern, "#pattern" : twitter.TwitterSearchExtractor.pattern,
"#sha1_url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9", "#urls" : "https://twitter.com/search?q=%23nature",
}, },
{ {
@ -537,6 +537,14 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
"The analysis by Texas A&M University seems to contradict statements by state and federal regulators that air near the crash site is completely safe, despite residents complaining about rashes, breathing problems and other health effects." Your reaction.""", "The analysis by Texas A&M University seems to contradict statements by state and federal regulators that air near the crash site is completely safe, despite residents complaining about rashes, breathing problems and other health effects." Your reaction.""",
}, },
{
"#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes",
"#category": ("", "twitter", "quotes"),
"#class" : twitter.TwitterQuotesExtractor,
"#pattern" : twitter.TwitterSearchExtractor.pattern,
"#urls" : "https://twitter.com/search?q=quoted_tweet_id:1263832915173048321",
},
{ {
"#url" : "https://twitter.com/supernaturepics/photo", "#url" : "https://twitter.com/supernaturepics/photo",
"#category": ("", "twitter", "avatar"), "#category": ("", "twitter", "avatar"),