1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-23 03:02:50 +01:00

[twitter] small improvements

- handle reply tweets (#403)
- unset cookies in Tweet extractor to "force" the legacy interface
This commit is contained in:
Mike Fährmann 2019-09-01 17:37:48 +02:00
parent 682105b8ee
commit bc0ca66c99
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -151,12 +151,15 @@ class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for all images from a user's timeline""" """Extractor for all images from a user's timeline"""
subcategory = "timeline" subcategory = "timeline"
pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
r"/([^/?&#]+)/?$") r"/([^/?&#]+)/?(?:[?#].*)?$")
test = ("https://twitter.com/supernaturepics", { test = (
"range": "1-40", ("https://twitter.com/supernaturepics", {
"url": "0106229d408f4111d9a52c8fd2ad687f64842aa4", "range": "1-40",
"keyword": "7210d679606240405e0cf62cbc67596e81a7a250", "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
}) "keyword": "7210d679606240405e0cf62cbc67596e81a7a250",
}),
("https://mobile.twitter.com/supernaturepics?p=i"),
)
def tweets(self): def tweets(self):
url = "{}/i/profiles/show/{}/timeline/tweets".format( url = "{}/i/profiles/show/{}/timeline/tweets".format(
@ -169,10 +172,13 @@ class TwitterMediaExtractor(TwitterExtractor):
subcategory = "media" subcategory = "media"
pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
r"/([^/?&#]+)/media(?!\w)") r"/([^/?&#]+)/media(?!\w)")
test = ("https://twitter.com/supernaturepics/media", { test = (
"range": "1-40", ("https://twitter.com/supernaturepics/media", {
"url": "0106229d408f4111d9a52c8fd2ad687f64842aa4", "range": "1-40",
}) "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
}),
("https://mobile.twitter.com/supernaturepics/media#t"),
)
def tweets(self): def tweets(self):
url = "{}/i/profiles/show/{}/media_timeline".format( url = "{}/i/profiles/show/{}/media_timeline".format(
@ -206,6 +212,11 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("content", True),), "options": (("content", True),),
"keyword": "b13b6c4cd0b0c15b2ea7685479e7fedde3c47b9e", "keyword": "b13b6c4cd0b0c15b2ea7685479e7fedde3c47b9e",
}), }),
# Reply to another tweet (#403)
("https://twitter.com/tyson_hesse/status/1103767554424598528", {
"options": (("videos", True),),
"pattern": r"ytdl:https://twitter.com/.*/1103767554424598528$",
}),
) )
def __init__(self, match): def __init__(self, match):
@ -216,7 +227,9 @@ class TwitterTweetExtractor(TwitterExtractor):
return {"user": self.user, "tweet_id": self.tweet_id} return {"user": self.user, "tweet_id": self.tweet_id}
def tweets(self): def tweets(self):
self.session.cookies.clear()
url = "{}/{}/status/{}".format(self.root, self.user, self.tweet_id) url = "{}/{}/status/{}".format(self.root, self.user, self.tweet_id)
page = self.request(url).text page = self.request(url).text
return (text.extract( end = page.index('class="js-tweet-stats-container')
page, '<div class="tweet ', 'class="js-tweet-stats-container')[0],) beg = page.rindex('<div class="tweet ', 0, end)
return (page[beg:end],)