1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-26 12:42:29 +01:00

[twitter] restore truncated retweet text (#3430, #4690)

This commit is contained in:
Mike Fährmann 2023-10-27 23:01:43 +02:00
parent 218295a4c6
commit fd36eafe32
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

View File

@ -272,28 +272,23 @@ class TwitterExtractor(Extractor):
author = tweet["user"] author = tweet["user"]
author = self._transform_user(author) author = self._transform_user(author)
if "note_tweet" in tweet:
note = tweet["note_tweet"]["note_tweet_results"]["result"]
else:
note = None
source = tweet["source"]
if "legacy" in tweet: if "legacy" in tweet:
tweet = tweet["legacy"] legacy = tweet["legacy"]
else:
legacy = tweet
tget = legacy.get
tweet_id = int(tweet["id_str"]) tweet_id = int(legacy["id_str"])
if tweet_id >= 300000000000000: if tweet_id >= 300000000000000:
date = text.parse_timestamp( date = text.parse_timestamp(
((tweet_id >> 22) + 1288834974657) // 1000) ((tweet_id >> 22) + 1288834974657) // 1000)
else: else:
try: try:
date = text.parse_datetime( date = text.parse_datetime(
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y") legacy["created_at"], "%a %b %d %H:%M:%S %z %Y")
except Exception: except Exception:
date = util.NONE date = util.NONE
tget = tweet.get
tdata = { tdata = {
"tweet_id" : tweet_id, "tweet_id" : tweet_id,
"retweet_id" : text.parse_int( "retweet_id" : text.parse_int(
@ -307,8 +302,8 @@ class TwitterExtractor(Extractor):
"date" : date, "date" : date,
"author" : author, "author" : author,
"user" : self._user or author, "user" : self._user or author,
"lang" : tweet["lang"], "lang" : legacy["lang"],
"source" : text.extr(source, ">", "<"), "source" : text.extr(tweet["source"], ">", "<"),
"sensitive" : tget("possibly_sensitive"), "sensitive" : tget("possibly_sensitive"),
"favorite_count": tget("favorite_count"), "favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"), "quote_count" : tget("quote_count"),
@ -316,7 +311,13 @@ class TwitterExtractor(Extractor):
"retweet_count" : tget("retweet_count"), "retweet_count" : tget("retweet_count"),
} }
entities = note["entity_set"] if note else tweet["entities"] if "note_tweet" in tweet:
note = tweet["note_tweet"]["note_tweet_results"]["result"]
content = note["text"]
entities = note["entity_set"]
else:
content = tget("full_text") or tget("text") or ""
entities = legacy["entities"]
hashtags = entities.get("hashtags") hashtags = entities.get("hashtags")
if hashtags: if hashtags:
@ -330,8 +331,7 @@ class TwitterExtractor(Extractor):
"nick": u["name"], "nick": u["name"],
} for u in mentions] } for u in mentions]
content = text.unescape( content = text.unescape(content)
note["text"] if note else tget("full_text") or tget("text") or "")
urls = entities.get("urls") urls = entities.get("urls")
if urls: if urls:
for url in urls: for url in urls:
@ -339,11 +339,13 @@ class TwitterExtractor(Extractor):
txt, _, tco = content.rpartition(" ") txt, _, tco = content.rpartition(" ")
tdata["content"] = txt if tco.startswith("https://t.co/") else content tdata["content"] = txt if tco.startswith("https://t.co/") else content
if "in_reply_to_screen_name" in tweet: if "in_reply_to_screen_name" in legacy:
tdata["reply_to"] = tweet["in_reply_to_screen_name"] tdata["reply_to"] = legacy["in_reply_to_screen_name"]
if "quoted_by" in tweet: if "quoted_by" in legacy:
tdata["quote_by"] = tweet["quoted_by"] tdata["quote_by"] = legacy["quoted_by"]
if tdata["retweet_id"]: if tdata["retweet_id"]:
tdata["content"] = "RT @{}: {}".format(
author["name"], tdata["content"])
tdata["date_original"] = text.parse_timestamp( tdata["date_original"] = text.parse_timestamp(
((tdata["retweet_id"] >> 22) + 1288834974657) // 1000) ((tdata["retweet_id"] >> 22) + 1288834974657) // 1000)
@ -1532,15 +1534,21 @@ class TwitterAPI():
retweet["core"]["user_results"]["result"] retweet["core"]["user_results"]["result"]
rtlegacy = retweet["legacy"] rtlegacy = retweet["legacy"]
if "note_tweet" in retweet:
tweet["note_tweet"] = retweet["note_tweet"]
if "extended_entities" in rtlegacy and \ if "extended_entities" in rtlegacy and \
"extended_entities" not in legacy: "extended_entities" not in legacy:
legacy["extended_entities"] = \ legacy["extended_entities"] = \
rtlegacy["extended_entities"] rtlegacy["extended_entities"]
if "withheld_scope" in rtlegacy and \ if "withheld_scope" in rtlegacy and \
"withheld_scope" not in legacy: "withheld_scope" not in legacy:
legacy["withheld_scope"] = \ legacy["withheld_scope"] = \
rtlegacy["withheld_scope"] rtlegacy["withheld_scope"]
legacy["full_text"] = rtlegacy["full_text"]
legacy["full_text"] = rtlegacy["full_text"]
except KeyError: except KeyError:
pass pass