mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 10:42:34 +01:00
[twitter] implement 'cursor' support (#5753)
This commit is contained in:
parent
162756b684
commit
97a50a23d2
5
.github/workflows/docker.yml
vendored
5
.github/workflows/docker.yml
vendored
@ -22,7 +22,10 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# on release commits, run only for tag event
|
||||
if: ${{ ! startsWith( github.event.head_commit.message , 'release version ' ) || startsWith( github.ref , 'refs/tags/v' ) }}
|
||||
if: |
|
||||
github.repository == 'mikf/gallery-dl' &&
|
||||
( ! startsWith( github.event.head_commit.message , 'release version ' ) ||
|
||||
startsWith( github.ref , 'refs/tags/v' ) )
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
1
.github/workflows/executables.yml
vendored
1
.github/workflows/executables.yml
vendored
@ -14,6 +14,7 @@ env:
|
||||
jobs:
|
||||
build:
|
||||
|
||||
if: github.repository == 'mikf/gallery-dl'
|
||||
runs-on: ${{ matrix.os }}
|
||||
defaults:
|
||||
run:
|
||||
|
1
.github/workflows/pages.yml
vendored
1
.github/workflows/pages.yml
vendored
@ -20,6 +20,7 @@ concurrency:
|
||||
jobs:
|
||||
dispatch:
|
||||
|
||||
if: github.repository == 'mikf/gallery-dl'
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
|
@ -51,6 +51,8 @@ class TwitterExtractor(Extractor):
|
||||
if not self.config("transform", True):
|
||||
self._transform_user = util.identity
|
||||
self._transform_tweet = util.identity
|
||||
|
||||
self._cursor = None
|
||||
self._user = None
|
||||
self._user_obj = None
|
||||
self._user_cache = {}
|
||||
@ -501,6 +503,14 @@ class TwitterExtractor(Extractor):
|
||||
},
|
||||
}
|
||||
|
||||
def _init_cursor(self):
|
||||
return self.config("cursor") or None
|
||||
|
||||
def _update_cursor(self, cursor):
|
||||
self.log.debug("Cursor: %s", cursor)
|
||||
self._cursor = cursor
|
||||
return cursor
|
||||
|
||||
def metadata(self):
|
||||
"""Return general metadata"""
|
||||
return {}
|
||||
@ -508,6 +518,11 @@ class TwitterExtractor(Extractor):
|
||||
def tweets(self):
|
||||
"""Yield all relevant tweet objects"""
|
||||
|
||||
def finalize(self):
|
||||
if self._cursor:
|
||||
self.log.info("Use '-o cursor=%s' to continue downloading "
|
||||
"from the current position", self._cursor)
|
||||
|
||||
def login(self):
|
||||
if self.cookies_check(self.cookies_names):
|
||||
return
|
||||
@ -539,6 +554,9 @@ class TwitterUserExtractor(TwitterExtractor):
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def finalize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/{}/".format(self.root, self.user)
|
||||
return self._dispatch_extractors((
|
||||
@ -558,30 +576,76 @@ class TwitterTimelineExtractor(TwitterExtractor):
|
||||
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
|
||||
example = "https://x.com/USER/timeline"
|
||||
|
||||
def _init_cursor(self):
|
||||
if self._cursor:
|
||||
return self._cursor.partition("/")[2] or None
|
||||
return None
|
||||
|
||||
def _update_cursor(self, cursor):
|
||||
if cursor:
|
||||
self._cursor = self._cursor_prefix + cursor
|
||||
self.log.debug("Cursor: %s", self._cursor)
|
||||
else:
|
||||
self._cursor = None
|
||||
return cursor
|
||||
|
||||
def tweets(self):
|
||||
# yield initial batch of (media) tweets
|
||||
tweet = None
|
||||
for tweet in self._select_tweet_source()(self.user):
|
||||
yield tweet
|
||||
if tweet is None:
|
||||
return
|
||||
self._cursor = cursor = self.config("cursor") or None
|
||||
reset = False
|
||||
|
||||
if cursor:
|
||||
state = cursor.partition("/")[0]
|
||||
state, _, tweet_id = state.partition("_")
|
||||
state = text.parse_int(state, 1)
|
||||
else:
|
||||
state = 1
|
||||
|
||||
if state <= 1:
|
||||
self._cursor_prefix = "1/"
|
||||
|
||||
# yield initial batch of (media) tweets
|
||||
tweet = None
|
||||
for tweet in self._select_tweet_source()(self.user):
|
||||
yield tweet
|
||||
if tweet is None and not cursor:
|
||||
return
|
||||
|
||||
user = self._user["name"]
|
||||
tweet_id = tweet["rest_id"]
|
||||
|
||||
state = reset = 2
|
||||
else:
|
||||
user = self.user
|
||||
|
||||
# build search query
|
||||
query = "from:{} max_id:{}".format(
|
||||
self._user["name"], tweet["rest_id"])
|
||||
query = "from:{} max_id:{}".format(user, tweet_id)
|
||||
if self.retweets:
|
||||
query += " include:retweets include:nativeretweets"
|
||||
|
||||
if not self.textonly:
|
||||
# try to search for media-only tweets
|
||||
tweet = None
|
||||
for tweet in self.api.search_timeline(query + " filter:links"):
|
||||
yield tweet
|
||||
if tweet is not None:
|
||||
return
|
||||
if state <= 2:
|
||||
self._cursor_prefix = "2_{}/".format(tweet_id)
|
||||
if reset:
|
||||
self._cursor = self._cursor_prefix
|
||||
|
||||
# yield unfiltered search results
|
||||
yield from self.api.search_timeline(query)
|
||||
if not self.textonly:
|
||||
# try to search for media-only tweets
|
||||
tweet = None
|
||||
for tweet in self.api.search_timeline(query + " filter:links"):
|
||||
yield tweet
|
||||
break
|
||||
if tweet is not None:
|
||||
return self._update_cursor(None)
|
||||
|
||||
state = reset = 3
|
||||
|
||||
if state <= 3:
|
||||
# yield unfiltered search results
|
||||
self._cursor_prefix = "3_{}/".format(tweet_id)
|
||||
if reset:
|
||||
self._cursor = self._cursor_prefix
|
||||
|
||||
yield from self.api.search_timeline(query)
|
||||
return self._update_cursor(None)
|
||||
|
||||
def _select_tweet_source(self):
|
||||
strategy = self.config("strategy")
|
||||
@ -1415,7 +1479,9 @@ class TwitterAPI():
|
||||
"%s %s (%s)", response.status_code, response.reason, errors)
|
||||
|
||||
def _pagination_legacy(self, endpoint, params):
|
||||
original_retweets = (self.extractor.retweets == "original")
|
||||
extr = self.extractor
|
||||
params["cursor"] = extr._init_cursor()
|
||||
original_retweets = (extr.retweets == "original")
|
||||
bottom = ("cursor-bottom-", "sq-cursor-bottom")
|
||||
|
||||
while True:
|
||||
@ -1423,7 +1489,7 @@ class TwitterAPI():
|
||||
|
||||
instructions = data["timeline"]["instructions"]
|
||||
if not instructions:
|
||||
return
|
||||
return extr._update_cursor(None)
|
||||
|
||||
tweets = data["globalObjects"]["tweets"]
|
||||
users = data["globalObjects"]["users"]
|
||||
@ -1504,8 +1570,8 @@ class TwitterAPI():
|
||||
|
||||
# stop on empty response
|
||||
if not cursor or (not tweets and not tweet_id):
|
||||
return
|
||||
params["cursor"] = cursor
|
||||
return extr._update_cursor(None)
|
||||
params["cursor"] = extr._update_cursor(cursor)
|
||||
|
||||
def _pagination_tweets(self, endpoint, variables,
|
||||
path=None, stop_tweets=True, features=None):
|
||||
@ -1514,6 +1580,7 @@ class TwitterAPI():
|
||||
pinned_tweet = extr.pinned
|
||||
|
||||
params = {"variables": None}
|
||||
variables["cursor"] = extr._init_cursor()
|
||||
if features is None:
|
||||
features = self.features_pagination
|
||||
if features:
|
||||
@ -1550,7 +1617,7 @@ class TwitterAPI():
|
||||
cursor = entry["content"]["value"]
|
||||
if entries is None:
|
||||
if not cursor:
|
||||
return
|
||||
return extr._update_cursor(None)
|
||||
entries = ()
|
||||
|
||||
except LookupError:
|
||||
@ -1699,12 +1766,14 @@ class TwitterAPI():
|
||||
continue
|
||||
|
||||
if stop_tweets and not tweet:
|
||||
return
|
||||
return extr._update_cursor(None)
|
||||
if not cursor or cursor == variables.get("cursor"):
|
||||
return
|
||||
variables["cursor"] = cursor
|
||||
return extr._update_cursor(None)
|
||||
variables["cursor"] = extr._update_cursor(cursor)
|
||||
|
||||
def _pagination_users(self, endpoint, variables, path=None):
|
||||
extr = self.extractor
|
||||
variables["cursor"] = extr._init_cursor()
|
||||
params = {
|
||||
"variables": None,
|
||||
"features" : self._json_dumps(self.features_pagination),
|
||||
@ -1724,7 +1793,7 @@ class TwitterAPI():
|
||||
data = data[key]
|
||||
instructions = data["instructions"]
|
||||
except KeyError:
|
||||
return
|
||||
return extr._update_cursor(None)
|
||||
|
||||
for instr in instructions:
|
||||
if instr["type"] == "TimelineAddEntries":
|
||||
@ -1742,8 +1811,8 @@ class TwitterAPI():
|
||||
cursor = entry["content"]["value"]
|
||||
|
||||
if not cursor or cursor.startswith(("-1|", "0|")) or not entry:
|
||||
return
|
||||
variables["cursor"] = cursor
|
||||
return extr._update_cursor(None)
|
||||
variables["cursor"] = extr._update_cursor(cursor)
|
||||
|
||||
def _handle_ratelimit(self, response):
|
||||
rl = self.extractor.config("ratelimit")
|
||||
|
Loading…
Reference in New Issue
Block a user