mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 18:53:21 +01:00
[twitter] implement 'cursor' support (#5753)
This commit is contained in:
parent
162756b684
commit
97a50a23d2
5
.github/workflows/docker.yml
vendored
5
.github/workflows/docker.yml
vendored
@ -22,7 +22,10 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
# on release commits, run only for tag event
|
# on release commits, run only for tag event
|
||||||
if: ${{ ! startsWith( github.event.head_commit.message , 'release version ' ) || startsWith( github.ref , 'refs/tags/v' ) }}
|
if: |
|
||||||
|
github.repository == 'mikf/gallery-dl' &&
|
||||||
|
( ! startsWith( github.event.head_commit.message , 'release version ' ) ||
|
||||||
|
startsWith( github.ref , 'refs/tags/v' ) )
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
1
.github/workflows/executables.yml
vendored
1
.github/workflows/executables.yml
vendored
@ -14,6 +14,7 @@ env:
|
|||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
||||||
|
if: github.repository == 'mikf/gallery-dl'
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
|
1
.github/workflows/pages.yml
vendored
1
.github/workflows/pages.yml
vendored
@ -20,6 +20,7 @@ concurrency:
|
|||||||
jobs:
|
jobs:
|
||||||
dispatch:
|
dispatch:
|
||||||
|
|
||||||
|
if: github.repository == 'mikf/gallery-dl'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
@ -51,6 +51,8 @@ class TwitterExtractor(Extractor):
|
|||||||
if not self.config("transform", True):
|
if not self.config("transform", True):
|
||||||
self._transform_user = util.identity
|
self._transform_user = util.identity
|
||||||
self._transform_tweet = util.identity
|
self._transform_tweet = util.identity
|
||||||
|
|
||||||
|
self._cursor = None
|
||||||
self._user = None
|
self._user = None
|
||||||
self._user_obj = None
|
self._user_obj = None
|
||||||
self._user_cache = {}
|
self._user_cache = {}
|
||||||
@ -501,6 +503,14 @@ class TwitterExtractor(Extractor):
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _init_cursor(self):
|
||||||
|
return self.config("cursor") or None
|
||||||
|
|
||||||
|
def _update_cursor(self, cursor):
|
||||||
|
self.log.debug("Cursor: %s", cursor)
|
||||||
|
self._cursor = cursor
|
||||||
|
return cursor
|
||||||
|
|
||||||
def metadata(self):
|
def metadata(self):
|
||||||
"""Return general metadata"""
|
"""Return general metadata"""
|
||||||
return {}
|
return {}
|
||||||
@ -508,6 +518,11 @@ class TwitterExtractor(Extractor):
|
|||||||
def tweets(self):
|
def tweets(self):
|
||||||
"""Yield all relevant tweet objects"""
|
"""Yield all relevant tweet objects"""
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
if self._cursor:
|
||||||
|
self.log.info("Use '-o cursor=%s' to continue downloading "
|
||||||
|
"from the current position", self._cursor)
|
||||||
|
|
||||||
def login(self):
|
def login(self):
|
||||||
if self.cookies_check(self.cookies_names):
|
if self.cookies_check(self.cookies_names):
|
||||||
return
|
return
|
||||||
@ -539,6 +554,9 @@ class TwitterUserExtractor(TwitterExtractor):
|
|||||||
def initialize(self):
|
def initialize(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
base = "{}/{}/".format(self.root, self.user)
|
base = "{}/{}/".format(self.root, self.user)
|
||||||
return self._dispatch_extractors((
|
return self._dispatch_extractors((
|
||||||
@ -558,30 +576,76 @@ class TwitterTimelineExtractor(TwitterExtractor):
|
|||||||
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
|
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
|
||||||
example = "https://x.com/USER/timeline"
|
example = "https://x.com/USER/timeline"
|
||||||
|
|
||||||
|
def _init_cursor(self):
|
||||||
|
if self._cursor:
|
||||||
|
return self._cursor.partition("/")[2] or None
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _update_cursor(self, cursor):
|
||||||
|
if cursor:
|
||||||
|
self._cursor = self._cursor_prefix + cursor
|
||||||
|
self.log.debug("Cursor: %s", self._cursor)
|
||||||
|
else:
|
||||||
|
self._cursor = None
|
||||||
|
return cursor
|
||||||
|
|
||||||
def tweets(self):
|
def tweets(self):
|
||||||
# yield initial batch of (media) tweets
|
self._cursor = cursor = self.config("cursor") or None
|
||||||
tweet = None
|
reset = False
|
||||||
for tweet in self._select_tweet_source()(self.user):
|
|
||||||
yield tweet
|
if cursor:
|
||||||
if tweet is None:
|
state = cursor.partition("/")[0]
|
||||||
return
|
state, _, tweet_id = state.partition("_")
|
||||||
|
state = text.parse_int(state, 1)
|
||||||
|
else:
|
||||||
|
state = 1
|
||||||
|
|
||||||
|
if state <= 1:
|
||||||
|
self._cursor_prefix = "1/"
|
||||||
|
|
||||||
|
# yield initial batch of (media) tweets
|
||||||
|
tweet = None
|
||||||
|
for tweet in self._select_tweet_source()(self.user):
|
||||||
|
yield tweet
|
||||||
|
if tweet is None and not cursor:
|
||||||
|
return
|
||||||
|
|
||||||
|
user = self._user["name"]
|
||||||
|
tweet_id = tweet["rest_id"]
|
||||||
|
|
||||||
|
state = reset = 2
|
||||||
|
else:
|
||||||
|
user = self.user
|
||||||
|
|
||||||
# build search query
|
# build search query
|
||||||
query = "from:{} max_id:{}".format(
|
query = "from:{} max_id:{}".format(user, tweet_id)
|
||||||
self._user["name"], tweet["rest_id"])
|
|
||||||
if self.retweets:
|
if self.retweets:
|
||||||
query += " include:retweets include:nativeretweets"
|
query += " include:retweets include:nativeretweets"
|
||||||
|
|
||||||
if not self.textonly:
|
if state <= 2:
|
||||||
# try to search for media-only tweets
|
self._cursor_prefix = "2_{}/".format(tweet_id)
|
||||||
tweet = None
|
if reset:
|
||||||
for tweet in self.api.search_timeline(query + " filter:links"):
|
self._cursor = self._cursor_prefix
|
||||||
yield tweet
|
|
||||||
if tweet is not None:
|
|
||||||
return
|
|
||||||
|
|
||||||
# yield unfiltered search results
|
if not self.textonly:
|
||||||
yield from self.api.search_timeline(query)
|
# try to search for media-only tweets
|
||||||
|
tweet = None
|
||||||
|
for tweet in self.api.search_timeline(query + " filter:links"):
|
||||||
|
yield tweet
|
||||||
|
break
|
||||||
|
if tweet is not None:
|
||||||
|
return self._update_cursor(None)
|
||||||
|
|
||||||
|
state = reset = 3
|
||||||
|
|
||||||
|
if state <= 3:
|
||||||
|
# yield unfiltered search results
|
||||||
|
self._cursor_prefix = "3_{}/".format(tweet_id)
|
||||||
|
if reset:
|
||||||
|
self._cursor = self._cursor_prefix
|
||||||
|
|
||||||
|
yield from self.api.search_timeline(query)
|
||||||
|
return self._update_cursor(None)
|
||||||
|
|
||||||
def _select_tweet_source(self):
|
def _select_tweet_source(self):
|
||||||
strategy = self.config("strategy")
|
strategy = self.config("strategy")
|
||||||
@ -1415,7 +1479,9 @@ class TwitterAPI():
|
|||||||
"%s %s (%s)", response.status_code, response.reason, errors)
|
"%s %s (%s)", response.status_code, response.reason, errors)
|
||||||
|
|
||||||
def _pagination_legacy(self, endpoint, params):
|
def _pagination_legacy(self, endpoint, params):
|
||||||
original_retweets = (self.extractor.retweets == "original")
|
extr = self.extractor
|
||||||
|
params["cursor"] = extr._init_cursor()
|
||||||
|
original_retweets = (extr.retweets == "original")
|
||||||
bottom = ("cursor-bottom-", "sq-cursor-bottom")
|
bottom = ("cursor-bottom-", "sq-cursor-bottom")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
@ -1423,7 +1489,7 @@ class TwitterAPI():
|
|||||||
|
|
||||||
instructions = data["timeline"]["instructions"]
|
instructions = data["timeline"]["instructions"]
|
||||||
if not instructions:
|
if not instructions:
|
||||||
return
|
return extr._update_cursor(None)
|
||||||
|
|
||||||
tweets = data["globalObjects"]["tweets"]
|
tweets = data["globalObjects"]["tweets"]
|
||||||
users = data["globalObjects"]["users"]
|
users = data["globalObjects"]["users"]
|
||||||
@ -1504,8 +1570,8 @@ class TwitterAPI():
|
|||||||
|
|
||||||
# stop on empty response
|
# stop on empty response
|
||||||
if not cursor or (not tweets and not tweet_id):
|
if not cursor or (not tweets and not tweet_id):
|
||||||
return
|
return extr._update_cursor(None)
|
||||||
params["cursor"] = cursor
|
params["cursor"] = extr._update_cursor(cursor)
|
||||||
|
|
||||||
def _pagination_tweets(self, endpoint, variables,
|
def _pagination_tweets(self, endpoint, variables,
|
||||||
path=None, stop_tweets=True, features=None):
|
path=None, stop_tweets=True, features=None):
|
||||||
@ -1514,6 +1580,7 @@ class TwitterAPI():
|
|||||||
pinned_tweet = extr.pinned
|
pinned_tweet = extr.pinned
|
||||||
|
|
||||||
params = {"variables": None}
|
params = {"variables": None}
|
||||||
|
variables["cursor"] = extr._init_cursor()
|
||||||
if features is None:
|
if features is None:
|
||||||
features = self.features_pagination
|
features = self.features_pagination
|
||||||
if features:
|
if features:
|
||||||
@ -1550,7 +1617,7 @@ class TwitterAPI():
|
|||||||
cursor = entry["content"]["value"]
|
cursor = entry["content"]["value"]
|
||||||
if entries is None:
|
if entries is None:
|
||||||
if not cursor:
|
if not cursor:
|
||||||
return
|
return extr._update_cursor(None)
|
||||||
entries = ()
|
entries = ()
|
||||||
|
|
||||||
except LookupError:
|
except LookupError:
|
||||||
@ -1699,12 +1766,14 @@ class TwitterAPI():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if stop_tweets and not tweet:
|
if stop_tweets and not tweet:
|
||||||
return
|
return extr._update_cursor(None)
|
||||||
if not cursor or cursor == variables.get("cursor"):
|
if not cursor or cursor == variables.get("cursor"):
|
||||||
return
|
return extr._update_cursor(None)
|
||||||
variables["cursor"] = cursor
|
variables["cursor"] = extr._update_cursor(cursor)
|
||||||
|
|
||||||
def _pagination_users(self, endpoint, variables, path=None):
|
def _pagination_users(self, endpoint, variables, path=None):
|
||||||
|
extr = self.extractor
|
||||||
|
variables["cursor"] = extr._init_cursor()
|
||||||
params = {
|
params = {
|
||||||
"variables": None,
|
"variables": None,
|
||||||
"features" : self._json_dumps(self.features_pagination),
|
"features" : self._json_dumps(self.features_pagination),
|
||||||
@ -1724,7 +1793,7 @@ class TwitterAPI():
|
|||||||
data = data[key]
|
data = data[key]
|
||||||
instructions = data["instructions"]
|
instructions = data["instructions"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return
|
return extr._update_cursor(None)
|
||||||
|
|
||||||
for instr in instructions:
|
for instr in instructions:
|
||||||
if instr["type"] == "TimelineAddEntries":
|
if instr["type"] == "TimelineAddEntries":
|
||||||
@ -1742,8 +1811,8 @@ class TwitterAPI():
|
|||||||
cursor = entry["content"]["value"]
|
cursor = entry["content"]["value"]
|
||||||
|
|
||||||
if not cursor or cursor.startswith(("-1|", "0|")) or not entry:
|
if not cursor or cursor.startswith(("-1|", "0|")) or not entry:
|
||||||
return
|
return extr._update_cursor(None)
|
||||||
variables["cursor"] = cursor
|
variables["cursor"] = extr._update_cursor(cursor)
|
||||||
|
|
||||||
def _handle_ratelimit(self, response):
|
def _handle_ratelimit(self, response):
|
||||||
rl = self.extractor.config("ratelimit")
|
rl = self.extractor.config("ratelimit")
|
||||||
|
Loading…
Reference in New Issue
Block a user