mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-25 12:12:34 +01:00
[twitter] add experimental 'videos' option (#99)
Enabling this option will detect videos in tweets and output them as "unsupported" URLs, so that these can then be downloaded with youtube-dl There are a lot of improvements to be made to the current implementation, but it works and does what it is supposed to, even if inefficient as can be ...
This commit is contained in:
parent
5507f5ce2e
commit
f8b3b00249
@ -654,6 +654,15 @@ Description Extract images from retweets.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.twitter.videos
|
||||
------------------------
|
||||
=========== =====
|
||||
Type ``bool``
|
||||
Default ``false``
|
||||
Description Output video tweets as unsupported URLs.
|
||||
=========== =====
|
||||
|
||||
|
||||
extractor.[booru].tags
|
||||
----------------------
|
||||
=========== =====
|
||||
|
@ -110,7 +110,8 @@
|
||||
},
|
||||
"twitter":
|
||||
{
|
||||
"retweets": true
|
||||
"retweets": true,
|
||||
"videos": false
|
||||
},
|
||||
"booru":
|
||||
{
|
||||
|
@ -9,7 +9,7 @@
|
||||
"""Extract images from https://twitter.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
from .. import text, extractor
|
||||
|
||||
|
||||
class TwitterExtractor(Extractor):
|
||||
@ -24,32 +24,38 @@ class TwitterExtractor(Extractor):
|
||||
Extractor.__init__(self)
|
||||
self.user = match.group(1)
|
||||
self.retweets = self.config("retweets", True)
|
||||
self.videos = self.config("videos", False)
|
||||
|
||||
if self.videos:
|
||||
self._blacklist = extractor.blacklist(("twitter",))
|
||||
|
||||
def items(self):
|
||||
yield Message.Version, 1
|
||||
yield Message.Directory, self.metadata()
|
||||
|
||||
for tweet in self.tweets():
|
||||
images = list(text.extract_iter(
|
||||
tweet, 'data-image-url="', '"'))
|
||||
if not images:
|
||||
continue
|
||||
|
||||
data = self._data_from_tweet(tweet)
|
||||
if not self.retweets and data["retweet_id"]:
|
||||
continue
|
||||
|
||||
images = text.extract_iter(
|
||||
tweet, 'data-image-url="', '"')
|
||||
for data["num"], url in enumerate(images, 1):
|
||||
text.nameext_from_url(url, data)
|
||||
yield Message.Url, url + ":orig", data
|
||||
|
||||
if self.videos and "-videoContainer" in tweet:
|
||||
url = "{}/{}/status/{}".format(
|
||||
self.root, data["user"], data["tweet_id"])
|
||||
with self._blacklist:
|
||||
yield Message.Queue, url, data
|
||||
|
||||
def metadata(self):
|
||||
"""Return general metadata"""
|
||||
return {"user": self.user}
|
||||
|
||||
def tweets(self):
|
||||
"""Yield HTML content of all relevant tweets"""
|
||||
return ()
|
||||
|
||||
@staticmethod
|
||||
def _data_from_tweet(tweet):
|
||||
|
Loading…
Reference in New Issue
Block a user