1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 12:12:34 +01:00

[twitter] add experimental 'videos' option (#99)

Enabling this option will detect videos in tweets and output them as
"unsupported" URLs, so that these can then be downloaded with youtube-dl

There are a lot of improvements to be made to the current
implementation, but it works and does what it is supposed to, even if
inefficient as can be ...
This commit is contained in:
Mike Fährmann 2018-09-30 18:41:39 +02:00
parent 5507f5ce2e
commit f8b3b00249
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 24 additions and 8 deletions

View File

@ -654,6 +654,15 @@ Description Extract images from retweets.
=========== =====
extractor.twitter.videos
------------------------
=========== =====
Type ``bool``
Default ``false``
Description Output video tweets as unsupported URLs.
=========== =====
extractor.[booru].tags
----------------------
=========== =====

View File

@ -110,7 +110,8 @@
},
"twitter":
{
"retweets": true
"retweets": true,
"videos": false
},
"booru":
{

View File

@ -9,7 +9,7 @@
"""Extract images from https://twitter.com/"""
from .common import Extractor, Message
from .. import text
from .. import text, extractor
class TwitterExtractor(Extractor):
@ -24,32 +24,38 @@ class TwitterExtractor(Extractor):
Extractor.__init__(self)
self.user = match.group(1)
self.retweets = self.config("retweets", True)
self.videos = self.config("videos", False)
if self.videos:
self._blacklist = extractor.blacklist(("twitter",))
def items(self):
yield Message.Version, 1
yield Message.Directory, self.metadata()
for tweet in self.tweets():
images = list(text.extract_iter(
tweet, 'data-image-url="', '"'))
if not images:
continue
data = self._data_from_tweet(tweet)
if not self.retweets and data["retweet_id"]:
continue
images = text.extract_iter(
tweet, 'data-image-url="', '"')
for data["num"], url in enumerate(images, 1):
text.nameext_from_url(url, data)
yield Message.Url, url + ":orig", data
if self.videos and "-videoContainer" in tweet:
url = "{}/{}/status/{}".format(
self.root, data["user"], data["tweet_id"])
with self._blacklist:
yield Message.Queue, url, data
def metadata(self):
"""Return general metadata"""
return {"user": self.user}
def tweets(self):
"""Yield HTML content of all relevant tweets"""
return ()
@staticmethod
def _data_from_tweet(tweet):