1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 12:12:34 +01:00

[ytdl] add extractor for sites supported by youtube-dl

(#1680, #878)

Can be used by prefixing any URL with 'ytdl:',
or by setting 'extractor,ytdl.enabled' to 'true'.
This commit is contained in:
Mike Fährmann 2021-07-10 20:47:33 +02:00
parent 64240c8d42
commit 36ac2197db
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
8 changed files with 195 additions and 9 deletions

View File

@ -1955,6 +1955,72 @@ Description
Download video files. Download video files.
extractor.ytdl.enabled
----------------------
Type
``bool``
Default
``false``
Description
Match **all** URLs, even ones without a ``ytdl:`` prefix.
extractor.ytdl.format
---------------------
Type
``string``
Default
youtube-dl's default, currently ``"bestvideo+bestaudio/best"``
Description
Video `format selection
<https://github.com/ytdl-org/youtube-dl#format-selection>`__
directly passed to youtube-dl.
extractor.ytdl.logging
----------------------
Type
``bool``
Default
``true``
Description
Route youtube-dl's output through gallery-dl's logging system.
Otherwise youtube-dl will write its output directly to stdout/stderr.
Note: Set ``quiet`` and ``no_warnings`` in
`extractor.ytdl.raw-options`_ to ``true`` to suppress all output.
extractor.ytdl.module
---------------------
Type
``string``
Default
``"youtube_dl"``
Description
Name of the youtube-dl Python module to import.
extractor.ytdl.raw-options
--------------------------
Type
``object``
Example
.. code:: json
{
"quiet": true,
"writesubtitles": true,
"merge_output_format": "mkv"
}
Description
Additional options passed directly to the ``YoutubeDL`` constructor.
All available options can be found in `youtube-dl's docstrings
<https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L138-L318>`__.
extractor.[booru].tags extractor.[booru].tags
---------------------- ----------------------
Type Type
@ -1967,6 +2033,7 @@ Description
Note: This requires 1 additional HTTP request for each post. Note: This requires 1 additional HTTP request for each post.
extractor.[booru].notes extractor.[booru].notes
----------------------- -----------------------
Type Type
@ -1978,6 +2045,7 @@ Description
Note: This requires 1 additional HTTP request for each post. Note: This requires 1 additional HTTP request for each post.
extractor.[manga-extractor].chapter-reverse extractor.[manga-extractor].chapter-reverse
------------------------------------------- -------------------------------------------
Type Type

View File

@ -287,6 +287,14 @@
"retweets": true, "retweets": true,
"videos": true "videos": true
}, },
"ytdl":
{
"enabled": false,
"format": null,
"logging": true,
"module": "youtube_dl",
"raw-options": null
},
"booru": "booru":
{ {
"tags": false, "tags": false,

View File

@ -41,7 +41,10 @@ class YoutubeDLDownloader(DownloaderBase):
"max_filesize": text.parse_bytes( "max_filesize": text.parse_bytes(
self.config("filesize-max"), None), self.config("filesize-max"), None),
} }
options.update(self.config("raw-options") or {})
raw_options = self.config("raw-options")
if raw_options:
options.update(raw_options)
if self.config("logging", True): if self.config("logging", True):
options["logger"] = self.log options["logger"] = self.log
@ -59,19 +62,22 @@ class YoutubeDLDownloader(DownloaderBase):
for cookie in self.session.cookies: for cookie in self.session.cookies:
set_cookie(cookie) set_cookie(cookie)
try: kwdict = pathfmt.kwdict
info_dict = self.ytdl.extract_info(url[5:], download=False) info_dict = kwdict.pop("_ytdl_info_dict", None)
except Exception: if not info_dict:
return False try:
info_dict = self.ytdl.extract_info(url[5:], download=False)
except Exception:
return False
if "entries" in info_dict: if "entries" in info_dict:
index = pathfmt.kwdict.get("_ytdl_index") index = kwdict.get("_ytdl_index")
if index is None: if index is None:
return self._download_playlist(pathfmt, info_dict) return self._download_playlist(pathfmt, info_dict)
else: else:
info_dict = info_dict["entries"][index] info_dict = info_dict["entries"][index]
extra = pathfmt.kwdict.get("_ytdl_extra") extra = kwdict.get("_ytdl_extra")
if extra: if extra:
info_dict.update(extra) info_dict.update(extra)
@ -121,6 +127,7 @@ class YoutubeDLDownloader(DownloaderBase):
def compatible_formats(formats): def compatible_formats(formats):
"""Returns True if 'formats' are compatible for merge"""
video_ext = formats[0].get("ext") video_ext = formats[0].get("ext")
audio_ext = formats[1].get("ext") audio_ext = formats[1].get("ext")

View File

@ -143,6 +143,7 @@ modules = [
"recursive", "recursive",
"oauth", "oauth",
"test", "test",
"ytdl",
] ]

View File

@ -0,0 +1,101 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for sites supported by youtube-dl"""
from .common import Extractor, Message
from .. import config
class YoutubeDLExtractor(Extractor):
"""Generic extractor for youtube-dl supported URLs"""
category = "ytdl"
directory_fmt = ("{category}", "{subcategory}")
filename_fmt = "{title}-{id}.{extension}"
archive_fmt = "{extractor_key} {id}"
ytdl_module = None
pattern = r"ytdl:(.*)"
test = ("ytdl:https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9",)
def __init__(self, match):
# import youtube_dl module
module = self.ytdl_module
if not module:
name = config.get(("extractor", "ytdl"), "module") or "youtube_dl"
module = YoutubeDLExtractor.ytdl_module = __import__(name)
# find suitable youtube_dl extractor
self.ytdl_url = url = match.group(1)
for ie in module.extractor.gen_extractor_classes():
if ie.suitable(url):
self.ytdl_ie = ie
break
# set subcategory to youtube_dl extractor's key
self.subcategory = ie.ie_key()
Extractor.__init__(self, match)
def items(self):
# construct YoutubeDL object
options = {
"format": self.config("format"),
"socket_timeout": self._timeout,
"nocheckcertificate": not self._verify,
"proxy": self.session.proxies.get("http"),
}
raw_options = self.config("raw-options")
if raw_options:
options.update(raw_options)
if self.config("logging", True):
options["logger"] = self.log
options["extract_flat"] = "in_playlist"
ytdl = self.ytdl_module.YoutubeDL(options)
# extract youtube_dl info_dict
info_dict = ytdl._YoutubeDL__extract_info(
self.ytdl_url,
ytdl.get_info_extractor(self.ytdl_ie.ie_key()),
False, {}, True)
if "entries" in info_dict:
results = self._process_entries(ytdl, info_dict["entries"])
else:
results = (info_dict,)
# yield results
for info_dict in results:
info_dict["extension"] = None
info_dict["_ytdl_info_dict"] = info_dict
url = "ytdl:" + (info_dict.get("url") or
info_dict.get("webpage_url") or
self.ytdl_url)
yield Message.Directory, info_dict
yield Message.Url, url, info_dict
def _process_entries(self, ytdl, entries):
for entry in entries:
if entry.get("_type") in ("url", "url_transparent"):
info_dict = ytdl.extract_info(
entry["url"], False,
ie_key=entry.get("ie_key"))
if "entries" in info_dict:
yield from self._process_entries(
ytdl, info_dict["entries"])
else:
yield info_dict
else:
yield entry
if config.get(("extractor", "ytdl"), "enabled"):
# make 'ytdl:' prefix optional
YoutubeDLExtractor.pattern = r"(?:ytdl:)?(.*)"

View File

@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
__version__ = "1.18.2-dev" __version__ = "1.19.0-dev"

View File

@ -254,6 +254,7 @@ IGNORE_LIST = (
"oauth", "oauth",
"recursive", "recursive",
"test", "test",
"ytdl",
) )

View File

@ -147,7 +147,7 @@ class TestExtractorModule(unittest.TestCase):
return c.capitalize() return c.capitalize()
for extr in extractor.extractors(): for extr in extractor.extractors():
if extr.category not in ("", "oauth"): if extr.category not in ("", "oauth", "ytdl"):
expected = "{}{}Extractor".format( expected = "{}{}Extractor".format(
capitalize(extr.category), capitalize(extr.category),
capitalize(extr.subcategory), capitalize(extr.subcategory),