mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-25 12:12:34 +01:00
[ytdl] add extractor for sites supported by youtube-dl
(#1680, #878) Can be used by prefixing any URL with 'ytdl:', or by setting 'extractor,ytdl.enabled' to 'true'.
This commit is contained in:
parent
64240c8d42
commit
36ac2197db
@ -1955,6 +1955,72 @@ Description
|
|||||||
Download video files.
|
Download video files.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.ytdl.enabled
|
||||||
|
----------------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``false``
|
||||||
|
Description
|
||||||
|
Match **all** URLs, even ones without a ``ytdl:`` prefix.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.ytdl.format
|
||||||
|
---------------------
|
||||||
|
Type
|
||||||
|
``string``
|
||||||
|
Default
|
||||||
|
youtube-dl's default, currently ``"bestvideo+bestaudio/best"``
|
||||||
|
Description
|
||||||
|
Video `format selection
|
||||||
|
<https://github.com/ytdl-org/youtube-dl#format-selection>`__
|
||||||
|
directly passed to youtube-dl.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.ytdl.logging
|
||||||
|
----------------------
|
||||||
|
Type
|
||||||
|
``bool``
|
||||||
|
Default
|
||||||
|
``true``
|
||||||
|
Description
|
||||||
|
Route youtube-dl's output through gallery-dl's logging system.
|
||||||
|
Otherwise youtube-dl will write its output directly to stdout/stderr.
|
||||||
|
|
||||||
|
Note: Set ``quiet`` and ``no_warnings`` in
|
||||||
|
`extractor.ytdl.raw-options`_ to ``true`` to suppress all output.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.ytdl.module
|
||||||
|
---------------------
|
||||||
|
Type
|
||||||
|
``string``
|
||||||
|
Default
|
||||||
|
``"youtube_dl"``
|
||||||
|
Description
|
||||||
|
Name of the youtube-dl Python module to import.
|
||||||
|
|
||||||
|
|
||||||
|
extractor.ytdl.raw-options
|
||||||
|
--------------------------
|
||||||
|
Type
|
||||||
|
``object``
|
||||||
|
Example
|
||||||
|
.. code:: json
|
||||||
|
|
||||||
|
{
|
||||||
|
"quiet": true,
|
||||||
|
"writesubtitles": true,
|
||||||
|
"merge_output_format": "mkv"
|
||||||
|
}
|
||||||
|
|
||||||
|
Description
|
||||||
|
Additional options passed directly to the ``YoutubeDL`` constructor.
|
||||||
|
|
||||||
|
All available options can be found in `youtube-dl's docstrings
|
||||||
|
<https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L138-L318>`__.
|
||||||
|
|
||||||
|
|
||||||
extractor.[booru].tags
|
extractor.[booru].tags
|
||||||
----------------------
|
----------------------
|
||||||
Type
|
Type
|
||||||
@ -1967,6 +2033,7 @@ Description
|
|||||||
|
|
||||||
Note: This requires 1 additional HTTP request for each post.
|
Note: This requires 1 additional HTTP request for each post.
|
||||||
|
|
||||||
|
|
||||||
extractor.[booru].notes
|
extractor.[booru].notes
|
||||||
-----------------------
|
-----------------------
|
||||||
Type
|
Type
|
||||||
@ -1978,6 +2045,7 @@ Description
|
|||||||
|
|
||||||
Note: This requires 1 additional HTTP request for each post.
|
Note: This requires 1 additional HTTP request for each post.
|
||||||
|
|
||||||
|
|
||||||
extractor.[manga-extractor].chapter-reverse
|
extractor.[manga-extractor].chapter-reverse
|
||||||
-------------------------------------------
|
-------------------------------------------
|
||||||
Type
|
Type
|
||||||
|
@ -287,6 +287,14 @@
|
|||||||
"retweets": true,
|
"retweets": true,
|
||||||
"videos": true
|
"videos": true
|
||||||
},
|
},
|
||||||
|
"ytdl":
|
||||||
|
{
|
||||||
|
"enabled": false,
|
||||||
|
"format": null,
|
||||||
|
"logging": true,
|
||||||
|
"module": "youtube_dl",
|
||||||
|
"raw-options": null
|
||||||
|
},
|
||||||
"booru":
|
"booru":
|
||||||
{
|
{
|
||||||
"tags": false,
|
"tags": false,
|
||||||
|
@ -41,7 +41,10 @@ class YoutubeDLDownloader(DownloaderBase):
|
|||||||
"max_filesize": text.parse_bytes(
|
"max_filesize": text.parse_bytes(
|
||||||
self.config("filesize-max"), None),
|
self.config("filesize-max"), None),
|
||||||
}
|
}
|
||||||
options.update(self.config("raw-options") or {})
|
|
||||||
|
raw_options = self.config("raw-options")
|
||||||
|
if raw_options:
|
||||||
|
options.update(raw_options)
|
||||||
|
|
||||||
if self.config("logging", True):
|
if self.config("logging", True):
|
||||||
options["logger"] = self.log
|
options["logger"] = self.log
|
||||||
@ -59,19 +62,22 @@ class YoutubeDLDownloader(DownloaderBase):
|
|||||||
for cookie in self.session.cookies:
|
for cookie in self.session.cookies:
|
||||||
set_cookie(cookie)
|
set_cookie(cookie)
|
||||||
|
|
||||||
try:
|
kwdict = pathfmt.kwdict
|
||||||
info_dict = self.ytdl.extract_info(url[5:], download=False)
|
info_dict = kwdict.pop("_ytdl_info_dict", None)
|
||||||
except Exception:
|
if not info_dict:
|
||||||
return False
|
try:
|
||||||
|
info_dict = self.ytdl.extract_info(url[5:], download=False)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
if "entries" in info_dict:
|
if "entries" in info_dict:
|
||||||
index = pathfmt.kwdict.get("_ytdl_index")
|
index = kwdict.get("_ytdl_index")
|
||||||
if index is None:
|
if index is None:
|
||||||
return self._download_playlist(pathfmt, info_dict)
|
return self._download_playlist(pathfmt, info_dict)
|
||||||
else:
|
else:
|
||||||
info_dict = info_dict["entries"][index]
|
info_dict = info_dict["entries"][index]
|
||||||
|
|
||||||
extra = pathfmt.kwdict.get("_ytdl_extra")
|
extra = kwdict.get("_ytdl_extra")
|
||||||
if extra:
|
if extra:
|
||||||
info_dict.update(extra)
|
info_dict.update(extra)
|
||||||
|
|
||||||
@ -121,6 +127,7 @@ class YoutubeDLDownloader(DownloaderBase):
|
|||||||
|
|
||||||
|
|
||||||
def compatible_formats(formats):
|
def compatible_formats(formats):
|
||||||
|
"""Returns True if 'formats' are compatible for merge"""
|
||||||
video_ext = formats[0].get("ext")
|
video_ext = formats[0].get("ext")
|
||||||
audio_ext = formats[1].get("ext")
|
audio_ext = formats[1].get("ext")
|
||||||
|
|
||||||
|
@ -143,6 +143,7 @@ modules = [
|
|||||||
"recursive",
|
"recursive",
|
||||||
"oauth",
|
"oauth",
|
||||||
"test",
|
"test",
|
||||||
|
"ytdl",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
101
gallery_dl/extractor/ytdl.py
Normal file
101
gallery_dl/extractor/ytdl.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2021 Mike Fährmann
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
|
"""Extractors for sites supported by youtube-dl"""
|
||||||
|
|
||||||
|
from .common import Extractor, Message
|
||||||
|
from .. import config
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeDLExtractor(Extractor):
|
||||||
|
"""Generic extractor for youtube-dl supported URLs"""
|
||||||
|
category = "ytdl"
|
||||||
|
directory_fmt = ("{category}", "{subcategory}")
|
||||||
|
filename_fmt = "{title}-{id}.{extension}"
|
||||||
|
archive_fmt = "{extractor_key} {id}"
|
||||||
|
ytdl_module = None
|
||||||
|
pattern = r"ytdl:(.*)"
|
||||||
|
test = ("ytdl:https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9",)
|
||||||
|
|
||||||
|
def __init__(self, match):
|
||||||
|
# import youtube_dl module
|
||||||
|
module = self.ytdl_module
|
||||||
|
if not module:
|
||||||
|
name = config.get(("extractor", "ytdl"), "module") or "youtube_dl"
|
||||||
|
module = YoutubeDLExtractor.ytdl_module = __import__(name)
|
||||||
|
|
||||||
|
# find suitable youtube_dl extractor
|
||||||
|
self.ytdl_url = url = match.group(1)
|
||||||
|
for ie in module.extractor.gen_extractor_classes():
|
||||||
|
if ie.suitable(url):
|
||||||
|
self.ytdl_ie = ie
|
||||||
|
break
|
||||||
|
|
||||||
|
# set subcategory to youtube_dl extractor's key
|
||||||
|
self.subcategory = ie.ie_key()
|
||||||
|
Extractor.__init__(self, match)
|
||||||
|
|
||||||
|
def items(self):
|
||||||
|
# construct YoutubeDL object
|
||||||
|
options = {
|
||||||
|
"format": self.config("format"),
|
||||||
|
"socket_timeout": self._timeout,
|
||||||
|
"nocheckcertificate": not self._verify,
|
||||||
|
"proxy": self.session.proxies.get("http"),
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_options = self.config("raw-options")
|
||||||
|
if raw_options:
|
||||||
|
options.update(raw_options)
|
||||||
|
if self.config("logging", True):
|
||||||
|
options["logger"] = self.log
|
||||||
|
options["extract_flat"] = "in_playlist"
|
||||||
|
|
||||||
|
ytdl = self.ytdl_module.YoutubeDL(options)
|
||||||
|
|
||||||
|
# extract youtube_dl info_dict
|
||||||
|
info_dict = ytdl._YoutubeDL__extract_info(
|
||||||
|
self.ytdl_url,
|
||||||
|
ytdl.get_info_extractor(self.ytdl_ie.ie_key()),
|
||||||
|
False, {}, True)
|
||||||
|
|
||||||
|
if "entries" in info_dict:
|
||||||
|
results = self._process_entries(ytdl, info_dict["entries"])
|
||||||
|
else:
|
||||||
|
results = (info_dict,)
|
||||||
|
|
||||||
|
# yield results
|
||||||
|
for info_dict in results:
|
||||||
|
info_dict["extension"] = None
|
||||||
|
info_dict["_ytdl_info_dict"] = info_dict
|
||||||
|
|
||||||
|
url = "ytdl:" + (info_dict.get("url") or
|
||||||
|
info_dict.get("webpage_url") or
|
||||||
|
self.ytdl_url)
|
||||||
|
|
||||||
|
yield Message.Directory, info_dict
|
||||||
|
yield Message.Url, url, info_dict
|
||||||
|
|
||||||
|
def _process_entries(self, ytdl, entries):
|
||||||
|
for entry in entries:
|
||||||
|
if entry.get("_type") in ("url", "url_transparent"):
|
||||||
|
info_dict = ytdl.extract_info(
|
||||||
|
entry["url"], False,
|
||||||
|
ie_key=entry.get("ie_key"))
|
||||||
|
if "entries" in info_dict:
|
||||||
|
yield from self._process_entries(
|
||||||
|
ytdl, info_dict["entries"])
|
||||||
|
else:
|
||||||
|
yield info_dict
|
||||||
|
else:
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
|
||||||
|
if config.get(("extractor", "ytdl"), "enabled"):
|
||||||
|
# make 'ytdl:' prefix optional
|
||||||
|
YoutubeDLExtractor.pattern = r"(?:ytdl:)?(.*)"
|
@ -6,4 +6,4 @@
|
|||||||
# it under the terms of the GNU General Public License version 2 as
|
# it under the terms of the GNU General Public License version 2 as
|
||||||
# published by the Free Software Foundation.
|
# published by the Free Software Foundation.
|
||||||
|
|
||||||
__version__ = "1.18.2-dev"
|
__version__ = "1.19.0-dev"
|
||||||
|
@ -254,6 +254,7 @@ IGNORE_LIST = (
|
|||||||
"oauth",
|
"oauth",
|
||||||
"recursive",
|
"recursive",
|
||||||
"test",
|
"test",
|
||||||
|
"ytdl",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -147,7 +147,7 @@ class TestExtractorModule(unittest.TestCase):
|
|||||||
return c.capitalize()
|
return c.capitalize()
|
||||||
|
|
||||||
for extr in extractor.extractors():
|
for extr in extractor.extractors():
|
||||||
if extr.category not in ("", "oauth"):
|
if extr.category not in ("", "oauth", "ytdl"):
|
||||||
expected = "{}{}Extractor".format(
|
expected = "{}{}Extractor".format(
|
||||||
capitalize(extr.category),
|
capitalize(extr.category),
|
||||||
capitalize(extr.subcategory),
|
capitalize(extr.subcategory),
|
||||||
|
Loading…
Reference in New Issue
Block a user