mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-16 16:13:35 +01:00
[cleanup] Misc (#10807)
Closes #10751, Closes #10769, Closes #10791 Authored by: bashonly, Codenade, pzhlkj6612, seproDev, coletdjnz, grqz, Grub4K Co-authored-by: Codenade <amadeus.dorian04@gmail.com> Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> Co-authored-by: coletdjnz <coletdjnz@protonmail.com> Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com> Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
This commit is contained in:
parent
cca534cd9e
commit
c6387abc1a
2
.github/workflows/quick-test.yml
vendored
2
.github/workflows/quick-test.yml
vendored
@ -15,7 +15,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
python-version: '3.8'
|
python-version: '3.8'
|
||||||
- name: Install test requirements
|
- name: Install test requirements
|
||||||
run: python3 ./devscripts/install_deps.py --include test
|
run: python3 ./devscripts/install_deps.py -o --include test
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
timeout-minutes: 15
|
timeout-minutes: 15
|
||||||
run: |
|
run: |
|
||||||
|
4
.github/workflows/release.yml
vendored
4
.github/workflows/release.yml
vendored
@ -204,7 +204,7 @@ jobs:
|
|||||||
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||||
git add -u
|
git add -u
|
||||||
git commit -m "Release ${{ env.version }}" \
|
git commit -m "Release ${{ env.version }}" \
|
||||||
-m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl"
|
-m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all"
|
||||||
git push origin --force ${{ github.event.ref }}:release
|
git push origin --force ${{ github.event.ref }}:release
|
||||||
|
|
||||||
- name: Get target commitish
|
- name: Get target commitish
|
||||||
@ -325,7 +325,7 @@ jobs:
|
|||||||
"(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES
|
"(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES
|
||||||
printf '\n\n' >> ./RELEASE_NOTES
|
printf '\n\n' >> ./RELEASE_NOTES
|
||||||
cat >> ./RELEASE_NOTES << EOF
|
cat >> ./RELEASE_NOTES << EOF
|
||||||
#### A description of the various files are in the [README](https://github.com/${{ github.repository }}#release-files)
|
#### A description of the various files is in the [README](https://github.com/${{ github.repository }}#release-files)
|
||||||
---
|
---
|
||||||
$(python ./devscripts/make_changelog.py -vv --collapsible)
|
$(python ./devscripts/make_changelog.py -vv --collapsible)
|
||||||
EOF
|
EOF
|
||||||
|
30
README.md
30
README.md
@ -200,7 +200,7 @@ #### Impersonation
|
|||||||
|
|
||||||
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
||||||
|
|
||||||
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
|
* [**curl_cffi**](https://github.com/lexiforest/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lexiforest/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/lexiforest/curl_cffi/blob/main/LICENSE)
|
||||||
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
|
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
|
||||||
* Currently included in `yt-dlp.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds
|
* Currently included in `yt-dlp.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds
|
||||||
|
|
||||||
@ -459,17 +459,17 @@ ## Video Selection:
|
|||||||
conditions. Use a "\" to escape "&" or
|
conditions. Use a "\" to escape "&" or
|
||||||
quotes if needed. If used multiple times,
|
quotes if needed. If used multiple times,
|
||||||
the filter matches if at least one of the
|
the filter matches if at least one of the
|
||||||
conditions is met. E.g. --match-filter
|
conditions is met. E.g. --match-filters
|
||||||
!is_live --match-filter "like_count>?100 &
|
!is_live --match-filters "like_count>?100 &
|
||||||
description~='(?i)\bcats \& dogs\b'" matches
|
description~='(?i)\bcats \& dogs\b'" matches
|
||||||
only videos that are not live OR those that
|
only videos that are not live OR those that
|
||||||
have a like count more than 100 (or the like
|
have a like count more than 100 (or the like
|
||||||
field is not available) and also has a
|
field is not available) and also has a
|
||||||
description that contains the phrase "cats &
|
description that contains the phrase "cats &
|
||||||
dogs" (caseless). Use "--match-filter -" to
|
dogs" (caseless). Use "--match-filters -" to
|
||||||
interactively ask whether to download each
|
interactively ask whether to download each
|
||||||
video
|
video
|
||||||
--no-match-filters Do not use any --match-filter (default)
|
--no-match-filters Do not use any --match-filters (default)
|
||||||
--break-match-filters FILTER Same as "--match-filters" but stops the
|
--break-match-filters FILTER Same as "--match-filters" but stops the
|
||||||
download process when a video is rejected
|
download process when a video is rejected
|
||||||
--no-break-match-filters Do not use any --break-match-filters (default)
|
--no-break-match-filters Do not use any --break-match-filters (default)
|
||||||
@ -490,7 +490,7 @@ ## Video Selection:
|
|||||||
encountering a file that is in the archive
|
encountering a file that is in the archive
|
||||||
(default)
|
(default)
|
||||||
--break-per-input Alters --max-downloads, --break-on-existing,
|
--break-per-input Alters --max-downloads, --break-on-existing,
|
||||||
--break-match-filter, and autonumber to
|
--break-match-filters, and autonumber to
|
||||||
reset per input URL
|
reset per input URL
|
||||||
--no-break-per-input --break-on-existing and similar options
|
--no-break-per-input --break-on-existing and similar options
|
||||||
terminates the entire download queue
|
terminates the entire download queue
|
||||||
@ -1771,7 +1771,7 @@ # EXTRACTOR ARGUMENTS
|
|||||||
#### youtube
|
#### youtube
|
||||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,web_creator` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
|
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,web_creator` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
|
||||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||||
@ -2184,9 +2184,9 @@ ### New features
|
|||||||
|
|
||||||
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
|
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
|
||||||
|
|
||||||
* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc
|
* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filters` etc
|
||||||
|
|
||||||
* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc
|
* **Improvements**: Regex and other operators in `--format`/`--match-filters`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc
|
||||||
|
|
||||||
* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details
|
* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details
|
||||||
|
|
||||||
@ -2227,7 +2227,7 @@ ### Differences in default behavior
|
|||||||
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
|
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
|
||||||
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
|
||||||
* ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~
|
* ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~
|
||||||
* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
|
* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filters` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
|
||||||
* yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values
|
* yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values
|
||||||
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
|
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
|
||||||
* The sub-modules `swfinterp`, `casefold` are removed.
|
* The sub-modules `swfinterp`, `casefold` are removed.
|
||||||
@ -2273,11 +2273,11 @@ #### Redundant options
|
|||||||
--get-thumbnail --print thumbnail
|
--get-thumbnail --print thumbnail
|
||||||
-e, --get-title --print title
|
-e, --get-title --print title
|
||||||
-g, --get-url --print urls
|
-g, --get-url --print urls
|
||||||
--match-title REGEX --match-filter "title ~= (?i)REGEX"
|
--match-title REGEX --match-filters "title ~= (?i)REGEX"
|
||||||
--reject-title REGEX --match-filter "title !~= (?i)REGEX"
|
--reject-title REGEX --match-filters "title !~= (?i)REGEX"
|
||||||
--min-views COUNT --match-filter "view_count >=? COUNT"
|
--min-views COUNT --match-filters "view_count >=? COUNT"
|
||||||
--max-views COUNT --match-filter "view_count <=? COUNT"
|
--max-views COUNT --match-filters "view_count <=? COUNT"
|
||||||
--break-on-reject Use --break-match-filter
|
--break-on-reject Use --break-match-filters
|
||||||
--user-agent UA --add-header "User-Agent:UA"
|
--user-agent UA --add-header "User-Agent:UA"
|
||||||
--referer URL --add-header "Referer:URL"
|
--referer URL --add-header "Referer:URL"
|
||||||
--playlist-start NUMBER -I NUMBER:
|
--playlist-start NUMBER -I NUMBER:
|
||||||
|
@ -76,7 +76,7 @@ dev = [
|
|||||||
]
|
]
|
||||||
static-analysis = [
|
static-analysis = [
|
||||||
"autopep8~=2.0",
|
"autopep8~=2.0",
|
||||||
"ruff~=0.5.0",
|
"ruff~=0.6.0",
|
||||||
]
|
]
|
||||||
test = [
|
test = [
|
||||||
"pytest~=8.1",
|
"pytest~=8.1",
|
||||||
|
@ -508,7 +508,7 @@ def _call_downloader(self, tmpfilename, info_dict):
|
|||||||
env = None
|
env = None
|
||||||
proxy = self.params.get('proxy')
|
proxy = self.params.get('proxy')
|
||||||
if proxy:
|
if proxy:
|
||||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
if not re.match(r'[\da-zA-Z]+://', proxy):
|
||||||
proxy = f'http://{proxy}'
|
proxy = f'http://{proxy}'
|
||||||
|
|
||||||
if proxy.startswith('socks'):
|
if proxy.startswith('socks'):
|
||||||
@ -559,7 +559,7 @@ def _call_downloader(self, tmpfilename, info_dict):
|
|||||||
|
|
||||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||||
for i, fmt in enumerate(selected_formats):
|
for i, fmt in enumerate(selected_formats):
|
||||||
is_http = re.match(r'^https?://', fmt['url'])
|
is_http = re.match(r'https?://', fmt['url'])
|
||||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
||||||
if cookies:
|
if cookies:
|
||||||
args.extend(['-cookies', ''.join(
|
args.extend(['-cookies', ''.join(
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
|
|
||||||
class AcademicEarthCourseIE(InfoExtractor):
|
class AcademicEarthCourseIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
_VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
|
||||||
IE_NAME = 'AcademicEarth:Course'
|
IE_NAME = 'AcademicEarth:Course'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
'url': 'http://academicearth.org/playlists/laws-of-nature/',
|
||||||
|
@ -231,7 +231,7 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
class ARDBetaMediathekIE(InfoExtractor):
|
class ARDBetaMediathekIE(InfoExtractor):
|
||||||
IE_NAME = 'ARDMediathek'
|
IE_NAME = 'ARDMediathek'
|
||||||
_VALID_URL = r'''(?x)https://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||||
(?:[^/]+/)?
|
(?:[^/]+/)?
|
||||||
(?:player|live|video)/
|
(?:player|live|video)/
|
||||||
@ -470,7 +470,7 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
|
|
||||||
class ARDMediathekCollectionIE(InfoExtractor):
|
class ARDMediathekCollectionIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||||
(?:[^/?#]+/)?
|
(?:[^/?#]+/)?
|
||||||
(?P<playlist>sendung|serie|sammlung)/
|
(?P<playlist>sendung|serie|sammlung)/
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
|
|
||||||
class CallinIE(InfoExtractor):
|
class CallinIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
|
_VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -2077,7 +2077,7 @@ def _parse_m3u8_formats_and_subtitles(
|
|||||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||||
|
|
||||||
def format_url(url):
|
def format_url(url):
|
||||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||||
|
|
||||||
if self.get_param('hls_split_discontinuity', False):
|
if self.get_param('hls_split_discontinuity', False):
|
||||||
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
|
||||||
@ -2812,11 +2812,11 @@ def extract_Initialization(source):
|
|||||||
base_url_e = element.find(_add_ns('BaseURL'))
|
base_url_e = element.find(_add_ns('BaseURL'))
|
||||||
if try_call(lambda: base_url_e.text) is not None:
|
if try_call(lambda: base_url_e.text) is not None:
|
||||||
base_url = base_url_e.text + base_url
|
base_url = base_url_e.text + base_url
|
||||||
if re.match(r'^https?://', base_url):
|
if re.match(r'https?://', base_url):
|
||||||
break
|
break
|
||||||
if mpd_base_url and base_url.startswith('/'):
|
if mpd_base_url and base_url.startswith('/'):
|
||||||
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
|
base_url = urllib.parse.urljoin(mpd_base_url, base_url)
|
||||||
elif mpd_base_url and not re.match(r'^https?://', base_url):
|
elif mpd_base_url and not re.match(r'https?://', base_url):
|
||||||
if not mpd_base_url.endswith('/'):
|
if not mpd_base_url.endswith('/'):
|
||||||
mpd_base_url += '/'
|
mpd_base_url += '/'
|
||||||
base_url = mpd_base_url + base_url
|
base_url = mpd_base_url + base_url
|
||||||
@ -2906,7 +2906,7 @@ def prepare_template(template_name, identifiers):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def location_key(location):
|
def location_key(location):
|
||||||
return 'url' if re.match(r'^https?://', location) else 'path'
|
return 'url' if re.match(r'https?://', location) else 'path'
|
||||||
|
|
||||||
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
|
|
||||||
class FC2IE(InfoExtractor):
|
class FC2IE(InfoExtractor):
|
||||||
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
_VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
|
||||||
IE_NAME = 'fc2'
|
IE_NAME = 'fc2'
|
||||||
_NETRC_MACHINE = 'fc2'
|
_NETRC_MACHINE = 'fc2'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -2340,7 +2340,7 @@ def _real_extract(self, url):
|
|||||||
default_search = 'fixup_error'
|
default_search = 'fixup_error'
|
||||||
|
|
||||||
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
if default_search in ('auto', 'auto_warning', 'fixup_error'):
|
||||||
if re.match(r'^[^\s/]+\.[^\s/]+/', url):
|
if re.match(r'[^\s/]+\.[^\s/]+/', url):
|
||||||
self.report_warning('The url doesn\'t specify the protocol, trying with http')
|
self.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||||
return self.url_result('http://' + url)
|
return self.url_result('http://' + url)
|
||||||
elif default_search != 'fixup_error':
|
elif default_search != 'fixup_error':
|
||||||
@ -2400,7 +2400,7 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
content_type = full_response.headers.get('Content-Type', '').lower()
|
content_type = full_response.headers.get('Content-Type', '').lower()
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
||||||
if m:
|
if m:
|
||||||
self.report_detected('direct video link')
|
self.report_detected('direct video link')
|
||||||
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
headers = filter_dict({'Referer': smuggled_data.get('referer')})
|
||||||
|
@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
|
|||||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||||
_VALID_URL = [
|
_VALID_URL = [
|
||||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||||
]
|
]
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://academymel.online/3video_1',
|
'url': 'http://academymel.online/3video_1',
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
|
|
||||||
class GolemIE(InfoExtractor):
|
class GolemIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
_VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
|
||||||
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
class HRFernsehenIE(InfoExtractor):
|
class HRFernsehenIE(InfoExtractor):
|
||||||
IE_NAME = 'hrfernsehen'
|
IE_NAME = 'hrfernsehen'
|
||||||
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
_VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
|
||||||
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
'md5': '5c4e0ba94677c516a2f65a84110fc536',
|
||||||
|
@ -194,11 +194,14 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
|
|
||||||
class SangiinInstructionIE(InfoExtractor):
|
class SangiinInstructionIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
_VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
|
||||||
IE_DESC = False # this shouldn't be listed as a supported site
|
IE_DESC = False # this shouldn't be listed as a supported site
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True)
|
raise ExtractorError(
|
||||||
|
'Copy the link from the button below the video description/player '
|
||||||
|
'and use that link to download. If there is no button in the frame, '
|
||||||
|
'get the URL of the frame showing the video.', expected=True)
|
||||||
|
|
||||||
|
|
||||||
class SangiinIE(InfoExtractor):
|
class SangiinIE(InfoExtractor):
|
||||||
|
@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
|
|||||||
(?:
|
(?:
|
||||||
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
|
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
|
||||||
https?://
|
https?://
|
||||||
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
# flash player
|
# flash player
|
||||||
|
@ -126,7 +126,7 @@ def _real_extract(self, url):
|
|||||||
video_data = None
|
video_data = None
|
||||||
|
|
||||||
# fix meta_url if missing the host address
|
# fix meta_url if missing the host address
|
||||||
if re.match(r'^\/\+\/', meta_url):
|
if re.match(r'\/\+\/', meta_url):
|
||||||
meta_url = urljoin('https://my.mail.ru', meta_url)
|
meta_url = urljoin('https://my.mail.ru', meta_url)
|
||||||
|
|
||||||
if meta_url:
|
if meta_url:
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
|
|
||||||
class MGTVIE(InfoExtractor):
|
class MGTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||||
IE_DESC = '芒果TV'
|
IE_DESC = '芒果TV'
|
||||||
IE_NAME = 'MangoTV'
|
IE_NAME = 'MangoTV'
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
class OCWMITIE(InfoExtractor):
|
class OCWMITIE(InfoExtractor):
|
||||||
IE_NAME = 'ocw.mit.edu'
|
IE_NAME = 'ocw.mit.edu'
|
||||||
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
_VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
|
||||||
_BASE_URL = 'http://ocw.mit.edu/'
|
_BASE_URL = 'http://ocw.mit.edu/'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
|
|
||||||
class NZOnScreenIE(InfoExtractor):
|
class NZOnScreenIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
|
'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -628,8 +628,7 @@ def is_404(e):
|
|||||||
page_entries = self._extract_entries(webpage, host)
|
page_entries = self._extract_entries(webpage, host)
|
||||||
if not page_entries:
|
if not page_entries:
|
||||||
break
|
break
|
||||||
for e in page_entries:
|
yield from page_entries
|
||||||
yield e
|
|
||||||
if not self._has_more(webpage):
|
if not self._has_more(webpage):
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
|
|
||||||
class RadioFranceIE(InfoExtractor):
|
class RadioFranceIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
_VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
|
||||||
IE_NAME = 'radiofrance'
|
IE_NAME = 'radiofrance'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
|
|
||||||
class ReverbNationIE(InfoExtractor):
|
class ReverbNationIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
_VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
|
||||||
'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
|
'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
|
|
||||||
class Tele13IE(InfoExtractor):
|
class Tele13IE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
|
_VALID_URL = r'https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',
|
||||||
|
@ -270,7 +270,7 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
|
|
||||||
class TwitCastingUserIE(InfoExtractor):
|
class TwitCastingUserIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)'
|
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(?:show|archive)/?(?:[#?]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
|
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -90,7 +90,7 @@ def _real_extract(self, url):
|
|||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||||
|
|
||||||
for key, value in video_data.items():
|
for key, value in video_data.items():
|
||||||
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
mobj = re.match(r'subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
continue
|
continue
|
||||||
subtitles.setdefault(mobj.group('lang'), []).append({
|
subtitles.setdefault(mobj.group('lang'), []).append({
|
||||||
|
@ -21,7 +21,7 @@ class XimalayaBaseIE(InfoExtractor):
|
|||||||
class XimalayaIE(XimalayaBaseIE):
|
class XimalayaIE(XimalayaBaseIE):
|
||||||
IE_NAME = 'ximalaya'
|
IE_NAME = 'ximalaya'
|
||||||
IE_DESC = '喜马拉雅FM'
|
IE_DESC = '喜马拉雅FM'
|
||||||
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.ximalaya.com/sound/47740352/',
|
'url': 'http://www.ximalaya.com/sound/47740352/',
|
||||||
|
@ -33,8 +33,8 @@
|
|||||||
import websockets.version
|
import websockets.version
|
||||||
|
|
||||||
websockets_version = tuple(map(int_or_none, websockets.version.version.split('.')))
|
websockets_version = tuple(map(int_or_none, websockets.version.version.split('.')))
|
||||||
if websockets_version < (12, 0):
|
if websockets_version < (13, 0):
|
||||||
raise ImportError('Only websockets>=12.0 is supported')
|
raise ImportError('Only websockets>=13.0 is supported')
|
||||||
|
|
||||||
import websockets.sync.client
|
import websockets.sync.client
|
||||||
from websockets.uri import parse_uri
|
from websockets.uri import parse_uri
|
||||||
|
@ -647,16 +647,16 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||||||
'You can also simply specify a field to match if the field is present, '
|
'You can also simply specify a field to match if the field is present, '
|
||||||
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
|
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
|
||||||
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
|
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
|
||||||
'the filter matches if at least one of the conditions is met. E.g. --match-filter '
|
'the filter matches if at least one of the conditions is met. E.g. --match-filters '
|
||||||
'!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
|
'!is_live --match-filters "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
|
||||||
'matches only videos that are not live OR those that have a like count more than 100 '
|
'matches only videos that are not live OR those that have a like count more than 100 '
|
||||||
'(or the like field is not available) and also has a description '
|
'(or the like field is not available) and also has a description '
|
||||||
'that contains the phrase "cats & dogs" (caseless). '
|
'that contains the phrase "cats & dogs" (caseless). '
|
||||||
'Use "--match-filter -" to interactively ask whether to download each video'))
|
'Use "--match-filters -" to interactively ask whether to download each video'))
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--no-match-filters',
|
'--no-match-filters',
|
||||||
dest='match_filter', action='store_const', const=None,
|
dest='match_filter', action='store_const', const=None,
|
||||||
help='Do not use any --match-filter (default)')
|
help='Do not use any --match-filters (default)')
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--break-match-filters',
|
'--break-match-filters',
|
||||||
metavar='FILTER', dest='breaking_match_filter', action='append',
|
metavar='FILTER', dest='breaking_match_filter', action='append',
|
||||||
@ -704,7 +704,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
|||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--break-per-input',
|
'--break-per-input',
|
||||||
action='store_true', dest='break_per_url', default=False,
|
action='store_true', dest='break_per_url', default=False,
|
||||||
help='Alters --max-downloads, --break-on-existing, --break-match-filter, and autonumber to reset per input URL')
|
help='Alters --max-downloads, --break-on-existing, --break-match-filters, and autonumber to reset per input URL')
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
'--no-break-per-input',
|
'--no-break-per-input',
|
||||||
action='store_false', dest='break_per_url',
|
action='store_false', dest='break_per_url',
|
||||||
|
@ -33,7 +33,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
|
|||||||
def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
|
def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
|
||||||
FFmpegPostProcessor.__init__(self, downloader)
|
FFmpegPostProcessor.__init__(self, downloader)
|
||||||
self._categories = tuple(categories or self.CATEGORIES.keys())
|
self._categories = tuple(categories or self.CATEGORIES.keys())
|
||||||
self._API_URL = api if re.match('^https?://', api) else 'https://' + api
|
self._API_URL = api if re.match('https?://', api) else 'https://' + api
|
||||||
|
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
extractor = info['extractor_key']
|
extractor = info['extractor_key']
|
||||||
|
@ -1954,7 +1954,7 @@ def urljoin(base, path):
|
|||||||
path = path.decode()
|
path = path.decode()
|
||||||
if not isinstance(path, str) or not path:
|
if not isinstance(path, str) or not path:
|
||||||
return None
|
return None
|
||||||
if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
|
if re.match(r'(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
|
||||||
return path
|
return path
|
||||||
if isinstance(base, bytes):
|
if isinstance(base, bytes):
|
||||||
base = base.decode()
|
base = base.decode()
|
||||||
@ -2007,7 +2007,7 @@ def url_or_none(url):
|
|||||||
if not url or not isinstance(url, str):
|
if not url or not isinstance(url, str):
|
||||||
return None
|
return None
|
||||||
url = url.strip()
|
url = url.strip()
|
||||||
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
return url if re.match(r'(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
|
||||||
|
|
||||||
|
|
||||||
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
|
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
|
||||||
@ -3113,7 +3113,7 @@ def is_html(first_bytes):
|
|||||||
while first_bytes.startswith(bom):
|
while first_bytes.startswith(bom):
|
||||||
encoding, first_bytes = enc, first_bytes[len(bom):]
|
encoding, first_bytes = enc, first_bytes[len(bom):]
|
||||||
|
|
||||||
return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
|
return re.match(r'\s*<', first_bytes.decode(encoding, 'replace'))
|
||||||
|
|
||||||
|
|
||||||
def determine_protocol(info_dict):
|
def determine_protocol(info_dict):
|
||||||
|
Loading…
Reference in New Issue
Block a user