mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-02 17:22:31 +01:00
Merge branch 'master' into openload-phantomjs-method
This commit is contained in:
commit
c89267d31a
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@ ## Please follow the guide below
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.04.15*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.04.15**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.01**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -35,7 +35,7 @@ ### If the purpose of this *issue* is a *bug report*, *site support request* or
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2017.04.15
|
||||
[debug] youtube-dl version 2017.05.01
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -35,8 +35,8 @@ updates_key.pem
|
||||
*.mkv
|
||||
*.swf
|
||||
*.part
|
||||
*.ytdl
|
||||
*.swp
|
||||
test/testdata
|
||||
test/local_parameters.json
|
||||
.tox
|
||||
youtube-dl.zsh
|
||||
|
1
AUTHORS
1
AUTHORS
@ -211,3 +211,4 @@ Juanjo Benages
|
||||
Xiao Di Guan
|
||||
Thomas Winant
|
||||
Daniel Twardowski
|
||||
Jeremie Jarosh
|
||||
|
126
ChangeLog
126
ChangeLog
@ -1,3 +1,129 @@
|
||||
version <unreleased>
|
||||
|
||||
Extractors
|
||||
+ [cda] Support birthday verification (#12789)
|
||||
* [leeco] Fix extraction (#12974)
|
||||
|
||||
|
||||
version 2017.05.01
|
||||
|
||||
Core
|
||||
+ [extractor/common] Extract view count from JSON-LD
|
||||
* [utils] Improve unified_timestamp
|
||||
+ [utils] Add video/mp2t to mimetype2ext
|
||||
* [downloader/external] Properly handle live stream downloading cancellation
|
||||
(#8932)
|
||||
+ [utils] Add support for unicode whitespace in clean_html on python 2 (#12906)
|
||||
|
||||
Extractors
|
||||
* [infoq] Make audio format extraction non fatal (#12938)
|
||||
* [brightcove] Allow whitespace around attribute names in embedded code
|
||||
+ [zaq1] Add support for zaq1.pl (#12693)
|
||||
+ [xvideos] Extract duration (#12828)
|
||||
* [vevo] Fix extraction (#12879)
|
||||
+ [noovo] Add support for noovo.ca (#12792)
|
||||
+ [washingtonpost] Add support for embeds (#12699)
|
||||
* [yandexmusic:playlist] Fix extraction for python 3 (#12888)
|
||||
* [anvato] Improve extraction (#12913)
|
||||
* Promote to regular shortcut based extractor
|
||||
* Add mcp to access key mapping table
|
||||
* Add support for embeds extraction
|
||||
* Add support for anvato embeds in generic extractor
|
||||
* [xtube] Fix extraction for older FLV videos (#12734)
|
||||
* [tvplayer] Fix extraction (#12908)
|
||||
|
||||
|
||||
version 2017.04.28
|
||||
|
||||
Core
|
||||
+ [adobepass] Use geo verification headers for all requests
|
||||
- [downloader/fragment] Remove assert for resume_len when no fragments
|
||||
downloaded
|
||||
+ [extractor/common] Add manifest_url for explicit group rendition formats
|
||||
* [extractor/common] Fix manifest_url for m3u8 formats
|
||||
- [extractor/common] Don't list master m3u8 playlists in format list (#12832)
|
||||
|
||||
Extractor
|
||||
* [aenetworks] Fix extraction for shows with single season
|
||||
+ [go] Add support for Disney, DisneyJunior and DisneyXD show pages
|
||||
* [youtube] Recognize new locale-based player URLs (#12885)
|
||||
+ [streamable] Add support for new embedded URL schema (#12844)
|
||||
* [arte:+7] Relax URL regular expression (#12837)
|
||||
|
||||
|
||||
version 2017.04.26
|
||||
|
||||
Core
|
||||
* Introduce --keep-fragments for keeping fragments of fragmented download
|
||||
on disk after download is finished
|
||||
* [YoutubeDL] Fix output template for missing timestamp (#12796)
|
||||
* [socks] Handle cases where credentials are required but missing
|
||||
* [extractor/common] Improve HLS extraction (#12211)
|
||||
* Extract m3u8 parsing to separate method
|
||||
* Improve rendition groups extraction
|
||||
* Build stream name according stream GROUP-ID
|
||||
* Ignore reference to AUDIO group without URI when stream has no CODECS
|
||||
* Use float for scaled tbr in _parse_m3u8_formats
|
||||
* [utils] Add support for TTML styles in dfxp2srt
|
||||
* [downloader/hls] No need to download keys for fragments that have been
|
||||
already downloaded
|
||||
* [downloader/fragment] Improve fragment downloading
|
||||
* Resume immediately
|
||||
* Don't concatenate fragments and decrypt them on every resume
|
||||
* Optimize disk storage usage, don't store intermediate fragments on disk
|
||||
* Store bookkeeping download state file
|
||||
+ [extractor/common] Add support for multiple getters in try_get
|
||||
+ [extractor/common] Add support for video of WebPage context in _json_ld
|
||||
(#12778)
|
||||
+ [extractor/common] Relax JWPlayer regular expression and remove
|
||||
duplicate URLs (#12768)
|
||||
|
||||
Extractors
|
||||
* [iqiyi] Fix extraction of Yule videos
|
||||
* [vidio] Improve extraction and sort formats
|
||||
+ [brightcove] Match only video elements with data-video-id attribute
|
||||
* [iqiyi] Fix playlist detection (#12504)
|
||||
- [azubu] Remove extractor (#12813)
|
||||
* [porn91] Fix extraction (#12814)
|
||||
* [vidzi] Fix extraction (#12793)
|
||||
+ [amp] Extract error message (#12795)
|
||||
+ [xfileshare] Add support for gorillavid.com and daclips.com (#12776)
|
||||
* [instagram] Fix extraction (#12777)
|
||||
+ [generic] Support Brightcove videos in <iframe> (#12482)
|
||||
+ [brightcove] Support URLs with bcpid instead of playerID (#12482)
|
||||
* [brightcove] Fix _extract_url (#12782)
|
||||
+ [odnoklassniki] Extract HLS formats
|
||||
|
||||
|
||||
version 2017.04.17
|
||||
|
||||
Extractors
|
||||
* [limelight] Improve extraction LimelightEmbeddedPlayerFlash media embeds and
|
||||
add support for channel and channelList embeds
|
||||
* [generic] Extract multiple Limelight embeds (#12761)
|
||||
+ [itv] Extract series metadata
|
||||
* [itv] Fix RTMP formats downloading (#12759)
|
||||
* [itv] Use native HLS downloader by default
|
||||
+ [go90] Extract subtitles (#12752)
|
||||
+ [go90] Extract series metadata (#12752)
|
||||
|
||||
|
||||
version 2017.04.16
|
||||
|
||||
Core
|
||||
* [YoutubeDL] Apply expand_path after output template substitution
|
||||
+ [YoutubeDL] Propagate overridden meta fields to extraction results of type
|
||||
url (#11163)
|
||||
|
||||
Extractors
|
||||
+ [generic] Extract RSS entries as url_transparent (#11163)
|
||||
+ [streamango] Add support for streamango.com (#12643)
|
||||
+ [wsj:article] Add support for articles (#12558)
|
||||
* [brightcove] Relax video tag embeds extraction and validate ambiguous embeds'
|
||||
URLs (#9163, #12005, #12178, #12480)
|
||||
+ [udemy] Add support for react rendition (#12744)
|
||||
|
||||
|
||||
version 2017.04.15
|
||||
|
||||
Extractors
|
||||
|
2
Makefile
2
Makefile
@ -1,7 +1,7 @@
|
||||
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
|
||||
|
||||
clean:
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
|
||||
find . -name "*.pyc" -delete
|
||||
find . -name "*.class" -delete
|
||||
|
||||
|
@ -187,6 +187,9 @@ ## Download Options:
|
||||
and ISM)
|
||||
--abort-on-unavailable-fragment Abort downloading when some fragment is not
|
||||
available
|
||||
--keep-fragments Keep downloaded fragments on disk after
|
||||
downloading is finished; fragments are
|
||||
erased by default
|
||||
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
|
||||
(default is 1024)
|
||||
--no-resize-buffer Do not automatically adjust the buffer
|
||||
|
@ -45,6 +45,7 @@ # Supported sites
|
||||
- **anderetijden**: npo.nl and ntr.nl
|
||||
- **AnimeOnDemand**
|
||||
- **anitube.se**
|
||||
- **Anvato**
|
||||
- **AnySex**
|
||||
- **Aparat**
|
||||
- **AppleConnect**
|
||||
@ -81,8 +82,6 @@ # Supported sites
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **AZMedienPlaylist**: AZ Medien playlists
|
||||
- **AZMedienShowPlaylist**: AZ Medien show playlists
|
||||
- **Azubu**
|
||||
- **AzubuLive**
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
@ -531,6 +530,7 @@ # Supported sites
|
||||
- **NJPWWorld**: 新日本プロレスワールド
|
||||
- **NobelPrize**
|
||||
- **Noco**
|
||||
- **Noovo**
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
@ -745,6 +745,7 @@ # Supported sites
|
||||
- **Steam**
|
||||
- **Stitcher**
|
||||
- **Streamable**
|
||||
- **Streamango**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreetVoice**
|
||||
@ -966,6 +967,7 @@ # Supported sites
|
||||
- **wrzuta.pl**
|
||||
- **wrzuta.pl:playlist**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **WSJArticle**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo
|
||||
@ -1013,6 +1015,7 @@ # Supported sites
|
||||
- **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword)
|
||||
- **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
|
||||
- **Zapiks**
|
||||
- **Zaq1**
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
|
@ -3,12 +3,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Allow direct execution
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, expect_dict
|
||||
from test.helper import FakeYDL, expect_dict, expect_value
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||
@ -175,6 +176,318 @@ def test_extract_jwplayer_data_realworld(self):
|
||||
}]
|
||||
})
|
||||
|
||||
def test_parse_m3u8_formats(self):
|
||||
_TEST_CASES = [
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/11507
|
||||
# http://pluzz.francetv.fr/videos/le_ministere.html
|
||||
'pluzz_francetv_11507',
|
||||
'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
[{
|
||||
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0',
|
||||
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
'ext': 'mp4',
|
||||
'format_id': '180',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.66.30',
|
||||
'tbr': 180,
|
||||
'width': 256,
|
||||
'height': 144,
|
||||
}, {
|
||||
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0',
|
||||
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
'ext': 'mp4',
|
||||
'format_id': '303',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.66.30',
|
||||
'tbr': 303,
|
||||
'width': 320,
|
||||
'height': 180,
|
||||
}, {
|
||||
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0',
|
||||
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
'ext': 'mp4',
|
||||
'format_id': '575',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.66.30',
|
||||
'tbr': 575,
|
||||
'width': 512,
|
||||
'height': 288,
|
||||
}, {
|
||||
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0',
|
||||
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
'ext': 'mp4',
|
||||
'format_id': '831',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.77.30',
|
||||
'tbr': 831,
|
||||
'width': 704,
|
||||
'height': 396,
|
||||
}, {
|
||||
'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0',
|
||||
'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8',
|
||||
'format_id': '1467',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.77.30',
|
||||
'tbr': 1467,
|
||||
'width': 1024,
|
||||
'height': 576,
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/11995
|
||||
# http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
|
||||
'teamcoco_11995',
|
||||
'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
[{
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'audio-0-Default',
|
||||
'protocol': 'm3u8',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'audio-1-Default',
|
||||
'protocol': 'm3u8',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '71',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.5',
|
||||
'vcodec': 'none',
|
||||
'tbr': 71,
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '413',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'avc1.42001e',
|
||||
'tbr': 413,
|
||||
'width': 400,
|
||||
'height': 224,
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '522',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'avc1.42001e',
|
||||
'tbr': 522,
|
||||
'width': 400,
|
||||
'height': 224,
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '1205',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'avc1.4d001e',
|
||||
'tbr': 1205,
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8',
|
||||
'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '2374',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'none',
|
||||
'vcodec': 'avc1.4d001f',
|
||||
'tbr': 2374,
|
||||
'width': 1024,
|
||||
'height': 576,
|
||||
}]
|
||||
),
|
||||
(
|
||||
# https://github.com/rg3/youtube-dl/issues/12211
|
||||
# http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
|
||||
'toggle_mobile_12211',
|
||||
'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
[{
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'audio-English',
|
||||
'protocol': 'm3u8',
|
||||
'language': 'eng',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'audio-Undefined',
|
||||
'protocol': 'm3u8',
|
||||
'language': 'und',
|
||||
'vcodec': 'none',
|
||||
}, {
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '155',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 155.648,
|
||||
'width': 320,
|
||||
'height': 180,
|
||||
}, {
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '502',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 502.784,
|
||||
'width': 480,
|
||||
'height': 270,
|
||||
}, {
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '827',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 827.392,
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8',
|
||||
'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '1396',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 1396.736,
|
||||
'width': 854,
|
||||
'height': 480,
|
||||
}]
|
||||
),
|
||||
(
|
||||
# http://www.twitch.tv/riotgames/v/6528877
|
||||
'twitch_vod',
|
||||
'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
[{
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'Audio Only',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'none',
|
||||
'tbr': 182.725,
|
||||
}, {
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'Mobile',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.42C00D',
|
||||
'tbr': 280.474,
|
||||
'width': 400,
|
||||
'height': 226,
|
||||
}, {
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'Low',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.42C01E',
|
||||
'tbr': 628.347,
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'Medium',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.42C01E',
|
||||
'tbr': 893.387,
|
||||
'width': 852,
|
||||
'height': 480,
|
||||
}, {
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'High',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.42C01F',
|
||||
'tbr': 1603.789,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}, {
|
||||
'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8',
|
||||
'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'Source',
|
||||
'protocol': 'm3u8',
|
||||
'acodec': 'mp4a.40.2',
|
||||
'vcodec': 'avc1.100.31',
|
||||
'tbr': 3214.134,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}]
|
||||
),
|
||||
(
|
||||
# http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
|
||||
# EXT-X-STREAM-INF tag with NAME attribute that is not defined
|
||||
# in HLS specification
|
||||
'vidio',
|
||||
'https://www.vidio.com/videos/165683/playlist.m3u8',
|
||||
[{
|
||||
'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8',
|
||||
'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '270p 3G',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 300,
|
||||
'width': 480,
|
||||
'height': 270,
|
||||
}, {
|
||||
'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8',
|
||||
'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '360p SD',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 600,
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
}, {
|
||||
'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8',
|
||||
'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8',
|
||||
'ext': 'mp4',
|
||||
'format_id': '720p HD',
|
||||
'protocol': 'm3u8',
|
||||
'tbr': 1200,
|
||||
'width': 1280,
|
||||
'height': 720,
|
||||
}]
|
||||
)
|
||||
]
|
||||
|
||||
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
|
||||
with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
|
||||
mode='r', encoding='utf-8') as f:
|
||||
formats = self.ie._parse_m3u8_formats(
|
||||
f.read(), m3u8_url, ext='mp4')
|
||||
self.ie._sort_formats(formats)
|
||||
expect_value(self, formats, expected_formats, None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -755,6 +755,7 @@ def _real_extract(self, url):
|
||||
'_type': 'url_transparent',
|
||||
'url': 'foo2:',
|
||||
'ie_key': 'Foo2',
|
||||
'title': 'foo1 title'
|
||||
}
|
||||
|
||||
class Foo2IE(InfoExtractor):
|
||||
@ -771,7 +772,7 @@ class Foo3IE(InfoExtractor):
|
||||
_VALID_URL = r'foo3:'
|
||||
|
||||
def _real_extract(self, url):
|
||||
return _make_result([{'url': TEST_URL}])
|
||||
return _make_result([{'url': TEST_URL}], title='foo3 title')
|
||||
|
||||
ydl.add_info_extractor(Foo1IE(ydl))
|
||||
ydl.add_info_extractor(Foo2IE(ydl))
|
||||
@ -779,6 +780,7 @@ def _real_extract(self, url):
|
||||
ydl.extract_info('foo1:')
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['url'], TEST_URL)
|
||||
self.assertEqual(downloaded['title'], 'foo1 title')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -225,7 +225,7 @@ def try_rm_tcs_files(tcs=None):
|
||||
format_bytes(got_fsize)))
|
||||
if 'md5' in tc:
|
||||
md5_for_file = _file_md5(tc_filename)
|
||||
self.assertEqual(md5_for_file, tc['md5'])
|
||||
self.assertEqual(tc['md5'], md5_for_file)
|
||||
# Finally, check test cases' data again but this time against
|
||||
# extracted data from info JSON file written during processing
|
||||
info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
|
||||
|
@ -44,6 +44,7 @@
|
||||
limit_length,
|
||||
mimetype2ext,
|
||||
month_by_name,
|
||||
multipart_encode,
|
||||
ohdave_rsa_encrypt,
|
||||
OnDemandPagedList,
|
||||
orderedSet,
|
||||
@ -338,6 +339,7 @@ def test_unified_timestamps(self):
|
||||
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||
self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100)
|
||||
self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361)
|
||||
|
||||
def test_determine_ext(self):
|
||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||
@ -619,6 +621,16 @@ def query_dict(url):
|
||||
'http://example.com/path', {'test': '第二行тест'})),
|
||||
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
|
||||
|
||||
def test_multipart_encode(self):
|
||||
self.assertEqual(
|
||||
multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
|
||||
b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
|
||||
self.assertEqual(
|
||||
multipart_encode({'欄位'.encode('utf-8'): '值'.encode('utf-8')}, boundary='AAAAAA')[0],
|
||||
b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
|
||||
self.assertRaises(
|
||||
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
||||
|
||||
def test_dict_get(self):
|
||||
FALSE_VALUES = {
|
||||
'none': None,
|
||||
@ -899,6 +911,7 @@ def test_extract_attributes(self):
|
||||
def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\nb'), 'a: b')
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
|
||||
|
||||
def test_intlist_to_bytes(self):
|
||||
self.assertEqual(
|
||||
@ -1069,6 +1082,47 @@ def test_dfxp2srt(self):
|
||||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data)
|
||||
|
||||
dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?>
|
||||
<tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata">
|
||||
<head>
|
||||
<styling>
|
||||
<style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" />
|
||||
<style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" />
|
||||
<style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" />
|
||||
<style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" />
|
||||
</styling>
|
||||
</head>
|
||||
<body tts:textAlign="center" style="s0">
|
||||
<div>
|
||||
<p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p>
|
||||
<p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p>
|
||||
<p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p>
|
||||
<p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p>
|
||||
</div>
|
||||
</body>
|
||||
</tt>'''
|
||||
srt_data = '''1
|
||||
00:00:02,080 --> 00:00:05,839
|
||||
<font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font>
|
||||
|
||||
2
|
||||
00:00:02,080 --> 00:00:05,839
|
||||
<b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1
|
||||
</font>part 2</font></b>
|
||||
|
||||
3
|
||||
00:00:05,839 --> 00:00:09,560
|
||||
<u><font color="lime">line 3
|
||||
part 3</font></u>
|
||||
|
||||
4
|
||||
00:00:09,560 --> 00:00:12,359
|
||||
<i><u><font color="yellow"><font color="lime">inner
|
||||
</font>style</font></u></i>
|
||||
|
||||
'''
|
||||
self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data)
|
||||
|
||||
def test_cli_option(self):
|
||||
self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128'])
|
||||
self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), [])
|
||||
|
14
test/testdata/m3u8/pluzz_francetv_11507.m3u8
vendored
Normal file
14
test/testdata/m3u8/pluzz_francetv_11507.m3u8
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
#EXTM3U
|
||||
#EXT-X-VERSION:5
|
||||
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Francais",DEFAULT=NO,FORCED=NO,URI="http://replayftv-pmd.francetv.fr/subtitles/2017/16/156589847-1492488987.m3u8",LANGUAGE="fra"
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aac",LANGUAGE="fra",NAME="Francais",DEFAULT=YES, AUTOSELECT=YES
|
||||
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=180000,RESOLUTION=256x144,CODECS="avc1.66.30, mp4a.40.2"
|
||||
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0
|
||||
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=303000,RESOLUTION=320x180,CODECS="avc1.66.30, mp4a.40.2"
|
||||
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0
|
||||
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=575000,RESOLUTION=512x288,CODECS="avc1.66.30, mp4a.40.2"
|
||||
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0
|
||||
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=831000,RESOLUTION=704x396,CODECS="avc1.77.30, mp4a.40.2"
|
||||
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0
|
||||
#EXT-X-STREAM-INF:SUBTITLES="subs",AUDIO="aac",PROGRAM-ID=1,BANDWIDTH=1467000,RESOLUTION=1024x576,CODECS="avc1.77.30, mp4a.40.2"
|
||||
http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0
|
16
test/testdata/m3u8/teamcoco_11995.m3u8
vendored
Normal file
16
test/testdata/m3u8/teamcoco_11995.m3u8
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
#EXTM3U
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-0",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8"
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio-1",NAME="Default",AUTOSELECT=YES,DEFAULT=YES,URI="hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8"
|
||||
#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=37862000,CODECS="avc1.4d001f",URI="hls/CONAN_020217_Highlight_show-2m_iframe.m3u8"
|
||||
#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=18750000,CODECS="avc1.4d001e",URI="hls/CONAN_020217_Highlight_show-1m_iframe.m3u8"
|
||||
#EXT-X-I-FRAME-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=6535000,CODECS="avc1.42001e",URI="hls/CONAN_020217_Highlight_show-400k_iframe.m3u8"
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=2374000,RESOLUTION=1024x576,CODECS="avc1.4d001f,mp4a.40.2",AUDIO="audio-0"
|
||||
hls/CONAN_020217_Highlight_show-2m_v4.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1205000,RESOLUTION=640x360,CODECS="avc1.4d001e,mp4a.40.2",AUDIO="audio-0"
|
||||
hls/CONAN_020217_Highlight_show-1m_v4.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=522000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.2",AUDIO="audio-0"
|
||||
hls/CONAN_020217_Highlight_show-400k_v4.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=413000,RESOLUTION=400x224,CODECS="avc1.42001e,mp4a.40.5",AUDIO="audio-1"
|
||||
hls/CONAN_020217_Highlight_show-400k_v4.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=71000,CODECS="mp4a.40.5",AUDIO="audio-1"
|
||||
hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8
|
13
test/testdata/m3u8/toggle_mobile_12211.m3u8
vendored
Normal file
13
test/testdata/m3u8/toggle_mobile_12211.m3u8
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
#EXTM3U
|
||||
#EXT-X-VERSION:4
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="eng",NAME="English",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8"
|
||||
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="audio",LANGUAGE="und",NAME="Undefined",URI="http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8"
|
||||
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=155648,RESOLUTION=320x180,AUDIO="audio"
|
||||
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=502784,RESOLUTION=480x270,AUDIO="audio"
|
||||
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=827392,RESOLUTION=640x360,AUDIO="audio"
|
||||
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1396736,RESOLUTION=854x480,AUDIO="audio"
|
||||
http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8
|
20
test/testdata/m3u8/twitch_vod.m3u8
vendored
Normal file
20
test/testdata/m3u8/twitch_vod.m3u8
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
#EXTM3U
|
||||
#EXT-X-TWITCH-INFO:ORIGIN="s3",CLUSTER="edgecast_vod",REGION="EU",MANIFEST-CLUSTER="edgecast_vod",USER-IP="109.171.17.81"
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="chunked",NAME="Source",AUTOSELECT=YES,DEFAULT=YES
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=3214134,CODECS="avc1.100.31,mp4a.40.2",RESOLUTION="1280x720",VIDEO="chunked"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="high",NAME="High",AUTOSELECT=YES,DEFAULT=YES
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1603789,CODECS="avc1.42C01F,mp4a.40.2",RESOLUTION="1280x720",VIDEO="high"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="medium",NAME="Medium",AUTOSELECT=YES,DEFAULT=YES
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=893387,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="852x480",VIDEO="medium"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="low",NAME="Low",AUTOSELECT=YES,DEFAULT=YES
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=628347,CODECS="avc1.42C01E,mp4a.40.2",RESOLUTION="640x360",VIDEO="low"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="mobile",NAME="Mobile",AUTOSELECT=YES,DEFAULT=YES
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=280474,CODECS="avc1.42C00D,mp4a.40.2",RESOLUTION="400x226",VIDEO="mobile"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8
|
||||
#EXT-X-MEDIA:TYPE=VIDEO,GROUP-ID="audio_only",NAME="Audio Only",AUTOSELECT=NO,DEFAULT=NO
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=182725,CODECS="mp4a.40.2",VIDEO="audio_only"
|
||||
https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8
|
10
test/testdata/m3u8/vidio.m3u8
vendored
Normal file
10
test/testdata/m3u8/vidio.m3u8
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
#EXTM3U
|
||||
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=300000,RESOLUTION=480x270,NAME="270p 3G"
|
||||
https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8
|
||||
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=600000,RESOLUTION=640x360,NAME="360p SD"
|
||||
https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8
|
||||
|
||||
#EXT-X-STREAM-INF:PROGRAM-ID=1,BANDWIDTH=1200000,RESOLUTION=1280x720,NAME="720p HD"
|
||||
https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8
|
@ -640,7 +640,7 @@ def prepare_filename(self, info_dict):
|
||||
|
||||
NUMERIC_FIELDS = set((
|
||||
'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
|
||||
'upload_year', 'upload_month', 'upload_day',
|
||||
'timestamp', 'upload_year', 'upload_month', 'upload_day',
|
||||
'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
|
||||
'average_rating', 'comment_count', 'age_limit',
|
||||
'start_time', 'end_time',
|
||||
@ -672,8 +672,7 @@ def prepare_filename(self, info_dict):
|
||||
FORMAT_RE.format(numeric_field),
|
||||
r'%({0})s'.format(numeric_field), outtmpl)
|
||||
|
||||
tmpl = expand_path(outtmpl)
|
||||
filename = tmpl % template_dict
|
||||
filename = expand_path(outtmpl % template_dict)
|
||||
# Temporary fix for #4787
|
||||
# 'Treat' all problem characters by passing filename through preferredencoding
|
||||
# to workaround encoding issues with subprocess on python2 @ Windows
|
||||
@ -851,7 +850,14 @@ def process_ie_result(self, ie_result, download=True, extra_info={}):
|
||||
new_result = info.copy()
|
||||
new_result.update(force_properties)
|
||||
|
||||
assert new_result.get('_type') != 'url_transparent'
|
||||
# Extracted info may not be a video result (i.e.
|
||||
# info.get('_type', 'video') != video) but rather an url or
|
||||
# url_transparent. In such cases outer metadata (from ie_result)
|
||||
# should be propagated to inner one (info). For this to happen
|
||||
# _type of info should be overridden with url_transparent. This
|
||||
# fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
|
||||
if new_result.get('_type') == 'url':
|
||||
new_result['_type'] = 'url_transparent'
|
||||
|
||||
return self.process_ie_result(
|
||||
new_result, download=download, extra_info=extra_info)
|
||||
|
@ -343,6 +343,7 @@ def parse_retries(retries):
|
||||
'retries': opts.retries,
|
||||
'fragment_retries': opts.fragment_retries,
|
||||
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
|
||||
'keep_fragments': opts.keep_fragments,
|
||||
'buffersize': opts.buffersize,
|
||||
'noresizebuffer': opts.noresizebuffer,
|
||||
'continuedl': opts.continue_dl,
|
||||
|
@ -187,6 +187,9 @@ def undo_temp_name(self, filename):
|
||||
return filename[:-len('.part')]
|
||||
return filename
|
||||
|
||||
def ytdl_filename(self, filename):
|
||||
return filename + '.ytdl'
|
||||
|
||||
def try_rename(self, old_filename, new_filename):
|
||||
try:
|
||||
if old_filename == new_filename:
|
||||
@ -327,6 +330,7 @@ def download(self, filename, info_dict):
|
||||
os.path.exists(encodeFilename(filename))
|
||||
)
|
||||
|
||||
if not hasattr(filename, 'write'):
|
||||
continuedl_and_exists = (
|
||||
self.params.get('continuedl', True) and
|
||||
os.path.isfile(encodeFilename(filename)) and
|
||||
|
@ -1,13 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
sanitize_open,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
class DashSegmentsFD(FragmentFD):
|
||||
@ -28,31 +22,24 @@ def real_download(self, filename, info_dict):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
def process_segment(segment, tmp_filename, num):
|
||||
segment_url = segment['url']
|
||||
segment_name = 'Frag%d' % num
|
||||
target_filename = '%s-%s' % (tmp_filename, segment_name)
|
||||
frag_index = 0
|
||||
for i, segment in enumerate(segments):
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
# In DASH, the first segment contains necessary headers to
|
||||
# generate a valid MP4 file, so always abort for the first segment
|
||||
fatal = num == 0 or not skip_unavailable_fragments
|
||||
fatal = i == 0 or not skip_unavailable_fragments
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {
|
||||
'url': segment_url,
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
ctx['dest_stream'].write(down.read())
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# YouTube may often return 404 HTTP error for a fragment causing the
|
||||
@ -63,22 +50,14 @@ def process_segment(segment, tmp_filename, num):
|
||||
# HTTP error.
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, segment_name, count, fragment_retries)
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if not fatal:
|
||||
self.report_skip_fragment(segment_name)
|
||||
return True
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
return True
|
||||
|
||||
for i, segment in enumerate(segments):
|
||||
if not process_segment(segment, ctx['tmpfilename'], i):
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for segment_file in segments_filenames:
|
||||
os.remove(encodeFilename(segment_file))
|
||||
|
||||
return True
|
||||
|
@ -29,7 +29,17 @@ def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
|
||||
try:
|
||||
retval = self._call_downloader(tmpfilename, info_dict)
|
||||
except KeyboardInterrupt:
|
||||
if not info_dict.get('is_live'):
|
||||
raise
|
||||
# Live stream downloading cancellation should be considered as
|
||||
# correct and expected termination thus all postprocessing
|
||||
# should take place
|
||||
retval = 0
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
|
||||
|
@ -3,7 +3,6 @@
|
||||
import base64
|
||||
import io
|
||||
import itertools
|
||||
import os
|
||||
import time
|
||||
|
||||
from .fragment import FragmentFD
|
||||
@ -16,9 +15,7 @@
|
||||
compat_struct_unpack,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
fix_xml_ampersands,
|
||||
sanitize_open,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
@ -366,6 +363,7 @@ def real_download(self, filename, info_dict):
|
||||
|
||||
dest_stream = ctx['dest_stream']
|
||||
|
||||
if ctx['complete_frags_downloaded_bytes'] == 0:
|
||||
write_flv_header(dest_stream)
|
||||
if not live:
|
||||
write_metadata_tag(dest_stream, metadata)
|
||||
@ -374,9 +372,12 @@ def real_download(self, filename, info_dict):
|
||||
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
frags_filenames = []
|
||||
frag_index = 0
|
||||
while fragments_list:
|
||||
seg_i, frag_i = fragments_list.pop(0)
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
|
||||
query = []
|
||||
if base_url_parsed.query:
|
||||
@ -386,17 +387,10 @@ def real_download(self, filename, info_dict):
|
||||
if info_dict.get('extra_param_to_segment_url'):
|
||||
query.append(info_dict['extra_param_to_segment_url'])
|
||||
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {
|
||||
'url': url_parsed.geturl(),
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
|
||||
if not success:
|
||||
return False
|
||||
(down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
|
||||
down_data = down.read()
|
||||
down.close()
|
||||
reader = FlvReader(down_data)
|
||||
while True:
|
||||
try:
|
||||
@ -411,12 +405,8 @@ def real_download(self, filename, info_dict):
|
||||
break
|
||||
raise
|
||||
if box_type == b'mdat':
|
||||
dest_stream.write(box_data)
|
||||
self._append_fragment(ctx, box_data)
|
||||
break
|
||||
if live:
|
||||
os.remove(encodeFilename(frag_sanitized))
|
||||
else:
|
||||
frags_filenames.append(frag_sanitized)
|
||||
except (compat_urllib_error.HTTPError, ) as err:
|
||||
if live and (err.code == 404 or err.code == 410):
|
||||
# We didn't keep up with the live window. Continue
|
||||
@ -436,7 +426,4 @@ def real_download(self, filename, info_dict):
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(encodeFilename(frag_file))
|
||||
|
||||
return True
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
@ -28,15 +29,37 @@ class FragmentFD(FileDownloader):
|
||||
and hlsnative only)
|
||||
skip_unavailable_fragments:
|
||||
Skip unavailable fragments (DASH and hlsnative only)
|
||||
keep_fragments: Keep downloaded fragments on disk after downloading is
|
||||
finished
|
||||
|
||||
For each incomplete fragment download youtube-dl keeps on disk a special
|
||||
bookkeeping file with download state and metadata (in future such files will
|
||||
be used for any incomplete download handled by youtube-dl). This file is
|
||||
used to properly handle resuming, check download file consistency and detect
|
||||
potential errors. The file has a .ytdl extension and represents a standard
|
||||
JSON file of the following format:
|
||||
|
||||
extractor:
|
||||
Dictionary of extractor related data. TBD.
|
||||
|
||||
downloader:
|
||||
Dictionary of downloader related data. May contain following data:
|
||||
current_fragment:
|
||||
Dictionary with current (being downloaded) fragment data:
|
||||
index: 0-based index of current fragment among all fragments
|
||||
fragment_count:
|
||||
Total count of fragments
|
||||
|
||||
This feature is experimental and file format may change in future.
|
||||
"""
|
||||
|
||||
def report_retry_fragment(self, err, fragment_name, count, retries):
|
||||
def report_retry_fragment(self, err, frag_index, count, retries):
|
||||
self.to_screen(
|
||||
'[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), fragment_name, count, self.format_retries(retries)))
|
||||
'[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
|
||||
% (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
|
||||
|
||||
def report_skip_fragment(self, fragment_name):
|
||||
self.to_screen('[download] Skipping fragment %s...' % fragment_name)
|
||||
def report_skip_fragment(self, frag_index):
|
||||
self.to_screen('[download] Skipping fragment %d...' % frag_index)
|
||||
|
||||
def _prepare_url(self, info_dict, url):
|
||||
headers = info_dict.get('http_headers')
|
||||
@ -46,6 +69,51 @@ def _prepare_and_start_frag_download(self, ctx):
|
||||
self._prepare_frag_download(ctx)
|
||||
self._start_frag_download(ctx)
|
||||
|
||||
@staticmethod
|
||||
def __do_ytdl_file(ctx):
|
||||
return not ctx['live'] and not ctx['tmpfilename'] == '-'
|
||||
|
||||
def _read_ytdl_file(self, ctx):
|
||||
stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
|
||||
ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
|
||||
stream.close()
|
||||
|
||||
def _write_ytdl_file(self, ctx):
|
||||
frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
|
||||
downloader = {
|
||||
'current_fragment': {
|
||||
'index': ctx['fragment_index'],
|
||||
},
|
||||
}
|
||||
if ctx.get('fragment_count') is not None:
|
||||
downloader['fragment_count'] = ctx['fragment_count']
|
||||
frag_index_stream.write(json.dumps({'downloader': downloader}))
|
||||
frag_index_stream.close()
|
||||
|
||||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
||||
success = ctx['dl'].download(fragment_filename, {
|
||||
'url': frag_url,
|
||||
'http_headers': headers or info_dict.get('http_headers'),
|
||||
})
|
||||
if not success:
|
||||
return False, None
|
||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
return True, frag_content
|
||||
|
||||
def _append_fragment(self, ctx, frag_content):
|
||||
try:
|
||||
ctx['dest_stream'].write(frag_content)
|
||||
finally:
|
||||
if self.__do_ytdl_file(ctx):
|
||||
self._write_ytdl_file(ctx)
|
||||
if not self.params.get('keep_fragments', False):
|
||||
os.remove(ctx['fragment_filename_sanitized'])
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
if 'live' not in ctx:
|
||||
ctx['live'] = False
|
||||
@ -66,11 +134,36 @@ def _prepare_frag_download(self, ctx):
|
||||
}
|
||||
)
|
||||
tmpfilename = self.temp_name(ctx['filename'])
|
||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb')
|
||||
open_mode = 'wb'
|
||||
resume_len = 0
|
||||
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(tmpfilename)):
|
||||
open_mode = 'ab'
|
||||
resume_len = os.path.getsize(encodeFilename(tmpfilename))
|
||||
|
||||
# Should be initialized before ytdl file check
|
||||
ctx.update({
|
||||
'tmpfilename': tmpfilename,
|
||||
'fragment_index': 0,
|
||||
})
|
||||
|
||||
if self.__do_ytdl_file(ctx):
|
||||
if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
|
||||
self._read_ytdl_file(ctx)
|
||||
else:
|
||||
self._write_ytdl_file(ctx)
|
||||
if ctx['fragment_index'] > 0:
|
||||
assert resume_len > 0
|
||||
|
||||
dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
|
||||
|
||||
ctx.update({
|
||||
'dl': dl,
|
||||
'dest_stream': dest_stream,
|
||||
'tmpfilename': tmpfilename,
|
||||
# Total complete fragments downloaded so far in bytes
|
||||
'complete_frags_downloaded_bytes': resume_len,
|
||||
})
|
||||
|
||||
def _start_frag_download(self, ctx):
|
||||
@ -79,9 +172,9 @@ def _start_frag_download(self, ctx):
|
||||
# hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': 0,
|
||||
'frag_index': 0,
|
||||
'frag_count': total_frags,
|
||||
'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
|
||||
'fragment_index': ctx['fragment_index'],
|
||||
'fragment_count': total_frags,
|
||||
'filename': ctx['filename'],
|
||||
'tmpfilename': ctx['tmpfilename'],
|
||||
}
|
||||
@ -89,8 +182,6 @@ def _start_frag_download(self, ctx):
|
||||
start = time.time()
|
||||
ctx.update({
|
||||
'started': start,
|
||||
# Total complete fragments downloaded so far in bytes
|
||||
'complete_frags_downloaded_bytes': 0,
|
||||
# Amount of fragment's bytes downloaded by the time of the previous
|
||||
# frag progress hook invocation
|
||||
'prev_frag_downloaded_bytes': 0,
|
||||
@ -106,11 +197,12 @@ def frag_progress_hook(s):
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
|
||||
(state['frag_index'] + 1) * total_frags)
|
||||
(state['fragment_index'] + 1) * total_frags)
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
|
||||
if s['status'] == 'finished':
|
||||
state['frag_index'] += 1
|
||||
state['fragment_index'] += 1
|
||||
ctx['fragment_index'] = state['fragment_index']
|
||||
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
|
||||
ctx['prev_frag_downloaded_bytes'] = 0
|
||||
@ -132,6 +224,10 @@ def frag_progress_hook(s):
|
||||
|
||||
def _finish_frag_download(self, ctx):
|
||||
ctx['dest_stream'].close()
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
||||
if os.path.isfile(ytdl_filename):
|
||||
os.remove(ytdl_filename)
|
||||
elapsed = time.time() - ctx['started']
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
fsize = os.path.getsize(encodeFilename(ctx['filename']))
|
||||
|
@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os.path
|
||||
import re
|
||||
import binascii
|
||||
try:
|
||||
@ -18,8 +17,6 @@
|
||||
compat_struct_pack,
|
||||
)
|
||||
from ..utils import (
|
||||
encodeFilename,
|
||||
sanitize_open,
|
||||
parse_m3u8_attributes,
|
||||
update_url_query,
|
||||
)
|
||||
@ -103,17 +100,18 @@ def real_download(self, filename, info_dict):
|
||||
media_sequence = 0
|
||||
decrypt_info = {'METHOD': 'NONE'}
|
||||
byte_range = {}
|
||||
frags_filenames = []
|
||||
frag_index = 0
|
||||
for line in s.splitlines():
|
||||
line = line.strip()
|
||||
if line:
|
||||
if not line.startswith('#'):
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
frag_url = (
|
||||
line
|
||||
if re.match(r'^https?://', line)
|
||||
else compat_urlparse.urljoin(man_url, line))
|
||||
frag_name = 'Frag%d' % i
|
||||
frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name)
|
||||
if extra_query:
|
||||
frag_url = update_url_query(frag_url, extra_query)
|
||||
count = 0
|
||||
@ -122,15 +120,10 @@ def real_download(self, filename, info_dict):
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(frag_filename, {
|
||||
'url': frag_url,
|
||||
'http_headers': headers,
|
||||
})
|
||||
success, frag_content = self._download_fragment(
|
||||
ctx, frag_url, info_dict, headers)
|
||||
if not success:
|
||||
return False
|
||||
down, frag_sanitized = sanitize_open(frag_filename, 'rb')
|
||||
frag_content = down.read()
|
||||
down.close()
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
# Unavailable (possibly temporary) fragments may be served.
|
||||
@ -139,28 +132,29 @@ def real_download(self, filename, info_dict):
|
||||
# https://github.com/rg3/youtube-dl/issues/10448).
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, frag_name, count, fragment_retries)
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
self.report_skip_fragment(frag_name)
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error(
|
||||
'giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
frag_content = AES.new(
|
||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||
ctx['dest_stream'].write(frag_content)
|
||||
frags_filenames.append(frag_sanitized)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
# We only download the first fragment during the test
|
||||
if test:
|
||||
break
|
||||
i += 1
|
||||
media_sequence += 1
|
||||
elif line.startswith('#EXT-X-KEY'):
|
||||
decrypt_url = decrypt_info.get('URI')
|
||||
decrypt_info = parse_m3u8_attributes(line[11:])
|
||||
if decrypt_info['METHOD'] == 'AES-128':
|
||||
if 'IV' in decrypt_info:
|
||||
@ -170,7 +164,8 @@ def real_download(self, filename, info_dict):
|
||||
man_url, decrypt_info['URI'])
|
||||
if extra_query:
|
||||
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
|
||||
decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
|
||||
if decrypt_url != decrypt_info['URI']:
|
||||
decrypt_info['KEY'] = None
|
||||
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
|
||||
media_sequence = int(line[22:])
|
||||
elif line.startswith('#EXT-X-BYTERANGE'):
|
||||
@ -183,7 +178,4 @@ def real_download(self, filename, info_dict):
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for frag_file in frags_filenames:
|
||||
os.remove(encodeFilename(frag_file))
|
||||
|
||||
return True
|
||||
|
@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import os
|
||||
import time
|
||||
import struct
|
||||
import binascii
|
||||
@ -8,10 +7,6 @@
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_urllib_error
|
||||
from ..utils import (
|
||||
sanitize_open,
|
||||
encodeFilename,
|
||||
)
|
||||
|
||||
|
||||
u8 = struct.Struct(b'>B')
|
||||
@ -225,50 +220,39 @@ def real_download(self, filename, info_dict):
|
||||
|
||||
self._prepare_and_start_frag_download(ctx)
|
||||
|
||||
segments_filenames = []
|
||||
|
||||
fragment_retries = self.params.get('fragment_retries', 0)
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
track_written = False
|
||||
frag_index = 0
|
||||
for i, segment in enumerate(segments):
|
||||
segment_url = segment['url']
|
||||
segment_name = 'Frag%d' % i
|
||||
target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name)
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
count = 0
|
||||
while count <= fragment_retries:
|
||||
try:
|
||||
success = ctx['dl'].download(target_filename, {
|
||||
'url': segment_url,
|
||||
'http_headers': info_dict.get('http_headers'),
|
||||
})
|
||||
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
|
||||
if not success:
|
||||
return False
|
||||
down, target_sanitized = sanitize_open(target_filename, 'rb')
|
||||
down_data = down.read()
|
||||
if not track_written:
|
||||
tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd'])
|
||||
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
|
||||
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
|
||||
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
|
||||
track_written = True
|
||||
ctx['dest_stream'].write(down_data)
|
||||
down.close()
|
||||
segments_filenames.append(target_sanitized)
|
||||
self._append_fragment(ctx, frag_content)
|
||||
break
|
||||
except compat_urllib_error.HTTPError as err:
|
||||
count += 1
|
||||
if count <= fragment_retries:
|
||||
self.report_retry_fragment(err, segment_name, count, fragment_retries)
|
||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||
if count > fragment_retries:
|
||||
if skip_unavailable_fragments:
|
||||
self.report_skip_fragment(segment_name)
|
||||
self.report_skip_fragment(frag_index)
|
||||
continue
|
||||
self.report_error('giving up after %s fragment retries' % fragment_retries)
|
||||
return False
|
||||
|
||||
self._finish_frag_download(ctx)
|
||||
|
||||
for segment_file in segments_filenames:
|
||||
os.remove(encodeFilename(segment_file))
|
||||
|
||||
return True
|
||||
|
@ -1308,6 +1308,12 @@ class AdobePassIE(InfoExtractor):
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
headers = kwargs.get('headers', {})
|
||||
headers.update(self.geo_verification_headers())
|
||||
kwargs['headers'] = headers
|
||||
return super(AdobePassIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
channel = etree.Element('channel')
|
||||
|
@ -101,10 +101,14 @@ def _real_extract(self, url):
|
||||
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
|
||||
entries.append(self.url_result(
|
||||
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeriesId', webpage),
|
||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
||||
elif url_parts_len == 2:
|
||||
else:
|
||||
# single season
|
||||
url_parts_len = 2
|
||||
if url_parts_len == 2:
|
||||
entries = []
|
||||
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
|
||||
episode_attributes = extract_attributes(episode_item)
|
||||
@ -112,7 +116,7 @@ def _real_extract(self, url):
|
||||
url, episode_attributes['data-canonical'])
|
||||
entries.append(self.url_result(
|
||||
episode_url, 'AENetworks',
|
||||
episode_attributes['data-videoid']))
|
||||
episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
|
||||
return self.playlist_result(
|
||||
entries, self._html_search_meta('aetn:SeasonId', webpage))
|
||||
|
||||
|
@ -207,11 +207,10 @@ def _real_extract(self, url):
|
||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls',
|
||||
note='Downloading part %d m3u8 information' % file_num)
|
||||
title = title if one else '%s (part %d)' % (title, file_num)
|
||||
file_info = common_entry.copy()
|
||||
file_info.update({
|
||||
'id': format_id,
|
||||
'title': title,
|
||||
'title': title if one else '%s (part %d)' % (title, file_num),
|
||||
'upload_date': upload_date,
|
||||
'duration': file_duration,
|
||||
'formats': formats,
|
||||
|
@ -7,15 +7,19 @@
|
||||
parse_iso8601,
|
||||
mimetype2ext,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
class AMPIE(InfoExtractor):
|
||||
# parse Akamai Adaptive Media Player feed
|
||||
def _extract_feed_info(self, url):
|
||||
item = self._download_json(
|
||||
feed = self._download_json(
|
||||
url, None, 'Downloading Akamai AMP feed',
|
||||
'Unable to download Akamai AMP feed')['channel']['item']
|
||||
'Unable to download Akamai AMP feed')
|
||||
item = feed.get('channel', {}).get('item')
|
||||
if not item:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
|
||||
|
||||
video_id = item['guid']
|
||||
|
||||
@ -30,9 +34,12 @@ def get_media_node(name, default=None):
|
||||
if isinstance(media_thumbnail, dict):
|
||||
media_thumbnail = [media_thumbnail]
|
||||
for thumbnail_data in media_thumbnail:
|
||||
thumbnail = thumbnail_data['@attributes']
|
||||
thumbnail = thumbnail_data.get('@attributes', {})
|
||||
thumbnail_url = thumbnail.get('url')
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': self._proto_relative_url(thumbnail['url'], 'http:'),
|
||||
'url': self._proto_relative_url(thumbnail_url, 'http:'),
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
@ -43,9 +50,14 @@ def get_media_node(name, default=None):
|
||||
if isinstance(media_subtitle, dict):
|
||||
media_subtitle = [media_subtitle]
|
||||
for subtitle_data in media_subtitle:
|
||||
subtitle = subtitle_data['@attributes']
|
||||
lang = subtitle.get('lang') or 'en'
|
||||
subtitles[lang] = [{'url': subtitle['href']}]
|
||||
subtitle = subtitle_data.get('@attributes', {})
|
||||
subtitle_href = subtitle.get('href')
|
||||
if not subtitle_href:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
|
||||
'url': subtitle_href,
|
||||
'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
|
||||
})
|
||||
|
||||
formats = []
|
||||
media_content = get_media_node('content')
|
||||
|
@ -5,6 +5,7 @@
|
||||
import hashlib
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -16,6 +17,7 @@
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
@ -26,6 +28,8 @@ def md5_text(s):
|
||||
|
||||
|
||||
class AnvatoIE(InfoExtractor):
|
||||
_VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)'
|
||||
|
||||
# Copied from anvplayer.min.js
|
||||
_ANVACK_TABLE = {
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
|
||||
@ -114,6 +118,22 @@ class AnvatoIE(InfoExtractor):
|
||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
|
||||
}
|
||||
|
||||
_MCP_TO_ACCESS_KEY_TABLE = {
|
||||
'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922',
|
||||
'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749',
|
||||
'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
|
||||
'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa',
|
||||
'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a',
|
||||
'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
|
||||
'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336',
|
||||
'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3',
|
||||
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
|
||||
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
|
||||
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -178,12 +198,7 @@ def _get_anvato_videos(self, access_key, video_id):
|
||||
}
|
||||
|
||||
if ext == 'm3u8' or media_format in ('m3u8', 'm3u8-variant'):
|
||||
# Not using _extract_m3u8_formats here as individual media
|
||||
# playlists are also included in published_urls.
|
||||
if tbr is None:
|
||||
formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
|
||||
continue
|
||||
else:
|
||||
if tbr is not None:
|
||||
a_format.update({
|
||||
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
|
||||
'ext': 'mp4',
|
||||
@ -222,9 +237,42 @@ def _get_anvato_videos(self, access_key, video_id):
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(ie, webpage, video_id):
|
||||
entries = []
|
||||
for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage):
|
||||
anvplayer_data = ie._parse_json(
|
||||
mobj.group('anvp'), video_id, transform_source=unescapeHTML,
|
||||
fatal=False)
|
||||
if not anvplayer_data:
|
||||
continue
|
||||
video = anvplayer_data.get('video')
|
||||
if not isinstance(video, compat_str) or not video.isdigit():
|
||||
continue
|
||||
access_key = anvplayer_data.get('accessKey')
|
||||
if not access_key:
|
||||
mcp = anvplayer_data.get('mcp')
|
||||
if mcp:
|
||||
access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
mcp.lower())
|
||||
if not access_key:
|
||||
continue
|
||||
entries.append(ie.url_result(
|
||||
'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(),
|
||||
video_id=video))
|
||||
return entries
|
||||
|
||||
def _extract_anvato_videos(self, webpage, video_id):
|
||||
anvplayer_data = self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
|
||||
'Anvato player data'), video_id)
|
||||
anvplayer_data = self._parse_json(
|
||||
self._html_search_regex(
|
||||
self._ANVP_RE, webpage, 'Anvato player data', group='anvp'),
|
||||
video_id)
|
||||
return self._get_anvato_videos(
|
||||
anvplayer_data['accessKey'], anvplayer_data['video'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
|
||||
return self._get_anvato_videos(access_key, video_id)
|
||||
|
@ -12,13 +12,13 @@ class AppleConnectIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
||||
_TEST = {
|
||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'md5': '10d0f2799111df4cb1c924520ca78f98',
|
||||
'md5': 'e7c38568a01ea45402570e6029206723',
|
||||
'info_dict': {
|
||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||
'ext': 'm4v',
|
||||
'title': 'Energy',
|
||||
'uploader': 'Drake',
|
||||
'thumbnail': 'http://is5.mzstatic.com/image/thumb/Video5/v4/78/61/c5/7861c5fa-ad6d-294b-1464-cf7605b911d6/source/1920x1080sr.jpg',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'upload_date': '20150710',
|
||||
'timestamp': 1436545535,
|
||||
},
|
||||
|
@ -70,7 +70,8 @@ class AppleTrailersIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
|
||||
'info_dict': {
|
||||
'id': 'blackthorn',
|
||||
'id': '4489',
|
||||
'title': 'Blackthorn',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
@ -261,7 +262,7 @@ class AppleTrailersSectionIE(InfoExtractor):
|
||||
'title': 'Most Popular',
|
||||
'id': 'mostpopular',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
'playlist_mincount': 30,
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/#section=moviestudios',
|
||||
'info_dict': {
|
||||
|
@ -24,12 +24,12 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
}
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Cops1922',
|
||||
'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba',
|
||||
'md5': '0869000b4ce265e8ca62738b336b268a',
|
||||
'info_dict': {
|
||||
'id': 'Cops1922',
|
||||
'ext': 'mp4',
|
||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||
'description': 'md5:b4544662605877edd99df22f9620d858',
|
||||
'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
|
@ -180,7 +180,7 @@ def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
||||
|
||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
IE_NAME = 'arte.tv:+7'
|
||||
_VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?P<lang>fr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
|
||||
@ -188,6 +188,9 @@ class ArteTVPlus7IE(ArteTVBaseIE):
|
||||
}, {
|
||||
'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@ -36,7 +36,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
},
|
||||
{
|
||||
'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
|
||||
'md5': '0d0e918533bbd4b263f2de4d197d4aac',
|
||||
'md5': '6e52cbb513c405e403dbacb7aacf8747',
|
||||
'info_dict': {
|
||||
'id': 'capitulo-112-david-bustamante',
|
||||
'ext': 'flv',
|
||||
|
@ -16,7 +16,7 @@ class AudioBoomIE(InfoExtractor):
|
||||
'title': '3/09/2016 Czaban Hour 3',
|
||||
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
|
||||
'duration': 2245.72,
|
||||
'uploader': 'Steve Czaban',
|
||||
'uploader': 'SB Nation A.M.',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
|
||||
}
|
||||
}, {
|
||||
|
@ -1,140 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
)
|
||||
|
||||
|
||||
class AzubuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
|
||||
'md5': 'a88b42fcf844f29ad6035054bd9ecaf4',
|
||||
'info_dict': {
|
||||
'id': '15575',
|
||||
'ext': 'mp4',
|
||||
'title': '2014 HOT6 CUP LAST BIG MATCH Ro8 Day 1',
|
||||
'description': 'md5:d06bdea27b8cc4388a90ad35b5c66c01',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'timestamp': 1417523507.334,
|
||||
'upload_date': '20141202',
|
||||
'duration': 9988.7,
|
||||
'uploader': 'GSL',
|
||||
'uploader_id': 414310,
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.azubu.tv/FnaticTV#!/play/9344/-fnatic-at-worlds-2014:-toyz---%22i-love-rekkles,-he-has-amazing-mechanics%22-',
|
||||
'md5': 'b72a871fe1d9f70bd7673769cdb3b925',
|
||||
'info_dict': {
|
||||
'id': '9344',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fnatic at Worlds 2014: Toyz - "I love Rekkles, he has amazing mechanics"',
|
||||
'description': 'md5:4a649737b5f6c8b5c5be543e88dc62af',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
'timestamp': 1410530893.320,
|
||||
'upload_date': '20140912',
|
||||
'duration': 172.385,
|
||||
'uploader': 'FnaticTV',
|
||||
'uploader_id': 272749,
|
||||
'view_count': int,
|
||||
},
|
||||
'skip': 'Channel offline',
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data']
|
||||
|
||||
title = data['title'].strip()
|
||||
description = data.get('description')
|
||||
thumbnail = data.get('thumbnail')
|
||||
view_count = data.get('view_count')
|
||||
user = data.get('user', {})
|
||||
uploader = user.get('username')
|
||||
uploader_id = user.get('id')
|
||||
|
||||
stream_params = json.loads(data['stream_params'])
|
||||
|
||||
timestamp = float_or_none(stream_params.get('creationDate'), 1000)
|
||||
duration = float_or_none(stream_params.get('length'), 1000)
|
||||
|
||||
renditions = stream_params.get('renditions') or []
|
||||
video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength')
|
||||
if video:
|
||||
renditions.append(video)
|
||||
|
||||
if not renditions and not user.get('channel', {}).get('is_live', True):
|
||||
raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True)
|
||||
|
||||
formats = [{
|
||||
'url': fmt['url'],
|
||||
'width': fmt['frameWidth'],
|
||||
'height': fmt['frameHeight'],
|
||||
'vbr': float_or_none(fmt['encodingRate'], 1000),
|
||||
'filesize': fmt['size'],
|
||||
'vcodec': fmt['videoCodec'],
|
||||
'container': fmt['videoContainer'],
|
||||
} for fmt in renditions if fmt['url']]
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class AzubuLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.azubu.tv/MarsTVMDLen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://azubu.uol.com.br/adolfz',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user = self._match_id(url)
|
||||
|
||||
info = self._download_json(
|
||||
'http://api.azubu.tv/public/modules/last-video/{0}/info'.format(user),
|
||||
user)['data']
|
||||
if info['type'] != 'STREAM':
|
||||
raise ExtractorError('{0} is not streaming live'.format(user), expected=True)
|
||||
|
||||
req = sanitized_Request(
|
||||
'https://edge-elb.api.brightcove.com/playback/v1/accounts/3361910549001/videos/ref:' + info['reference_id'])
|
||||
req.add_header('Accept', 'application/json;pk=BCpkADawqM1gvI0oGWg8dxQHlgT8HkdE2LnAlWAZkOlznO39bSZX726u4JqnDsK3MDXcO01JxXK2tZtJbgQChxgaFzEVdHRjaDoxaOu8hHOO8NYhwdxw9BzvgkvLUlpbDNUuDoc4E4wxDToV')
|
||||
bc_info = self._download_json(req, user)
|
||||
m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': info['id'],
|
||||
'title': self._live_title(info['title']),
|
||||
'uploader_id': user,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
'thumbnail': bc_info['poster'],
|
||||
}
|
@ -34,12 +34,12 @@ class BandcampIE(InfoExtractor):
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '73d0b3171568232574e45652f8720b5c',
|
||||
'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'mp3',
|
||||
'title': 'Lanius (Battle)',
|
||||
'uploader': 'Ben Prunty Music',
|
||||
'ext': 'aiff',
|
||||
'title': 'Ben Prunty - Lanius (Battle)',
|
||||
'uploader': 'Ben Prunty',
|
||||
},
|
||||
}]
|
||||
|
||||
|
@ -16,7 +16,7 @@ class BeegIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://beeg.com/5416503',
|
||||
'md5': '46c384def73b33dbc581262e5ee67cef',
|
||||
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
|
||||
'info_dict': {
|
||||
'id': '5416503',
|
||||
'ext': 'mp4',
|
||||
|
@ -35,7 +35,7 @@ class BleacherReportIE(InfoExtractor):
|
||||
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
|
||||
'timestamp': 1446839961,
|
||||
'uploader': 'Sean Fay',
|
||||
'description': 'md5:825e94e0f3521df52fa83b2ed198fa20',
|
||||
'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
|
||||
'uploader_id': 6466954,
|
||||
'upload_date': '20151011',
|
||||
},
|
||||
@ -90,17 +90,13 @@ class BleacherReportCMSIE(AMPIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
|
||||
_TESTS = [{
|
||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'md5': '8c2c12e3af7805152675446c905d159b',
|
||||
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
||||
'info_dict': {
|
||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||
'ext': 'mp4',
|
||||
'ext': 'flv',
|
||||
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
||||
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -77,7 +77,7 @@ class BRIE(InfoExtractor):
|
||||
'description': 'md5:bb659990e9e59905c3d41e369db1fbe3',
|
||||
'duration': 893,
|
||||
'uploader': 'Eva Maria Steimle',
|
||||
'upload_date': '20140117',
|
||||
'upload_date': '20170208',
|
||||
}
|
||||
},
|
||||
]
|
||||
|
@ -131,6 +131,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
},
|
||||
{
|
||||
# playerID inferred from bcpid
|
||||
# from http://www.un.org/chinese/News/story.asp?NewsID=27724
|
||||
'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
|
||||
'only_matching': True, # Tested in GenericIE
|
||||
}
|
||||
]
|
||||
FLV_VCODECS = {
|
||||
1: 'SORENSON',
|
||||
@ -266,9 +272,13 @@ def _extract_brightcove_urls(cls, webpage):
|
||||
if matches:
|
||||
return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
|
||||
|
||||
matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
|
||||
if matches:
|
||||
return list(filter(None, [
|
||||
cls._build_brighcove_url_from_js(custom_bc)
|
||||
for custom_bc in re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)]))
|
||||
for custom_bc in matches]))
|
||||
return [src for _, src in re.findall(
|
||||
r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
@ -285,6 +295,10 @@ def _real_extract(self, url):
|
||||
if videoPlayer:
|
||||
# We set the original url as the default 'Referer' header
|
||||
referer = smuggled_data.get('Referer', url)
|
||||
if 'playerID' not in query:
|
||||
mobj = re.search(r'/bcpid(\d+)', url)
|
||||
if mobj is not None:
|
||||
query['playerID'] = [mobj.group(1)]
|
||||
return self._get_video_info(
|
||||
videoPlayer[0], query, referer=referer)
|
||||
elif 'playerKey' in query:
|
||||
@ -484,8 +498,8 @@ class BrightcoveNewIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
urls = BrightcoveNewIE._extract_urls(webpage)
|
||||
def _extract_url(ie, webpage):
|
||||
urls = BrightcoveNewIE._extract_urls(ie, webpage)
|
||||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
@ -508,7 +522,7 @@ def _extract_urls(ie, webpage):
|
||||
# [2] looks like:
|
||||
for video, script_tag, account_id, player_id, embed in re.findall(
|
||||
r'''(?isx)
|
||||
(<video\s+[^>]+>)
|
||||
(<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
|
||||
(?:.*?
|
||||
(<script[^>]+
|
||||
src=["\'](?:https?:)?//players\.brightcove\.net/
|
||||
|
@ -16,13 +16,10 @@ class Canalc2IE(InfoExtractor):
|
||||
'md5': '060158428b650f896c542dfbb3d6487f',
|
||||
'info_dict': {
|
||||
'id': '12163',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Terrasses du Numérique',
|
||||
'duration': 122,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # Requires rtmpdump
|
||||
}
|
||||
}, {
|
||||
'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
|
||||
'only_matching': True,
|
||||
|
@ -96,6 +96,7 @@ class CBCIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
|
||||
'id': 'dog-indoor-exercise-winter-1.3928238',
|
||||
'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}]
|
||||
@ -165,12 +166,11 @@ class CBCPlayerIE(InfoExtractor):
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
}, {
|
||||
# available only when we add `formats=MPEG4,FLV,MP3` to theplatform url
|
||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||
'md5': '17a61eb813539abea40618d6323a7f82',
|
||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
||||
'info_dict': {
|
||||
'id': '2164402062',
|
||||
'ext': 'flv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cancer survivor four times over',
|
||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||
'timestamp': 1320410746,
|
||||
|
@ -60,8 +60,8 @@ class CBSLocalIE(AnvatoIE):
|
||||
'title': 'A Very Blue Anniversary',
|
||||
'description': 'CBS2’s Cindy Hsu has more.',
|
||||
'thumbnail': 're:^https?://.*',
|
||||
'timestamp': 1479962220,
|
||||
'upload_date': '20161124',
|
||||
'timestamp': int,
|
||||
'upload_date': r're:^\d{8}$',
|
||||
'uploader': 'CBS',
|
||||
'subtitles': {
|
||||
'en': 'mincount:5',
|
||||
|
@ -9,7 +9,10 @@
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
random_birthday,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@ -27,7 +30,8 @@ class CDAIE(InfoExtractor):
|
||||
'description': 'md5:269ccd135d550da90d1662651fcb9772',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'average_rating': float,
|
||||
'duration': 39
|
||||
'duration': 39,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.cda.pl/video/57413289',
|
||||
@ -41,13 +45,41 @@ class CDAIE(InfoExtractor):
|
||||
'uploader': 'crash404',
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'duration': 137
|
||||
'duration': 137,
|
||||
'age_limit': 0,
|
||||
}
|
||||
}, {
|
||||
# Age-restricted
|
||||
'url': 'http://www.cda.pl/video/1273454c4',
|
||||
'info_dict': {
|
||||
'id': '1273454c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bronson (2008) napisy HD 1080p',
|
||||
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
|
||||
'height': 1080,
|
||||
'uploader': 'boniek61',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 5554,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
||||
form_data = random_birthday('rok', 'miesiac', 'dzien')
|
||||
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
|
||||
data, content_type = multipart_encode(form_data)
|
||||
return self._download_webpage(
|
||||
urljoin(url, '/a/validatebirth'), video_id, *args,
|
||||
data=data, headers={
|
||||
'Referer': url,
|
||||
'Content-Type': content_type,
|
||||
}, **kwargs)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||
@ -57,6 +89,13 @@ def _real_extract(self, url):
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||
|
||||
need_confirm_age = False
|
||||
if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
|
||||
webpage, 'birthday validate form', default=None):
|
||||
webpage = self._download_age_confirm_page(
|
||||
url, video_id, note='Confirming age')
|
||||
need_confirm_age = True
|
||||
|
||||
formats = []
|
||||
|
||||
uploader = self._search_regex(r'''(?x)
|
||||
@ -81,6 +120,7 @@ def _real_extract(self, url):
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
'duration': None,
|
||||
'age_limit': 18 if need_confirm_age else 0,
|
||||
}
|
||||
|
||||
def extract_format(page, version):
|
||||
@ -121,7 +161,12 @@ def extract_format(page, version):
|
||||
for href, resolution in re.findall(
|
||||
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
|
||||
webpage):
|
||||
webpage = self._download_webpage(
|
||||
if need_confirm_age:
|
||||
handler = self._download_age_confirm_page
|
||||
else:
|
||||
handler = self._download_webpage
|
||||
|
||||
webpage = handler(
|
||||
self._BASE_URL + href, video_id,
|
||||
'Downloading %s version information' % resolution, fatal=False)
|
||||
if not webpage:
|
||||
@ -129,6 +174,7 @@ def extract_format(page, version):
|
||||
# invalid version is requested.
|
||||
self.report_warning('Unable to download %s version information' % resolution)
|
||||
continue
|
||||
|
||||
extract_format(webpage, resolution)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
@ -12,7 +12,7 @@ class ClipfishIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?clipfish\.de/(?:[^/]+/)+video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.clipfish.de/special/ugly-americans/video/4343170/s01-e01-ugly-americans-date-in-der-hoelle/',
|
||||
'md5': '720563e467b86374c194bdead08d207d',
|
||||
'md5': 'b9a5dc46294154c1193e2d10e0c95693',
|
||||
'info_dict': {
|
||||
'id': '4343170',
|
||||
'ext': 'mp4',
|
||||
|
@ -21,7 +21,7 @@ class CollegeRamaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
|
||||
'duration': 7713.088,
|
||||
'timestamp': 1413309600,
|
||||
'upload_date': '20141014',
|
||||
@ -35,6 +35,7 @@ class CollegeRamaIE(InfoExtractor):
|
||||
'ext': 'wmv',
|
||||
'title': '64ste Vakantiecursus: Afvalwater',
|
||||
'description': 'md5:7fd774865cc69d972f542b157c328305',
|
||||
'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
|
||||
'duration': 10853,
|
||||
'timestamp': 1326446400,
|
||||
'upload_date': '20120113',
|
||||
|
@ -245,6 +245,10 @@ class InfoExtractor(object):
|
||||
specified in the URL.
|
||||
end_time: Time in seconds where the reproduction should end, as
|
||||
specified in the URL.
|
||||
chapters: A list of dictionaries, with the following entries:
|
||||
* "start_time" - The start time of the chapter in seconds
|
||||
* "end_time" - The end time of the chapter in seconds
|
||||
* "title" (optional, string)
|
||||
|
||||
The following fields should only be used when the video belongs to some logical
|
||||
chapter or section:
|
||||
@ -976,6 +980,23 @@ def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||
return info
|
||||
if isinstance(json_ld, dict):
|
||||
json_ld = [json_ld]
|
||||
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
info.update({
|
||||
'url': e.get('contentUrl'),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
'height': int_or_none(e.get('height')),
|
||||
'view_count': int_or_none(e.get('interactionCount')),
|
||||
})
|
||||
|
||||
for e in json_ld:
|
||||
if e.get('@context') == 'http://schema.org':
|
||||
item_type = e.get('@type')
|
||||
@ -1000,18 +1021,11 @@ def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||
'description': unescapeHTML(e.get('articleBody')),
|
||||
})
|
||||
elif item_type == 'VideoObject':
|
||||
info.update({
|
||||
'url': e.get('contentUrl'),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
'tbr': int_or_none(e.get('bitrate')),
|
||||
'width': int_or_none(e.get('width')),
|
||||
'height': int_or_none(e.get('height')),
|
||||
})
|
||||
extract_video_object(e)
|
||||
elif item_type == 'WebPage':
|
||||
video = e.get('video')
|
||||
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
|
||||
extract_video_object(video)
|
||||
break
|
||||
return dict((k, v) for k, v in info.items() if v is not None)
|
||||
|
||||
@ -1303,40 +1317,50 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None, note=None, errnote=None,
|
||||
fatal=True, live=False):
|
||||
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note=note or 'Downloading m3u8 information',
|
||||
errnote=errnote or 'Failed to download m3u8 information',
|
||||
fatal=fatal)
|
||||
|
||||
if res is False:
|
||||
return []
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
|
||||
return self._parse_m3u8_formats(
|
||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||
preference=preference, m3u8_id=m3u8_id, live=live)
|
||||
|
||||
def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
|
||||
entry_protocol='m3u8', preference=None,
|
||||
m3u8_id=None, live=False):
|
||||
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
|
||||
return []
|
||||
|
||||
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
|
||||
formats = []
|
||||
|
||||
format_url = lambda u: (
|
||||
u
|
||||
if re.match(r'^https?://', u)
|
||||
else compat_urlparse.urljoin(m3u8_url, u))
|
||||
|
||||
# We should try extracting formats only from master playlists [1], i.e.
|
||||
# playlists that describe available qualities. On the other hand media
|
||||
# playlists [2] should be returned as is since they contain just the media
|
||||
# without qualities renditions.
|
||||
# References:
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||
# 2. https://github.com/rg3/youtube-dl/issues/12211
|
||||
|
||||
# We should try extracting formats only from master playlists [1, 4.3.4],
|
||||
# i.e. playlists that describe available qualities. On the other hand
|
||||
# media playlists [1, 4.3.3] should be returned as is since they contain
|
||||
# just the media without qualities renditions.
|
||||
# Fortunately, master playlist can be easily distinguished from media
|
||||
# playlist based on particular tags availability. As of [1, 2] master
|
||||
# playlist tags MUST NOT appear in a media playist and vice versa.
|
||||
# As of [3] #EXT-X-TARGETDURATION tag is REQUIRED for every media playlist
|
||||
# and MUST NOT appear in master playlist thus we can clearly detect media
|
||||
# playlist with this criterion.
|
||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.4
|
||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3
|
||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.1
|
||||
# playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
|
||||
# master playlist tags MUST NOT appear in a media playist and vice versa.
|
||||
# As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
|
||||
# media playlist and MUST NOT appear in master playlist thus we can
|
||||
# clearly detect media playlist with this criterion.
|
||||
|
||||
if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
|
||||
return [{
|
||||
'url': m3u8_url,
|
||||
@ -1345,26 +1369,29 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}]
|
||||
audio_in_video_stream = {}
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_info = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
media = parse_m3u8_attributes(line)
|
||||
media_type = media.get('TYPE')
|
||||
if media_type in ('VIDEO', 'AUDIO'):
|
||||
group_id = media.get('GROUP-ID')
|
||||
|
||||
groups = {}
|
||||
last_stream_inf = {}
|
||||
|
||||
def extract_media(x_media_line):
|
||||
media = parse_m3u8_attributes(x_media_line)
|
||||
# As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
|
||||
media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
|
||||
if not (media_type and group_id and name):
|
||||
return
|
||||
groups.setdefault(group_id, []).append(media)
|
||||
if media_type not in ('VIDEO', 'AUDIO'):
|
||||
return
|
||||
media_url = media.get('URI')
|
||||
if media_url:
|
||||
format_id = []
|
||||
for v in (group_id, media.get('NAME')):
|
||||
for v in (group_id, name):
|
||||
if v:
|
||||
format_id.append(v)
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': format_url(media_url),
|
||||
'manifest_url': m3u8_url,
|
||||
'language': media.get('LANGUAGE'),
|
||||
'ext': ext,
|
||||
'protocol': entry_protocol,
|
||||
@ -1372,25 +1399,42 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
}
|
||||
if media_type == 'AUDIO':
|
||||
f['vcodec'] = 'none'
|
||||
if group_id and not audio_in_video_stream.get(group_id):
|
||||
audio_in_video_stream[group_id] = False
|
||||
formats.append(f)
|
||||
else:
|
||||
# When there is no URI in EXT-X-MEDIA let this tag's
|
||||
# data be used by regular URI lines below
|
||||
last_media = media
|
||||
if media_type == 'AUDIO' and group_id:
|
||||
audio_in_video_stream[group_id] = True
|
||||
|
||||
def build_stream_name():
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
|
||||
# or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
|
||||
# 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
|
||||
stream_name = last_stream_inf.get('NAME')
|
||||
if stream_name:
|
||||
return stream_name
|
||||
# If there is no NAME in EXT-X-STREAM-INF it will be obtained
|
||||
# from corresponding rendition group
|
||||
stream_group_id = last_stream_inf.get('VIDEO')
|
||||
if not stream_group_id:
|
||||
return
|
||||
stream_group = groups.get(stream_group_id)
|
||||
if not stream_group:
|
||||
return stream_group_id
|
||||
rendition = stream_group[0]
|
||||
return rendition.get('NAME') or stream_group_id
|
||||
|
||||
for line in m3u8_doc.splitlines():
|
||||
if line.startswith('#EXT-X-STREAM-INF:'):
|
||||
last_stream_inf = parse_m3u8_attributes(line)
|
||||
elif line.startswith('#EXT-X-MEDIA:'):
|
||||
extract_media(line)
|
||||
elif line.startswith('#') or not line.strip():
|
||||
continue
|
||||
else:
|
||||
tbr = int_or_none(last_info.get('AVERAGE-BANDWIDTH') or last_info.get('BANDWIDTH'), scale=1000)
|
||||
tbr = float_or_none(
|
||||
last_stream_inf.get('AVERAGE-BANDWIDTH') or
|
||||
last_stream_inf.get('BANDWIDTH'), scale=1000)
|
||||
format_id = []
|
||||
if m3u8_id:
|
||||
format_id.append(m3u8_id)
|
||||
# Despite specification does not mention NAME attribute for
|
||||
# EXT-X-STREAM-INF it still sometimes may be present
|
||||
stream_name = last_info.get('NAME') or last_media.get('NAME')
|
||||
stream_name = build_stream_name()
|
||||
# Bandwidth of live streams may differ over time thus making
|
||||
# format_id unpredictable. So it's better to keep provided
|
||||
# format_id intact.
|
||||
@ -1400,14 +1444,14 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
f = {
|
||||
'format_id': '-'.join(format_id),
|
||||
'url': manifest_url,
|
||||
'manifest_url': manifest_url,
|
||||
'manifest_url': m3u8_url,
|
||||
'tbr': tbr,
|
||||
'ext': ext,
|
||||
'fps': float_or_none(last_info.get('FRAME-RATE')),
|
||||
'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
|
||||
'protocol': entry_protocol,
|
||||
'preference': preference,
|
||||
}
|
||||
resolution = last_info.get('RESOLUTION')
|
||||
resolution = last_stream_inf.get('RESOLUTION')
|
||||
if resolution:
|
||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||
if mobj:
|
||||
@ -1423,13 +1467,26 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
|
||||
'vbr': vbr,
|
||||
'abr': abr,
|
||||
})
|
||||
f.update(parse_codecs(last_info.get('CODECS')))
|
||||
if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
|
||||
# TODO: update acodec for audio only formats with the same GROUP-ID
|
||||
codecs = parse_codecs(last_stream_inf.get('CODECS'))
|
||||
f.update(codecs)
|
||||
audio_group_id = last_stream_inf.get('AUDIO')
|
||||
# As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
|
||||
# references a rendition group MUST have a CODECS attribute.
|
||||
# However, this is not always respected, for example, [2]
|
||||
# contains EXT-X-STREAM-INF tag which references AUDIO
|
||||
# rendition group but does not have CODECS and despite
|
||||
# referencing audio group an audio group, it represents
|
||||
# a complete (with audio and video) format. So, for such cases
|
||||
# we will ignore references to rendition groups and treat them
|
||||
# as complete formats.
|
||||
if audio_group_id and codecs and f.get('vcodec') != 'none':
|
||||
audio_group = groups.get(audio_group_id)
|
||||
if audio_group and audio_group[0].get('URI'):
|
||||
# TODO: update acodec for audio only formats with
|
||||
# the same GROUP-ID
|
||||
f['acodec'] = 'none'
|
||||
formats.append(f)
|
||||
last_info = {}
|
||||
last_media = {}
|
||||
last_stream_inf = {}
|
||||
return formats
|
||||
|
||||
@staticmethod
|
||||
@ -1803,7 +1860,7 @@ def extract_Initialization(source):
|
||||
'ext': mimetype2ext(mime_type),
|
||||
'width': int_or_none(representation_attrib.get('width')),
|
||||
'height': int_or_none(representation_attrib.get('height')),
|
||||
'tbr': int_or_none(bandwidth, 1000),
|
||||
'tbr': float_or_none(bandwidth, 1000),
|
||||
'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
|
||||
'fps': int_or_none(representation_attrib.get('frameRate')),
|
||||
'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
|
||||
@ -2182,7 +2239,7 @@ def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native
|
||||
|
||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||
mobj = re.search(
|
||||
r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
|
||||
r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
|
||||
webpage)
|
||||
if mobj:
|
||||
try:
|
||||
@ -2258,11 +2315,17 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
||||
|
||||
def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
|
||||
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
|
||||
urls = []
|
||||
formats = []
|
||||
for source in jwplayer_sources_data:
|
||||
source_url = self._proto_relative_url(source['file'])
|
||||
source_url = self._proto_relative_url(source.get('file'))
|
||||
if not source_url:
|
||||
continue
|
||||
if base_url:
|
||||
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||
if source_url in urls:
|
||||
continue
|
||||
urls.append(source_url)
|
||||
source_type = source.get('type') or ''
|
||||
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||
if source_type == 'hls' or ext == 'm3u8':
|
||||
|
@ -24,12 +24,11 @@ class CoubIE(InfoExtractor):
|
||||
'duration': 4.6,
|
||||
'timestamp': 1428527772,
|
||||
'upload_date': '20150408',
|
||||
'uploader': 'Артём Лоскутников',
|
||||
'uploader': 'Artyom Loskutnikov',
|
||||
'uploader_id': 'artyom.loskutnikov',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
@ -118,7 +117,6 @@ def _real_extract(self, url):
|
||||
view_count = int_or_none(coub.get('views_count') or coub.get('views_increase_count'))
|
||||
like_count = int_or_none(coub.get('likes_count'))
|
||||
repost_count = int_or_none(coub.get('recoubs_count'))
|
||||
comment_count = int_or_none(coub.get('comments_count'))
|
||||
|
||||
age_restricted = coub.get('age_restricted', coub.get('age_restricted_by_admin'))
|
||||
if age_restricted is not None:
|
||||
@ -137,7 +135,6 @@ def _real_extract(self, url):
|
||||
'view_count': view_count,
|
||||
'like_count': like_count,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'info_dict': {
|
||||
'id': '727589',
|
||||
'ext': 'mp4',
|
||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance from this Judicial Injustice!",
|
||||
'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
|
||||
'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Kadokawa Pictures Inc.',
|
||||
@ -179,7 +179,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'series': "KONOSUBA -God's blessing on this wonderful world!",
|
||||
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
|
||||
'season_number': 2,
|
||||
'episode': 'Give Me Deliverance from this Judicial Injustice!',
|
||||
'episode': 'Give Me Deliverance From This Judicial Injustice!',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
|
@ -50,6 +50,24 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
]
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
|
||||
'md5': '074b95bdee76b9e3654137aee9c79dfe',
|
||||
'info_dict': {
|
||||
'id': 'x5kesuj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
|
||||
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
|
||||
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
|
||||
'duration': 187,
|
||||
'timestamp': 1493651285,
|
||||
'upload_date': '20170501',
|
||||
'uploader': 'Deadline',
|
||||
'uploader_id': 'x1xm8ri',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
|
||||
'md5': '2137c41a8e78554bb09225b8eb322406',
|
||||
@ -66,7 +84,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'uploader_id': 'xijv66',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
'skip': 'video gone',
|
||||
},
|
||||
# Vevo video
|
||||
{
|
||||
|
@ -21,7 +21,8 @@ class DemocracynowIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2015-0703-001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Daily Show',
|
||||
'title': 'Daily Show for July 03, 2015',
|
||||
'description': 'md5:80eb927244d6749900de6072c7cc2c86',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
|
||||
|
@ -35,7 +35,7 @@ class DotsubIE(InfoExtractor):
|
||||
'thumbnail': 're:^https?://dotsub.com/media/747bcf58-bd59-45b7-8c8c-ac312d084ee6/p',
|
||||
'duration': 290,
|
||||
'timestamp': 1476767794.2809999,
|
||||
'upload_date': '20160525',
|
||||
'upload_date': '20161018',
|
||||
'uploader': 'parthivi001',
|
||||
'uploader_id': 'user52596202',
|
||||
'view_count': int,
|
||||
|
@ -20,7 +20,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
'id': '17732',
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
@ -51,7 +51,7 @@ class DouyuTVIE(InfoExtractor):
|
||||
'id': '17732',
|
||||
'display_id': '17732',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!T-ARA根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '7师傅',
|
||||
|
@ -41,6 +41,7 @@
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .animeondemand import AnimeOnDemandIE
|
||||
from .anitube import AnitubeIE
|
||||
from .anvato import AnvatoIE
|
||||
from .anysex import AnySexIE
|
||||
from .aol import AolIE
|
||||
from .allocine import AllocineIE
|
||||
@ -87,7 +88,6 @@
|
||||
AZMedienPlaylistIE,
|
||||
AZMedienShowPlaylistIE,
|
||||
)
|
||||
from .azubu import AzubuIE, AzubuLiveIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE
|
||||
@ -663,6 +663,7 @@
|
||||
from .njpwworld import NJPWWorldIE
|
||||
from .nobelprize import NobelPrizeIE
|
||||
from .noco import NocoIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nova import NovaIE
|
||||
@ -939,6 +940,7 @@
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .steam import SteamIE
|
||||
from .streamable import StreamableIE
|
||||
from .streamango import StreamangoIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streetvoice import StreetVoiceIE
|
||||
@ -1233,7 +1235,10 @@
|
||||
WrzutaIE,
|
||||
WrzutaPlaylistIE,
|
||||
)
|
||||
from .wsj import WSJIE
|
||||
from .wsj import (
|
||||
WSJIE,
|
||||
WSJArticleIE,
|
||||
)
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
@ -1295,5 +1300,6 @@
|
||||
YoutubeWatchLaterIE,
|
||||
)
|
||||
from .zapiks import ZapiksIE
|
||||
from .zaq1 import Zaq1IE
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
|
@ -11,10 +11,10 @@ class FoxSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/video?vid=432609859715',
|
||||
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||
'info_dict': {
|
||||
'id': 'i0qKWsk3qJaM',
|
||||
'id': 'bwduI3X_TgUB',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
@ -31,8 +31,9 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config = self._parse_json(
|
||||
self._search_regex(
|
||||
r"data-player-config='([^']+)'", webpage, 'data player config'),
|
||||
self._html_search_regex(
|
||||
r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
|
||||
webpage, 'data player config'),
|
||||
video_id)
|
||||
|
||||
return self.url_result(smuggle_url(update_url_query(
|
||||
|
@ -58,8 +58,7 @@ def _real_extract(self, url):
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
source_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
|
||||
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
|
||||
bitrates.sort()
|
||||
|
@ -78,8 +78,7 @@ def _real_extract(self, url):
|
||||
if m3u8_formats:
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
if len(qualities) == len(m3u8_formats):
|
||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
||||
f = m3u8_format.copy()
|
||||
|
@ -85,6 +85,9 @@
|
||||
from .openload import OpenloadIE
|
||||
from .videopress import VideoPressIE
|
||||
from .rutube import RutubeIE
|
||||
from .limelight import LimelightBaseIE
|
||||
from .anvato import AnvatoIE
|
||||
from .washingtonpost import WashingtonPostIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -429,6 +432,22 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True, # m3u8 download
|
||||
},
|
||||
},
|
||||
{
|
||||
# Brightcove video in <iframe>
|
||||
'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
|
||||
'md5': '36d74ef5e37c8b4a2ce92880d208b968',
|
||||
'info_dict': {
|
||||
'id': '5360463607001',
|
||||
'ext': 'mp4',
|
||||
'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
|
||||
'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
|
||||
'uploader': 'United Nations',
|
||||
'uploader_id': '1362235914001',
|
||||
'timestamp': 1489593889,
|
||||
'upload_date': '20170315',
|
||||
},
|
||||
'add_ie': ['BrightcoveLegacy'],
|
||||
},
|
||||
{
|
||||
# Brightcove with alternative playerID key
|
||||
'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
|
||||
@ -1410,6 +1429,22 @@ class GenericIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# Brightcove embed with whitespace around attribute names
|
||||
'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
|
||||
'info_dict': {
|
||||
'id': '3167554373001',
|
||||
'ext': 'mp4',
|
||||
'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
|
||||
'description': 'md5:57bacb0e0f29349de4972bfda3191713',
|
||||
'uploader_id': '1079349493',
|
||||
'upload_date': '20140207',
|
||||
'timestamp': 1391810548,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
# Another form of arte.tv embed
|
||||
{
|
||||
'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
|
||||
@ -1651,6 +1686,38 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': [SenateISVPIE.ie_key()],
|
||||
},
|
||||
{
|
||||
# Limelight embeds (1 channel embed + 4 media embeds)
|
||||
'url': 'http://www.sedona.com/FacilitatorTraining2017',
|
||||
'info_dict': {
|
||||
'id': 'FacilitatorTraining2017',
|
||||
'title': 'Facilitator Training 2017',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
},
|
||||
{
|
||||
'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
|
||||
'info_dict': {
|
||||
'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
|
||||
'title': 'Standoff with Walnut Creek murder suspect ends',
|
||||
'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
|
||||
},
|
||||
'playlist_mincount': 4,
|
||||
},
|
||||
{
|
||||
# WashingtonPost embed
|
||||
'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
|
||||
'info_dict': {
|
||||
'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
|
||||
'ext': 'mp4',
|
||||
'title': "No one has seen the drama series based on Trump's life \u2014 until now",
|
||||
'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
|
||||
'timestamp': 1455216756,
|
||||
'uploader': 'The Washington Post',
|
||||
'upload_date': '20160211',
|
||||
},
|
||||
'add_ie': [WashingtonPostIE.ie_key()],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -1693,7 +1760,7 @@ def _extract_rss(self, url, video_id, doc):
|
||||
continue
|
||||
|
||||
entries.append({
|
||||
'_type': 'url',
|
||||
'_type': 'url_transparent',
|
||||
'url': next_url,
|
||||
'title': it.find('title').text,
|
||||
})
|
||||
@ -2483,6 +2550,11 @@ def _real_extract(self, url):
|
||||
return self.url_result(piksel_url, PikselIE.ie_key())
|
||||
|
||||
# Look for Limelight embeds
|
||||
limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
|
||||
if limelight_urls:
|
||||
return self.playlist_result(
|
||||
limelight_urls, video_id, video_title, video_description)
|
||||
|
||||
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
|
||||
if mobj:
|
||||
lm = {
|
||||
@ -2506,6 +2578,12 @@ def _real_extract(self, url):
|
||||
'limelight:media:%s' % mobj.group('id'),
|
||||
{'source_url': url}), 'LimelightMedia', mobj.group('id'))
|
||||
|
||||
# Look for Anvato embeds
|
||||
anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
|
||||
if anvato_urls:
|
||||
return self.playlist_result(
|
||||
anvato_urls, video_id, video_title, video_description)
|
||||
|
||||
# Look for AdobeTVVideo embeds
|
||||
mobj = re.search(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
|
||||
@ -2623,6 +2701,12 @@ def _real_extract(self, url):
|
||||
return self.playlist_from_matches(
|
||||
rutube_urls, ie=RutubeIE.ie_key())
|
||||
|
||||
# Look for WashingtonPost embeds
|
||||
wapo_urls = WashingtonPostIE._extract_urls(webpage)
|
||||
if wapo_urls:
|
||||
return self.playlist_from_matches(
|
||||
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
|
||||
|
||||
# Looking for http://schema.org/VideoObject
|
||||
json_ld = self._search_json_ld(
|
||||
webpage, video_id, default={}, expected_type='VideoObject')
|
||||
|
@ -36,22 +36,26 @@ class GoIE(AdobePassIE):
|
||||
'requestor_id': 'DisneyXD',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
'info_dict': {
|
||||
'id': '0_g86w5onx',
|
||||
'id': 'VDKA3807643',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sneak Peek: Language Arts',
|
||||
'description': 'md5:7dcdab3b2d17e5217c953256af964e9c',
|
||||
'title': 'The Traitor in the White House',
|
||||
'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/after-paradise/video/most-recent/vdka3335601',
|
||||
'only_matching': True,
|
||||
'url': 'http://watchdisneyxd.go.com/doraemon',
|
||||
'info_dict': {
|
||||
'title': 'Doraemon',
|
||||
'id': 'SH55574025',
|
||||
},
|
||||
'playlist_mincount': 51,
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'only_matching': True,
|
||||
@ -60,19 +64,36 @@ class GoIE(AdobePassIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
|
||||
display_id = video_id if video_id != '-1' else show_id
|
||||
return self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id),
|
||||
display_id)['video']
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site_info = self._SITE_INFO[sub_domain]
|
||||
brand = site_info['brand']
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*VDKA(\w+)', webpage, 'video id')
|
||||
site_info = self._SITE_INFO[sub_domain]
|
||||
brand = site_info['brand']
|
||||
video_data = self._download_json(
|
||||
'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id),
|
||||
video_id)['video'][0]
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
|
||||
if not video_id:
|
||||
# show extraction works for Disney, DisneyJunior and DisneyXD
|
||||
# ABC and Freeform has different layout
|
||||
show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
|
||||
videos = self._extract_videos(brand, show_id=show_id)
|
||||
show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
|
||||
entries = []
|
||||
for video in videos:
|
||||
entries.append(self.url_result(
|
||||
video['url'], 'Go', video.get('id'), video.get('title')))
|
||||
entries.reverse()
|
||||
return self.playlist_result(entries, show_id, show_title)
|
||||
video_data = self._extract_videos(brand, video_id)[0]
|
||||
video_id = video_data['id']
|
||||
title = video_data['title']
|
||||
|
||||
formats = []
|
||||
@ -105,7 +126,7 @@ def _real_extract(self, url):
|
||||
self._initialize_geo_bypass(['US'])
|
||||
entitlement = self._download_json(
|
||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||
video_id, data=urlencode_postdata(data), headers=self.geo_verification_headers())
|
||||
video_id, data=urlencode_postdata(data))
|
||||
errors = entitlement.get('errors', {}).get('errors', [])
|
||||
if errors:
|
||||
for error in errors:
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
@ -18,7 +19,7 @@ class Go90IE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '84BUqjLpf9D',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inside The Utah Coalition Against Pornography Convention',
|
||||
'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention',
|
||||
'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
|
||||
'timestamp': 1491868800,
|
||||
'upload_date': '20170411',
|
||||
@ -32,11 +33,28 @@ def _real_extract(self, url):
|
||||
video_id, headers={
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
}, data=b'{"client":"web","device_type":"pc"}')
|
||||
title = video_data['title']
|
||||
main_video_asset = video_data['main_video_asset']
|
||||
|
||||
episode_number = int_or_none(video_data.get('episode_number'))
|
||||
series = None
|
||||
season = None
|
||||
season_id = None
|
||||
season_number = None
|
||||
for metadata in video_data.get('__children', {}).get('Item', {}).values():
|
||||
if metadata.get('type') == 'show':
|
||||
series = metadata.get('title')
|
||||
elif metadata.get('type') == 'season':
|
||||
season = metadata.get('title')
|
||||
season_id = metadata.get('id')
|
||||
season_number = int_or_none(metadata.get('season_number'))
|
||||
|
||||
title = episode = video_data.get('title') or series
|
||||
if series and series != title:
|
||||
title = '%s - %s' % (series, title)
|
||||
|
||||
thumbnails = []
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for asset in video_data.get('assets'):
|
||||
if asset.get('id') == main_video_asset:
|
||||
for source in asset.get('sources', []):
|
||||
@ -70,6 +88,15 @@ def _real_extract(self, url):
|
||||
'height': int_or_none(source.get('height')),
|
||||
'tbr': int_or_none(source.get('bitrate')),
|
||||
})
|
||||
|
||||
for caption in asset.get('caption_metadata', []):
|
||||
caption_url = caption.get('source_url')
|
||||
if not caption_url:
|
||||
continue
|
||||
subtitles.setdefault(caption.get('language', 'en'), []).append({
|
||||
'url': caption_url,
|
||||
'ext': determine_ext(caption_url, 'vtt'),
|
||||
})
|
||||
elif asset.get('type') == 'image':
|
||||
asset_location = asset.get('location')
|
||||
if not asset_location:
|
||||
@ -89,4 +116,11 @@ def _real_extract(self, url):
|
||||
'description': video_data.get('short_description'),
|
||||
'like_count': int_or_none(video_data.get('like_count')),
|
||||
'timestamp': parse_iso8601(video_data.get('released_at')),
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season': season,
|
||||
'season_id': season_id,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
@ -87,8 +87,8 @@ def _extract_http_video(self, webpage):
|
||||
|
||||
def _extract_http_audio(self, webpage, video_id):
|
||||
fields = self._hidden_inputs(webpage)
|
||||
http_audio_url = fields['filename']
|
||||
if http_audio_url is None:
|
||||
http_audio_url = fields.get('filename')
|
||||
if not http_audio_url:
|
||||
return []
|
||||
|
||||
cookies_header = {'Cookie': self._extract_cookies(webpage)}
|
||||
|
@ -112,7 +112,8 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
(video_url, description, thumbnail, timestamp, uploader,
|
||||
uploader_id, like_count, comment_count, height, width) = [None] * 10
|
||||
uploader_id, like_count, comment_count, comments, height,
|
||||
width) = [None] * 11
|
||||
|
||||
shared_data = self._parse_json(
|
||||
self._search_regex(
|
||||
@ -121,7 +122,10 @@ def _real_extract(self, url):
|
||||
video_id, fatal=False)
|
||||
if shared_data:
|
||||
media = try_get(
|
||||
shared_data, lambda x: x['entry_data']['PostPage'][0]['media'], dict)
|
||||
shared_data,
|
||||
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
||||
dict)
|
||||
if media:
|
||||
video_url = media.get('video_url')
|
||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||
|
@ -189,7 +189,11 @@ class IqiyiIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://yule.iqiyi.com/pcb.html',
|
||||
'only_matching': True,
|
||||
'info_dict': {
|
||||
'id': '4a0af228fddb55ec96398a364248ed7f',
|
||||
'ext': 'mp4',
|
||||
'title': '第2017-04-21期 女艺人频遭极端粉丝骚扰',
|
||||
},
|
||||
}, {
|
||||
# VIP-only video. The first 2 parts (6 minutes) are available without login
|
||||
# MD5 sums omitted as values are different on Travis CI and my machine
|
||||
@ -337,15 +341,18 @@ def _real_extract(self, url):
|
||||
url, 'temp_id', note='download video page')
|
||||
|
||||
# There's no simple way to determine whether an URL is a playlist or not
|
||||
# So detect it
|
||||
# Sometimes there are playlist links in individual videos, so treat it
|
||||
# as a single video first
|
||||
tvid = self._search_regex(
|
||||
r'data-(?:player|shareplattrigger)-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid', default=None)
|
||||
if tvid is None:
|
||||
playlist_result = self._extract_playlist(webpage)
|
||||
if playlist_result:
|
||||
return playlist_result
|
||||
raise ExtractorError('Can\'t find any video')
|
||||
|
||||
tvid = self._search_regex(
|
||||
r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
|
||||
video_id = self._search_regex(
|
||||
r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
|
||||
r'data-(?:player|shareplattrigger)-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
|
||||
|
||||
formats = []
|
||||
for _ in range(5):
|
||||
@ -377,7 +384,8 @@ def _real_extract(self, url):
|
||||
|
||||
self._sort_formats(formats)
|
||||
title = (get_element_by_id('widget-videotitle', webpage) or
|
||||
clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)))
|
||||
clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) or
|
||||
self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -116,13 +116,25 @@ def _add_sub_element(element, name):
|
||||
if not play_path:
|
||||
continue
|
||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
||||
formats.append({
|
||||
f = {
|
||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
||||
'url': rtmp_url,
|
||||
'play_path': play_path,
|
||||
# Providing this swfVfy allows to avoid truncated downloads
|
||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
||||
'page_url': url,
|
||||
'tbr': tbr,
|
||||
'ext': 'flv',
|
||||
}
|
||||
app = self._search_regex(
|
||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
||||
if app:
|
||||
f.update({
|
||||
'url': rtmp_url.split('?', 1)[0],
|
||||
'app': app,
|
||||
})
|
||||
else:
|
||||
f['url'] = rtmp_url
|
||||
formats.append(f)
|
||||
|
||||
ios_playlist_url = params.get('data-video-playlist')
|
||||
hmac = params.get('data-video-hmac')
|
||||
@ -172,7 +184,9 @@ def _add_sub_element(element, name):
|
||||
href = ios_base_url + href
|
||||
ext = determine_ext(href)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(href, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
href, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': href,
|
||||
@ -189,7 +203,8 @@ def _add_sub_element(element, name):
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
return {
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
@ -198,4 +213,5 @@ def _add_sub_element(element, name):
|
||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
||||
'duartion': parse_duration(xpath_text(playlist, 'Duration')),
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
@ -23,7 +23,6 @@
|
||||
str_or_none,
|
||||
url_basename,
|
||||
urshift,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
@ -51,7 +50,7 @@ class LeIE(InfoExtractor):
|
||||
'id': '1415246',
|
||||
'ext': 'mp4',
|
||||
'title': '美人天下01',
|
||||
'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda',
|
||||
'description': 'md5:28942e650e82ed4fcc8e4de919ee854d',
|
||||
},
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
@ -69,7 +68,6 @@ class LeIE(InfoExtractor):
|
||||
'params': {
|
||||
'hls_prefer_native': True,
|
||||
},
|
||||
'skip': 'Only available in China',
|
||||
}, {
|
||||
'url': 'http://sports.le.com/video/25737697.html',
|
||||
'only_matching': True,
|
||||
@ -81,7 +79,7 @@ class LeIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf
|
||||
# ror() and calc_time_key() are reversed from a embedded swf file in LetvPlayer.swf
|
||||
def ror(self, param1, param2):
|
||||
_loc3_ = 0
|
||||
while _loc3_ < param2:
|
||||
@ -90,15 +88,8 @@ def ror(self, param1, param2):
|
||||
return param1
|
||||
|
||||
def calc_time_key(self, param1):
|
||||
_loc2_ = 773625421
|
||||
_loc3_ = self.ror(param1, _loc2_ % 13)
|
||||
_loc3_ = _loc3_ ^ _loc2_
|
||||
_loc3_ = self.ror(_loc3_, _loc2_ % 17)
|
||||
return _loc3_
|
||||
|
||||
# reversed from http://jstatic.letvcdn.com/sdk/player.js
|
||||
def get_mms_key(self, time):
|
||||
return self.ror(time, 8) ^ 185025305
|
||||
_loc2_ = 185025305
|
||||
return self.ror(param1, _loc2_ % 17) ^ _loc2_
|
||||
|
||||
# see M3U8Encryption class in KLetvPlayer.swf
|
||||
@staticmethod
|
||||
@ -122,7 +113,7 @@ def decrypt_m3u8(encrypted_data):
|
||||
|
||||
def _check_errors(self, play_json):
|
||||
# Check for errors
|
||||
playstatus = play_json['playstatus']
|
||||
playstatus = play_json['msgs']['playstatus']
|
||||
if playstatus['status'] == 0:
|
||||
flag = playstatus['flag']
|
||||
if flag == 1:
|
||||
@ -134,59 +125,32 @@ def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
page = self._download_webpage(url, media_id)
|
||||
|
||||
play_json_h5 = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJsonH5',
|
||||
media_id, 'Downloading html5 playJson data', query={
|
||||
'id': media_id,
|
||||
'platid': 3,
|
||||
'splatid': 304,
|
||||
'format': 1,
|
||||
'tkey': self.get_mms_key(int(time.time())),
|
||||
'domain': 'www.le.com',
|
||||
'tss': 'no',
|
||||
},
|
||||
headers=self.geo_verification_headers())
|
||||
self._check_errors(play_json_h5)
|
||||
|
||||
play_json_flash = self._download_json(
|
||||
'http://api.le.com/mms/out/video/playJson',
|
||||
'http://player-pc.le.com/mms/out/video/playJson',
|
||||
media_id, 'Downloading flash playJson data', query={
|
||||
'id': media_id,
|
||||
'platid': 1,
|
||||
'splatid': 101,
|
||||
'format': 1,
|
||||
'source': 1000,
|
||||
'tkey': self.calc_time_key(int(time.time())),
|
||||
'domain': 'www.le.com',
|
||||
'region': 'cn',
|
||||
},
|
||||
headers=self.geo_verification_headers())
|
||||
self._check_errors(play_json_flash)
|
||||
|
||||
def get_h5_urls(media_url, format_id):
|
||||
location = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id, query={
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'tss': 'no',
|
||||
})['location']
|
||||
|
||||
return {
|
||||
'http': update_url_query(location, {'tss': 'no'}),
|
||||
'hls': update_url_query(location, {'tss': 'ios'}),
|
||||
}
|
||||
|
||||
def get_flash_urls(media_url, format_id):
|
||||
media_url += '&' + compat_urllib_parse_urlencode({
|
||||
nodes_data = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id,
|
||||
query={
|
||||
'm3v': 1,
|
||||
'format': 1,
|
||||
'expect': 3,
|
||||
'rateid': format_id,
|
||||
'tss': 'ios',
|
||||
})
|
||||
|
||||
nodes_data = self._download_json(
|
||||
media_url, media_id,
|
||||
'Download JSON metadata for format %s' % format_id)
|
||||
|
||||
req = self._request_webpage(
|
||||
nodes_data['nodelist'][0]['location'], media_id,
|
||||
note='Downloading m3u8 information for format %s' % format_id)
|
||||
@ -199,8 +163,7 @@ def get_flash_urls(media_url, format_id):
|
||||
|
||||
extracted_formats = []
|
||||
formats = []
|
||||
for play_json, get_urls in ((play_json_h5, get_h5_urls), (play_json_flash, get_flash_urls)):
|
||||
playurl = play_json['playurl']
|
||||
playurl = play_json_flash['msgs']['playurl']
|
||||
play_domain = playurl['domain'][0]
|
||||
|
||||
for format_id, format_data in playurl.get('dispatch', []).items():
|
||||
@ -209,7 +172,7 @@ def get_flash_urls(media_url, format_id):
|
||||
extracted_formats.append(format_id)
|
||||
|
||||
media_url = play_domain + format_data[0]
|
||||
for protocol, format_url in get_urls(media_url, format_id).items():
|
||||
for protocol, format_url in get_flash_urls(media_url, format_id).items():
|
||||
f = {
|
||||
'url': format_url,
|
||||
'ext': determine_ext(format_data[1]),
|
||||
|
@ -86,7 +86,7 @@ def _real_extract(self, url):
|
||||
formats = self._extract_akamai_formats(
|
||||
'%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none',
|
||||
formats))
|
||||
if len(m3u8_formats) == len(self._BITRATES):
|
||||
self._sort_formats(m3u8_formats)
|
||||
|
@ -9,6 +9,7 @@
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
ExtractorError,
|
||||
)
|
||||
@ -18,6 +19,42 @@ class LimelightBaseIE(InfoExtractor):
|
||||
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
||||
_API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage, source_url):
|
||||
lm = {
|
||||
'Media': 'media',
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
entries = []
|
||||
for kind, video_id in re.findall(
|
||||
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
|
||||
webpage):
|
||||
entries.append(cls.url_result(
|
||||
smuggle_url(
|
||||
'limelight:%s:%s' % (lm[kind], video_id),
|
||||
{'source_url': source_url}),
|
||||
'Limelight%s' % kind, video_id))
|
||||
for mobj in re.finditer(
|
||||
# As per [1] class attribute should be exactly equal to
|
||||
# LimelightEmbeddedPlayerFlash but numerous examples seen
|
||||
# that don't exactly match it (e.g. [2]).
|
||||
# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
|
||||
# 2. http://www.sedona.com/FacilitatorTraining2017
|
||||
r'''(?sx)
|
||||
<object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
|
||||
<param[^>]+
|
||||
name=(["\'])flashVars\2[^>]+
|
||||
value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
|
||||
''', webpage):
|
||||
kind, video_id = mobj.group('kind'), mobj.group('id')
|
||||
entries.append(cls.url_result(
|
||||
smuggle_url(
|
||||
'limelight:%s:%s' % (kind, video_id),
|
||||
{'source_url': source_url}),
|
||||
'Limelight%s' % kind.capitalize(), video_id))
|
||||
return entries
|
||||
|
||||
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
||||
headers = {}
|
||||
if referer:
|
||||
|
97
youtube_dl/extractor/noovo.py
Normal file
97
youtube_dl/extractor/noovo.py
Normal file
@ -0,0 +1,97 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class NoovoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?noovo\.ca/videos/(?P<id>[^/]+/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial',
|
||||
'info_dict': {
|
||||
'id': '5386045029001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chrysler Imperial',
|
||||
'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056',
|
||||
'timestamp': 1491399228,
|
||||
'upload_date': '20170405',
|
||||
'uploader_id': '618566855001',
|
||||
'creator': 'vtele',
|
||||
'view_count': int,
|
||||
'series': 'RPM+',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# episode
|
||||
'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8',
|
||||
'info_dict': {
|
||||
'id': '5395865725001',
|
||||
'title': 'Épisode 13 : Les retrouvailles',
|
||||
'description': 'md5:336d5ebc5436534e61d16e63ddfca327',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1492019320,
|
||||
'upload_date': '20170412',
|
||||
'uploader_id': '618566855001',
|
||||
'creator': 'vtele',
|
||||
'view_count': int,
|
||||
'series': "L'amour est dans le pré",
|
||||
'season_number': 5,
|
||||
'episode': 'Épisode 13',
|
||||
'episode_number': 13,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'http://api.noovo.ca/api/v1/pages/single-episode/%s' % video_id,
|
||||
video_id)['data']
|
||||
|
||||
content = try_get(data, lambda x: x['contents'][0])
|
||||
|
||||
brightcove_id = data.get('brightcoveId') or content['brightcoveId']
|
||||
|
||||
series = try_get(
|
||||
data, (
|
||||
lambda x: x['show']['title'],
|
||||
lambda x: x['season']['show']['title']),
|
||||
compat_str)
|
||||
|
||||
episode = None
|
||||
og = data.get('og')
|
||||
if isinstance(og, dict) and og.get('type') == 'video.episode':
|
||||
episode = og.get('title')
|
||||
|
||||
video = content or data
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': BrightcoveNewIE.ie_key(),
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['CA']}),
|
||||
'id': brightcove_id,
|
||||
'title': video.get('title'),
|
||||
'creator': video.get('source'),
|
||||
'view_count': int_or_none(video.get('viewsCount')),
|
||||
'series': series,
|
||||
'season_number': int_or_none(try_get(
|
||||
data, lambda x: x['season']['seasonNumber'])),
|
||||
'episode': episode,
|
||||
'episode_number': int_or_none(data.get('episodeNumber')),
|
||||
}
|
@ -28,7 +28,7 @@ def _extract_url_result(self, post):
|
||||
bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
|
||||
if bc_url:
|
||||
return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
|
||||
bc_url = BrightcoveNewIE._extract_url(player_code)
|
||||
bc_url = BrightcoveNewIE._extract_url(self, player_code)
|
||||
if bc_url:
|
||||
return self.url_result(bc_url, BrightcoveNewIE.ie_key())
|
||||
raise ExtractorError('Could not find player definition')
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
@ -37,7 +38,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
# metadataUrl
|
||||
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
|
||||
'md5': '9676cf86eff5391d35dea675d224e131',
|
||||
'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
|
||||
'info_dict': {
|
||||
'id': '63567059965189-0',
|
||||
'ext': 'mp4',
|
||||
@ -53,7 +54,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
}, {
|
||||
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
|
||||
'url': 'http://ok.ru/video/64211978996595-1',
|
||||
'md5': '5d7475d428845cd2e13bae6f1a992278',
|
||||
'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
|
||||
'info_dict': {
|
||||
'id': '64211978996595-1',
|
||||
'ext': 'mp4',
|
||||
@ -61,8 +62,8 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
|
||||
'duration': 440,
|
||||
'upload_date': '20150826',
|
||||
'uploader_id': '750099571',
|
||||
'uploader': 'Алина П',
|
||||
'uploader_id': 'tvroscosmos',
|
||||
'uploader': 'Телестудия Роскосмоса',
|
||||
'age_limit': 0,
|
||||
},
|
||||
}, {
|
||||
@ -81,6 +82,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Video has not been found',
|
||||
}, {
|
||||
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
|
||||
'only_matching': True,
|
||||
@ -176,14 +178,32 @@ def _real_extract(self, url):
|
||||
})
|
||||
return info
|
||||
|
||||
quality = qualities(('mobile', 'lowest', 'low', 'sd', 'hd', 'full'))
|
||||
quality = qualities(('4', '0', '1', '2', '3', '5'))
|
||||
|
||||
formats = [{
|
||||
'url': f['url'],
|
||||
'ext': 'mp4',
|
||||
'format_id': f['name'],
|
||||
'quality': quality(f['name']),
|
||||
} for f in metadata['videos']]
|
||||
|
||||
m3u8_url = metadata.get('hlsManifestUrl')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
dash_manifest = metadata.get('metadataEmbedded')
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(dash_manifest), 'mpd'))
|
||||
|
||||
for fmt in formats:
|
||||
fmt_type = self._search_regex(
|
||||
r'\btype[/=](\d)', fmt['url'],
|
||||
'format type', default=None)
|
||||
if fmt_type:
|
||||
fmt['quality'] = quality(fmt_type)
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
info['formats'] = formats
|
||||
|
@ -8,6 +8,7 @@
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
@ -464,6 +465,7 @@ def extract_redirect_urls(info):
|
||||
redirects.append(redirect)
|
||||
redirect_urls.add(redirect_url)
|
||||
|
||||
chapters = []
|
||||
# Player pages may also serve different qualities
|
||||
for page in ('widget/partnerplayer', 'portalplayer'):
|
||||
player = self._download_webpage(
|
||||
@ -479,6 +481,20 @@ def extract_redirect_urls(info):
|
||||
extract_redirect_urls(video_info)
|
||||
if not info:
|
||||
info = video_info
|
||||
if not chapters:
|
||||
for chapter_data in re.findall(r'(?s)chapters\.push\(({.*?})\)', player):
|
||||
chapter = self._parse_json(chapter_data, video_id, js_to_json, fatal=False)
|
||||
if not chapter:
|
||||
continue
|
||||
start_time = float_or_none(chapter.get('start_time'), 1000)
|
||||
duration = float_or_none(chapter.get('duration'), 1000)
|
||||
if start_time is None or duration is None:
|
||||
continue
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': start_time + duration,
|
||||
'title': chapter.get('title'),
|
||||
})
|
||||
|
||||
formats = []
|
||||
http_url = None
|
||||
@ -515,7 +531,7 @@ def extract_redirect_urls(info):
|
||||
http_url = format_url
|
||||
self._remove_duplicate_formats(formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
|
||||
formats))
|
||||
if http_url:
|
||||
for m3u8_format in m3u8_formats:
|
||||
@ -588,4 +604,5 @@ def extract_redirect_urls(info):
|
||||
'upload_date': upload_date,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
}
|
||||
|
@ -1,10 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
@ -19,7 +15,7 @@ class Porn91IE(InfoExtractor):
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
|
||||
'md5': '6df8f6d028bc8b14f5dbd73af742fb20',
|
||||
'md5': '7fcdb5349354f40d41689bd0fa8db05a',
|
||||
'info_dict': {
|
||||
'id': '7e42283b4f5ab36da134',
|
||||
'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
|
||||
@ -43,24 +39,7 @@ def _real_extract(self, url):
|
||||
r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
|
||||
title = title.replace('\n', '')
|
||||
|
||||
# get real url
|
||||
file_id = self._search_regex(
|
||||
r'so.addVariable\(\'file\',\'(\d+)\'', webpage, 'file id')
|
||||
sec_code = self._search_regex(
|
||||
r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code')
|
||||
max_vid = self._search_regex(
|
||||
r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid')
|
||||
url_params = compat_urllib_parse_urlencode({
|
||||
'VID': file_id,
|
||||
'mp4': '1',
|
||||
'seccode': sec_code,
|
||||
'max_vid': max_vid,
|
||||
})
|
||||
info_cn = self._download_webpage(
|
||||
'http://91porn.com/getfile.php?' + url_params, video_id,
|
||||
'Downloading real video url')
|
||||
video_url = compat_urllib_parse_unquote(self._search_regex(
|
||||
r'file=([^&]+)&', info_cn, 'url'))
|
||||
info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||
|
||||
duration = parse_duration(self._search_regex(
|
||||
r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
|
||||
@ -68,11 +47,12 @@ def _real_extract(self, url):
|
||||
comment_count = int_or_none(self._search_regex(
|
||||
r'留言:\s*</span>\s*(\d+)', webpage, 'comment count', fatal=False))
|
||||
|
||||
return {
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': video_url,
|
||||
'duration': duration,
|
||||
'comment_count': comment_count,
|
||||
'age_limit': self._rta_search(webpage),
|
||||
}
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
@ -62,8 +62,7 @@ def _real_extract(self, url):
|
||||
# m3u8 format always matches the http format, let's copy metadata from
|
||||
# one to another
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
formats))
|
||||
lambda f: f.get('vcodec') != 'none', formats))
|
||||
if len(m3u8_formats) == 1:
|
||||
f_copy = m3u8_formats[0].copy()
|
||||
f_copy.update(f)
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
|
||||
class StreamableIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://streamable\.com/(?:e/)?(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://streamable\.com/(?:[es]/)?(?P<id>\w+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://streamable.com/dnd1',
|
||||
@ -47,6 +47,10 @@ class StreamableIE(InfoExtractor):
|
||||
{
|
||||
'url': 'https://streamable.com/e/dnd1',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://streamable.com/s/okkqk/drxjds',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
|
64
youtube_dl/extractor/streamango.py
Normal file
64
youtube_dl/extractor/streamango.py
Normal file
@ -0,0 +1,64 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
)
|
||||
|
||||
|
||||
class StreamangoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
|
||||
'md5': 'e992787515a182f55e38fc97588d802a',
|
||||
'info_dict': {
|
||||
'id': 'clapasobsptpkdfe',
|
||||
'ext': 'mp4',
|
||||
'title': '20170315_150006.mp4',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
formats = []
|
||||
for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
|
||||
video = self._parse_json(
|
||||
format_, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video:
|
||||
continue
|
||||
src = video.get('src')
|
||||
if not src:
|
||||
continue
|
||||
ext = determine_ext(src, default_ext=None)
|
||||
if video.get('type') == 'application/dash+xml' or ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': src,
|
||||
'ext': ext or 'mp4',
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': int_or_none(video.get('bitrate')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': url,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
@ -210,7 +210,7 @@ def _talk_info(self, url, video_name):
|
||||
resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
|
||||
formats))
|
||||
if http_url:
|
||||
for m3u8_format in m3u8_formats:
|
||||
|
@ -150,8 +150,7 @@ def _real_extract(self, url):
|
||||
'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
formats.extend(m3u8_formats)
|
||||
for i, m3u8_format in enumerate(m3u8_formats, 2):
|
||||
http_url = '%s-%d.mp4' % (video_url_base, i)
|
||||
|
@ -2,9 +2,13 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
try_get,
|
||||
urlencode_postdata,
|
||||
ExtractorError,
|
||||
)
|
||||
@ -34,25 +38,32 @@ def _real_extract(self, url):
|
||||
webpage, 'channel element'))
|
||||
title = current_channel['data-name']
|
||||
|
||||
resource_id = self._search_regex(
|
||||
r'resourceId\s*=\s*"(\d+)"', webpage, 'resource id')
|
||||
platform = self._search_regex(
|
||||
r'platform\s*=\s*"([^"]+)"', webpage, 'platform')
|
||||
resource_id = current_channel['data-id']
|
||||
|
||||
token = self._search_regex(
|
||||
r'token\s*=\s*"([^"]+)"', webpage, 'token', default='null')
|
||||
validate = self._search_regex(
|
||||
r'validate\s*=\s*"([^"]+)"', webpage, 'validate', default='null')
|
||||
r'data-token=(["\'])(?P<token>(?!\1).+)\1', webpage,
|
||||
'token', group='token')
|
||||
|
||||
context = self._download_json(
|
||||
'https://tvplayer.com/watch/context', display_id,
|
||||
'Downloading JSON context', query={
|
||||
'resource': resource_id,
|
||||
'nonce': token,
|
||||
})
|
||||
|
||||
validate = context['validate']
|
||||
platform = try_get(
|
||||
context, lambda x: x['platform']['key'], compat_str) or 'firefox'
|
||||
|
||||
try:
|
||||
response = self._download_json(
|
||||
'http://api.tvplayer.com/api/v2/stream/live',
|
||||
resource_id, headers={
|
||||
display_id, 'Downloading JSON stream', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
}, data=urlencode_postdata({
|
||||
'id': resource_id,
|
||||
'service': 1,
|
||||
'platform': platform,
|
||||
'id': resource_id,
|
||||
'token': token,
|
||||
'validate': validate,
|
||||
}))['tvplayer']['response']
|
||||
except ExtractorError as e:
|
||||
@ -63,7 +74,7 @@ def _real_extract(self, url):
|
||||
'%s said: %s' % (self.IE_NAME, response['error']), expected=True)
|
||||
raise
|
||||
|
||||
formats = self._extract_m3u8_formats(response['stream'], resource_id, 'mp4')
|
||||
formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
|
@ -1,6 +1,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@ -11,7 +12,6 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
sanitized_Request,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
@ -154,19 +154,24 @@ class VevoIE(VevoBaseIE):
|
||||
}
|
||||
|
||||
def _initialize_api(self, video_id):
|
||||
req = sanitized_Request(
|
||||
'http://www.vevo.com/auth', data=b'')
|
||||
webpage = self._download_webpage(
|
||||
req, None,
|
||||
'https://accounts.vevo.com/token', None,
|
||||
note='Retrieving oauth token',
|
||||
errnote='Unable to retrieve oauth token')
|
||||
errnote='Unable to retrieve oauth token',
|
||||
data=json.dumps({
|
||||
'client_id': 'SPupX1tvqFEopQ1YS6SS',
|
||||
'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous',
|
||||
}).encode('utf-8'),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
|
||||
self.raise_geo_restricted(
|
||||
'%s said: This page is currently unavailable in your region' % self.IE_NAME)
|
||||
|
||||
auth_info = self._parse_json(webpage, video_id)
|
||||
self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
|
||||
self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token']
|
||||
|
||||
def _call_api(self, path, *args, **kwargs):
|
||||
try:
|
||||
|
@ -1,7 +1,6 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -11,6 +10,7 @@
|
||||
float_or_none,
|
||||
parse_age_limit,
|
||||
qualities,
|
||||
random_birthday,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
@ -47,13 +47,10 @@ def _extract_urls(webpage):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
query = random_birthday('birth_year', 'birth_month', 'birth_day')
|
||||
video = self._download_json(
|
||||
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
|
||||
video_id, query={
|
||||
'birth_month': random.randint(1, 12),
|
||||
'birth_day': random.randint(1, 31),
|
||||
'birth_year': random.randint(1950, 1995),
|
||||
})
|
||||
video_id, query=query)
|
||||
|
||||
title = video['title']
|
||||
|
||||
|
@ -49,8 +49,11 @@ def _real_extract(self, url):
|
||||
thumbnail = clip.get('image')
|
||||
|
||||
m3u8_url = m3u8_url or self._search_regex(
|
||||
r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>.+?)\1', webpage, 'hls url')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
|
||||
r'data(?:-vjs)?-clip-hls-url=(["\'])(?P<url>(?!\1).+)\1',
|
||||
webpage, 'hls url')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native')
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = int_or_none(duration or self._search_regex(
|
||||
r'data-video-duration=(["\'])(?P<duartion>\d+)\1', webpage, 'duration'))
|
||||
|
@ -42,14 +42,15 @@ def _real_extract(self, url):
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
|
||||
|
||||
packed_codes = [mobj.group(0) for mobj in re.finditer(
|
||||
PACKED_CODES_RE, webpage)]
|
||||
for num, pc in enumerate(packed_codes, 1):
|
||||
code = decode_packed_codes(pc).replace('\\\'', '\'')
|
||||
codes = [webpage]
|
||||
codes.extend([
|
||||
decode_packed_codes(mobj.group(0)).replace('\\\'', '\'')
|
||||
for mobj in re.finditer(PACKED_CODES_RE, webpage)])
|
||||
for num, code in enumerate(codes, 1):
|
||||
jwplayer_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'setup\(([^)]+)\)', code, 'jwplayer data',
|
||||
default=NO_DEFAULT if num == len(packed_codes) else '{}'),
|
||||
default=NO_DEFAULT if num == len(codes) else '{}'),
|
||||
video_id, transform_source=js_to_json)
|
||||
if jwplayer_data:
|
||||
break
|
||||
|
@ -176,8 +176,7 @@ def concat(suffix, sep='-'):
|
||||
if m3u8_formats:
|
||||
self._sort_formats(m3u8_formats)
|
||||
m3u8_formats = list(filter(
|
||||
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
||||
m3u8_formats))
|
||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
||||
if len(qualities) == len(m3u8_formats):
|
||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
||||
f = m3u8_format.copy()
|
||||
|
@ -13,6 +13,7 @@
|
||||
class WashingtonPostIE(InfoExtractor):
|
||||
IE_NAME = 'washingtonpost'
|
||||
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||
_EMBED_URL = r'https?://(?:www\.)?washingtonpost\.com/video/c/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
|
||||
_TEST = {
|
||||
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
|
||||
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
|
||||
@ -27,6 +28,11 @@ class WashingtonPostIE(InfoExtractor):
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _extract_urls(cls, webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
|
@ -10,12 +10,14 @@
|
||||
|
||||
|
||||
class WSJIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
video-api\.wsj\.com/api-video/player/iframe\.html\?guid=|
|
||||
(?:www\.)?wsj\.com/video/[^/]+/
|
||||
https?://video-api\.wsj\.com/api-video/player/iframe\.html\?.*?\bguid=|
|
||||
https?://(?:www\.)?wsj\.com/video/[^/]+/|
|
||||
wsj:
|
||||
)
|
||||
(?P<id>[a-zA-Z0-9-]+)'''
|
||||
(?P<id>[a-fA-F0-9-]{36})
|
||||
'''
|
||||
IE_DESC = 'Wall Street Journal'
|
||||
_TESTS = [{
|
||||
'url': 'http://video-api.wsj.com/api-video/player/iframe.html?guid=1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A',
|
||||
@ -38,12 +40,17 @@ class WSJIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
api_url = (
|
||||
'http://video-api.wsj.com/api-video/find_all_videos.asp?'
|
||||
'type=guid&count=1&query=%s&fields=type,hls,videoMP4List,'
|
||||
'thumbnailList,author,description,name,duration,videoURL,'
|
||||
'titletag,formattedCreationDate,keywords,editor' % video_id)
|
||||
info = self._download_json(api_url, video_id)['items'][0]
|
||||
info = self._download_json(
|
||||
'http://video-api.wsj.com/api-video/find_all_videos.asp', video_id,
|
||||
query={
|
||||
'type': 'guid',
|
||||
'count': 1,
|
||||
'query': video_id,
|
||||
'fields': ','.join((
|
||||
'type', 'hls', 'videoMP4List', 'thumbnailList', 'author',
|
||||
'description', 'name', 'duration', 'videoURL', 'titletag',
|
||||
'formattedCreationDate', 'keywords', 'editor')),
|
||||
})['items'][0]
|
||||
title = info.get('name', info.get('titletag'))
|
||||
|
||||
formats = []
|
||||
@ -87,3 +94,24 @@ def _real_extract(self, url):
|
||||
'title': title,
|
||||
'categories': info.get('keywords'),
|
||||
}
|
||||
|
||||
|
||||
class WSJArticleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?wsj\.com/articles/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.wsj.com/articles/dont-like-china-no-pandas-for-you-1490366939?',
|
||||
'info_dict': {
|
||||
'id': '4B13FA62-1D8C-45DB-8EA1-4105CB20B362',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20170221',
|
||||
'uploader_id': 'ralcaraz',
|
||||
'title': 'Bao Bao the Panda Leaves for China',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
video_id = self._search_regex(
|
||||
r'data-src=["\']([a-fA-F0-9-]{36})', webpage, 'video id')
|
||||
return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id)
|
||||
|
@ -17,24 +17,24 @@
|
||||
|
||||
class XFileShareIE(InfoExtractor):
|
||||
_SITES = (
|
||||
('daclips.in', 'DaClips'),
|
||||
('filehoot.com', 'FileHoot'),
|
||||
('gorillavid.in', 'GorillaVid'),
|
||||
('movpod.in', 'MovPod'),
|
||||
('powerwatch.pw', 'PowerWatch'),
|
||||
('rapidvideo.ws', 'Rapidvideo.ws'),
|
||||
('thevideobee.to', 'TheVideoBee'),
|
||||
('vidto.me', 'Vidto'),
|
||||
('streamin.to', 'Streamin.To'),
|
||||
('xvidstage.com', 'XVIDSTAGE'),
|
||||
('vidabc.com', 'Vid ABC'),
|
||||
('vidbom.com', 'VidBom'),
|
||||
('vidlo.us', 'vidlo'),
|
||||
(r'daclips\.(?:in|com)', 'DaClips'),
|
||||
(r'filehoot\.com', 'FileHoot'),
|
||||
(r'gorillavid\.(?:in|com)', 'GorillaVid'),
|
||||
(r'movpod\.in', 'MovPod'),
|
||||
(r'powerwatch\.pw', 'PowerWatch'),
|
||||
(r'rapidvideo\.ws', 'Rapidvideo.ws'),
|
||||
(r'thevideobee\.to', 'TheVideoBee'),
|
||||
(r'vidto\.me', 'Vidto'),
|
||||
(r'streamin\.to', 'Streamin.To'),
|
||||
(r'xvidstage\.com', 'XVIDSTAGE'),
|
||||
(r'vidabc\.com', 'Vid ABC'),
|
||||
(r'vidbom\.com', 'VidBom'),
|
||||
(r'vidlo\.us', 'vidlo'),
|
||||
)
|
||||
|
||||
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
|
||||
_VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
% '|'.join(re.escape(site) for site in list(zip(*_SITES))[0]))
|
||||
% '|'.join(site for site in list(zip(*_SITES))[0]))
|
||||
|
||||
_FILE_NOT_FOUND_REGEXES = (
|
||||
r'>(?:404 - )?File Not Found<',
|
||||
|
@ -6,6 +6,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
sanitized_Request,
|
||||
@ -37,6 +38,22 @@ class XTubeIE(InfoExtractor):
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}, {
|
||||
# FLV videos with duplicated formats
|
||||
'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
|
||||
'md5': 'a406963eb349dd43692ec54631efd88b',
|
||||
'info_dict': {
|
||||
'id': '9299752',
|
||||
'display_id': 'A-Super-Run-Part-1-YT',
|
||||
'ext': 'flv',
|
||||
'title': 'A Super Run - Part 1 (YT)',
|
||||
'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93',
|
||||
'uploader': 'tshirtguy59',
|
||||
'duration': 579,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
# new URL schema
|
||||
'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
|
||||
@ -68,8 +85,9 @@ def _real_extract(self, url):
|
||||
})
|
||||
|
||||
sources = self._parse_json(self._search_regex(
|
||||
r'(["\'])sources\1\s*:\s*(?P<sources>{.+?}),',
|
||||
webpage, 'sources', group='sources'), video_id)
|
||||
r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
|
||||
webpage, 'sources', group='sources'), video_id,
|
||||
transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in sources.items():
|
||||
@ -78,6 +96,7 @@ def _real_extract(self, url):
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id),
|
||||
})
|
||||
self._remove_duplicate_formats(formats)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
|
@ -6,8 +6,10 @@
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
@ -20,6 +22,7 @@ class XVideosIE(InfoExtractor):
|
||||
'id': '4588838',
|
||||
'ext': 'mp4',
|
||||
'title': 'Biker Takes his Girl',
|
||||
'duration': 108,
|
||||
'age_limit': 18,
|
||||
}
|
||||
}
|
||||
@ -36,6 +39,11 @@ def _real_extract(self, url):
|
||||
r'<title>(.*?)\s+-\s+XVID', webpage, 'title')
|
||||
video_thumbnail = self._search_regex(
|
||||
r'url_bigthumb=(.+?)&', webpage, 'thumbnail', fatal=False)
|
||||
video_duration = int_or_none(self._og_search_property(
|
||||
'duration', webpage, default=None)) or parse_duration(
|
||||
self._search_regex(
|
||||
r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)',
|
||||
webpage, 'duration', fatal=False))
|
||||
|
||||
formats = []
|
||||
|
||||
@ -67,6 +75,7 @@ def _real_extract(self, url):
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_title,
|
||||
'duration': video_duration,
|
||||
'thumbnail': video_thumbnail,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
@ -258,7 +258,7 @@ def _real_extract(self, url):
|
||||
return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
|
||||
|
||||
# Look for Brightcove New Studio embeds
|
||||
bc_url = BrightcoveNewIE._extract_url(webpage)
|
||||
bc_url = BrightcoveNewIE._extract_url(self, webpage)
|
||||
if bc_url:
|
||||
return self.url_result(bc_url, BrightcoveNewIE.ie_key())
|
||||
|
||||
|
@ -234,7 +234,8 @@ def _real_extract(self, url):
|
||||
'overembed': 'false',
|
||||
})['playlist']
|
||||
|
||||
tracks, track_ids = playlist['tracks'], map(compat_str, playlist['trackIds'])
|
||||
tracks = playlist['tracks']
|
||||
track_ids = [compat_str(track_id) for track_id in playlist['trackIds']]
|
||||
|
||||
# tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
|
||||
# missing tracks should be retrieved manually.
|
||||
|
@ -963,7 +963,7 @@ def _signature_cache_id(self, example_sig):
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
id_m = re.match(
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
|
||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
|
||||
player_url)
|
||||
if not id_m:
|
||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||
@ -1629,7 +1629,8 @@ def _extract_count(count_name):
|
||||
player_desc = 'flash player %s' % player_version
|
||||
else:
|
||||
player_version = self._search_regex(
|
||||
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
|
||||
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||
r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
|
||||
player_url,
|
||||
'html5 player', fatal=False)
|
||||
player_desc = 'html5 player %s' % player_version
|
||||
|
101
youtube_dl/extractor/zaq1.py
Normal file
101
youtube_dl/extractor/zaq1.py
Normal file
@ -0,0 +1,101 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class Zaq1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?zaq1\.pl/video/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://zaq1.pl/video/xev0e',
|
||||
'md5': '24a5eb3f052e604ae597c4d0d19b351e',
|
||||
'info_dict': {
|
||||
'id': 'xev0e',
|
||||
'title': 'DJ NA WESELE. TANIEC Z FIGURAMI.węgrów/sokołów podlaski/siedlce/mińsk mazowiecki/warszawa',
|
||||
'description': 'www.facebook.com/weseledjKontakt: 728 448 199 / 505 419 147',
|
||||
'ext': 'mp4',
|
||||
'duration': 511,
|
||||
'timestamp': 1490896361,
|
||||
'uploader': 'Anonim',
|
||||
'upload_date': '20170330',
|
||||
'view_count': int,
|
||||
}
|
||||
}, {
|
||||
# malformed JSON-LD
|
||||
'url': 'http://zaq1.pl/video/x81vn',
|
||||
'info_dict': {
|
||||
'id': 'x81vn',
|
||||
'title': 'SEKRETNE ŻYCIE WALTERA MITTY',
|
||||
'ext': 'mp4',
|
||||
'duration': 6234,
|
||||
'timestamp': 1493494860,
|
||||
'uploader': 'Anonim',
|
||||
'upload_date': '20170429',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._search_regex(
|
||||
r'data-video-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'video url', group='url')
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
|
||||
def extract_data(field, name, fatal=False):
|
||||
return self._search_regex(
|
||||
r'data-%s=(["\'])(?P<field>(?:(?!\1).)+)\1' % field,
|
||||
webpage, field, fatal=fatal, group='field')
|
||||
|
||||
if not info.get('title'):
|
||||
info['title'] = extract_data('file-name', 'title', fatal=True)
|
||||
|
||||
if not info.get('duration'):
|
||||
info['duration'] = int_or_none(extract_data('duration', 'duration'))
|
||||
|
||||
if not info.get('thumbnail'):
|
||||
info['thumbnail'] = extract_data('photo-url', 'thumbnail')
|
||||
|
||||
if not info.get('timestamp'):
|
||||
info['timestamp'] = unified_timestamp(self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp'))
|
||||
|
||||
if not info.get('interactionCount'):
|
||||
info['view_count'] = int_or_none(self._html_search_meta(
|
||||
'interactionCount', webpage, 'view count'))
|
||||
|
||||
uploader = self._html_search_regex(
|
||||
r'Wideo dodał:\s*<a[^>]*>([^<]+)</a>', webpage, 'uploader',
|
||||
fatal=False)
|
||||
|
||||
width = int_or_none(self._html_search_meta(
|
||||
'width', webpage, fatal=False))
|
||||
height = int_or_none(self._html_search_meta(
|
||||
'height', webpage, fatal=False))
|
||||
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'formats': [{
|
||||
'url': video_url,
|
||||
'width': width,
|
||||
'height': height,
|
||||
'http_headers': {
|
||||
'Referer': url,
|
||||
},
|
||||
}],
|
||||
'uploader': uploader,
|
||||
})
|
||||
|
||||
return info
|
@ -468,6 +468,10 @@ def _scrub_eq(o):
|
||||
'--abort-on-unavailable-fragment',
|
||||
action='store_false', dest='skip_unavailable_fragments',
|
||||
help='Abort downloading when some fragment is not available')
|
||||
downloader.add_option(
|
||||
'--keep-fragments',
|
||||
action='store_true', dest='keep_fragments', default=False,
|
||||
help='Keep downloaded fragments on disk after downloading is finished; fragments are erased by default')
|
||||
downloader.add_option(
|
||||
'--buffer-size',
|
||||
dest='buffersize', metavar='SIZE', default='1024',
|
||||
|
@ -4,6 +4,7 @@
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import re
|
||||
|
||||
|
||||
from .common import AudioConversionError, PostProcessor
|
||||
@ -22,6 +23,7 @@
|
||||
subtitles_filename,
|
||||
dfxp2srt,
|
||||
ISO639Utils,
|
||||
replace_extension,
|
||||
)
|
||||
|
||||
|
||||
@ -429,17 +431,40 @@ def add(meta_list, info_list=None):
|
||||
|
||||
filename = info['filepath']
|
||||
temp_filename = prepend_extension(filename, 'temp')
|
||||
in_filenames = [filename]
|
||||
options = []
|
||||
|
||||
if info['ext'] == 'm4a':
|
||||
options = ['-vn', '-acodec', 'copy']
|
||||
options.extend(['-vn', '-acodec', 'copy'])
|
||||
else:
|
||||
options = ['-c', 'copy']
|
||||
options.extend(['-c', 'copy'])
|
||||
|
||||
for (name, value) in metadata.items():
|
||||
options.extend(['-metadata', '%s=%s' % (name, value)])
|
||||
|
||||
chapters = info.get('chapters', [])
|
||||
if chapters:
|
||||
metadata_filename = encodeFilename(replace_extension(filename, 'meta'))
|
||||
with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
|
||||
def ffmpeg_escape(text):
|
||||
return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
|
||||
|
||||
metadata_file_content = ';FFMETADATA1\n'
|
||||
for chapter in chapters:
|
||||
metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
|
||||
metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
|
||||
metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
|
||||
chapter_title = chapter.get('title')
|
||||
if chapter_title:
|
||||
metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
|
||||
f.write(metadata_file_content)
|
||||
in_filenames.append(metadata_filename)
|
||||
options.extend(['-map_metadata', '1'])
|
||||
|
||||
self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
|
||||
self.run_ffmpeg(filename, temp_filename, options)
|
||||
self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
|
||||
if chapters:
|
||||
os.remove(metadata_filename)
|
||||
os.remove(encodeFilename(filename))
|
||||
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
|
||||
return [], info
|
||||
|
@ -193,9 +193,10 @@ def _socks5_auth(self):
|
||||
|
||||
self._check_response_version(SOCKS5_VERSION, version)
|
||||
|
||||
if method == Socks5Auth.AUTH_NO_ACCEPTABLE:
|
||||
if method == Socks5Auth.AUTH_NO_ACCEPTABLE or (
|
||||
method == Socks5Auth.AUTH_USER_PASS and (not self._proxy.username or not self._proxy.password)):
|
||||
self.close()
|
||||
raise Socks5Error(method)
|
||||
raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE)
|
||||
|
||||
if method == Socks5Auth.AUTH_USER_PASS:
|
||||
username = self._proxy.username.encode('utf-8')
|
||||
|
@ -11,6 +11,7 @@
|
||||
import ctypes
|
||||
import datetime
|
||||
import email.utils
|
||||
import email.header
|
||||
import errno
|
||||
import functools
|
||||
import gzip
|
||||
@ -421,8 +422,8 @@ def clean_html(html):
|
||||
|
||||
# Newline vs <br />
|
||||
html = html.replace('\n', ' ')
|
||||
html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
|
||||
html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
|
||||
html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
|
||||
html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
|
||||
# Strip html tags
|
||||
html = re.sub('<.*?>', '', html)
|
||||
# Replace html entities
|
||||
@ -1194,6 +1195,11 @@ def unified_timestamp(date_str, day_first=True):
|
||||
# Remove AM/PM + timezone
|
||||
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
|
||||
|
||||
# Remove unrecognized timezones from ISO 8601 alike timestamps
|
||||
m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
|
||||
if m:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
|
||||
for expression in date_formats(day_first):
|
||||
try:
|
||||
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
|
||||
@ -2092,6 +2098,58 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
|
||||
return new_req
|
||||
|
||||
|
||||
def try_multipart_encode(data, boundary):
|
||||
content_type = 'multipart/form-data; boundary=%s' % boundary
|
||||
|
||||
out = b''
|
||||
for k, v in data.items():
|
||||
out += b'--' + boundary.encode('ascii') + b'\r\n'
|
||||
if isinstance(k, compat_str):
|
||||
k = k.encode('utf-8')
|
||||
if isinstance(v, compat_str):
|
||||
v = v.encode('utf-8')
|
||||
# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
|
||||
# suggests sending UTF-8 directly. Firefox sends UTF-8, too
|
||||
content = b'Content-Disposition: form-data; name="%s"\r\n\r\n' % k + v + b'\r\n'
|
||||
if boundary.encode('ascii') in content:
|
||||
raise ValueError('Boundary overlaps with data')
|
||||
out += content
|
||||
|
||||
out += b'--' + boundary.encode('ascii') + b'--\r\n'
|
||||
|
||||
return out, content_type
|
||||
|
||||
|
||||
def multipart_encode(data, boundary=None):
|
||||
'''
|
||||
Encode a dict to RFC 7578-compliant form-data
|
||||
|
||||
data:
|
||||
A dict where keys and values can be either Unicode or bytes-like
|
||||
objects.
|
||||
boundary:
|
||||
If specified a Unicode object, it's used as the boundary. Otherwise
|
||||
a random boundary is generated.
|
||||
|
||||
Reference: https://tools.ietf.org/html/rfc7578
|
||||
'''
|
||||
has_specified_boundary = boundary is not None
|
||||
|
||||
while True:
|
||||
if boundary is None:
|
||||
boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
|
||||
|
||||
try:
|
||||
out, content_type = try_multipart_encode(data, boundary)
|
||||
break
|
||||
except ValueError:
|
||||
if has_specified_boundary:
|
||||
raise
|
||||
boundary = None
|
||||
|
||||
return out, content_type
|
||||
|
||||
|
||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
if isinstance(key_or_keys, (list, tuple)):
|
||||
for key in key_or_keys:
|
||||
@ -2103,8 +2161,11 @@ def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||
|
||||
|
||||
def try_get(src, getter, expected_type=None):
|
||||
if not isinstance(getter, (list, tuple)):
|
||||
getter = [getter]
|
||||
for get in getter:
|
||||
try:
|
||||
v = getter(src)
|
||||
v = get(src)
|
||||
except (AttributeError, KeyError, TypeError, IndexError):
|
||||
pass
|
||||
else:
|
||||
@ -2270,10 +2331,8 @@ def mimetype2ext(mt):
|
||||
return {
|
||||
'3gpp': '3gp',
|
||||
'smptett+xml': 'tt',
|
||||
'srt': 'srt',
|
||||
'ttaf+xml': 'dfxp',
|
||||
'ttml+xml': 'ttml',
|
||||
'vtt': 'vtt',
|
||||
'x-flv': 'flv',
|
||||
'x-mp4-fragmented': 'mp4',
|
||||
'x-ms-wmv': 'wmv',
|
||||
@ -2281,11 +2340,11 @@ def mimetype2ext(mt):
|
||||
'x-mpegurl': 'm3u8',
|
||||
'vnd.apple.mpegurl': 'm3u8',
|
||||
'dash+xml': 'mpd',
|
||||
'f4m': 'f4m',
|
||||
'f4m+xml': 'f4m',
|
||||
'hds+xml': 'f4m',
|
||||
'vnd.ms-sstr+xml': 'ism',
|
||||
'quicktime': 'mov',
|
||||
'mp2t': 'ts',
|
||||
}.get(res, res)
|
||||
|
||||
|
||||
@ -2508,27 +2567,97 @@ def srt_subtitles_timecode(seconds):
|
||||
|
||||
|
||||
def dfxp2srt(dfxp_data):
|
||||
LEGACY_NAMESPACES = (
|
||||
('http://www.w3.org/ns/ttml', [
|
||||
'http://www.w3.org/2004/11/ttaf1',
|
||||
'http://www.w3.org/2006/04/ttaf1',
|
||||
'http://www.w3.org/2006/10/ttaf1',
|
||||
]),
|
||||
('http://www.w3.org/ns/ttml#styling', [
|
||||
'http://www.w3.org/ns/ttml#style',
|
||||
]),
|
||||
)
|
||||
|
||||
SUPPORTED_STYLING = [
|
||||
'color',
|
||||
'fontFamily',
|
||||
'fontSize',
|
||||
'fontStyle',
|
||||
'fontWeight',
|
||||
'textDecoration'
|
||||
]
|
||||
|
||||
_x = functools.partial(xpath_with_ns, ns_map={
|
||||
'ttml': 'http://www.w3.org/ns/ttml',
|
||||
'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
|
||||
'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1',
|
||||
'tts': 'http://www.w3.org/ns/ttml#styling',
|
||||
})
|
||||
|
||||
styles = {}
|
||||
default_style = {}
|
||||
|
||||
class TTMLPElementParser(object):
|
||||
out = ''
|
||||
_out = ''
|
||||
_unclosed_elements = []
|
||||
_applied_styles = []
|
||||
|
||||
def start(self, tag, attrib):
|
||||
if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
|
||||
self.out += '\n'
|
||||
if tag in (_x('ttml:br'), 'br'):
|
||||
self._out += '\n'
|
||||
else:
|
||||
unclosed_elements = []
|
||||
style = {}
|
||||
element_style_id = attrib.get('style')
|
||||
if default_style:
|
||||
style.update(default_style)
|
||||
if element_style_id:
|
||||
style.update(styles.get(element_style_id, {}))
|
||||
for prop in SUPPORTED_STYLING:
|
||||
prop_val = attrib.get(_x('tts:' + prop))
|
||||
if prop_val:
|
||||
style[prop] = prop_val
|
||||
if style:
|
||||
font = ''
|
||||
for k, v in sorted(style.items()):
|
||||
if self._applied_styles and self._applied_styles[-1].get(k) == v:
|
||||
continue
|
||||
if k == 'color':
|
||||
font += ' color="%s"' % v
|
||||
elif k == 'fontSize':
|
||||
font += ' size="%s"' % v
|
||||
elif k == 'fontFamily':
|
||||
font += ' face="%s"' % v
|
||||
elif k == 'fontWeight' and v == 'bold':
|
||||
self._out += '<b>'
|
||||
unclosed_elements.append('b')
|
||||
elif k == 'fontStyle' and v == 'italic':
|
||||
self._out += '<i>'
|
||||
unclosed_elements.append('i')
|
||||
elif k == 'textDecoration' and v == 'underline':
|
||||
self._out += '<u>'
|
||||
unclosed_elements.append('u')
|
||||
if font:
|
||||
self._out += '<font' + font + '>'
|
||||
unclosed_elements.append('font')
|
||||
applied_style = {}
|
||||
if self._applied_styles:
|
||||
applied_style.update(self._applied_styles[-1])
|
||||
applied_style.update(style)
|
||||
self._applied_styles.append(applied_style)
|
||||
self._unclosed_elements.append(unclosed_elements)
|
||||
|
||||
def end(self, tag):
|
||||
pass
|
||||
if tag not in (_x('ttml:br'), 'br'):
|
||||
unclosed_elements = self._unclosed_elements.pop()
|
||||
for element in reversed(unclosed_elements):
|
||||
self._out += '</%s>' % element
|
||||
if unclosed_elements and self._applied_styles:
|
||||
self._applied_styles.pop()
|
||||
|
||||
def data(self, data):
|
||||
self.out += data
|
||||
self._out += data
|
||||
|
||||
def close(self):
|
||||
return self.out.strip()
|
||||
return self._out.strip()
|
||||
|
||||
def parse_node(node):
|
||||
target = TTMLPElementParser()
|
||||
@ -2536,13 +2665,45 @@ def parse_node(node):
|
||||
parser.feed(xml.etree.ElementTree.tostring(node))
|
||||
return parser.close()
|
||||
|
||||
for k, v in LEGACY_NAMESPACES:
|
||||
for ns in v:
|
||||
dfxp_data = dfxp_data.replace(ns, k)
|
||||
|
||||
dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
|
||||
out = []
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p')
|
||||
paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
|
||||
|
||||
if not paras:
|
||||
raise ValueError('Invalid dfxp/TTML subtitle')
|
||||
|
||||
repeat = False
|
||||
while True:
|
||||
for style in dfxp.findall(_x('.//ttml:style')):
|
||||
style_id = style.get('id')
|
||||
parent_style_id = style.get('style')
|
||||
if parent_style_id:
|
||||
if parent_style_id not in styles:
|
||||
repeat = True
|
||||
continue
|
||||
styles[style_id] = styles[parent_style_id].copy()
|
||||
for prop in SUPPORTED_STYLING:
|
||||
prop_val = style.get(_x('tts:' + prop))
|
||||
if prop_val:
|
||||
styles.setdefault(style_id, {})[prop] = prop_val
|
||||
if repeat:
|
||||
repeat = False
|
||||
else:
|
||||
break
|
||||
|
||||
for p in ('body', 'div'):
|
||||
ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
|
||||
if ele is None:
|
||||
continue
|
||||
style = styles.get(ele.get('style'))
|
||||
if not style:
|
||||
continue
|
||||
default_style.update(style)
|
||||
|
||||
for para, index in zip(paras, itertools.count(1)):
|
||||
begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
|
||||
end_time = parse_dfxp_time_expr(para.attrib.get('end'))
|
||||
@ -3862,3 +4023,10 @@ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on w
|
||||
|
||||
return (html, encodeArgument(out))
|
||||
|
||||
|
||||
def random_birthday(year_field, month_field, day_field):
|
||||
return {
|
||||
year_field: str(random.randint(1950, 1995)),
|
||||
month_field: str(random.randint(1, 12)),
|
||||
day_field: str(random.randint(1, 31)),
|
||||
}
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2017.04.15'
|
||||
__version__ = '2017.05.01'
|
||||
|
Loading…
Reference in New Issue
Block a user