mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-02 17:22:31 +01:00
[bbc] Extract article JSON and actualize tests
This commit is contained in:
parent
6a74719060
commit
ae8bdfd1a1
@ -11,6 +11,7 @@
|
|||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
remove_end,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
@ -533,7 +534,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'url': 'http://www.bbc.com/news/world-europe-32041533',
|
'url': 'http://www.bbc.com/news/world-europe-32041533',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p02mprgb',
|
'id': 'p02mprgb',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||||
'duration': 47,
|
'duration': 47,
|
||||||
'timestamp': 1427219242,
|
'timestamp': 1427219242,
|
||||||
@ -552,7 +553,6 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'id': '150615_telabyad_kentin_cogu',
|
'id': '150615_telabyad_kentin_cogu',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||||
'duration': 47,
|
|
||||||
'timestamp': 1434397334,
|
'timestamp': 1434397334,
|
||||||
'upload_date': '20150615',
|
'upload_date': '20150615',
|
||||||
},
|
},
|
||||||
@ -566,7 +566,6 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||||
'duration': 87,
|
|
||||||
'timestamp': 1434713142,
|
'timestamp': 1434713142,
|
||||||
'upload_date': '20150619',
|
'upload_date': '20150619',
|
||||||
},
|
},
|
||||||
@ -578,7 +577,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p02w6qjc',
|
'id': 'p02w6qjc',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
|
'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
|
||||||
'duration': 56,
|
'duration': 56,
|
||||||
},
|
},
|
||||||
@ -605,11 +604,11 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p018zqqg',
|
'id': 'p018zqqg',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Hyundai Santa Fe Sport: Rock star',
|
'title': 'Hyundai Santa Fe Sport: Rock star',
|
||||||
'description': 'md5:b042a26142c4154a6e472933cf20793d',
|
'description': 'md5:b042a26142c4154a6e472933cf20793d',
|
||||||
'timestamp': 1368473503,
|
'timestamp': 1415867444,
|
||||||
'upload_date': '20130513',
|
'upload_date': '20141113',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
@ -620,9 +619,8 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p02xycnp',
|
'id': 'p02xycnp',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||||
'description': 'md5:398fca0e2e701c609d726e034fa1fc89',
|
|
||||||
'duration': 140,
|
'duration': 140,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -697,11 +695,26 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
timestamp = parse_iso8601(self._search_regex(
|
timestamp = None
|
||||||
[r'"datePublished":\s*"([^"]+)',
|
playlist_title = None
|
||||||
r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
|
playlist_description = None
|
||||||
r'itemprop="datePublished"[^>]+datetime="([^"]+)"'],
|
|
||||||
webpage, 'date', default=None))
|
ld = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)<script type="application/ld\+json">(.+?)</script>',
|
||||||
|
webpage, 'ld json', default='{}'),
|
||||||
|
playlist_id, fatal=False)
|
||||||
|
if ld:
|
||||||
|
timestamp = parse_iso8601(ld.get('datePublished'))
|
||||||
|
playlist_title = ld.get('headline')
|
||||||
|
playlist_description = ld.get('articleBody')
|
||||||
|
|
||||||
|
if not timestamp:
|
||||||
|
timestamp = parse_iso8601(self._search_regex(
|
||||||
|
[r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
|
||||||
|
r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
|
||||||
|
r'"datePublished":\s*"([^"]+)',],
|
||||||
|
webpage, 'date', default=None))
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
@ -754,8 +767,8 @@ def _real_extract(self, url):
|
|||||||
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
|
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
|
||||||
|
|
||||||
if entries:
|
if entries:
|
||||||
playlist_title = self._og_search_title(webpage)
|
playlist_title = playlist_title or remove_end(self._og_search_title(webpage), ' - BBC News')
|
||||||
playlist_description = self._og_search_description(webpage, default=None)
|
playlist_description = playlist_description or self._og_search_description(webpage, default=None)
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||||
|
Loading…
Reference in New Issue
Block a user