From 2acf2ce5cb2114ae938ac72dba48b3e060fd077f Mon Sep 17 00:00:00 2001 From: Ashish <39122144+Ashish0804@users.noreply.github.com> Date: Mon, 23 Aug 2021 21:30:39 +0530 Subject: [PATCH] [GabTV] Add extractor (#768) Closes #499 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/gab.py | 64 ++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 yt_dlp/extractor/gab.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index cbebb8bf0..60ba6ccb2 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -480,6 +480,7 @@ ) from .funk import FunkIE from .fusion import FusionIE +from .gab import GabTVIE from .gaia import GaiaIE from .gameinformer import GameInformerIE from .gamespot import GameSpotIE diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py new file mode 100644 index 000000000..25b5cb066 --- /dev/null +++ b/yt_dlp/extractor/gab.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + str_to_int, +) + + +class GabTVIE(InfoExtractor): + _VALID_URL = r'(?:https?://)tv.gab.com/channel/[^/]+/view/(?P[a-z0-9-]+)' + _TESTS = [{ + 'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488', + 'info_dict': { + 'id': '61217eacea5665de450d0488', + 'ext': 'mp4', + 'title': 'WHY WAS AMERICA IN AFGHANISTAN - AMERICA FIRST AGAINST AMERICAN OLIGARCHY', + 'description': None, + 'uploader': 'Wurzelroot', + 'uploader_id': '608fb0a85738fd1974984f7d', + 'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488', + } + }] + + def _real_extract(self, url): + id = self._match_id(url).split('-')[-1] + webpage = self._download_webpage(url, id) + channel_id = self._search_regex(r'data-channel-id=\"(?P[^\"]+)', webpage, 'channel_id') + channel_name = self._search_regex(r'data-channel-name=\"(?P[^\"]+)', webpage, 'channel_name') + title = self._search_regex(r'data-episode-title=\"(?P[^\"]+)', webpage, 'title') + view_key = self._search_regex(r'data-view-key=\"(?P[^\"]+)', webpage, 'view_key') + description = clean_html(self._html_search_regex(self._meta_regex('description'), webpage, 'description', group='content')) or None + available_resolutions = re.findall(r'[^\"]+)' % id, webpage) + + formats = [] + for resolution in available_resolutions: + frmt = { + 'url': f'https://tv.gab.com/media/{id}?viewKey={view_key}&r={resolution}', + 'format_id': resolution, + 'vcodec': 'h264', + 'acodec': 'aac', + 'ext': 'mp4' + } + if 'audio-' in resolution: + frmt['abr'] = str_to_int(resolution.replace('audio-', '')) + frmt['height'] = 144 + frmt['quality'] = -10 + else: + frmt['height'] = str_to_int(resolution.replace('p', '')) + formats.append(frmt) + self._sort_formats(formats) + + return { + 'id': id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': channel_name, + 'uploader_id': channel_id, + 'thumbnail': f'https://tv.gab.com/image/{id}', + }