From 320724f964f09a5e1f08edd246464db4f0d297f9 Mon Sep 17 00:00:00 2001 From: Unknown Date: Mon, 12 Oct 2020 23:46:02 +0200 Subject: [PATCH] [SouthparkDE/MTV] another mgid extraction (mtv_base) feed url updated --- youtube_dlc/extractor/mtv.py | 21 +++++++++++++++------ youtube_dlc/extractor/southpark.py | 10 ++++++++-- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py index e545a9ef3..cc4b0114a 100644 --- a/youtube_dlc/extractor/mtv.py +++ b/youtube_dlc/extractor/mtv.py @@ -45,7 +45,7 @@ def _remove_template_parameter(url): # Remove the templates, like &device={device} return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) - def _get_feed_url(self, uri): + def _get_feed_url(self, uri, url=None): return self._FEED_URL def _get_thumbnail_url(self, uri, itemdoc): @@ -211,9 +211,9 @@ def _get_feed_query(self, uri): data['lang'] = self._LANG return data - def _get_videos_info(self, uri, use_hls=True): + def _get_videos_info(self, uri, use_hls=True, url=None): video_id = self._id_from_uri(uri) - feed_url = self._get_feed_url(uri) + feed_url = self._get_feed_url(uri, url) info_url = update_url_query(feed_url, self._get_feed_query(uri)) return self._get_videos_info_from_url(info_url, video_id, use_hls) @@ -256,7 +256,6 @@ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): return try_get(feed, lambda x: x['result']['data']['id'], compat_str) def _extract_new_triforce_mgid(self, webpage, url='', video_id=None): - # print(compat_urlparse.urlparse(url).netloc) if url == '': return domain = get_domain(url) @@ -281,7 +280,7 @@ def _extract_new_triforce_mgid(self, webpage, url='', video_id=None): item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str) if not item_id: - self.to_screen('Found no id!') + self.to_screen('No id found!') return # 'episode' can be anything. 'content' is used often as well @@ -301,6 +300,16 @@ def _extract_mgid(self, webpage, url, data_zone=None): except RegexNotFoundError: mgid = None + title = self._match_id(url) + + try: + window_data = self._parse_json(self._search_regex( + r'(?s)window.__DATA__ = (?P{.+});', webpage, + 'JSON Window Data', default=None, fatal=False, group='json'), title, fatal=False) + mgid = window_data['children'][4]['children'][0]['props']['media']['video']['config']['uri'] + except (KeyError, IndexError, TypeError): + pass + if mgid is None or ':' not in mgid: mgid = self._search_regex( [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'], @@ -324,7 +333,7 @@ def _real_extract(self, url): title = url_basename(url) webpage = self._download_webpage(url, title) mgid = self._extract_mgid(webpage, url) - videos_info = self._get_videos_info(mgid) + videos_info = self._get_videos_info(mgid, url=url) return videos_info diff --git a/youtube_dlc/extractor/southpark.py b/youtube_dlc/extractor/southpark.py index da75a43a7..24209c36c 100644 --- a/youtube_dlc/extractor/southpark.py +++ b/youtube_dlc/extractor/southpark.py @@ -44,8 +44,8 @@ class SouthParkEsIE(SouthParkIE): class SouthParkDeIE(SouthParkIE): IE_NAME = 'southpark.de' - _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.de/(?:clips|alle-episoden|collections)/(?P.+?)(\?|#|$))' - _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' + _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.de/(?:clips|alle-episoden|collections|folgen)/(?P(?P.+?)/.+?)(?:\?|#|$))' + # _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' _TESTS = [{ 'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured', @@ -78,6 +78,12 @@ class SouthParkDeIE(SouthParkIE): 'only_matching': True, }] + def _get_feed_url(self, uri, url=None): + video_id = self._id_from_uri(uri) + config = self._download_json( + 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge&ref=%s' % (uri, url), video_id) + return self._remove_template_parameter(config['feedWithQueryParams']) + class SouthParkNlIE(SouthParkIE): IE_NAME = 'southpark.nl'