Remove _sort_formats from _extract_*_formats methods

Now _sort_formats should be called explicitly. _sort_formats has been added to all the necessary places in code. Closes #8051
2024-11-05 02:32:44 +01:00 · 2016-03-27 07:03:08 +06:00 · 2016-03-27 07:03:08 +06:00 · 19dbaeece3
commit 19dbaeece3
parent 395fd4b08a
29 changed files with 56 additions and 21 deletions
--- a/youtube_dl/extractor/abc7news.py
+++ b/youtube_dl/extractor/abc7news.py
@ -44,6 +44,7 @@ def _real_extract(self, url):
            'contentURL', webpage, 'm3u8 url', fatal=True)

        formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
+        self._sort_formats(formats)

        title = self._og_search_title(webpage).strip()
        description = self._og_search_description(webpage).strip()
--- a/youtube_dl/extractor/azubu.py
+++ b/youtube_dl/extractor/azubu.py
@ -120,6 +120,7 @@ def _real_extract(self, url):
        bc_info = self._download_json(req, user)
        m3u8_url = next(source['src'] for source in bc_info['sources'] if source['container'] == 'M2TS')
        formats = self._extract_m3u8_formats(m3u8_url, user, ext='mp4')
+        self._sort_formats(formats)

        return {
            'id': info['id'],
--- a/youtube_dl/extractor/bet.py
+++ b/youtube_dl/extractor/bet.py
@ -94,6 +94,7 @@ def _real_extract(self, url):
            xpath_with_ns('./media:thumbnail', NS_MAP)).get('url')

        formats = self._extract_smil_formats(smil_url, display_id)
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@ -122,6 +122,7 @@ def _real_extract(self, url):
            for entry in f4m_formats:
                # URLs without the extra param induce an 404 error
                entry.update({'extra_param_to_segment_url': hdcore_sign})
+        self._sort_formats(f4m_formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/chaturbate.py
+++ b/youtube_dl/extractor/chaturbate.py
@ -48,6 +48,7 @@ def _real_extract(self, url):
            raise ExtractorError('Unable to find stream URL')

        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -1021,8 +1021,6 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None,
                'height': int_or_none(media_el.attrib.get('height')),
                'preference': preference,
            })
-        self._sort_formats(formats)
-
        return formats

    def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
@ -1143,7 +1141,6 @@ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
                    last_media = None
                formats.append(f)
                last_info = {}
-        self._sort_formats(formats)
        return formats

    @staticmethod
@ -1317,8 +1314,6 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
                })
                continue

-        self._sort_formats(formats)
-
        return formats

    def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
@ -1536,7 +1531,6 @@ def extract_multisegment_info(element, ms_parent_info):
                            existing_format.update(f)
                    else:
                        self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
-        self._sort_formats(formats)
        return formats

    def _live_title(self, name):
--- a/youtube_dl/extractor/cwtv.py
+++ b/youtube_dl/extractor/cwtv.py
@ -57,6 +57,7 @@ def _real_extract(self, url):

        formats = self._extract_m3u8_formats(
            video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
+        self._sort_formats(formats)

        thumbnails = [{
            'url': image['uri'],
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@ -38,6 +38,7 @@ def _real_extract(self, url):
        token_el = f4m_info.find('token')
        manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
        formats = self._extract_f4m_formats(manifest_url, display_id)
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@ -63,18 +63,23 @@ def _real_extract(self, url):

        video_title = info.get('playlist_title') or info.get('video_title')

-        entries = [{
-            'id': compat_str(video_info['id']),
-            'formats': self._extract_m3u8_formats(
+        entries = []
+
+        for idx, video_info in enumerate(info['playlist']):
+            formats = self._extract_m3u8_formats(
                video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
-                note='Download m3u8 information for video %d' % (idx + 1)),
-            'title': video_info['title'],
-            'description': video_info.get('description'),
-            'duration': parse_duration(video_info.get('video_length')),
-            'webpage_url': video_info.get('href') or video_info.get('url'),
-            'thumbnail': video_info.get('thumbnailURL'),
-            'alt_title': video_info.get('secondary_title'),
-            'timestamp': parse_iso8601(video_info.get('publishedDate')),
-        } for idx, video_info in enumerate(info['playlist'])]
+                note='Download m3u8 information for video %d' % (idx + 1))
+            self._sort_formats(formats)
+            entries.append({
+                'id': compat_str(video_info['id']),
+                'formats': formats,
+                'title': video_info['title'],
+                'description': video_info.get('description'),
+                'duration': parse_duration(video_info.get('video_length')),
+                'webpage_url': video_info.get('href') or video_info.get('url'),
+                'thumbnail': video_info.get('thumbnailURL'),
+                'alt_title': video_info.get('secondary_title'),
+                'timestamp': parse_iso8601(video_info.get('publishedDate')),
+            })

        return self.playlist_result(entries, display_id, video_title)
--- a/youtube_dl/extractor/dplay.py
+++ b/youtube_dl/extractor/dplay.py
@ -118,6 +118,8 @@ def extract_formats(protocol, manifest_url):
                if info.get(protocol):
                    extract_formats(protocol, info[protocol])

+        self._sort_formats(formats)
+
        return {
            'id': video_id,
            'display_id': display_id,
--- a/youtube_dl/extractor/dw.py
+++ b/youtube_dl/extractor/dw.py
@ -39,13 +39,13 @@ def _real_extract(self, url):
        hidden_inputs = self._hidden_inputs(webpage)
        title = hidden_inputs['media_title']

-        formats = []
        if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
            formats = self._extract_smil_formats(
                'http://www.dw.com/smil/v-%s' % media_id, media_id,
                transform_source=lambda s: s.replace(
                    'rtmp://tv-od.dw.de/flash/',
                    'http://tv-download.dw.de/dwtv_video/flv/'))
+            self._sort_formats(formats)
        else:
            formats = [{'url': hidden_inputs['file_name']}]

--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@ -1310,6 +1310,7 @@ def _real_extract(self, url):
                    'vcodec': 'none' if m.group('type') == 'audio' else None
                }]
                info_dict['direct'] = True
+            self._sort_formats(formats)
            info_dict['formats'] = formats
            return info_dict

@ -1336,6 +1337,7 @@ def _real_extract(self, url):
        # Is it an M3U playlist?
        if first_bytes.startswith(b'#EXTM3U'):
            info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
+            self._sort_formats(info_dict['formats'])
            return info_dict

        # Maybe it's a direct link to a video?
@ -1360,15 +1362,19 @@ def _real_extract(self, url):
            if doc.tag == 'rss':
                return self._extract_rss(url, video_id, doc)
            elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
-                return self._parse_smil(doc, url, video_id)
+                smil = self._parse_smil(doc, url, video_id)
+                self._sort_formats(smil['formats'])
+                return smil
            elif doc.tag == '{http://xspf.org/ns/0/}playlist':
                return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
            elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
                info_dict['formats'] = self._parse_mpd_formats(
                    doc, video_id, mpd_base_url=url.rpartition('/')[0])
+                self._sort_formats(info_dict['formats'])
                return info_dict
            elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
                info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
+                self._sort_formats(info_dict['formats'])
                return info_dict
        except compat_xml_parse_error:
            pass
@ -2053,6 +2059,9 @@ def filter_video(urls):
            else:
                entry_info_dict['url'] = video_url

+            if entry_info_dict.get('formats'):
+                self._sort_formats(entry_info_dict['formats'])
+
            entries.append(entry_info_dict)

        if len(entries) == 1:
--- a/youtube_dl/extractor/laola1tv.py
+++ b/youtube_dl/extractor/laola1tv.py
@ -130,6 +130,7 @@ def _real_extract(self, url):
        formats = self._extract_f4m_formats(
            '%s?hdnea=%s&hdcore=3.2.0' % (token_attrib['url'], token_auth),
            video_id, f4m_id='hds')
+        self._sort_formats(formats)

        categories_str = _v('meta_sports')
        categories = categories_str.split(',') if categories_str else []
--- a/youtube_dl/extractor/lrt.py
+++ b/youtube_dl/extractor/lrt.py
@ -37,6 +37,7 @@ def _real_extract(self, url):
            r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*location\.hash\.substring\(1\)',
            webpage, 'm3u8 url', group='url')
        formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+        self._sort_formats(formats)

        thumbnail = self._og_search_thumbnail(webpage)
        description = self._og_search_description(webpage)
--- a/youtube_dl/extractor/matchtv.py
+++ b/youtube_dl/extractor/matchtv.py
@ -47,6 +47,7 @@ def _real_extract(self, url):
        video_url = self._download_json(request, video_id)['data']['videoUrl']
        f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
        formats = self._extract_f4m_formats(f4m_url, video_id)
+        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': self._live_title('Матч ТВ - Прямой эфир'),
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@ -67,6 +67,7 @@ def _real_extract(self, url):
            formats.extend(self._extract_f4m_formats(
                file_ + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
                display_id, f4m_id=loc))
+        self._sort_formats(formats)

        title = self._search_regex(
            r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title')
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@ -63,6 +63,7 @@ def _real_extract(self, url):
        if determine_ext(media_url) == 'f4m':
            formats = self._extract_f4m_formats(
                media_url + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id, f4m_id='hds')
+            self._sort_formats(formats)
        else:
            formats = [{
                'url': media_url,
--- a/youtube_dl/extractor/restudy.py
+++ b/youtube_dl/extractor/restudy.py
@ -31,6 +31,7 @@ def _real_extract(self, url):
        formats = self._extract_smil_formats(
            'https://www.restudy.dk/awsmedia/SmilDirectory/video_%s.xml' % video_id,
            video_id)
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/rte.py
+++ b/youtube_dl/extractor/rte.py
@ -49,6 +49,7 @@ def _real_extract(self, url):
        # f4m_url = server + relative_url
        f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
        f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
+        self._sort_formats(f4m_formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dl/extractor/rtve.py
@ -209,6 +209,7 @@ def _real_extract(self, url):
        png = self._download_webpage(png_url, video_id, 'Downloading url information')
        m3u8_url = _decrypt_url(png)
        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/rtvnh.py
+++ b/youtube_dl/extractor/rtvnh.py
@ -38,6 +38,7 @@ def _real_extract(self, url):
                    item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
            elif item.get('type') == '':
                formats.append({'url': item['file']})
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/shahid.py
+++ b/youtube_dl/extractor/shahid.py
@ -77,6 +77,7 @@ def _real_extract(self, url):
            raise ExtractorError('This video is DRM protected.', expected=True)

        formats = self._extract_m3u8_formats(player['url'], video_id, 'mp4')
+        self._sort_formats(formats)

        video = self._download_json(
            '%s/%s/%s?%s' % (
--- a/youtube_dl/extractor/sportbox.py
+++ b/youtube_dl/extractor/sportbox.py
@ -99,6 +99,7 @@ def _real_extract(self, url):
            webpage, 'hls file')

        formats = self._extract_m3u8_formats(hls, video_id, 'mp4')
+        self._sort_formats(formats)

        title = self._search_regex(
            r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title')
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@ -82,6 +82,7 @@ def _real_extract(self, url):
        )
        formats = self._extract_m3u8_formats(
            token_info['tokenizedUrl'], episode, ext='mp4', entry_protocol='m3u8_native')
+        self._sort_formats(formats)

        return {
            'id': embed_data['videoId'],
--- a/youtube_dl/extractor/tubitv.py
+++ b/youtube_dl/extractor/tubitv.py
@ -69,6 +69,7 @@ def _real_extract(self, url):
        apu = self._search_regex(r"apu='([^']+)'", webpage, 'apu')
        m3u8_url = codecs.decode(apu, 'rot_13')[::-1]
        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
+        self._sort_formats(formats)

        return {
            'id': video_id,
--- a/youtube_dl/extractor/videomore.py
+++ b/youtube_dl/extractor/videomore.py
@ -111,6 +111,7 @@ def _real_extract(self, url):

        video_url = xpath_text(video, './/video_url', 'video url', fatal=True)
        formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds')
+        self._sort_formats(formats)

        data = self._download_json(
            'http://videomore.ru/video/tracks/%s.json' % video_id,
--- a/youtube_dl/extractor/vier.py
+++ b/youtube_dl/extractor/vier.py
@ -50,6 +50,7 @@ def _real_extract(self, url):

        playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename)
        formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4')
+        self._sort_formats(formats)

        title = self._og_search_title(webpage, default=display_id)
        description = self._og_search_description(webpage, default=None)
--- a/youtube_dl/extractor/viidea.py
+++ b/youtube_dl/extractor/viidea.py
@ -151,6 +151,7 @@ def extract_part(part_id):
                smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
                smil = self._download_smil(smil_url, lecture_id)
                info = self._parse_smil(smil, smil_url, lecture_id)
+                self._sort_formats(info['formats'])
                info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
                info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
                if multipart:
--- a/youtube_dl/extractor/ynet.py
+++ b/youtube_dl/extractor/ynet.py
@ -41,10 +41,12 @@ def _real_extract(self, url):
        m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
        if m:
            title = m.group('title')
+        formats = self._extract_f4m_formats(f4m_url, video_id)
+        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
-            'formats': self._extract_f4m_formats(f4m_url, video_id),
+            'formats': formats,
            'thumbnail': self._og_search_thumbnail(webpage),
        }