[extractor/odnoklassniki] Support boosty.to embeds (#5105)

Closes #4212 Authored by: megapro17, Lesmiscore, pukkandan
2024-11-05 10:42:37 +01:00 · 2022-11-07 19:02:42 +03:00 · 2022-11-07 19:02:42 +03:00 · 8196182a12
commit 8196182a12
parent 9b383177c9
1 changed files with 75 additions and 19 deletions
--- a/yt_dlp/extractor/odnoklassniki.py
+++ b/yt_dlp/extractor/odnoklassniki.py
@ -8,10 +8,12 @@
 from ..utils import (
    ExtractorError,
    float_or_none,
-    unified_strdate,
    int_or_none,
    qualities,
+    smuggle_url,
    unescapeHTML,
+    unified_strdate,
+    unsmuggle_url,
    urlencode_postdata,
 )

@ -22,7 +24,7 @@ class OdnoklassnikiIE(InfoExtractor):
                    (?:(?:www|m|mobile)\.)?
                    (?:odnoklassniki|ok)\.ru/
                    (?:
-                        video(?:embed)?/|
+                        video(?P<embed>embed)?/|
                        web-api/video/moviePlayer/|
                        live/|
                        dk\?.*?st\.mvId=
@ -38,7 +40,7 @@ class OdnoklassnikiIE(InfoExtractor):
            'ext': 'mp4',
            'timestamp': 1545580896,
            'view_count': int,
-            'thumbnail': 'https://coub-anubis-a.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
+            'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
            'title': 'Народная забава',
            'uploader': 'Nevata',
            'upload_date': '20181223',
@ -65,11 +67,12 @@ class OdnoklassnikiIE(InfoExtractor):
    }, {
        # metadata in JSON
        'url': 'http://ok.ru/video/20079905452',
-        'md5': '0b62089b479e06681abaaca9d204f152',
+        'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
        'info_dict': {
            'id': '20079905452',
            'ext': 'mp4',
            'title': 'Культура меняет нас (прекрасный ролик!))',
+            'thumbnail': str,
            'duration': 100,
            'upload_date': '20141207',
            'uploader_id': '330537914540',
@ -80,11 +83,12 @@ class OdnoklassnikiIE(InfoExtractor):
    }, {
        # metadataUrl
        'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
-        'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
+        'md5': 'f8c951122516af72e6e6ffdd3c41103b',
        'info_dict': {
            'id': '63567059965189-0',
            'ext': 'mp4',
            'title': 'Девушка без комплексов ...',
+            'thumbnail': str,
            'duration': 191,
            'upload_date': '20150518',
            'uploader_id': '534380003155',
@ -95,18 +99,32 @@ class OdnoklassnikiIE(InfoExtractor):
        },
    }, {
        # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
-        'url': 'http://ok.ru/video/64211978996595-1',
-        'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
+        'url': 'https://ok.ru/video/3952212382174',
+        'md5': '91749d0bd20763a28d083fa335bbd37a',
        'info_dict': {
-            'id': 'V_VztHT5BzY',
+            'id': '5axVgHHDBvU',
            'ext': 'mp4',
-            'title': 'Космическая среда от 26 августа 2015',
-            'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
-            'duration': 440,
-            'upload_date': '20150826',
-            'uploader_id': 'tvroscosmos',
-            'uploader': 'Телестудия Роскосмоса',
+            'title': 'Youtube-dl 101: What is it and HOW to use it! Full Download Walkthrough and Guide',
+            'description': 'md5:b57209eeb9d5c2f20c984dfb58862097',
+            'uploader': 'Lod Mer',
+            'uploader_id': '575186401502',
+            'duration': 1529,
            'age_limit': 0,
+            'upload_date': '20210405',
+            'comment_count': int,
+            'live_status': 'not_live',
+            'view_count': int,
+            'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
+            'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
+            'channel_follower_count': int,
+            'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
+            'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
+            'like_count': int,
+            'availability': 'public',
+            'channel_url': 'https://www.youtube.com/channel/UCVGtvURtEURYHtJFUegdSug',
+            'categories': ['Education'],
+            'playable_in_embed': True,
+            'channel': 'BornToReact',
        },
    }, {
        # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
@ -126,10 +144,12 @@ class OdnoklassnikiIE(InfoExtractor):
        },
        'skip': 'Video has not been found',
    }, {
+        # TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
        'note': 'Only available in mobile webpage',
        'url': 'https://m.ok.ru/video/2361249957145',
        'info_dict': {
            'id': '2361249957145',
+            'ext': 'mp4',
            'title': 'Быковское крещение',
            'duration': 3038.181,
        },
@ -158,8 +178,37 @@ class OdnoklassnikiIE(InfoExtractor):
        # Paid video
        'url': 'https://ok.ru/video/954886983203',
        'only_matching': True,
+    }, {
+        'url': 'https://ok.ru/videoembed/2932705602075',
+        'info_dict': {
+            'id': '2932705602075',
+            'ext': 'mp4',
+            'thumbnail': 'https://i.mycdn.me/videoPreview?id=1369902483995&type=37&idx=2&tkn=fqlnoQD_xwq5ovIlKfgNyU08qmM&fn=external_8',
+            'title': 'Boosty для тебя!',
+            'uploader_id': '597811038747',
+            'like_count': 0,
+            'duration': 35,
+        },
    }]

+    _WEBPAGE_TESTS = [{
+        'url': 'https://boosty.to/ikakprosto/posts/56cedaca-b56a-4dfd-b3ed-98c79cfa0167',
+        'info_dict': {
+            'id': '3950343629563',
+            'ext': 'mp4',
+            'thumbnail': 'https://i.mycdn.me/videoPreview?id=2776238394107&type=37&idx=11&tkn=F3ejkUFcpuI4DnMRxrDGcH5YcmM&fn=external_8',
+            'title': 'Заяц Бусти.mp4',
+            'uploader_id': '571368965883',
+            'like_count': 0,
+            'duration': 10444,
+        },
+    }]
+
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        for x in super()._extract_embed_urls(url, webpage):
+            yield smuggle_url(x, {'referrer': url})
+
    def _real_extract(self, url):
        try:
            return self._extract_desktop(url)
@ -174,16 +223,23 @@ def _extract_desktop(self, url):
        start_time = int_or_none(compat_parse_qs(
            compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])

-        video_id = self._match_id(url)
+        url, smuggled = unsmuggle_url(url, {})
+        video_id, is_embed = self._match_valid_url(url).group('id', 'embed')
+        mode = 'videoembed' if is_embed else 'video'

        webpage = self._download_webpage(
-            'http://ok.ru/video/%s' % video_id, video_id,
-            note='Downloading desktop webpage')
+            f'https://ok.ru/{mode}/{video_id}', video_id,
+            note='Downloading desktop webpage',
+            headers={'Referer': smuggled['referrer']} if smuggled.get('referrer') else {})

        error = self._search_regex(
            r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
            webpage, 'error', default=None)
-        if error:
+        # Direct link from boosty
+        if (error == 'The author of this video has not been found or is blocked'
+                and not smuggled.get('referrer') and mode == 'videoembed'):
+            return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
+        elif error:
            raise ExtractorError(error, expected=True)

        player = self._parse_json(
@ -270,7 +326,7 @@ def _extract_desktop(self, url):
        if provider == 'LIVE_TV_APP':
            info['title'] = title

-        quality = qualities(('4', '0', '1', '2', '3', '5'))
+        quality = qualities(('4', '0', '1', '2', '3', '5', '6', '7'))

        formats = [{
            'url': f['url'],