From 78ef214d2d8010f2fc7ab451c9b4ae137c2569dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 9 Feb 2017 23:42:40 +0700 Subject: [PATCH] [facebook] Improve JS data regex (closes #12042) --- youtube_dl/extractor/facebook.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index b325c8200..4a3c839f4 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -134,6 +134,20 @@ class FacebookIE(InfoExtractor): 'upload_date': '20161030', 'uploader': 'CNN', }, + }, { + # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall + 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/', + 'info_dict': { + 'id': '1417995061575415', + 'ext': 'mp4', + 'title': 'md5:a7b86ca673f51800cd54687b7f4012fe', + 'timestamp': 1486648217, + 'upload_date': '20170209', + 'uploader': 'Yaroslav Korpan', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -262,7 +276,7 @@ def extract_video_data(instances): if not video_data: server_js_data = self._parse_json( self._search_regex( - r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+stream_pagelet', + r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall)', webpage, 'js data', default='{}'), video_id, transform_source=js_to_json, fatal=False) if server_js_data: