From a6fed628dde288cdd680de426fa3c6ea094196cc Mon Sep 17 00:00:00 2001 From: Wiiplay123 <9746793+Wiiplay123@users.noreply.github.com> Date: Sat, 20 Jan 2024 15:07:52 -0600 Subject: [PATCH 1/2] [blogger] Fix lh*.googleusercontent.com forward slash bug, add support for lh*-**.googleusercontent.com Some URLs use "lh(number)-(locale).googleusercontent.com" format, so I added support for those. Also, "lh(number).googleusercontent.com" formats were broken because the regex was looking for a second forward slash. Examples: lh7.googleusercontent.com lh7-us.googleusercontent.com --- gallery_dl/extractor/blogger.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index 58ae59db..b3b405de 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -37,7 +37,8 @@ class BloggerExtractor(BaseExtractor): findall_image = re.compile( r'src="(https?://(?:' r'blogger\.googleusercontent\.com/img|' - r'lh\d+\.googleusercontent\.com/|' + r'lh\d+\.googleusercontent\.com|' + r'lh\d+-\w+\.googleusercontent\.com|' r'\d+\.bp\.blogspot\.com)/[^"]+)').findall findall_video = re.compile( r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall From 6eb62f21401ad112aaf23734201f3908eabb0316 Mon Sep 17 00:00:00 2001 From: Wiiplay123 <9746793+Wiiplay123@users.noreply.github.com> Date: Sat, 20 Jan 2024 15:53:11 -0600 Subject: [PATCH 2/2] Combine lh*(-**).googleusercontent.com URL regex into one line. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mike Fährmann --- gallery_dl/extractor/blogger.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index b3b405de..402408e6 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -37,8 +37,7 @@ class BloggerExtractor(BaseExtractor): findall_image = re.compile( r'src="(https?://(?:' r'blogger\.googleusercontent\.com/img|' - r'lh\d+\.googleusercontent\.com|' - r'lh\d+-\w+\.googleusercontent\.com|' + r'lh\d+(?:-\w+)?\.googleusercontent\.com|' r'\d+\.bp\.blogspot\.com)/[^"]+)').findall findall_video = re.compile( r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall