1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 04:02:32 +01:00

[pinterest] support 'story' pins (#6188, #6078, #4229)

This commit is contained in:
Mike Fährmann 2024-10-19 17:42:01 +02:00
parent 5477ed181d
commit 5d984f35aa
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 150 additions and 33 deletions

View File

@ -3400,6 +3400,16 @@ Description
Include pins from board sections. Include pins from board sections.
extractor.pinterest.stories
---------------------------
Type
``bool``
Default
``true``
Description
Extract files from story pins.
extractor.pinterest.videos extractor.pinterest.videos
-------------------------- --------------------------
Type Type
@ -4447,7 +4457,14 @@ Description
`fallback <extractor.*.fallback_>`_ URLs. `fallback <extractor.*.fallback_>`_ URLs.
Known available sizes are Known available sizes are
``4096x4096``, ``orig``, ``large``, ``medium``, and ``small``.
* ``orig``
* ``large``
* ``medium``
* ``small``
* ``4096x4096``
* ``900x900``
* ``360x360``
extractor.twitter.logout extractor.twitter.logout

View File

@ -18,8 +18,8 @@ BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
class PinterestExtractor(Extractor): class PinterestExtractor(Extractor):
"""Base class for pinterest extractors""" """Base class for pinterest extractors"""
category = "pinterest" category = "pinterest"
filename_fmt = "{category}_{id}{media_id:?_//}.{extension}" filename_fmt = "{category}_{id}{media_id|page_id:?_//}.{extension}"
archive_fmt = "{id}{media_id}" archive_fmt = "{id}{media_id|page_id}"
root = "https://www.pinterest.com" root = "https://www.pinterest.com"
def _init(self): def _init(self):
@ -30,6 +30,7 @@ class PinterestExtractor(Extractor):
self.root = text.ensure_http_scheme(domain) self.root = text.ensure_http_scheme(domain)
self.api = PinterestAPI(self) self.api = PinterestAPI(self)
self.stories = self.config("stories", True)
self.videos = self.config("videos", True) self.videos = self.config("videos", True)
def items(self): def items(self):
@ -62,6 +63,8 @@ class PinterestExtractor(Extractor):
if "media_id" not in file: if "media_id" not in file:
pin["media_id"] = "" pin["media_id"] = ""
if "page_id" not in file:
pin["page_id"] = ""
if pin["extension"] == "m3u8": if pin["extension"] == "m3u8":
url = "ytdl:" + url url = "ytdl:" + url
@ -77,37 +80,107 @@ class PinterestExtractor(Extractor):
"""Return all relevant pin objects""" """Return all relevant pin objects"""
def _extract_files(self, pin): def _extract_files(self, pin):
story_pin_data = pin.get("story_pin_data")
if story_pin_data and self.stories:
return self._extract_story(pin, story_pin_data)
carousel_data = pin.get("carousel_data") carousel_data = pin.get("carousel_data")
if carousel_data: if carousel_data:
files = [] return self._extract_carousel(pin, carousel_data)
for slot in carousel_data["carousel_slots"]:
size, image = next(iter(slot["images"].items()))
slot["media_id"] = slot.pop("id")
slot["url"] = image["url"].replace(
"/" + size + "/", "/originals/", 1)
files.append(slot)
return files
videos = pin.get("videos") videos = pin.get("videos")
if videos: if videos and self.videos:
if not self.videos: return (self._extract_video(videos),)
return ()
pass
video_formats = videos["video_list"] try:
for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"): return (pin["images"]["orig"],)
if fmt in video_formats: except Exception:
file = video_formats[fmt] self.log.debug("%s: No files found", pin.get("id"))
break return ()
else:
file = max(video_formats.values(),
key=lambda x: x.get("width", 0))
if "V_720P" in video_formats: def _extract_story(self, pin, story):
file["_fallback"] = (video_formats["V_720P"]["url"],) files = []
return (file,) story_id = story.get("id")
return (pin["images"]["orig"],) for page in story["pages"]:
page_id = page.get("id")
for block in page["blocks"]:
type = block.get("type")
if type == "story_pin_image_block":
if 1 == len(page["blocks"]) == len(story["pages"]):
try:
media = pin["images"]["orig"]
except Exception:
media = self._extract_image(page, block)
else:
media = self._extract_image(page, block)
elif type == "story_pin_video_block":
video = block["video"]
media = self._extract_video(video)
media["media_id"] = video.get("id") or ""
elif type == "story_pin_paragraph_block":
media = {"url": "text:" + block["text"],
"extension": "txt",
"media_id": block.get("id")}
else:
self.log.warning("%s: Unsupported story block '%s'",
pin.get("id"), type)
continue
media["story_id"] = story_id
media["page_id"] = page_id
files.append(media)
return files
def _extract_carousel(self, pin, carousel_data):
files = []
for slot in carousel_data["carousel_slots"]:
size, image = next(iter(slot["images"].items()))
slot["media_id"] = slot.pop("id")
slot["url"] = image["url"].replace(
"/" + size + "/", "/originals/", 1)
files.append(slot)
return files
def _extract_image(self, page, block):
sig = block.get("image_signature") or page["image_signature"]
url_base = "https://i.pinimg.com/originals/{}/{}/{}/{}.".format(
sig[0:2], sig[2:4], sig[4:6], sig)
url_jpg = url_base + "jpg"
url_png = url_base + "png"
url_webp = url_base + "webp"
try:
media = block["image"]["images"]["originals"]
except Exception:
media = {"url": url_jpg, "_fallback": (url_png, url_webp,)}
if media["url"] == url_jpg:
media["_fallback"] = (url_png, url_webp,)
else:
media["_fallback"] = (url_jpg, url_png, url_webp,)
media["media_id"] = sig
return media
def _extract_video(self, video):
video_formats = video["video_list"]
for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"):
if fmt in video_formats:
media = video_formats[fmt]
break
else:
media = max(video_formats.values(),
key=lambda x: x.get("width", 0))
if "V_720P" in video_formats:
media["_fallback"] = (video_formats["V_720P"]["url"],)
return media
class PinterestPinExtractor(PinterestExtractor): class PinterestPinExtractor(PinterestExtractor):

View File

@ -23,9 +23,38 @@ __tests__ = (
{ {
"#url" : "https://www.pinterest.com/pin/422564377542934214/", "#url" : "https://www.pinterest.com/pin/422564377542934214/",
"#comment" : "video pin (#1189)", "#comment" : "video pin (#1189)",
"#category": ("", "pinterest", "pin"),
"#class" : pinterest.PinterestPinExtractor, "#class" : pinterest.PinterestPinExtractor,
"#pattern" : r"https://v\d*\.pinimg\.com/videos/mc/hls/d7/22/ff/d722ff00ab2352981b89974b37909de8.m3u8", "#pattern" : r"https://v\d*\.pinimg\.com/videos/mc/hls/d7/22/ff/d722ff00ab2352981b89974b37909de8.m3u8",
"#exception": exception.NotFoundError,
},
{
"#url" : "https://jp.pinterest.com/pin/858146904010573850/",
"#comment" : "story pin with images",
"#class" : pinterest.PinterestPinExtractor,
"#urls" : (
"https://i.pinimg.com/originals/0f/b0/8c/0fb08c519067dd263a1fcfecea775450.jpg",
"https://i.pinimg.com/originals/2f/27/f3/2f27f3eb781b107ce58bf588c12a12b7.jpg",
"https://i.pinimg.com/originals/55/fd/df/55fddf8d26aa0d96071af52ac6a0c25f.jpg",
),
},
{
"#url" : "https://www.pinterest.com/pin/63824519713049795/",
"#comment" : "story pin with video (#6188)",
"#class" : pinterest.PinterestPinExtractor,
"#urls" : "ytdl:https://v1.pinimg.com/videos/iht/hls/7a/b0/cc/7ab0cc56dcbfc1508b8d650af7b0a593.m3u8",
"extension" : "mp4",
"_ytdl_manifest": "hls",
},
{
"#url" : "https://jp.pinterest.com/pin/851532242064221228/",
"#comment" : "story pin with text",
"#class" : pinterest.PinterestPinExtractor,
"#range" : "2",
"#urls" : "text:Everskies character+outfits i made",
}, },
{ {
@ -37,10 +66,8 @@ __tests__ = (
{ {
"#url" : "https://www.pinterest.com/g1952849/test-/", "#url" : "https://www.pinterest.com/g1952849/test-/",
"#category": ("", "pinterest", "board"),
"#class" : pinterest.PinterestBoardExtractor, "#class" : pinterest.PinterestBoardExtractor,
"#pattern" : r"https://i\.pinimg\.com/originals/", "#urls" : "https://i.pinimg.com/originals/d4/f4/7f/d4f47fa2fce4c4c28475af5d94972904.jpg",
"#count" : 2,
}, },
{ {
@ -103,14 +130,14 @@ __tests__ = (
"#category": ("", "pinterest", "allpins"), "#category": ("", "pinterest", "allpins"),
"#class" : pinterest.PinterestAllpinsExtractor, "#class" : pinterest.PinterestAllpinsExtractor,
"#pattern" : r"https://i\.pinimg\.com/originals/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}", "#pattern" : r"https://i\.pinimg\.com/originals/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}",
"#count" : 7, "#count" : 9,
}, },
{ {
"#url" : "https://www.pinterest.de/digitalmomblog/_created/", "#url" : "https://www.pinterest.de/digitalmomblog/_created/",
"#category": ("", "pinterest", "created"), "#category": ("", "pinterest", "created"),
"#class" : pinterest.PinterestCreatedExtractor, "#class" : pinterest.PinterestCreatedExtractor,
"#pattern" : r"ytdl:|https://i\.pinimg\.com/originals/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.(jpg|png)", "#pattern" : r"ytdl:|https://i\.pinimg\.com/originals/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.(jpg|png|webp)",
"#range" : "1-10", "#range" : "1-10",
"#count" : 10, "#count" : 10,
}, },