diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2e29e525..1cb97d48 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -685,6 +685,12 @@ Consider all sites to be NSFW unless otherwise known. individual Images + + VK + https://vk.com/ + Albums + + VSCO https://vsco.co/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 9cea8510..3e85cef7 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -115,6 +115,7 @@ modules = [ "twitter", "unsplash", "vanillarock", + "vk", "vsco", "wallhaven", "warosu", diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py new file mode 100644 index 00000000..22c840e8 --- /dev/null +++ b/gallery_dl/extractor/vk.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://vk.com/""" + +from .common import GalleryExtractor +from .. import text +import re + + +class VkAlbumExtractor(GalleryExtractor): + """Extractor for vkontakte albums""" + category = "vk" + subcategory = "album" + directory_fmt = ("{category}", "{album_id}") + filename_fmt = "{id}.{extension}" + archive_fmt = "{id}" + root = "https://vk.com/" + pattern = r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:albums|id)(\d+)" + test = ( + ("https://vk.com/id398982326", { + "pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+" + r"/[0-9a-f]+/[\w-]+\.jpg", + "count": ">= 35", + }), + ("https://m.vk.com/albums398982326"), + ("https://www.vk.com/id398982326"), + ) + + def __init__(self, match): + self.album_id = match.group(1) + url = "{}/albums{}".format(self.root, self.album_id) + GalleryExtractor.__init__(self, match, url) + + def metadata(self, page): + return { + "album_id": self.album_id, + } + + def images(self, page): + results = [] + sub = re.compile(r"/imp[fg]/").sub + needle = 'data-id="{}_'.format(self.album_id) + + for photo in text.extract_iter(page, needle, '?'): + photo_id = photo.partition('"')[0] + url = sub("/", photo.rpartition("(")[2]) + results.append((url, {"id": photo_id})) + + return results diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 48aff4f6..174f71da 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -100,6 +100,7 @@ CATEGORY_MAP = { "tumblrgallery" : "TumblrGallery", "vanillarock" : "もえぴりあ", "vidyart" : "/v/idyart", + "vk" : "VK", "vsco" : "VSCO", "webtoons" : "Webtoon", "wikiart" : "WikiArt.org",