[vk] initial support for albums (#474)

2024-11-22 10:42:34 +01:00 · 2021-03-23 18:48:01 +01:00 · 2021-03-23 18:48:01 +01:00 · 62cfee4d28
commit 62cfee4d28
parent 0e601de67b
4 changed files with 63 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -685,6 +685,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>individual Images</td>
    <td></td>
 </tr>
 <tr>
    <td>VK</td>
    <td>https://vk.com/</td>
    <td>Albums</td>
    <td></td>
 </tr>
 <tr>
    <td>VSCO</td>
    <td>https://vsco.co/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -115,6 +115,7 @@ modules = [
    "twitter",
    "unsplash",
    "vanillarock",
    "vk",
    "vsco",
    "wallhaven",
    "warosu",
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@ -0,0 +1,55 @@
 # -*- coding: utf-8 -*-
 # Copyright 2021 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 """Extractors for https://vk.com/"""
 from .common import GalleryExtractor
 from .. import text
 import re
 class VkAlbumExtractor(GalleryExtractor):
    """Extractor for vkontakte albums"""
    category = "vk"
    subcategory = "album"
    directory_fmt = ("{category}", "{album_id}")
    filename_fmt = "{id}.{extension}"
    archive_fmt = "{id}"
    root = "https://vk.com/"
    pattern = r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:albums|id)(\d+)"
    test = (
        ("https://vk.com/id398982326", {
            "pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+"
                       r"/[0-9a-f]+/[\w-]+\.jpg",
            "count": ">= 35",
        }),
        ("https://m.vk.com/albums398982326"),
        ("https://www.vk.com/id398982326"),
    )
    def __init__(self, match):
        self.album_id = match.group(1)
        url = "{}/albums{}".format(self.root, self.album_id)
        GalleryExtractor.__init__(self, match, url)
    def metadata(self, page):
        return {
            "album_id": self.album_id,
        }
    def images(self, page):
        results = []
        sub = re.compile(r"/imp[fg]/").sub
        needle = 'data-id="{}_'.format(self.album_id)
        for photo in text.extract_iter(page, needle, '?'):
            photo_id = photo.partition('"')[0]
            url = sub("/", photo.rpartition("(")[2])
            results.append((url, {"id": photo_id}))
        return results
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -100,6 +100,7 @@ CATEGORY_MAP = {
    "tumblrgallery"  : "TumblrGallery",
    "vanillarock"    : "もえぴりあ",
    "vidyart"        : "/v/idyart",
    "vk"             : "VK",
    "vsco"           : "VSCO",
    "webtoons"       : "Webtoon",
    "wikiart"        : "WikiArt.org",