[vk] initial support for albums (#474)

2024-11-25 12:12:34 +01:00 · 2021-03-23 18:48:01 +01:00 · 2021-03-23 18:48:01 +01:00 · 62cfee4d28
commit 62cfee4d28
parent 0e601de67b
4 changed files with 63 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -685,6 +685,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>individual Images</td>
    <td></td>
 </tr>
+<tr>
+    <td>VK</td>
+    <td>https://vk.com/</td>
+    <td>Albums</td>
+    <td></td>
+</tr>
 <tr>
    <td>VSCO</td>
    <td>https://vsco.co/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -115,6 +115,7 @@ modules = [
    "twitter",
    "unsplash",
    "vanillarock",
+    "vk",
    "vsco",
    "wallhaven",
    "warosu",
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://vk.com/"""
+
+from .common import GalleryExtractor
+from .. import text
+import re
+
+
+class VkAlbumExtractor(GalleryExtractor):
+    """Extractor for vkontakte albums"""
+    category = "vk"
+    subcategory = "album"
+    directory_fmt = ("{category}", "{album_id}")
+    filename_fmt = "{id}.{extension}"
+    archive_fmt = "{id}"
+    root = "https://vk.com/"
+    pattern = r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:albums|id)(\d+)"
+    test = (
+        ("https://vk.com/id398982326", {
+            "pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+"
+                       r"/[0-9a-f]+/[\w-]+\.jpg",
+            "count": ">= 35",
+        }),
+        ("https://m.vk.com/albums398982326"),
+        ("https://www.vk.com/id398982326"),
+    )
+
+    def __init__(self, match):
+        self.album_id = match.group(1)
+        url = "{}/albums{}".format(self.root, self.album_id)
+        GalleryExtractor.__init__(self, match, url)
+
+    def metadata(self, page):
+        return {
+            "album_id": self.album_id,
+        }
+
+    def images(self, page):
+        results = []
+        sub = re.compile(r"/imp[fg]/").sub
+        needle = 'data-id="{}_'.format(self.album_id)
+
+        for photo in text.extract_iter(page, needle, '?'):
+            photo_id = photo.partition('"')[0]
+            url = sub("/", photo.rpartition("(")[2])
+            results.append((url, {"id": photo_id}))
+
+        return results
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -100,6 +100,7 @@ CATEGORY_MAP = {
    "tumblrgallery"  : "TumblrGallery",
    "vanillarock"    : "もえぴりあ",
    "vidyart"        : "/v/idyart",
+    "vk"             : "VK",
    "vsco"           : "VSCO",
    "webtoons"       : "Webtoon",
    "wikiart"        : "WikiArt.org",