[scrolller] add initial support (#295, #3418, #5051)

2024-11-25 04:02:32 +01:00 · 2024-10-21 09:42:42 +02:00 · 2024-10-21 09:42:42 +02:00 · 66aa514c25
commit 66aa514c25
parent eacd4479a2
6 changed files with 320 additions and 0 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -443,6 +443,7 @@ Description
    * ``nijie`` (R)
    * ``pillowfort``
    * ``sankaku``
+    * ``scrolller``
    * ``seiga``
    * ``subscribestar``
    * ``tapas``
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -799,6 +799,12 @@ Consider all listed sites to potentially be NSFW.
    <td>Articles, Tag Searches</td>
    <td></td>
 </tr>
+<tr>
+    <td>Scrolller</td>
+    <td>https://scrolller.com/</td>
+    <td>Followed Subreddits, Posts, Subreddits</td>
+    <td>Supported</td>
+</tr>
 <tr>
    <td>Sen Manga</td>
    <td>https://raw.senmanga.com/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -141,6 +141,7 @@ modules = [
    "rule34us",
    "sankaku",
    "sankakucomplex",
+    "scrolller",
    "seiga",
    "senmanga",
    "sexcom",
--- a/gallery_dl/extractor/scrolller.py
+++ b/gallery_dl/extractor/scrolller.py
@ -0,0 +1,227 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://scrolller.com/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+from ..cache import cache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com"
+
+
+class ScrolllerExtractor(Extractor):
+    """Base class for scrolller extractors"""
+    category = "scrolller"
+    root = "https://scrolller.com"
+    directory_fmt = ("{category}", "{subredditTitle}")
+    filename_fmt = "{id}{title:? //}.{extension}"
+    archive_fmt = "{id}"
+    request_interval = (0.5, 1.5)
+
+    def _init(self):
+        self.auth_token = None
+
+    def items(self):
+        self.login()
+
+        for post in self.posts():
+
+            src = max(post["mediaSources"], key=self._sort_key)
+            post.update(src)
+            url = src["url"]
+            text.nameext_from_url(url, post)
+
+            yield Message.Directory, post
+            yield Message.Url, url, post
+
+    def posts(self):
+        return ()
+
+    def login(self):
+        username, password = self._get_auth_info()
+        if username:
+            self.auth_token = self._login_impl(username, password)
+
+    @cache(maxage=28*86400, keyarg=1)
+    def _login_impl(self, username, password):
+        self.log.info("Logging in as %s", username)
+
+        variables = {
+            "username": username,
+            "password": password,
+        }
+
+        try:
+            data = self._request_graphql("LoginQuery", variables)
+        except exception.HttpError as exc:
+            if exc.status == 403:
+                raise exception.AuthenticationError()
+            raise
+
+        return data["login"]["token"]
+
+    def _request_graphql(self, opname, variables):
+        url = "https://api.scrolller.com/api/v2/graphql"
+        headers = {
+            "Content-Type"  : "text/plain;charset=UTF-8",
+            "Origin"        : self.root,
+            "Sec-Fetch-Dest": "empty",
+            "Sec-Fetch-Mode": "cors",
+            "Sec-Fetch-Site": "same-site",
+        }
+        data = {
+            "query"        : QUERIES[opname],
+            "variables"    : variables,
+            "authorization": self.auth_token,
+        }
+        return self.request(
+            url, method="POST", headers=headers, data=util.json_dumps(data),
+        ).json()["data"]
+
+    def _pagination(self, opname, variables):
+        while True:
+            data = self._request_graphql(opname, variables)
+
+            while "items" not in data:
+                data = data.popitem()[1]
+            yield from data["items"]
+
+            if not data["iterator"]:
+                return
+            variables["iterator"] = data["iterator"]
+
+    def _sort_key(self, src):
+        return src["width"], not src["isOptimized"]
+
+
+class ScrolllerSubredditExtractor(ScrolllerExtractor):
+    """Extractor for media from a scrolller subreddit"""
+    subcategory = "subreddit"
+    pattern = BASE_PATTERN + r"(/r/[^/?#]+)(?:/?\?([^#]+))?"
+    example = "https://scrolller.com/r/SUBREDDIT"
+
+    def posts(self):
+        url, query = self.groups
+        filter = None
+
+        if query:
+            params = text.parse_query(query)
+            if "filter" in params:
+                filter = params["filter"].upper().rstrip("S")
+
+        variables = {
+            "url"      : url,
+            "iterator" : None,
+            "filter"   : filter,
+            "hostsDown": None,
+        }
+        return self._pagination("SubredditQuery", variables)
+
+
+class ScrolllerFollowingExtractor(ScrolllerExtractor):
+    """Extractor for followed scrolller subreddits"""
+    subcategory = "following"
+    pattern = BASE_PATTERN + r"/following"
+    example = "https://scrolller.com/following"
+
+    def items(self):
+        self.login()
+
+        if not self.auth_token:
+            raise exception.AuthorizationError("Login required")
+
+        variables = {
+            "iterator" : None,
+            "hostsDown": None,
+        }
+
+        for subreddit in self._pagination("FollowingQuery", variables):
+            url = self.root + subreddit["url"]
+            subreddit["_extractor"] = ScrolllerSubredditExtractor
+            yield Message.Queue, url, subreddit
+
+
+class ScrolllerPostExtractor(ScrolllerExtractor):
+    """Extractor for media from a single scrolller post"""
+    subcategory = "post"
+    pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)"
+    example = "https://scrolller.com/title-slug-a1b2c3d4f5"
+
+    def posts(self):
+        url = "{}/{}".format(self.root, self.groups[0])
+        page = self.request(url).text
+        data = util.json_loads(text.extr(
+            page, '<script>window.scrolllerConfig="', '"</script>')
+            .replace('\\"', '"'))
+        return (data["item"],)
+
+
+QUERIES = {
+
+    "SubredditQuery": """\
+query SubredditQuery(
+    $url: String!
+    $filter: SubredditPostFilter
+    $iterator: String
+) {
+    getSubreddit(
+        url: $url
+    ) {
+        children(
+            limit: 50
+            iterator: $iterator
+            filter: $filter
+            disabledHosts: null
+        ) {
+            iterator items {
+                __typename id url title subredditId subredditTitle
+                subredditUrl redditPath isNsfw albumUrl hasAudio
+                fullLengthSource gfycatSource redgifsSource ownerAvatar
+                username displayName isPaid tags isFavorite
+                mediaSources { url width height isOptimized }
+                blurredMediaSources { url width height isOptimized }
+            }
+        }
+    }
+}
+""",
+
+    "FollowingQuery": """\
+query FollowingQuery(
+    $iterator: String
+) {
+    getFollowing(
+        limit: 10
+        iterator: $iterator
+    ) {
+        iterator items {
+            __typename id url title secondaryTitle description createdAt isNsfw
+            subscribers isComplete itemCount videoCount pictureCount albumCount
+            isPaid username tags isFollowing
+            banner { url width height isOptimized }
+        }
+    }
+}
+""",
+
+    "LoginQuery": """\
+query LoginQuery(
+    $username: String!,
+    $password: String!
+) {
+    login(
+        username: $username,
+        password: $password
+    ) {
+        username token expiresAt isAdmin status isPremium
+    }
+}
+""",
+
+}
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -294,6 +294,9 @@ SUBCATEGORY_MAP = {
    "sankaku": {
        "books": "Book Searches",
    },
+    "scrolller": {
+        "following": "Followed Subreddits",
+    },
    "sexcom": {
        "pins": "User Pins",
    },
@ -417,6 +420,7 @@ AUTH_MAP = {
    "ponybooru"      : "API Key",
    "reddit"         : _OAUTH,
    "sankaku"        : "Supported",
+    "scrolller"      : "Supported",
    "seiga"          : "Supported",
    "smugmug"        : _OAUTH,
    "subscribestar"  : "Supported",
--- a/test/results/scrolller.py
+++ b/test/results/scrolller.py
@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import scrolller
+
+
+__tests__ = (
+{
+    "#url"    : "https://scrolller.com/r/AmateurPhotography",
+    "#class"  : scrolller.ScrolllerSubredditExtractor,
+    "#pattern": r"https://\w+\.scrolller\.com/(\w+/)?[\w-]+-\w+\.(jpg|png)",
+    "#range"  : "1-100",
+    "#count"  : 100,
+
+    "albumUrl"        : None,
+    "displayName"     : None,
+    "fullLengthSource": None,
+    "gfycatSource"    : None,
+    "hasAudio"        : None,
+    "height"          : int,
+    "id"              : int,
+    "isFavorite"      : False,
+    "isNsfw"          : False,
+    "isOptimized"     : bool,
+    "isPaid"          : None,
+    "mediaSources"    : list,
+    "ownerAvatar"     : None,
+    "redditPath"      : r"re:/r/AmateurPhotography/comments/...",
+    "redgifsSource"   : None,
+    "subredditId"     : {0, 413},
+    "subredditTitle"  : "AmateurPhotography",
+    "subredditUrl"    : "/r/AmateurPhotography",
+    "tags"            : None,
+    "title"           : str,
+    "url"             : str,
+    "username"        : None,
+    "width"           : int,
+},
+
+{
+    "#url"  : "https://scrolller.com/cabin-in-northern-finland-7nagf1929p",
+    "#class": scrolller.ScrolllerPostExtractor,
+    "#urls" : "https://yocto.scrolller.com/cabin-in-northern-finland-93vjsuxmcz.jpg",
+
+    "albumUrl"        : None,
+    "displayName"     : None,
+    "extension"       : "jpg",
+    "filename"        : "cabin-in-northern-finland-93vjsuxmcz",
+    "fullLengthSource": None,
+    "gfycatSource"    : None,
+    "hasAudio"        : None,
+    "height"          : 1350,
+    "id"              : 10478722,
+    "isNsfw"          : False,
+    "isOptimized"     : False,
+    "isPaid"          : None,
+    "mediaSources"    : list,
+    "ownerAvatar"     : None,
+    "redditPath"      : "/r/AmateurPhotography/comments/jj048q/cabin_in_northern_finland/",
+    "redgifsSource"   : None,
+    "subredditId"     : 0,
+    "subredditTitle"  : "AmateurPhotography",
+    "subredditUrl"    : "/r/AmateurPhotography",
+    "tags"            : None,
+    "title"           : "Cabin in northern Finland",
+    "url"             : "https://yocto.scrolller.com/cabin-in-northern-finland-93vjsuxmcz.jpg",
+    "username"        : None,
+    "width"           : 1080,
+},
+
+{
+    "#url"    : "https://scrolller.com/following",
+    "#class"  : scrolller.ScrolllerFollowingExtractor,
+    "#pattern": scrolller.ScrolllerSubredditExtractor.pattern,
+    "#auth"   : True,
+},
+
+)