1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 04:02:32 +01:00

[scrolller] add initial support (#295, #3418, #5051)

This commit is contained in:
Mike Fährmann 2024-10-21 09:42:42 +02:00
parent eacd4479a2
commit 66aa514c25
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
6 changed files with 320 additions and 0 deletions

View File

@ -443,6 +443,7 @@ Description
* ``nijie`` (R)
* ``pillowfort``
* ``sankaku``
* ``scrolller``
* ``seiga``
* ``subscribestar``
* ``tapas``

View File

@ -799,6 +799,12 @@ Consider all listed sites to potentially be NSFW.
<td>Articles, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Scrolller</td>
<td>https://scrolller.com/</td>
<td>Followed Subreddits, Posts, Subreddits</td>
<td>Supported</td>
</tr>
<tr>
<td>Sen Manga</td>
<td>https://raw.senmanga.com/</td>

View File

@ -141,6 +141,7 @@ modules = [
"rule34us",
"sankaku",
"sankakucomplex",
"scrolller",
"seiga",
"senmanga",
"sexcom",

View File

@ -0,0 +1,227 @@
# -*- coding: utf-8 -*-
# Copyright 2024 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://scrolller.com/"""
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com"
class ScrolllerExtractor(Extractor):
"""Base class for scrolller extractors"""
category = "scrolller"
root = "https://scrolller.com"
directory_fmt = ("{category}", "{subredditTitle}")
filename_fmt = "{id}{title:? //}.{extension}"
archive_fmt = "{id}"
request_interval = (0.5, 1.5)
def _init(self):
self.auth_token = None
def items(self):
self.login()
for post in self.posts():
src = max(post["mediaSources"], key=self._sort_key)
post.update(src)
url = src["url"]
text.nameext_from_url(url, post)
yield Message.Directory, post
yield Message.Url, url, post
def posts(self):
return ()
def login(self):
username, password = self._get_auth_info()
if username:
self.auth_token = self._login_impl(username, password)
@cache(maxage=28*86400, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
variables = {
"username": username,
"password": password,
}
try:
data = self._request_graphql("LoginQuery", variables)
except exception.HttpError as exc:
if exc.status == 403:
raise exception.AuthenticationError()
raise
return data["login"]["token"]
def _request_graphql(self, opname, variables):
url = "https://api.scrolller.com/api/v2/graphql"
headers = {
"Content-Type" : "text/plain;charset=UTF-8",
"Origin" : self.root,
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
}
data = {
"query" : QUERIES[opname],
"variables" : variables,
"authorization": self.auth_token,
}
return self.request(
url, method="POST", headers=headers, data=util.json_dumps(data),
).json()["data"]
def _pagination(self, opname, variables):
while True:
data = self._request_graphql(opname, variables)
while "items" not in data:
data = data.popitem()[1]
yield from data["items"]
if not data["iterator"]:
return
variables["iterator"] = data["iterator"]
def _sort_key(self, src):
return src["width"], not src["isOptimized"]
class ScrolllerSubredditExtractor(ScrolllerExtractor):
"""Extractor for media from a scrolller subreddit"""
subcategory = "subreddit"
pattern = BASE_PATTERN + r"(/r/[^/?#]+)(?:/?\?([^#]+))?"
example = "https://scrolller.com/r/SUBREDDIT"
def posts(self):
url, query = self.groups
filter = None
if query:
params = text.parse_query(query)
if "filter" in params:
filter = params["filter"].upper().rstrip("S")
variables = {
"url" : url,
"iterator" : None,
"filter" : filter,
"hostsDown": None,
}
return self._pagination("SubredditQuery", variables)
class ScrolllerFollowingExtractor(ScrolllerExtractor):
"""Extractor for followed scrolller subreddits"""
subcategory = "following"
pattern = BASE_PATTERN + r"/following"
example = "https://scrolller.com/following"
def items(self):
self.login()
if not self.auth_token:
raise exception.AuthorizationError("Login required")
variables = {
"iterator" : None,
"hostsDown": None,
}
for subreddit in self._pagination("FollowingQuery", variables):
url = self.root + subreddit["url"]
subreddit["_extractor"] = ScrolllerSubredditExtractor
yield Message.Queue, url, subreddit
class ScrolllerPostExtractor(ScrolllerExtractor):
"""Extractor for media from a single scrolller post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)"
example = "https://scrolller.com/title-slug-a1b2c3d4f5"
def posts(self):
url = "{}/{}".format(self.root, self.groups[0])
page = self.request(url).text
data = util.json_loads(text.extr(
page, '<script>window.scrolllerConfig="', '"</script>')
.replace('\\"', '"'))
return (data["item"],)
QUERIES = {
"SubredditQuery": """\
query SubredditQuery(
$url: String!
$filter: SubredditPostFilter
$iterator: String
) {
getSubreddit(
url: $url
) {
children(
limit: 50
iterator: $iterator
filter: $filter
disabledHosts: null
) {
iterator items {
__typename id url title subredditId subredditTitle
subredditUrl redditPath isNsfw albumUrl hasAudio
fullLengthSource gfycatSource redgifsSource ownerAvatar
username displayName isPaid tags isFavorite
mediaSources { url width height isOptimized }
blurredMediaSources { url width height isOptimized }
}
}
}
}
""",
"FollowingQuery": """\
query FollowingQuery(
$iterator: String
) {
getFollowing(
limit: 10
iterator: $iterator
) {
iterator items {
__typename id url title secondaryTitle description createdAt isNsfw
subscribers isComplete itemCount videoCount pictureCount albumCount
isPaid username tags isFollowing
banner { url width height isOptimized }
}
}
}
""",
"LoginQuery": """\
query LoginQuery(
$username: String!,
$password: String!
) {
login(
username: $username,
password: $password
) {
username token expiresAt isAdmin status isPremium
}
}
""",
}

View File

@ -294,6 +294,9 @@ SUBCATEGORY_MAP = {
"sankaku": {
"books": "Book Searches",
},
"scrolller": {
"following": "Followed Subreddits",
},
"sexcom": {
"pins": "User Pins",
},
@ -417,6 +420,7 @@ AUTH_MAP = {
"ponybooru" : "API Key",
"reddit" : _OAUTH,
"sankaku" : "Supported",
"scrolller" : "Supported",
"seiga" : "Supported",
"smugmug" : _OAUTH,
"subscribestar" : "Supported",

81
test/results/scrolller.py Normal file
View File

@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import scrolller
__tests__ = (
{
"#url" : "https://scrolller.com/r/AmateurPhotography",
"#class" : scrolller.ScrolllerSubredditExtractor,
"#pattern": r"https://\w+\.scrolller\.com/(\w+/)?[\w-]+-\w+\.(jpg|png)",
"#range" : "1-100",
"#count" : 100,
"albumUrl" : None,
"displayName" : None,
"fullLengthSource": None,
"gfycatSource" : None,
"hasAudio" : None,
"height" : int,
"id" : int,
"isFavorite" : False,
"isNsfw" : False,
"isOptimized" : bool,
"isPaid" : None,
"mediaSources" : list,
"ownerAvatar" : None,
"redditPath" : r"re:/r/AmateurPhotography/comments/...",
"redgifsSource" : None,
"subredditId" : {0, 413},
"subredditTitle" : "AmateurPhotography",
"subredditUrl" : "/r/AmateurPhotography",
"tags" : None,
"title" : str,
"url" : str,
"username" : None,
"width" : int,
},
{
"#url" : "https://scrolller.com/cabin-in-northern-finland-7nagf1929p",
"#class": scrolller.ScrolllerPostExtractor,
"#urls" : "https://yocto.scrolller.com/cabin-in-northern-finland-93vjsuxmcz.jpg",
"albumUrl" : None,
"displayName" : None,
"extension" : "jpg",
"filename" : "cabin-in-northern-finland-93vjsuxmcz",
"fullLengthSource": None,
"gfycatSource" : None,
"hasAudio" : None,
"height" : 1350,
"id" : 10478722,
"isNsfw" : False,
"isOptimized" : False,
"isPaid" : None,
"mediaSources" : list,
"ownerAvatar" : None,
"redditPath" : "/r/AmateurPhotography/comments/jj048q/cabin_in_northern_finland/",
"redgifsSource" : None,
"subredditId" : 0,
"subredditTitle" : "AmateurPhotography",
"subredditUrl" : "/r/AmateurPhotography",
"tags" : None,
"title" : "Cabin in northern Finland",
"url" : "https://yocto.scrolller.com/cabin-in-northern-finland-93vjsuxmcz.jpg",
"username" : None,
"width" : 1080,
},
{
"#url" : "https://scrolller.com/following",
"#class" : scrolller.ScrolllerFollowingExtractor,
"#pattern": scrolller.ScrolllerSubredditExtractor.pattern,
"#auth" : True,
},
)