1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

[newgrounds] support page numbers in URLs (#6320)

https://USER.newgrounds.com/art/?page=5
https://USER.newgrounds.com/art/page/5
This commit is contained in:
Mike Fährmann 2024-10-14 14:12:20 +02:00
parent d7e34e1dc3
commit 4358799833
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 66 additions and 44 deletions

View File

@ -14,6 +14,9 @@ from ..cache import cache
import itertools
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?newgrounds\.com"
USER_PATTERN = r"(?:https?://)?([\w-]+)\.newgrounds\.com"
class NewgroundsExtractor(Extractor):
"""Base class for newgrounds extractors"""
@ -93,7 +96,7 @@ class NewgroundsExtractor(Extractor):
def posts(self):
"""Return URLs of all relevant post pages"""
return self._pagination(self._path)
return self._pagination(self._path, self.groups[1])
def metadata(self):
"""Return general metadata"""
@ -334,10 +337,10 @@ class NewgroundsExtractor(Extractor):
for fmt in formats:
yield fmt[1][0]["src"]
def _pagination(self, kind):
def _pagination(self, kind, pnum=1):
url = "{}/{}".format(self.user_root, kind)
params = {
"page": 1,
"page": text.parse_int(pnum, 1),
"isAjaxRequest": "1",
}
headers = {
@ -400,8 +403,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
class NewgroundsMediaExtractor(NewgroundsExtractor):
"""Extractor for a media file from newgrounds.com"""
subcategory = "media"
pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
r"(/(?:portal/view|audio/listen)/\d+)")
pattern = BASE_PATTERN + r"(/(?:portal/view|audio/listen)/\d+)"
example = "https://www.newgrounds.com/portal/view/12345"
def __init__(self, match):
@ -416,35 +418,35 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
class NewgroundsArtExtractor(NewgroundsExtractor):
"""Extractor for all images of a newgrounds user"""
subcategory = _path = "art"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$"
pattern = USER_PATTERN + r"/art(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/art"
class NewgroundsAudioExtractor(NewgroundsExtractor):
"""Extractor for all audio submissions of a newgrounds user"""
subcategory = _path = "audio"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$"
pattern = USER_PATTERN + r"/audio(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/audio"
class NewgroundsMoviesExtractor(NewgroundsExtractor):
"""Extractor for all movies of a newgrounds user"""
subcategory = _path = "movies"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$"
pattern = USER_PATTERN + r"/movies(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/movies"
class NewgroundsGamesExtractor(NewgroundsExtractor):
"""Extractor for a newgrounds user's games"""
subcategory = _path = "games"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/games/?$"
pattern = USER_PATTERN + r"/games(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/games"
class NewgroundsUserExtractor(NewgroundsExtractor):
"""Extractor for a newgrounds user profile"""
subcategory = "user"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/?$"
pattern = USER_PATTERN + r"/?$"
example = "https://USER.newgrounds.com"
def initialize(self):
@ -464,25 +466,22 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
"""Extractor for posts favorited by a newgrounds user"""
subcategory = "favorite"
directory_fmt = ("{category}", "{user}", "Favorites")
pattern = (r"(?:https?://)?([\w-]+)\.newgrounds\.com"
r"/favorites(?!/following)(?:/(art|audio|movies))?/?")
pattern = (USER_PATTERN + r"/favorites(?!/following)(?:/(art|audio|movies)"
r"(?:(?:/page/|/?\?page=)(\d+))?)?")
example = "https://USER.newgrounds.com/favorites"
def __init__(self, match):
NewgroundsExtractor.__init__(self, match)
self.kind = match.group(2)
def posts(self):
if self.kind:
return self._pagination(self.kind)
_, kind, pnum = self.groups
if kind:
return self._pagination_favorites(kind, pnum)
return itertools.chain.from_iterable(
self._pagination(k) for k in ("art", "audio", "movies")
self._pagination_favorites(k) for k in ("art", "audio", "movies")
)
def _pagination(self, kind):
def _pagination_favorites(self, kind, pnum=1):
url = "{}/favorites/{}".format(self.user_root, kind)
params = {
"page": 1,
"page": text.parse_int(pnum, 1),
"isAjaxRequest": "1",
}
headers = {
@ -514,12 +513,13 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
"""Extractor for a newgrounds user's favorited users"""
subcategory = "following"
pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/favorites/(following)"
pattern = USER_PATTERN + r"/favorites/(following)"
example = "https://USER.newgrounds.com/favorites/following"
def items(self):
_, kind, pnum = self.groups
data = {"_extractor": NewgroundsUserExtractor}
for url in self._pagination(self.kind):
for url in self._pagination_favorites(kind, pnum):
yield Message.Queue, url, data
@staticmethod
@ -534,13 +534,12 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
"""Extractor for newgrounds.com search reesults"""
subcategory = "search"
directory_fmt = ("{category}", "search", "{search_tags}")
pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
r"/search/conduct/([^/?#]+)/?\?([^#]+)")
pattern = BASE_PATTERN + r"/search/conduct/([^/?#]+)/?\?([^#]+)"
example = "https://www.newgrounds.com/search/conduct/art?terms=QUERY"
def __init__(self, match):
NewgroundsExtractor.__init__(self, match)
self._path, query = match.groups()
self._path, query = self.groups
self.query = text.parse_query(query)
def posts(self):
@ -550,19 +549,20 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
for s in suitabilities.split(",")}
self.request(self.root + "/suitabilities",
method="POST", data=data)
return self._pagination("/search/conduct/" + self._path, self.query)
return self._pagination_search(
"/search/conduct/" + self._path, self.query)
def metadata(self):
return {"search_tags": self.query.get("terms", "")}
def _pagination(self, path, params):
def _pagination_search(self, path, params):
url = self.root + path
params["inner"] = "1"
params["page"] = text.parse_int(params.get("page"), 1)
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
}
params["inner"] = "1"
params["page"] = 1
while True:
data = self.request(url, params=params, headers=headers).json()

View File

@ -267,48 +267,67 @@ From The ZJ "Late """,
{
"#url" : "https://tomfulp.newgrounds.com/art",
"#category": ("", "newgrounds", "art"),
"#class" : newgrounds.NewgroundsArtExtractor,
"#pattern" : newgrounds.NewgroundsImageExtractor.pattern,
"#count" : ">= 3",
},
{
"#url" : "https://tomfulp.newgrounds.com/art/page/3",
"#class" : newgrounds.NewgroundsArtExtractor,
},
{
"#url" : "https://tomfulp.newgrounds.com/art?page=3",
"#class" : newgrounds.NewgroundsArtExtractor,
},
{
"#url" : "https://tomfulp.newgrounds.com/audio",
"#category": ("", "newgrounds", "audio"),
"#class" : newgrounds.NewgroundsAudioExtractor,
"#pattern" : r"https://(audio\.ngfiles\.com/\d+/\d+_.+\.mp3|uploads\.ungrounded\.net/.+\.png)",
"#count" : ">= 10",
},
{
"#url" : "https://tomfulp.newgrounds.com/audio?page=3",
"#class" : newgrounds.NewgroundsAudioExtractor,
},
{
"#url" : "https://tomfulp.newgrounds.com/movies",
"#category": ("", "newgrounds", "movies"),
"#class" : newgrounds.NewgroundsMoviesExtractor,
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+",
"#range" : "1-10",
"#count" : 10,
},
{
"#url" : "https://tomfulp.newgrounds.com/movies/?page=3",
"#class" : newgrounds.NewgroundsMoviesExtractor,
},
{
"#url" : "https://tomfulp.newgrounds.com/games",
"#category": ("", "newgrounds", "games"),
"#class" : newgrounds.NewgroundsGamesExtractor,
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/(\d+/\d+_.+|tmp/.+)",
"#range" : "1-10",
"#count" : 10,
},
{
"#url" : "https://tomfulp.newgrounds.com/games?page=3",
"#class" : newgrounds.NewgroundsGamesExtractor,
},
{
"#url" : "https://tomfulp.newgrounds.com",
"#category": ("", "newgrounds", "user"),
"#class" : newgrounds.NewgroundsUserExtractor,
"#urls" : "https://tomfulp.newgrounds.com/art",
},
{
"#url" : "https://tomfulp.newgrounds.com",
"#category": ("", "newgrounds", "user"),
"#class" : newgrounds.NewgroundsUserExtractor,
"#options" : {"include": "all"},
"#urls" : (
@ -321,42 +340,47 @@ From The ZJ "Late """,
{
"#url" : "https://tomfulp.newgrounds.com/favorites/art",
"#category": ("", "newgrounds", "favorite"),
"#class" : newgrounds.NewgroundsFavoriteExtractor,
"#range" : "1-10",
"#count" : ">= 10",
},
{
"#url" : "https://tomfulp.newgrounds.com/favorites/art?page=3",
"#class" : newgrounds.NewgroundsFavoriteExtractor,
},
{
"#url" : "https://tomfulp.newgrounds.com/favorites/audio",
"#category": ("", "newgrounds", "favorite"),
"#class" : newgrounds.NewgroundsFavoriteExtractor,
},
{
"#url" : "https://tomfulp.newgrounds.com/favorites/movies",
"#category": ("", "newgrounds", "favorite"),
"#class" : newgrounds.NewgroundsFavoriteExtractor,
},
{
"#url" : "https://tomfulp.newgrounds.com/favorites/",
"#category": ("", "newgrounds", "favorite"),
"#class" : newgrounds.NewgroundsFavoriteExtractor,
},
{
"#url" : "https://tomfulp.newgrounds.com/favorites/following",
"#category": ("", "newgrounds", "following"),
"#class" : newgrounds.NewgroundsFollowingExtractor,
"#pattern" : newgrounds.NewgroundsUserExtractor.pattern,
"#range" : "76-125",
"#count" : 50,
},
{
"#url" : "https://tomfulp.newgrounds.com/favorites/following?page=3",
"#class" : newgrounds.NewgroundsFollowingExtractor,
},
{
"#url" : "https://www.newgrounds.com/search/conduct/art?terms=tree",
"#category": ("", "newgrounds", "search"),
"#class" : newgrounds.NewgroundsSearchExtractor,
"#pattern" : newgrounds.NewgroundsImageExtractor.pattern,
"#range" : "1-10",
@ -367,7 +391,6 @@ From The ZJ "Late """,
{
"#url" : "https://www.newgrounds.com/search/conduct/movies?terms=tree",
"#category": ("", "newgrounds", "search"),
"#class" : newgrounds.NewgroundsSearchExtractor,
"#pattern" : r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+",
"#range" : "1-10",
@ -376,7 +399,6 @@ From The ZJ "Late """,
{
"#url" : "https://www.newgrounds.com/search/conduct/audio?advanced=1&terms=tree+green+nature&match=tdtu&genre=5&suitabilities=e%2Cm",
"#category": ("", "newgrounds", "search"),
"#class" : newgrounds.NewgroundsSearchExtractor,
},