mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-23 03:02:50 +01:00
[newgrounds] add 'search' extractor (closes #2161)
This commit is contained in:
parent
8b910dd8ae
commit
37beb1298e
@ -490,7 +490,7 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>Newgrounds</td>
|
||||
<td>https://www.newgrounds.com/</td>
|
||||
<td>Art, Audio, Favorites, individual Images, Media Files, Movies, User Profiles</td>
|
||||
<td>Art, Audio, Favorites, individual Images, Media Files, Movies, Search Results, User Profiles</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
@ -38,6 +38,7 @@ class NewgroundsExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
metadata = self.metadata()
|
||||
|
||||
for post_url in self.posts():
|
||||
try:
|
||||
@ -48,6 +49,8 @@ class NewgroundsExtractor(Extractor):
|
||||
url = None
|
||||
|
||||
if url:
|
||||
if metadata:
|
||||
post.update(metadata)
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
|
||||
@ -62,9 +65,12 @@ class NewgroundsExtractor(Extractor):
|
||||
"Unable to get download URL for '%s'", post_url)
|
||||
|
||||
def posts(self):
|
||||
"""Return urls of all relevant image pages"""
|
||||
"""Return URLs of all relevant post pages"""
|
||||
return self._pagination(self._path)
|
||||
|
||||
def metadata(self):
|
||||
"""Return general metadata"""
|
||||
|
||||
def login(self):
|
||||
username, password = self._get_auth_info()
|
||||
if username:
|
||||
@ -493,3 +499,59 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
|
||||
text.ensure_http_scheme(user.rpartition('"')[2])
|
||||
for user in text.extract_iter(page, 'class="item-user', '"><img')
|
||||
]
|
||||
|
||||
|
||||
class NewgroundsSearchExtractor(NewgroundsExtractor):
|
||||
"""Extractor for newgrounds.com search reesults"""
|
||||
subcategory = "search"
|
||||
directory_fmt = ("{category}", "search", "{search_tags}")
|
||||
pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
|
||||
r"/search/conduct/([^/?#]+)/?\?([^#]+)")
|
||||
test = (
|
||||
("https://www.newgrounds.com/search/conduct/art?terms=tree", {
|
||||
"pattern": NewgroundsImageExtractor.pattern,
|
||||
"keyword": {"search_tags": "tree"},
|
||||
"range": "1-10",
|
||||
"count": 10,
|
||||
}),
|
||||
("https://www.newgrounds.com/search/conduct/movies?terms=tree", {
|
||||
"pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+",
|
||||
"range": "1-10",
|
||||
"count": 10,
|
||||
}),
|
||||
("https://www.newgrounds.com/search/conduct/audio?advanced=1"
|
||||
"&terms=tree+green+nature&match=tdtu&genre=5&suitabilities=e%2Cm"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
NewgroundsExtractor.__init__(self, match)
|
||||
self._path, query = match.groups()
|
||||
self.query = text.parse_query(query)
|
||||
|
||||
def posts(self):
|
||||
return self._pagination("/search/conduct/" + self._path, self.query)
|
||||
|
||||
def metadata(self):
|
||||
return {"search_tags": self.query.get("terms", "")}
|
||||
|
||||
def _pagination(self, path, params):
|
||||
url = self.root + path
|
||||
headers = {
|
||||
"Accept": "application/json, text/javascript, */*; q=0.01",
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
"Referer": self.root,
|
||||
}
|
||||
params["inner"] = "1"
|
||||
params["page"] = 1
|
||||
|
||||
while True:
|
||||
data = self.request(url, params=params, headers=headers).json()
|
||||
|
||||
post_url = None
|
||||
for post_url in text.extract_iter(data["content"], 'href="', '"'):
|
||||
if not post_url.startswith("/search/"):
|
||||
yield post_url
|
||||
|
||||
if post_url is None:
|
||||
return
|
||||
params["page"] += 1
|
||||
|
Loading…
Reference in New Issue
Block a user