diff --git a/docs/configuration.rst b/docs/configuration.rst index 964779f6..487ee25c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -415,14 +415,10 @@ Description The username and password to use when attempting to log in to another site. - Specifying username and password is required for - - * ``nijie`` - * ``horne`` - - and optional for + This is supported for * ``aibooru`` (*) + * ``ao3`` * ``aryion`` * ``atfbooru`` (*) * ``bluesky`` @@ -434,6 +430,7 @@ Description * ``e6ai`` (*) * ``e926`` (*) * ``exhentai`` + * ``horne`` (R) * ``idolcomplex`` * ``imgbb`` * ``inkbunny`` @@ -441,8 +438,11 @@ Description * ``koharu`` * ``mangadex`` * ``mangoxo`` + * ``newgrounds`` + * ``nijie`` (R) * ``pillowfort`` * ``sankaku`` + * ``seiga`` * ``subscribestar`` * ``tapas`` * ``tsumino`` @@ -457,6 +457,9 @@ Description (*) The password value for these sites should be the API key found in your user profile, not the actual account password. + (R) Login with username & password or supplying logged-in + `cookies `__ is required + Note: Leave the ``password`` value empty or undefined to be prompted for a passeword when performing a login (see `getpass() `__). @@ -467,7 +470,7 @@ extractor.*.input Type ``bool`` Default - ``true`` if `stdin` is attached to a terminal , + ``true`` if `stdin` is attached to a terminal, ``false`` otherwise Description Allow prompting the user for interactive input. diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 4e319879..bfe280bb 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -107,7 +107,7 @@ Consider all listed sites to potentially be NSFW. Archive of Our Own https://archiveofourown.org/ Search Results, Series, Tag Searches, User Profiles, Bookmarks, Works - + Supported ArtStation @@ -629,7 +629,7 @@ Consider all listed sites to potentially be NSFW. Niconico Seiga https://seiga.nicovideo.jp/ individual Images, User Profiles - Cookies + Supported Nozomi.la diff --git a/gallery_dl/extractor/ao3.py b/gallery_dl/extractor/ao3.py index 3bb48b50..a0abce05 100644 --- a/gallery_dl/extractor/ao3.py +++ b/gallery_dl/extractor/ao3.py @@ -9,7 +9,8 @@ """Extractors for https://archiveofourown.org/""" from .common import Extractor, Message -from .. import text, util +from .. import text, util, exception +from ..cache import cache BASE_PATTERN = (r"(?:https?://)?(?:www\.)?" r"a(?:rchiveofourown|o3)\.(?:org|com|net)") @@ -20,9 +21,13 @@ class Ao3Extractor(Extractor): category = "ao3" root = "https://archiveofourown.org" categorytransfer = True + cookies_domain = ".archiveofourown.org" + cookies_names = ("remember_user_token",) request_interval = (0.5, 1.5) def items(self): + self.login() + base = self.root + "/works/" data = {"_extractor": Ao3WorkExtractor} @@ -32,6 +37,48 @@ class Ao3Extractor(Extractor): def works(self): return self._pagination(self.groups[0]) + def login(self): + if self.cookies_check(self.cookies_names): + return + + username, password = self._get_auth_info() + if username: + return self.cookies_update(self._login_impl(username, password)) + + @cache(maxage=90*86400, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + url = self.root + "/users/login" + page = self.request(url).text + + pos = page.find('id="loginform"') + token = text.extract( + page, ' name="authenticity_token" value="', '"', pos)[0] + if not token: + self.log.error("Unable to extract 'authenticity_token'") + + data = { + "authenticity_token": text.unescape(token), + "user[login]" : username, + "user[password]" : password, + "user[remember_me]" : "1", + "commit" : "Log In", + } + + response = self.request(url, method="POST", data=data) + if not response.history: + raise exception.AuthenticationError() + + remember = response.history[0].cookies.get("remember_user_token") + if not remember: + raise exception.AuthenticationError() + + return { + "remember_user_token": remember, + "user_credentials" : "1", + } + def _pagination(self, path, needle='