From 3946fe5ac48b45c951381bb14bd3d8829df4ab0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 12 Oct 2024 16:32:29 +0200 Subject: [PATCH] [cookies] return loaded cookies as list don't set_cookie() them immediately into a CookieJar also, give some more consistent names to chrome/chromium functions --- gallery_dl/cookies.py | 166 +++++++++++++++++---------------- gallery_dl/extractor/common.py | 35 +++---- gallery_dl/util.py | 10 +- test/test_util.py | 8 +- 4 files changed, 115 insertions(+), 104 deletions(-) diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index 78a9590d..3fa9fa39 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -31,59 +31,61 @@ SUPPORTED_BROWSERS = SUPPORTED_BROWSERS_CHROMIUM | {"firefox", "safari"} logger = logging.getLogger("cookies") -def load_cookies(cookiejar, browser_specification): +def load_cookies(browser_specification): browser_name, profile, keyring, container, domain = \ _parse_browser_specification(*browser_specification) if browser_name == "firefox": - load_cookies_firefox(cookiejar, profile, container, domain) + return load_cookies_firefox(profile, container, domain) elif browser_name == "safari": - load_cookies_safari(cookiejar, profile, domain) + return load_cookies_safari(profile, domain) elif browser_name in SUPPORTED_BROWSERS_CHROMIUM: - load_cookies_chrome(cookiejar, browser_name, profile, keyring, domain) + return load_cookies_chromium(browser_name, profile, keyring, domain) else: raise ValueError("unknown browser '{}'".format(browser_name)) -def load_cookies_firefox(cookiejar, profile=None, container=None, domain=None): +def load_cookies_firefox(profile=None, container=None, domain=None): path, container_id = _firefox_cookies_database(profile, container) + + sql = ("SELECT name, value, host, path, isSecure, expiry " + "FROM moz_cookies") + conditions = [] + parameters = [] + + if container_id is False: + conditions.append("NOT INSTR(originAttributes,'userContextId=')") + elif container_id: + uid = "%userContextId={}".format(container_id) + conditions.append("originAttributes LIKE ? OR originAttributes LIKE ?") + parameters += (uid, uid + "&%") + + if domain: + if domain[0] == ".": + conditions.append("host == ? OR host LIKE ?") + parameters += (domain[1:], "%" + domain) + else: + conditions.append("host == ? OR host == ?") + parameters += (domain, "." + domain) + + if conditions: + sql = "{} WHERE ( {} )".format(sql, " ) AND ( ".join(conditions)) + with DatabaseConnection(path) as db: - - sql = ("SELECT name, value, host, path, isSecure, expiry " - "FROM moz_cookies") - conditions = [] - parameters = [] - - if container_id is False: - conditions.append("NOT INSTR(originAttributes,'userContextId=')") - elif container_id: - conditions.append( - "originAttributes LIKE ? OR originAttributes LIKE ?") - uid = "%userContextId={}".format(container_id) - parameters += (uid, uid + "&%") - - if domain: - if domain[0] == ".": - conditions.append("host == ? OR host LIKE ?") - parameters += (domain[1:], "%" + domain) - else: - conditions.append("host == ? OR host == ?") - parameters += (domain, "." + domain) - - if conditions: - sql = "{} WHERE ( {} )".format(sql, " ) AND ( ".join(conditions)) - - set_cookie = cookiejar.set_cookie - for name, value, domain, path, secure, expires in db.execute( - sql, parameters): - set_cookie(Cookie( + cookies = [ + Cookie( 0, name, value, None, False, domain, bool(domain), domain.startswith("."), path, bool(path), secure, expires, False, None, None, {}, - )) - _log_info("Extracted %s cookies from Firefox", len(cookiejar)) + ) + for name, value, domain, path, secure, expires in db.execute( + sql, parameters) + ] + + _log_info("Extracted %s cookies from Firefox", len(cookies)) + return cookies -def load_cookies_safari(cookiejar, profile=None, domain=None): +def load_cookies_safari(profile=None, domain=None): """Ref.: https://github.com/libyal/dtformats/blob /main/documentation/Safari%20Cookies.asciidoc - This data appears to be out of date @@ -95,31 +97,33 @@ def load_cookies_safari(cookiejar, profile=None, domain=None): data = fp.read() page_sizes, body_start = _safari_parse_cookies_header(data) p = DataParser(data[body_start:]) + + cookies = [] for page_size in page_sizes: - _safari_parse_cookies_page(p.read_bytes(page_size), cookiejar) + _safari_parse_cookies_page(p.read_bytes(page_size), cookies) + _log_info("Extracted %s cookies from Safari", len(cookies)) + return cookies -def load_cookies_chrome(cookiejar, browser_name, profile=None, - keyring=None, domain=None): - config = _get_chromium_based_browser_settings(browser_name) - path = _chrome_cookies_database(profile, config) +def load_cookies_chromium(browser_name, profile=None, + keyring=None, domain=None): + config = _chromium_browser_settings(browser_name) + path = _chromium_cookies_database(profile, config) _log_debug("Extracting cookies from %s", path) + if domain: + if domain[0] == ".": + condition = " WHERE host_key == ? OR host_key LIKE ?" + parameters = (domain[1:], "%" + domain) + else: + condition = " WHERE host_key == ? OR host_key == ?" + parameters = (domain, "." + domain) + else: + condition = "" + parameters = () + with DatabaseConnection(path) as db: db.text_factory = bytes - decryptor = get_cookie_decryptor( - config["directory"], config["keyring"], keyring) - - if domain: - if domain[0] == ".": - condition = " WHERE host_key == ? OR host_key LIKE ?" - parameters = (domain[1:], "%" + domain) - else: - condition = " WHERE host_key == ? OR host_key == ?" - parameters = (domain, "." + domain) - else: - condition = "" - parameters = () try: rows = db.execute( @@ -130,10 +134,12 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None, "SELECT host_key, name, value, encrypted_value, path, " "expires_utc, secure FROM cookies" + condition, parameters) - set_cookie = cookiejar.set_cookie failed_cookies = 0 unencrypted_cookies = 0 + decryptor = _chromium_cookie_decryptor( + config["directory"], config["keyring"], keyring) + cookies = [] for domain, name, value, enc_value, path, expires, secure in rows: if not value and enc_value: # encrypted @@ -149,7 +155,7 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None, path = path.decode() name = name.decode() - set_cookie(Cookie( + cookies.append(Cookie( 0, name, value, None, False, domain, bool(domain), domain.startswith("."), path, bool(path), secure, expires or None, False, @@ -162,10 +168,11 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None, failed_message = "" _log_info("Extracted %s cookies from %s%s", - len(cookiejar), browser_name.capitalize(), failed_message) + len(cookies), browser_name.capitalize(), failed_message) counts = decryptor.cookie_counts counts["unencrypted"] = unencrypted_cookies - _log_debug("Cookie version breakdown: %s", counts) + _log_debug("version breakdown: %s", counts) + return cookies # -------------------------------------------------------------------- @@ -253,7 +260,7 @@ def _safari_parse_cookies_header(data): return page_sizes, p.cursor -def _safari_parse_cookies_page(data, cookiejar, domain=None): +def _safari_parse_cookies_page(data, cookies, domain=None): p = DataParser(data) p.expect_bytes(b"\x00\x00\x01\x00", "page signature") number_of_cookies = p.read_uint() @@ -267,12 +274,12 @@ def _safari_parse_cookies_page(data, cookiejar, domain=None): for i, record_offset in enumerate(record_offsets): p.skip_to(record_offset, "space between records") record_length = _safari_parse_cookies_record( - data[record_offset:], cookiejar, domain) + data[record_offset:], cookies, domain) p.read_bytes(record_length) p.skip_to_end("space in between pages") -def _safari_parse_cookies_record(data, cookiejar, host=None): +def _safari_parse_cookies_record(data, cookies, host=None): p = DataParser(data) record_size = p.read_uint() p.skip(4, "unknown record field 1") @@ -313,7 +320,7 @@ def _safari_parse_cookies_record(data, cookiejar, host=None): p.skip_to(record_size, "space at the end of the record") - cookiejar.set_cookie(Cookie( + cookies.append(Cookie( 0, name, value, None, False, domain, bool(domain), domain.startswith("."), path, bool(path), is_secure, expiration_date, False, @@ -324,9 +331,9 @@ def _safari_parse_cookies_record(data, cookiejar, host=None): # -------------------------------------------------------------------- -# chrome +# chromium -def _chrome_cookies_database(profile, config): +def _chromium_cookies_database(profile, config): if profile is None: search_root = config["directory"] elif _is_path(profile): @@ -346,7 +353,7 @@ def _chrome_cookies_database(profile, config): return path -def _get_chromium_based_browser_settings(browser_name): +def _chromium_browser_settings(browser_name): # https://chromium.googlesource.com/chromium # /src/+/HEAD/docs/user_data_dir.md join = os.path.join @@ -414,7 +421,17 @@ def _get_chromium_based_browser_settings(browser_name): } -class ChromeCookieDecryptor: +def _chromium_cookie_decryptor( + browser_root, browser_keyring_name, keyring=None): + if sys.platform in ("win32", "cygwin"): + return WindowsChromiumCookieDecryptor(browser_root) + elif sys.platform == "darwin": + return MacChromiumCookieDecryptor(browser_keyring_name) + else: + return LinuxChromiumCookieDecryptor(browser_keyring_name, keyring) + + +class ChromiumCookieDecryptor: """ Overview: @@ -452,16 +469,7 @@ class ChromeCookieDecryptor: raise NotImplementedError("Must be implemented by sub classes") -def get_cookie_decryptor(browser_root, browser_keyring_name, keyring=None): - if sys.platform in ("win32", "cygwin"): - return WindowsChromeCookieDecryptor(browser_root) - elif sys.platform == "darwin": - return MacChromeCookieDecryptor(browser_keyring_name) - else: - return LinuxChromeCookieDecryptor(browser_keyring_name, keyring) - - -class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): +class LinuxChromiumCookieDecryptor(ChromiumCookieDecryptor): def __init__(self, browser_keyring_name, keyring=None): self._v10_key = self.derive_key(b"peanuts") password = _get_linux_keyring_password(browser_keyring_name, keyring) @@ -500,7 +508,7 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): return None -class MacChromeCookieDecryptor(ChromeCookieDecryptor): +class MacChromiumCookieDecryptor(ChromiumCookieDecryptor): def __init__(self, browser_keyring_name): password = _get_mac_keyring_password(browser_keyring_name) self._v10_key = None if password is None else self.derive_key(password) @@ -539,7 +547,7 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor): return encrypted_value -class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): +class WindowsChromiumCookieDecryptor(ChromiumCookieDecryptor): def __init__(self, browser_root): self._v10_key = _get_windows_v10_key(browser_root) self._cookie_counts = {"v10": 0, "other": 0} diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 22e0bfc3..2146fa65 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -456,46 +456,49 @@ class Extractor(): cookies = random.choice(cookies) self.cookies_load(cookies) - def cookies_load(self, cookies): - if isinstance(cookies, dict): - self.cookies_update_dict(cookies, self.cookies_domain) + def cookies_load(self, cookies_source): + if isinstance(cookies_source, dict): + self.cookies_update_dict(cookies_source, self.cookies_domain) - elif isinstance(cookies, str): - path = util.expand_path(cookies) + elif isinstance(cookies_source, str): + path = util.expand_path(cookies_source) try: with open(path) as fp: - util.cookiestxt_load(fp, self.cookies) + cookies = util.cookiestxt_load(fp) except Exception as exc: self.log.warning("cookies: %s", exc) else: - self.log.debug("Loading cookies from '%s'", cookies) + self.log.debug("Loading cookies from '%s'", cookies_source) + set_cookie = self.cookies.set_cookie + for cookie in cookies: + set_cookie(cookie) self.cookies_file = path - elif isinstance(cookies, (list, tuple)): - key = tuple(cookies) - cookiejar = _browser_cookies.get(key) + elif isinstance(cookies_source, (list, tuple)): + key = tuple(cookies_source) + cookies = _browser_cookies.get(key) - if cookiejar is None: + if cookies is None: from ..cookies import load_cookies - cookiejar = self.cookies.__class__() try: - load_cookies(cookiejar, cookies) + cookies = load_cookies(cookies_source) except Exception as exc: self.log.warning("cookies: %s", exc) + cookies = () else: - _browser_cookies[key] = cookiejar + _browser_cookies[key] = cookies else: self.log.debug("Using cached cookies from %s", key) set_cookie = self.cookies.set_cookie - for cookie in cookiejar: + for cookie in cookies: set_cookie(cookie) else: self.log.warning( "Expected 'dict', 'list', or 'str' value for 'cookies' " "option, got '%s' (%s)", - cookies.__class__.__name__, cookies) + cookies_source.__class__.__name__, cookies_source) def cookies_store(self): """Store the session's cookies in a cookies.txt file""" diff --git a/gallery_dl/util.py b/gallery_dl/util.py index d5bc1717..a269a2b6 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -403,9 +403,9 @@ def set_mtime(path, mtime): pass -def cookiestxt_load(fp, cookiejar): - """Parse a Netscape cookies.txt file and add its Cookies to 'cookiejar'""" - set_cookie = cookiejar.set_cookie +def cookiestxt_load(fp): + """Parse a Netscape cookies.txt file and add return its Cookies""" + cookies = [] for line in fp: @@ -427,7 +427,7 @@ def cookiestxt_load(fp, cookiejar): name = value value = None - set_cookie(Cookie( + cookies.append(Cookie( 0, name, value, None, False, domain, @@ -439,6 +439,8 @@ def cookiestxt_load(fp, cookiejar): False, None, None, {}, )) + return cookies + def cookiestxt_store(fp, cookies): """Write 'cookies' in Netscape cookies.txt format to 'fp'""" diff --git a/test/test_util.py b/test/test_util.py index e2db29bf..b630ffbc 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -205,9 +205,8 @@ class TestCookiesTxt(unittest.TestCase): def test_cookiestxt_load(self): def _assert(content, expected): - jar = http.cookiejar.CookieJar() - util.cookiestxt_load(io.StringIO(content, None), jar) - for c, e in zip(jar, expected): + cookies = util.cookiestxt_load(io.StringIO(content, None)) + for c, e in zip(cookies, expected): self.assertEqual(c.__dict__, e.__dict__) _assert("", []) @@ -253,8 +252,7 @@ class TestCookiesTxt(unittest.TestCase): ) with self.assertRaises(ValueError): - util.cookiestxt_load("example.org\tTRUE\t/\tTRUE\t0\tname", - http.cookiejar.CookieJar()) + util.cookiestxt_load("example.org\tTRUE\t/\tTRUE\t0\tname") def test_cookiestxt_store(self):