From 1ae1df0d27f0fc771d50e5e0b6d30e2c5c98658a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 18 Jun 2020 15:07:30 +0200 Subject: [PATCH] update '--write-pages' (#737) - fix infinite recursion for responses with multiple entries in 'history' - hide values of Set-Cookie headers - only write the response content by default (use '-o write-pages=all' to also include HTTP headers) --- gallery_dl/extractor/common.py | 10 ++++++---- gallery_dl/util.py | 21 ++++++++++++++------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index dd685df7..4d314c23 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -328,14 +328,15 @@ class Extractor(): test = (test, None) yield test - def _dump_response(self, response): + def _dump_response(self, response, history=True): """Write the response content to a .dump file in the current directory. The file name is derived from the response url, replacing special characters with "_" """ - for resp in response.history: - self._dump_response(resp) + if history: + for resp in response.history: + self._dump_response(resp, False) if hasattr(Extractor, "_dump_index"): Extractor._dump_index += 1 @@ -350,7 +351,8 @@ class Extractor(): try: with open(fname + ".dump", 'wb') as fp: - util.dump_response(response, fp) + util.dump_response( + response, fp, headers=(self._write_pages == "all")) except Exception as e: self.log.warning("Failed to dump HTTP request (%s: %s)", e.__class__.__name__, e) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index afd96b88..c8d73b63 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -120,13 +120,14 @@ def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4): fp.write("\n") -def dump_response(response, fp=sys.stdout, - headers=True, content=True, hide_auth=True): +def dump_response(response, fp, *, + headers=False, content=True, hide_auth=True): """Write the contents of 'response' into a file-like object""" if headers: request = response.request req_headers = request.headers.copy() + res_headers = response.headers.copy() outfmt = """\ {request.method} {request.url} Status: {response.status_code} {response.reason} @@ -145,11 +146,17 @@ Response Headers atype, sep, _ = authorization.partition(" ") req_headers["Authorization"] = atype + " ***" if sep else "***" - cookies = req_headers.get("Cookie") - if cookies: + cookie = req_headers.get("Cookie") + if cookie: req_headers["Cookie"] = ";".join( - cookie.partition("=")[0] + "=***" - for cookie in cookies.split(";") + c.partition("=")[0] + "=***" + for c in cookie.split(";") + ) + + set_cookie = res_headers.get("Set-Cookie") + if set_cookie: + res_headers["Set-Cookie"] = re.sub( + r"(^|, )([^ =]+)=[^,;]*", r"\1\2=***", set_cookie, ) fp.write(outfmt.format( @@ -161,7 +168,7 @@ Response Headers ), response_headers="\n".join( name + ": " + value - for name, value in response.headers.items() + for name, value in res_headers.items() ), ).encode())