From aa3e950764337ef9800c936f4de89b31c00dfcf5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 28 Aug 2013 11:57:13 +0200 Subject: [PATCH] Tolerate junk at the end of gzip-compressed content (#1268) --- youtube_dl/utils.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e6fa634a7..be788cf5a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -628,8 +628,23 @@ def http_response(self, req, resp): old_resp = resp # gzip if resp.headers.get('Content-encoding', '') == 'gzip': - gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r') - resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code) + content = resp.read() + gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb') + try: + uncompressed = io.BytesIO(gz.read()) + except IOError as original_ioerror: + # There may be junk add the end of the file + # See http://stackoverflow.com/q/4928560/35070 for details + for i in range(1, 1024): + try: + gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb') + uncompressed = io.BytesIO(gz.read()) + except IOError: + continue + break + else: + raise original_ioerror + resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg # deflate if resp.headers.get('Content-encoding', '') == 'deflate':