From 7755ef1b8d4d7803c9ad330a1bd9599715e06c60 Mon Sep 17 00:00:00 2001 From: Clark Boylan <clark.boylan@gmail.com> Date: Fri, 11 Oct 2019 08:46:20 -0700 Subject: [PATCH] Use gzip to compress files uploaded to swift We've discovered that rackspace swift seems to always want to gzip encode files when clients request their contents. When our files are deflate encoded this results in files that are first deflate encoded then gzip encoded. Not all browers or layer 7 firewalls can handle this (despite being perfectly valid according to the HTTP RFCs). We'll use gzip to see if that causes rackspace to not double encode the files. To do this with memory efficienty we vendor a tool from pypi called gzip-stream which allows us to read chunks of the compressed data at a time without loading the entire file into memory or writing multiple gzip headers in a single file. Change-Id: I9483cfdbd8e7d0683eeb24d28dd6d8b0c0e772fa --- .../library/zuul_swift_upload.py | 115 +++++++++++++++++- 1 file changed, 113 insertions(+), 2 deletions(-) diff --git a/roles/upload-logs-swift/library/zuul_swift_upload.py b/roles/upload-logs-swift/library/zuul_swift_upload.py index c1a81696d..c25720fc5 100755 --- a/roles/upload-logs-swift/library/zuul_swift_upload.py +++ b/roles/upload-logs-swift/library/zuul_swift_upload.py @@ -25,6 +25,8 @@ Utility to upload files to swift """ import argparse +import gzip +import io import logging import mimetypes import os @@ -131,6 +133,95 @@ ICON_IMAGES = { 'AupSdoFsAAAAAElFTkSuQmCC'} +# Begin vendored code +# This code is licensed under the Public Domain/CC0 and comes from +# https://github.com/leenr/gzip-stream/blob/master/gzip_stream.py +# Code was modified: +# removed type annotations to support python2. +# removed use of *, somearg for positional anonymous args. +# Default compression level to 9. + +class GZIPCompressedStream(io.RawIOBase): + def __init__(self, stream, compression_level=9): + assert 1 <= compression_level <= 9 + + self._compression_level = compression_level + self._stream = stream + + self._compressed_stream = io.BytesIO() + self._compressor = gzip.GzipFile( + mode='wb', + fileobj=self._compressed_stream, + compresslevel=compression_level + ) + + # because of the GZIP header written by `GzipFile.__init__`: + self._compressed_stream.seek(0) + + @property + def compression_level(self): + return self._compression_level + + @property + def stream(self): + return self._stream + + def readable(self): + return True + + def _read_compressed_into(self, b): + buf = self._compressed_stream.read(len(b)) + b[:len(buf)] = buf + return len(buf) + + def readinto(self, b): + b = memoryview(b) + + offset = 0 + size = len(b) + while offset < size: + offset += self._read_compressed_into(b[offset:]) + if offset < size: + # self._compressed_buffer now empty + if self._compressor.closed: + # nothing to compress anymore + break + # compress next bytes + self._read_n_compress(size) + + return offset + + def _read_n_compress(self, size): + assert size > 0 + + data = self._stream.read(size) + + # rewind buffer to the start to free up memory + # (because anything currently in the buffer should be already + # streamed off the object) + self._compressed_stream.seek(0) + self._compressed_stream.truncate(0) + + if data: + self._compressor.write(data) + else: + # this will write final data (will flush zlib with Z_FINISH) + self._compressor.close() + + # rewind to the buffer start + self._compressed_stream.seek(0) + + def __repr__(self): + return ( + '{self.__class__.__name__}(' + '{self.stream!r}, ' + 'compression_level={self.compression_level!r}' + ')' + ).format(self=self) + +# End vendored code + + def get_mime_icon(mime, filename=''): icon = (APACHE_FILE_ICON_MAP.get(filename) or APACHE_MIME_ICON_MAP.get(mime) or @@ -463,6 +554,26 @@ class Indexer(): self.file_list.file_list = new_list +class GzipFilter(): + chunk_size = 16384 + + def __init__(self, infile): + self.gzipfile = GZIPCompressedStream(infile) + self.done = False + + def __iter__(self): + return self + + def __next__(self): + if self.done: + self.gzipfile.close() + raise StopIteration() + data = self.gzipfile.read(self.chunk_size) + if not data: + self.done = True + return data + + class DeflateFilter(): chunk_size = 16384 @@ -622,8 +733,8 @@ class Uploader(): if not file_detail.folder: if (file_detail.encoding is None and self._is_text_type(file_detail.mimetype)): - headers['content-encoding'] = 'deflate' - data = DeflateFilter(open(file_detail.full_path, 'rb')) + headers['content-encoding'] = 'gzip' + data = GzipFilter(open(file_detail.full_path, 'rb')) else: if file_detail.encoding: headers['content-encoding'] = file_detail.encoding