From 671b02b504a45953cf225a210c361729d480dd66 Mon Sep 17 00:00:00 2001 From: Gorka Eguileor Date: Mon, 5 Feb 2018 18:57:46 +0100 Subject: [PATCH] Improve ChunkedBackupDriver hashlib calls Currently we have 2 hashlib calls withing the ChunkedBackupDriver, one to calculate the MD5 of the chunk and another to calculate the SHA256 of the blocks within each chunk. This patch improve interactions between cinder and the hashlib library method calls by making sure MD5 and SHA256 related calls are execute in a native thread to improve context switching responsiveness within eventlet. The MD5 of a 1GB chunk could take around 4 seconds, so the overhead of creating a native thread is acceptable, and for the SHA256 instead of creating a thread for each call we create a single thread to do the calculations of all the blocks, thus making it cost effective. Current code slices the data into blocks, which means that the data is being copied, but this has now been switched to a memoryview object to take advantage of the buffer protocol so copying of data is no longer necesary. Change-Id: Ifb65b8008f30bc9cc4b6cd9b867a726ec4ed4707 --- cinder/backup/chunkeddriver.py | 36 +++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index 9c528b455db..751e0024a8f 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -372,7 +372,7 @@ class ChunkedBackupDriver(driver.BackupDriver): container, object_name, extra_metadata=extra_metadata ) as writer: writer.write(output_data) - md5 = hashlib.md5(data).hexdigest() + md5 = eventlet.tpool.execute(hashlib.md5, data).hexdigest() obj[object_name]['md5'] = md5 LOG.debug('backup MD5 for %(object_name)s: %(md5)s', {'object_name': object_name, 'md5': md5}) @@ -470,6 +470,25 @@ class ChunkedBackupDriver(driver.BackupDriver): disk_path) return win32_diskutils.get_disk_size(disk_number) + def _calculate_sha(self, data): + """Calculate SHA256 of a data chunk. + + This method cannot log anything as it is called on a native thread. + """ + # NOTE(geguileo): Using memoryview to avoid data copying when slicing + # for the sha256 call. + chunk = memoryview(data) + shalist = [] + off = 0 + datalen = len(chunk) + while off < datalen: + chunk_end = min(datalen, off + self.sha_block_size_bytes) + block = chunk[off:chunk_end] + sha = hashlib.sha256(block).hexdigest() + shalist.append(sha) + off += self.sha_block_size_bytes + return shalist + def backup(self, backup, volume_file, backup_metadata=True): """Backup the given volume. @@ -562,18 +581,7 @@ class ChunkedBackupDriver(driver.BackupDriver): break # Calculate new shas with the datablock. - shalist = [] - off = 0 - datalen = len(data) - while off < datalen: - chunk_start = off - chunk_end = chunk_start + self.sha_block_size_bytes - if chunk_end > datalen: - chunk_end = datalen - chunk = data[chunk_start:chunk_end] - sha = hashlib.sha256(chunk).hexdigest() - shalist.append(sha) - off += self.sha_block_size_bytes + shalist = eventlet.tpool.execute(self._calculate_sha, data) sha256_list.extend(shalist) # If parent_backup is not None, that means an incremental @@ -600,7 +608,7 @@ class ChunkedBackupDriver(driver.BackupDriver): # The last extent extends to the end of data buffer. if extent_off != -1: - extent_end = datalen + extent_end = len(data) segment = data[extent_off:extent_end] self._backup_chunk(backup, container, segment, data_offset + extent_off,