So far ZKObject and its sharded equivalent also created the parent path in case it did not exist. Since we are sometimes working with cached data, the information we have could be stale. If we now want to create ZKObjects as subnodes of existing nodes we could cause problems when e.g. the parent node was deleted in the meantime and we re-create as part of the sub-ZKObject without any content. To prevent this data race, we make this behavior configurable per ZKObject by introducing a `makepath` attribute which is set to True by default. Change-Id: Ib17e40cd5d664bfc625d83c742d76dabd2dd7a8c
285 lines
9.3 KiB
Python
285 lines
9.3 KiB
Python
# Copyright 2020 BMW Group
|
|
# Copyright 2024 Acme Gating, LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import io
|
|
from contextlib import suppress
|
|
import time
|
|
import zlib
|
|
|
|
from zuul.zk.components import COMPONENT_REGISTRY
|
|
|
|
from kazoo.exceptions import NoNodeError
|
|
|
|
# The default size limit for a node in Zookeeper is ~1MiB. However, as this
|
|
# also includes the size of the key we can not use all of it for data.
|
|
# Because of that we will leave ~47 KiB for the key.
|
|
NODE_BYTE_SIZE_LIMIT = 1000000
|
|
|
|
|
|
class RawZKIO(io.RawIOBase):
|
|
def __init__(self, client, path, create=False, makepath=True, version=-1):
|
|
self.client = client
|
|
self.path = path
|
|
self.bytes_read = 0
|
|
self.bytes_written = 0
|
|
self.cumulative_read_time = 0.0
|
|
self.cumulative_write_time = 0.0
|
|
self.znodes_read = 0
|
|
self.znodes_written = 0
|
|
self.create = create
|
|
self.makepath = makepath
|
|
self.version = version
|
|
self.zstat = None
|
|
|
|
def readable(self):
|
|
return True
|
|
|
|
def writable(self):
|
|
return True
|
|
|
|
def truncate(self, size=None):
|
|
# We never truncate unless we're going to write, so make this
|
|
# a noop for the single-znode case.
|
|
pass
|
|
|
|
def _getData(self, path):
|
|
start = time.perf_counter()
|
|
data, zstat = self.client.get(path)
|
|
self.cumulative_read_time += time.perf_counter() - start
|
|
self.bytes_read += len(data)
|
|
self.znodes_read += 1
|
|
return data, zstat
|
|
|
|
def readall(self):
|
|
data, self.zstat = self._getData(self.path)
|
|
return data
|
|
|
|
def write(self, data):
|
|
byte_count = len(data)
|
|
if byte_count > NODE_BYTE_SIZE_LIMIT:
|
|
raise Exception(f"ZK data size too large: {byte_count}")
|
|
start = time.perf_counter()
|
|
if self.create:
|
|
_, self.zstat = self.client.create(
|
|
self.path, data, makepath=self.makepath, include_data=True)
|
|
else:
|
|
self.zstat = self.client.set(self.path, data,
|
|
version=self.version)
|
|
self.cumulative_write_time += time.perf_counter() - start
|
|
self.bytes_written += byte_count
|
|
self.znodes_written += 1
|
|
return byte_count
|
|
|
|
|
|
class RawShardIO(RawZKIO):
|
|
def __init__(self, *args, old_format=False, makepath=True, **kw):
|
|
# MODEL_API < 31
|
|
self.old_format = old_format
|
|
self.makepath = makepath
|
|
super().__init__(*args, makepath=makepath, **kw)
|
|
|
|
def truncate(self, size=None):
|
|
if size != 0:
|
|
raise ValueError("Can only truncate to 0")
|
|
with suppress(NoNodeError):
|
|
self.client.delete(self.path, recursive=True)
|
|
self.zstat = None
|
|
|
|
@property
|
|
def _shards(self):
|
|
start = time.perf_counter()
|
|
ret = self.client.get_children(self.path)
|
|
self.cumulative_read_time += time.perf_counter() - start
|
|
return ret
|
|
|
|
def readall_old(self, shard0, shard1):
|
|
# Decompress each shard individually and then recompress them
|
|
# as a unit.
|
|
read_buffer = io.BytesIO()
|
|
read_buffer.write(zlib.decompress(shard0))
|
|
read_buffer.write(zlib.decompress(shard1))
|
|
for shard_count, shard_name in enumerate(sorted(self._shards)):
|
|
if shard_count < 2:
|
|
continue
|
|
shard_path = "/".join((self.path, shard_name))
|
|
data = self._getData(shard_path)[0]
|
|
read_buffer.write(zlib.decompress(data))
|
|
self.zstat = self.client.exists(self.path)
|
|
return zlib.compress(read_buffer.getvalue())
|
|
|
|
def readall(self):
|
|
read_buffer = io.BytesIO()
|
|
for shard_count, shard_name in enumerate(sorted(self._shards)):
|
|
shard_path = "/".join((self.path, shard_name))
|
|
data = self._getData(shard_path)[0]
|
|
if shard_count == 1 and data[:2] == b'\x78\x9c':
|
|
# If this is the second shard, and it starts with a
|
|
# zlib header, we're probably reading the old format.
|
|
# Double check that we can decompress it, and if so,
|
|
# switch to reading the old format.
|
|
try:
|
|
zlib.decompress(data)
|
|
return self.readall_old(
|
|
read_buffer.getvalue(),
|
|
data,
|
|
)
|
|
except zlib.error:
|
|
# Perhaps we were wrong about the header
|
|
pass
|
|
read_buffer.write(self._getData(shard_path)[0])
|
|
self.zstat = self.client.exists(self.path)
|
|
return read_buffer.getvalue()
|
|
|
|
def write(self, data):
|
|
# Only write one znode at a time and defer writing the rest to
|
|
# the caller
|
|
data_bytes = bytes(data[0:NODE_BYTE_SIZE_LIMIT])
|
|
read_len = len(data_bytes)
|
|
# MODEL_API < 31
|
|
if self.old_format:
|
|
# We're going to add a header and footer that is several
|
|
# bytes, so we definitely need to reduce the size a little
|
|
# bit, but the old format would end up with a considerable
|
|
# amount of headroom due to compressing after chunking, so
|
|
# lets go ahead and reserve 1k of space.
|
|
new_limit = NODE_BYTE_SIZE_LIMIT - 1024
|
|
data_bytes = data_bytes[0:new_limit]
|
|
# Update our return value to indicate how many bytes we
|
|
# actually read from input.
|
|
read_len = len(data_bytes)
|
|
data_bytes = zlib.compress(data_bytes)
|
|
if not (len(data_bytes) <= NODE_BYTE_SIZE_LIMIT):
|
|
raise RuntimeError("Shard too large")
|
|
start = time.perf_counter()
|
|
# The path we pass to a shard writer is e.g. '/foo/bar'. Now,
|
|
# for shards the makepath argument should only apply to '/foo'
|
|
# but not the 'bar' subnode as it holds the individual shards
|
|
# and will also be deleted recursively on e.g. a truncate.
|
|
while True:
|
|
try:
|
|
self.client.create(
|
|
"{}/".format(self.path),
|
|
data_bytes,
|
|
sequence=True,
|
|
)
|
|
break
|
|
except NoNodeError:
|
|
if not self.makepath:
|
|
raise
|
|
self.client.ensure_path(self.path)
|
|
|
|
self.cumulative_write_time += time.perf_counter() - start
|
|
self.bytes_written += len(data_bytes)
|
|
self.znodes_written += 1
|
|
if self.zstat is None:
|
|
self.zstat = self.client.exists(self.path)
|
|
return read_len
|
|
|
|
|
|
class BufferedZKWriter(io.BufferedWriter):
|
|
def __init__(self, client, path):
|
|
self.__raw = RawZKIO(client, path)
|
|
super().__init__(self.__raw)
|
|
|
|
@property
|
|
def bytes_written(self):
|
|
return self.__raw.bytes_written
|
|
|
|
@property
|
|
def cumulative_write_time(self):
|
|
return self.__raw.cumulative_write_time
|
|
|
|
@property
|
|
def znodes_written(self):
|
|
return self.__raw.znodes_written
|
|
|
|
@property
|
|
def zstat(self):
|
|
return self.__raw.zstat
|
|
|
|
|
|
class BufferedZKReader(io.BufferedReader):
|
|
def __init__(self, client, path):
|
|
self.__raw = RawZKIO(client, path)
|
|
super().__init__(self.__raw)
|
|
|
|
@property
|
|
def bytes_read(self):
|
|
return self.__raw.bytes_read
|
|
|
|
@property
|
|
def cumulative_read_time(self):
|
|
return self.__raw.cumulative_read_time
|
|
|
|
@property
|
|
def znodes_read(self):
|
|
return self.__raw.znodes_read
|
|
|
|
@property
|
|
def zstat(self):
|
|
return self.__raw.zstat
|
|
|
|
|
|
class BufferedShardWriter(io.BufferedWriter):
|
|
def __init__(self, client, path, create=False, makepath=True, version=-1):
|
|
self.__old_format = COMPONENT_REGISTRY.model_api < 31
|
|
self.__raw = RawShardIO(client, path, create=create, makepath=makepath,
|
|
version=version, old_format=self.__old_format)
|
|
super().__init__(self.__raw, NODE_BYTE_SIZE_LIMIT)
|
|
|
|
@property
|
|
def bytes_written(self):
|
|
return self.__raw.bytes_written
|
|
|
|
@property
|
|
def cumulative_write_time(self):
|
|
return self.__raw.cumulative_write_time
|
|
|
|
@property
|
|
def znodes_written(self):
|
|
return self.__raw.znodes_written
|
|
|
|
@property
|
|
def zstat(self):
|
|
return self.__raw.zstat
|
|
|
|
def write(self, data):
|
|
# MODEL_API < 31
|
|
if self.__old_format and data[:2] == b'\x78\x9c':
|
|
data = zlib.decompress(data)
|
|
return super().write(data)
|
|
|
|
|
|
class BufferedShardReader(io.BufferedReader):
|
|
def __init__(self, client, path):
|
|
self.__raw = RawShardIO(client, path)
|
|
super().__init__(self.__raw, NODE_BYTE_SIZE_LIMIT)
|
|
|
|
@property
|
|
def bytes_read(self):
|
|
return self.__raw.bytes_read
|
|
|
|
@property
|
|
def cumulative_read_time(self):
|
|
return self.__raw.cumulative_read_time
|
|
|
|
@property
|
|
def znodes_read(self):
|
|
return self.__raw.znodes_read
|
|
|
|
@property
|
|
def zstat(self):
|
|
return self.__raw.zstat
|