Check zkobject size before writing
If we attempt to write a non-sharded zkobject that is larger than 1MiB, zk will drop our connection, and we will retry in an infinite loop. To avoid this, check the data size before writing and raise an exception. This will still likely cause errors in Zuul, but it may limit the blast radius of the damage, and may be more recoverable. ZK has a 1MiB max data size for a znode but that includes some other data like the key (it's really a packet buffer size). To avoid false negatives, we use the same value we use for sharding: 1MB, which gives 47KiB headroom. This should be fine since we either expect objects to be much smaller than 1MiB or sharded. We're just trying to catch the cases where we should have sharded something but didn't. Change-Id: If9da8b3968a2db813895ca5aabb736da652d7f2b
This commit is contained in:
@@ -18,6 +18,8 @@ from collections import defaultdict
|
||||
import json
|
||||
import math
|
||||
import queue
|
||||
import random
|
||||
import string
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
@@ -1946,6 +1948,37 @@ class TestZKObject(ZooKeeperBaseTestCase):
|
||||
pipeline1.foo = 'five'
|
||||
self.assertEqual(pipeline1.foo, 'five')
|
||||
|
||||
def _test_zk_object_too_large(self, zkobject_class):
|
||||
# This produces a consistent string that compresses to > 1MiB
|
||||
rnd = random.Random()
|
||||
rnd.seed(42)
|
||||
size = 1024 * 1200
|
||||
foo = ''.join(rnd.choice(string.printable) for x in range(size))
|
||||
|
||||
stop_event = threading.Event()
|
||||
self.zk_client.client.create('/zuul/pipeline', makepath=True)
|
||||
# Create a new object
|
||||
tenant_name = 'fake_tenant'
|
||||
with tenant_write_lock(self.zk_client, tenant_name) as lock:
|
||||
context = ZKContext(self.zk_client, lock, stop_event, self.log)
|
||||
with testtools.ExpectedException(Exception,
|
||||
'ZK data size too large'):
|
||||
pipeline1 = zkobject_class.new(context,
|
||||
name=tenant_name,
|
||||
foo=foo)
|
||||
|
||||
pipeline1 = zkobject_class.new(context,
|
||||
name=tenant_name,
|
||||
foo='foo')
|
||||
|
||||
with testtools.ExpectedException(Exception,
|
||||
'ZK data size too large'):
|
||||
pipeline1.updateAttributes(context, foo=foo)
|
||||
|
||||
# Refresh an existing object
|
||||
pipeline1.refresh(context)
|
||||
self.assertEqual(pipeline1.foo, 'foo')
|
||||
|
||||
def test_zk_object(self):
|
||||
self._test_zk_object(DummyZKObject)
|
||||
|
||||
@@ -1958,6 +1991,10 @@ class TestZKObject(ZooKeeperBaseTestCase):
|
||||
def test_sharded_zk_object_exception(self):
|
||||
self._test_zk_object_exception(DummyShardedZKObject)
|
||||
|
||||
def test_zk_object_too_large(self):
|
||||
# This only makes sense for a regular zkobject
|
||||
self._test_zk_object_too_large(DummyZKObject)
|
||||
|
||||
|
||||
class TestBranchCache(ZooKeeperBaseTestCase):
|
||||
def test_branch_cache_protected_then_all(self):
|
||||
|
||||
@@ -67,6 +67,8 @@ class RawZKIO(io.RawIOBase):
|
||||
|
||||
def write(self, data):
|
||||
byte_count = len(data)
|
||||
if byte_count > NODE_BYTE_SIZE_LIMIT:
|
||||
raise Exception(f"ZK data size too large: {byte_count}")
|
||||
start = time.perf_counter()
|
||||
if self.create:
|
||||
_, self.zstat = self.client.create(
|
||||
|
||||
Reference in New Issue
Block a user