Add --keep-config-cache option to delete-state command

The circular dependency refactor will require deleting all of the
pipeline states as well as the event queues from ZK while zuul
is offline during the upgrade.  This is fairly close to the existing
"delete-state" command, except that we can keep the config cache.
Doing so will allow for a faster recovery since we won't need to
issue all of the cat jobs again in order to fetch file contents.

To facilitate this, we add a "--keep-config-cache" argument to
the "delete-state" command which will then remove everything under
/zuul except /zuul/config.

Also, speed up both operations by implementing a fast recursive
delete method which sends async delete ops depth first and only
checks their results at the end (as opposed to the standard kazoo
delete which checks each operation at once).

This is added without a release note since it's not widely useful
and the upcoming change which requires its use will have a release
note with usage instructions.

Change-Id: I4db43e00a73f5e5b796261ffe7236ed906e6b421
This commit is contained in:
James E. Blair 2024-01-30 14:18:56 -08:00
parent 184f73ae02
commit c531adacae
3 changed files with 81 additions and 6 deletions

View File

@ -358,6 +358,46 @@ class TestOfflineZKOperations(ZuulTestCase):
self.zk_client.disconnect()
def test_delete_state_keep_config_cache(self):
# Shut everything down (as much as possible) to reduce
# logspam and errors.
ZuulTestCase.shutdown(self)
# Re-start the client connection because we need one for the
# test.
self.zk_client = ZooKeeperClient.fromConfig(self.config)
self.zk_client.connect()
config_file = os.path.join(self.test_root, 'zuul.conf')
with open(config_file, 'w') as f:
self.config.write(f)
# Save a copy of the things we keep in ZK
old_keys = self.getZKTree('/keystorage')
old_config = self.getZKTree('/zuul/config')
p = subprocess.Popen(
[os.path.join(sys.prefix, 'bin/zuul-admin'),
'-c', config_file,
'delete-state', '--keep-config-cache',
],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
out, _ = p.communicate(b'yes\n')
self.log.debug(out.decode('utf8'))
# Make sure the keys are still around
new_keys = self.getZKTree('/keystorage')
self.assertEqual(new_keys, old_keys)
new_config = self.getZKTree('/zuul/config')
self.assertEqual(new_config, old_config)
# Make sure we really deleted everything
children = self.zk_client.client.get_children('/zuul')
self.assertEqual(['config'], children)
self.zk_client.disconnect()
class TestOnlineZKOperations(ZuulTestCase):
tenant_config_file = 'config/single-tenant/main.yaml'

View File

@ -42,6 +42,8 @@ from zuul.zk.locks import tenant_read_lock, pipeline_lock
from zuul.zk.zkobject import ZKContext
from zuul.zk.components import COMPONENT_REGISTRY
from kazoo.exceptions import NoNodeError
def parse_cutoff(now, before, older_than):
if before and not older_than:
@ -496,6 +498,9 @@ class Client(zuul.cmd.ZuulApp):
ZooKeeper; it will not remove private keys or Nodepool
data.'''))
cmd_delete_state.set_defaults(command='delete-state')
cmd_delete_state.add_argument(
'--keep-config-cache', action='store_true',
help='keep config cache')
cmd_delete_state.set_defaults(func=self.delete_state)
cmd_delete_pipeline_state = subparsers.add_parser(
@ -1014,8 +1019,22 @@ class Client(zuul.cmd.ZuulApp):
zk_client.connect()
confirm = input("Are you sure you want to delete "
"all ephemeral data from ZooKeeper? (yes/no) ")
if confirm.strip().lower() == 'yes':
zk_client.client.delete('/zuul', recursive=True)
if confirm.strip().lower() != 'yes':
print("Aborting")
sys.exit(1)
if self.args.keep_config_cache:
try:
children = zk_client.client.get_children('/zuul')
except NoNodeError:
children = []
for child in children:
if child == 'config':
continue
path = f'/zuul/{child}'
self.log.debug("Deleting %s", path)
zk_client.fastRecursiveDelete(path)
else:
zk_client.fastRecursiveDelete('/zuul')
sys.exit(0)
def delete_pipeline_state(self):
@ -1028,16 +1047,13 @@ class Client(zuul.cmd.ZuulApp):
safe_tenant = urllib.parse.quote_plus(args.tenant)
safe_pipeline = urllib.parse.quote_plus(args.pipeline)
COMPONENT_REGISTRY.create(zk_client)
self.log.info('get tenant')
with tenant_read_lock(zk_client, args.tenant):
path = f'/zuul/tenant/{safe_tenant}/pipeline/{safe_pipeline}'
self.log.info('get pipe')
pipeline = Pipeline(args.tenant, args.pipeline)
with pipeline_lock(
zk_client, args.tenant, args.pipeline
) as plock:
self.log.info('got locks')
zk_client.client.delete(path, recursive=True)
zk_client.fastRecursiveDelete(path)
with ZKContext(zk_client, plock, None, self.log) as context:
pipeline.state = PipelineState.new(
context, _path=path, layout_uuid=None)

View File

@ -224,6 +224,25 @@ class ZooKeeperClient(object):
zstat = self.client.set("/zuul/ltime", b"")
return zstat.last_modified_transaction_id
def _fastRecursiveDelete(self, path, results):
try:
children = self.client.get_children(path)
except NoNodeError:
return
if children:
for child in children:
self._fastRecursiveDelete(f'{path}/{child}', results)
results.append(self.client.delete_async(path, -1))
def fastRecursiveDelete(self, path):
results = []
self._fastRecursiveDelete(path, results)
for res in results:
try:
res.get()
except NoNodeError:
pass
class ZooKeeperSimpleBase(metaclass=ABCMeta):
"""Base class for stateless Zookeeper interaction."""