Add delete-state command to delete everything from ZK

This will give operators a tool for manual recovery in case of
emergency.

Change-Id: Ia84beb08b685f59a24f76cb0b6adf518f6e64362
This commit is contained in:
James E. Blair 2021-08-11 14:09:22 -07:00
parent a0af6004de
commit e2dd49b5be
4 changed files with 99 additions and 0 deletions

View File

@ -242,3 +242,12 @@ delete-keys
Example::
zuul delete-keys gerrit old_project
delete-state
^^^^^^^^^^^^
.. program-output:: zuul delete-state --help
Example::
zuul delete-state

View File

@ -0,0 +1,7 @@
---
features:
- |
The ``zuul delete-state`` command may be used to delete all of the
ephemeral state stored by Zuul in ZooKeeper. Normally Zuul is able
to detect and correct errors on its own, but in case it is unable to,
this may prove a useful utility for manual recovery.

View File

@ -24,6 +24,8 @@ import jwt
import testtools
from kazoo.exceptions import NoNodeError
from zuul.zk import ZooKeeperClient
from tests.base import BaseTestCase, ZuulTestCase
from tests.base import FIXTURE_DIR
@ -251,3 +253,50 @@ class TestKeyOperations(ZuulTestCase):
data.get('/keystorage/gerrit/org/org%2Fproject/secrets'))
self.assertIsNone(
data.get('/keystorage/gerrit/org/org%2Fproject/ssh'))
class TestZKOperations(ZuulTestCase):
tenant_config_file = 'config/single-tenant/main.yaml'
def shutdown(self):
pass
def assertFinalState(self):
pass
def test_delete_state(self):
# Shut everything down (as much as possible) to reduce
# logspam and errors.
ZuulTestCase.shutdown(self)
# Re-start the client connection because we need one for the
# test.
self.zk_client = ZooKeeperClient.fromConfig(self.config)
self.zk_client.connect()
config_file = os.path.join(self.test_root, 'zuul.conf')
with open(config_file, 'w') as f:
self.config.write(f)
# Save a copy of the keys in ZK
old_data = self.getZKTree('/keystorage')
p = subprocess.Popen(
[os.path.join(sys.prefix, 'bin/zuul'),
'-c', config_file,
'delete-state',
],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
out, _ = p.communicate(b'yes\n')
self.log.debug(out.decode('utf8'))
# Make sure the keys are still around
new_data = self.getZKTree('/keystorage')
self.assertEqual(new_data, old_data)
# Make sure we really deleted everything
with testtools.ExpectedException(NoNodeError):
self.getZKTree('/zuul')
self.zk_client.disconnect()

View File

@ -431,6 +431,30 @@ class Client(zuul.cmd.ZuulApp):
help='project name')
cmd_delete_keys.set_defaults(func=self.delete_keys)
# ZK Maintenance
cmd_delete_state = subparsers.add_parser(
'delete-state',
help='delete ephemeral ZooKeeper state',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent('''\
Delete all ephemeral state stored in ZooKeeper
Zuul stores a considerable amount of ephemeral state
information in ZooKeeper. Generally it should be able to
detect and correct any errors, but if the state becomes
corrupted and it is unable to recover, this command may be
used to delete all ephemeral data from ZooKeeper and start
anew.
Do not run this command while any Zuul component is
running (perform a complete shutdown first).
This command will only remove ephemeral Zuul data from
ZooKeeper; it will not remove private keys or Nodepool
data.'''))
cmd_delete_state.set_defaults(command='delete-state')
cmd_delete_state.set_defaults(func=self.delete_state)
return parser
def parseArguments(self, args=None):
@ -893,6 +917,16 @@ class Client(zuul.cmd.ZuulApp):
self.log.info("Delete keys from %s %s",
args.connection, args.project)
def delete_state(self):
logging.basicConfig(level=logging.INFO)
zk_client = ZooKeeperClient.fromConfig(self.config)
zk_client.connect()
confirm = input("Are you sure you want to delete "
"all ephemeral data from ZooKeeper? (yes/no) ")
if confirm.strip().lower() == 'yes':
zk_client.client.delete('/zuul', recursive=True)
def main():
Client().main()