Add provider info command

This command will display all ZooKeeper data for a given provider,
and provide and option to remove all of the data from ZooKeeper.
This can be useful when an operator must permanently remove a
pre-existing provider from nodepool and cannot cleanly shutdown
the services otherwise.

Example:

   nodepool info rax
   nodepool info --erase rax

Change-Id: I527aae5ff89aac864f984af050abb83e7bc3ac04
This commit is contained in:
David Shrewsbury 2018-01-18 12:41:17 -05:00
parent 63dbab87df
commit 742b0b1d6b
6 changed files with 264 additions and 1 deletions

View File

@ -203,6 +203,13 @@ delete
.. program-output:: nodepool delete --help
:nostderr:
The following subcommands deal with ZooKeeper data management:
info
^^^^
.. program-output:: nodepool info --help
:nostderr:
If Nodepool's database gets out of sync with reality, the following
commands can help identify compute instances or images that are
unknown to Nodepool:

View File

@ -122,6 +122,23 @@ class NodePoolCmd(NodepoolApp):
help='list the current node requests')
cmd_request_list.set_defaults(func=self.request_list)
cmd_info = subparsers.add_parser(
'info',
help='Show provider data from zookeeper')
cmd_info.add_argument(
'provider',
help='provider name',
metavar='PROVIDER')
cmd_info.add_argument(
'--erase',
help='erase ZooKeeper data for this provider',
action='store_true')
cmd_info.add_argument(
'--force',
help='used with --erase to bypass the warning prompt',
action='store_true')
cmd_info.set_defaults(func=self.info)
return parser
def setup_logging(self):
@ -285,6 +302,46 @@ class NodePoolCmd(NodepoolApp):
self.zk.storeImageUpload(image.image_name, image.build_id,
image.provider_name, image, image.id)
def erase(self, provider_name, provider_builds, provider_nodes):
print("\nErasing build data for %s..." % provider_name)
self.zk.removeProviderBuilds(provider_name, provider_builds)
print("Erasing node data for %s..." % provider_name)
self.zk.removeProviderNodes(provider_name, provider_nodes)
def info(self):
provider_name = self.args.provider
provider_builds = self.zk.getProviderBuilds(provider_name)
provider_nodes = self.zk.getProviderNodes(provider_name)
print("ZooKeeper data for provider %s\n" % provider_name)
print("Image builds:")
t = PrettyTable(['Image Name', 'Build IDs'])
t.align = 'l'
for image, builds in provider_builds.items():
t.add_row([image, ','.join(builds)])
print(t)
print("\nNodes:")
t = PrettyTable(['ID', 'Server ID'])
t.align = 'l'
for node in provider_nodes:
t.add_row([node.id, node.external_id])
print(t)
if self.args.erase:
if self.args.force:
self.erase(provider_name, provider_builds, provider_nodes)
return
print("\nWARNING! This action is not reversible!")
answer = input("Erase ZooKeeper data for provider %s? [N/y] " %
provider_name)
if answer.lower() != 'y':
print("Aborting. No data erased.")
else:
self.erase(provider_name, provider_builds, provider_nodes)
def config_validate(self):
validator = ConfigValidator(self.args.config)
validator.validate()
@ -318,7 +375,7 @@ class NodePoolCmd(NodepoolApp):
'image-list', 'dib-image-delete',
'image-delete', 'alien-image-list',
'list', 'hold', 'delete',
'request-list'):
'request-list', 'info'):
self.zk = zk.ZooKeeper()
self.zk.connect(list(config.zookeeper_servers.values()))

View File

@ -0,0 +1,56 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 1
- name: fake-label2
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label2
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,39 @@
elements-dir: .
images-dir: '{images_dir}'
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
labels:
- name: fake-label
min-ready: 1
providers:
- name: fake-provider
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image
pools:
- name: main
max-servers: 96
labels:
- name: fake-label
diskimage: fake-image
min-ram: 8192
diskimages:
- name: fake-image
elements:
- fedora
- vm
release: 21
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -300,3 +300,49 @@ class TestNodepoolCMD(tests.DBTestCase):
self.patch_argv("-c", configfile)
result = nodepoolcmd.main()
self.assertEqual(1, result)
def test_info(self):
configfile = self.setup_config('info_cmd_two_provider.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.useBuilder(configfile)
pool.start()
p1_image = self.waitForImage('fake-provider', 'fake-image')
p1_nodes = self.waitForNodes('fake-label')
p2_nodes = self.waitForNodes('fake-label2')
# Get rid of the second provider so that when we remove its
# data from ZooKeeper, the builder and launcher don't attempt to
# recreate the data.
self.replace_config(configfile, 'info_cmd_two_provider_remove.yaml')
# Verify that the second provider image is listed
self.assert_listed(
configfile,
['info', 'fake-provider2'],
0, 'fake-image', 1)
# Verify that the second provider node is listed. We go ahead
# and erase the data here (after it has been displayed) so that
# we can verify the erase in the next steps.
self.assert_listed(
configfile,
['info', 'fake-provider2', '--erase', '--force'],
0, p2_nodes[0].id, 1)
# Verify that no build or node for the second provider is listed
# after the previous erase
self.assert_listed(
configfile,
['info', 'fake-provider2'],
0, 'fake-image', 0)
self.assert_listed(
configfile,
['info', 'fake-provider2'],
0, p2_nodes[0].id, 0)
# Verify that we did not affect the first provider
image = self.waitForImage('fake-provider', 'fake-image')
self.assertEqual(p1_image, image)
nodes = self.waitForNodes('fake-label')
self.assertEqual(1, len(nodes))
self.assertEqual(p1_nodes[0], nodes[0])

View File

@ -1679,3 +1679,61 @@ class ZooKeeper(object):
if node.provider == provider_name and node.pool == pool_name:
count = count + 1
return count
def getProviderBuilds(self, provider_name):
'''
Get all builds for a provider for each image.
:param str provider_name: The provider name.
:returns: A dict of lists of build IDs, keyed by image name.
'''
provider_builds = {}
image_names = self.getImageNames()
for image in image_names:
build_numbers = self.getBuildNumbers(image)
for build in build_numbers:
providers = self.getBuildProviders(image, build)
for p in providers:
if p == provider_name:
if image not in provider_builds:
provider_builds[image] = []
provider_builds[image].append(build)
return provider_builds
def getProviderNodes(self, provider_name):
'''
Get all nodes for a provider.
:param str provider_name: The provider name.
:returns: A list of Node objects.
'''
provider_nodes = []
for node in self.nodeIterator():
if node.provider == provider_name:
provider_nodes.append(node)
return provider_nodes
def removeProviderBuilds(self, provider_name, provider_builds):
'''
Remove ZooKeeper build data for a provider.
:param str provider_name: The provider name.
:param dict provider_builds: Data as returned by getProviderBuilds().
'''
for image, builds in provider_builds.items():
for build in builds:
path = self._imageProviderPath(image, build)
path = "%s/%s" % (path, provider_name)
try:
self.client.delete(path, recursive=True)
except kze.NoNodeError:
pass
def removeProviderNodes(self, provider_name, provider_nodes):
'''
Remove ZooKeeper node data for a provider.
:param str provider_name: The provider name.
:param dict provider_nodes: Data as returned by getProviderNodes().
'''
for node in provider_nodes:
self.deleteNode(node)