Add port-cleanup-interval config option
There are some edge cases where the port cleanup logic is too aggressive. This change attempts to address both of them in one commit: * Some providers might spawn instances very slowly. In the past this was handled by hardcoding the timeout to 10 minutes. This allows a user to tweak the timeout in config. * In the esoteric combination of using Ironic without the Ironic Neutron agent, it's normal for ports to remain DOWN indefinitely. Setting the timeout to 0, will work around that edge case. Change-Id: I120d79c4b5f209bb1bd9907db172f94f29b9cb5d
This commit is contained in:
parent
d2f9c53959
commit
6789c4b618
|
@ -577,6 +577,15 @@ Selecting the OpenStack driver adds the following options to the
|
|||
the OpenStack project and will attempt to clean unattached
|
||||
floating ips that may have leaked around restarts.
|
||||
|
||||
.. attr:: port-cleanup-interval
|
||||
:type: int seconds
|
||||
:default: 600
|
||||
|
||||
If greater than 0, nodepool will assume it is the only user of the
|
||||
OpenStack project and will attempt to clean ports in `DOWN` state after
|
||||
`cleanup-interval` has passed. This value can be reduced if the instance
|
||||
spawn time on the provider is reliably quicker.
|
||||
|
||||
.. attr:: diskimages
|
||||
:type: list
|
||||
|
||||
|
|
|
@ -230,6 +230,7 @@ class OpenStackProviderConfig(ProviderConfig):
|
|||
self.boot_timeout = None
|
||||
self.launch_timeout = None
|
||||
self.clean_floating_ips = None
|
||||
self.port_cleanup_interval = None
|
||||
self.diskimages = {}
|
||||
self.cloud_images = {}
|
||||
self.hostname_format = None
|
||||
|
@ -246,6 +247,8 @@ class OpenStackProviderConfig(ProviderConfig):
|
|||
other.boot_timeout == self.boot_timeout and
|
||||
other.launch_timeout == self.launch_timeout and
|
||||
other.clean_floating_ips == self.clean_floating_ips and
|
||||
other.port_cleanup_interval ==
|
||||
self.port_cleanup_interval and
|
||||
other.diskimages == self.diskimages and
|
||||
other.cloud_images == self.cloud_images)
|
||||
return False
|
||||
|
@ -277,6 +280,10 @@ class OpenStackProviderConfig(ProviderConfig):
|
|||
self.launch_timeout = self.provider.get('launch-timeout', 3600)
|
||||
self.launch_retries = self.provider.get('launch-retries', 3)
|
||||
self.clean_floating_ips = self.provider.get('clean-floating-ips')
|
||||
self.port_cleanup_interval = self.provider.get(
|
||||
'port-cleanup-interval',
|
||||
600
|
||||
)
|
||||
self.hostname_format = self.provider.get(
|
||||
'hostname-format',
|
||||
'{label.name}-{provider.name}-{node.id}'
|
||||
|
@ -414,6 +421,7 @@ class OpenStackProviderConfig(ProviderConfig):
|
|||
'hostname-format': str,
|
||||
'image-name-format': str,
|
||||
'clean-floating-ips': bool,
|
||||
'port-cleanup-interval': int,
|
||||
'pools': [pool],
|
||||
'diskimages': [provider_diskimage],
|
||||
'cloud-images': [provider_cloud_images],
|
||||
|
|
|
@ -51,10 +51,6 @@ class OpenStackProvider(Provider):
|
|||
self._zk = None
|
||||
self._down_ports = set()
|
||||
self._last_port_cleanup = None
|
||||
# Set this long enough to avoid deleting a port which simply
|
||||
# hasn't yet been attached to an instance which is being
|
||||
# created.
|
||||
self._port_cleanup_interval_secs = 600
|
||||
self._statsd = stats.get_client()
|
||||
|
||||
def start(self, zk_conn):
|
||||
|
@ -554,7 +550,7 @@ class OpenStackProvider(Provider):
|
|||
|
||||
# Return if not enough time has passed between cleanup
|
||||
last_check_in_secs = int(time.monotonic() - self._last_port_cleanup)
|
||||
if last_check_in_secs <= self._port_cleanup_interval_secs:
|
||||
if last_check_in_secs <= self.provider.port_cleanup_interval:
|
||||
return
|
||||
|
||||
ports = self.listPorts(status='DOWN')
|
||||
|
@ -588,7 +584,8 @@ class OpenStackProvider(Provider):
|
|||
|
||||
def cleanupLeakedResources(self):
|
||||
self.cleanupLeakedInstances()
|
||||
self.cleanupLeakedPorts()
|
||||
if self.provider.port_cleanup_interval:
|
||||
self.cleanupLeakedPorts()
|
||||
if self.provider.clean_floating_ips:
|
||||
self._client.delete_unattached_floating_ips()
|
||||
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
elements-dir: .
|
||||
images-dir: '{images_dir}'
|
||||
build-log-dir: '{build_log_dir}'
|
||||
build-log-retention: 1
|
||||
|
||||
zookeeper-servers:
|
||||
- host: {zookeeper_host}
|
||||
port: {zookeeper_port}
|
||||
chroot: {zookeeper_chroot}
|
||||
|
||||
labels:
|
||||
- name: fake-label
|
||||
min-ready: 1
|
||||
|
||||
providers:
|
||||
- name: fake-provider
|
||||
cloud: fake
|
||||
driver: fake
|
||||
region-name: fake-region
|
||||
rate: 0.0001
|
||||
port-cleanup-interval: 2
|
||||
diskimages:
|
||||
- name: fake-image
|
||||
meta:
|
||||
key: value
|
||||
key2: value
|
||||
pools:
|
||||
- name: main
|
||||
max-servers: 96
|
||||
node-attributes:
|
||||
key1: value1
|
||||
key2: value2
|
||||
availability-zones:
|
||||
- az1
|
||||
networks:
|
||||
- net-name
|
||||
labels:
|
||||
- name: fake-label
|
||||
diskimage: fake-image
|
||||
min-ram: 8192
|
||||
flavor-name: 'Fake'
|
||||
|
||||
diskimages:
|
||||
- name: fake-image
|
||||
elements:
|
||||
- fedora
|
||||
- vm
|
||||
release: 21
|
||||
dib-cmd: nodepool/tests/fake-image-create
|
||||
env-vars:
|
||||
TMPDIR: /opt/dib_tmp
|
||||
DIB_IMAGE_CACHE: /opt/dib_cache
|
||||
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
|
||||
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
|
|
@ -33,6 +33,7 @@ providers:
|
|||
boot-timeout: 120
|
||||
max-concurrency: 10
|
||||
launch-retries: 3
|
||||
port-cleanup-interval: 600
|
||||
rate: 1
|
||||
diskimages:
|
||||
- name: trusty
|
||||
|
@ -75,6 +76,7 @@ providers:
|
|||
region-name: 'chocolate'
|
||||
boot-timeout: 120
|
||||
rate: 0.001
|
||||
port-cleanup-interval: 0
|
||||
diskimages:
|
||||
- name: trusty
|
||||
pause: False
|
||||
|
|
|
@ -2109,10 +2109,29 @@ class TestLauncher(tests.DBTestCase):
|
|||
self.assertEqual(2, len(down_ports))
|
||||
self.log.debug("Down ports: %s", down_ports)
|
||||
|
||||
# Change the port cleanup interval to happen quicker
|
||||
manager._port_cleanup_interval_secs = 2
|
||||
while manager.listPorts(status='DOWN'):
|
||||
time.sleep(1)
|
||||
# Second config decreases cleanup interval to 2 seconds
|
||||
self.replace_config(configfile, 'cleanup-port.yaml')
|
||||
oldmanager = manager
|
||||
manager = pool.getProviderManager('fake-provider')
|
||||
for _ in iterate_timeout(10, Exception, 'assert config updated'):
|
||||
try:
|
||||
self.assertNotEqual(manager, oldmanager)
|
||||
break
|
||||
except AssertionError:
|
||||
# config still hasn't updated, retry
|
||||
manager = pool.getProviderManager('fake-provider')
|
||||
# Reset the client as a new fake client will have been
|
||||
# created.
|
||||
manager.resetClient()
|
||||
|
||||
for _ in iterate_timeout(4, Exception, 'assert ports are cleaned'):
|
||||
try:
|
||||
down_ports = manager.listPorts(status='DOWN')
|
||||
self.assertEqual(0, len(down_ports))
|
||||
break
|
||||
except AssertionError:
|
||||
# ports not cleaned up yet, retry
|
||||
pass
|
||||
|
||||
self.assertReportedStat('nodepool.provider.fake-provider.downPorts',
|
||||
value='2', kind='c')
|
||||
|
|
Loading…
Reference in New Issue