diff --git a/doc/source/operation.rst b/doc/source/operation.rst index 470e9f9bf..80d96cff7 100644 --- a/doc/source/operation.rst +++ b/doc/source/operation.rst @@ -329,12 +329,6 @@ Nodepool launcher * ready * used -.. zuul:stat:: nodepool.provider..downPorts - :type: counter - - Number of ports in the DOWN state that have been removed automatically - in the cleanup resources phase of the OpenStack driver. - .. zuul:stat:: nodepool.provider..nodes. :type: gauge diff --git a/nodepool/driver/fake/provider.py b/nodepool/driver/fake/provider.py index ed83be6c9..6e6fdcd4d 100644 --- a/nodepool/driver/fake/provider.py +++ b/nodepool/driver/fake/provider.py @@ -31,7 +31,6 @@ class Dummy(object): INSTANCE = 'Instance' FLAVOR = 'Flavor' LOCATION = 'Server.Location' - PORT = 'Port' def __init__(self, kind, **kw): self.__kind = kind @@ -105,12 +104,6 @@ class FakeOpenStackCloud(object): self._server_list = [] self.max_cores, self.max_instances, self.max_ram = FakeOpenStackCloud.\ _get_quota() - self._down_ports = [ - Dummy(Dummy.PORT, id='1a', status='DOWN', - device_owner="compute:nova"), - Dummy(Dummy.PORT, id='2b', status='DOWN', - device_owner=None), - ] def _get(self, name_or_id, instance_list): self.log.debug("Get %s in %s" % (name_or_id, repr(instance_list))) @@ -288,20 +281,6 @@ class FakeOpenStackCloud(object): total_ram_used=8192 * len(self._server_list) ) - def list_ports(self, filters=None): - if filters and filters.get('status') == 'DOWN': - return self._down_ports - return [] - - def delete_port(self, port_id): - tmp_ports = [] - for port in self._down_ports: - if port.id != port_id: - tmp_ports.append(port) - else: - self.log.debug("Deleted port ID: %s", port_id) - self._down_ports = tmp_ports - class FakeUploadFailCloud(FakeOpenStackCloud): log = logging.getLogger("nodepool.FakeUploadFailCloud") diff --git a/nodepool/driver/openstack/provider.py b/nodepool/driver/openstack/provider.py index de479fcc3..d202f9f38 100755 --- a/nodepool/driver/openstack/provider.py +++ b/nodepool/driver/openstack/provider.py @@ -26,7 +26,6 @@ from nodepool.driver import Provider from nodepool.driver.utils import QuotaInformation from nodepool.nodeutils import iterate_timeout from nodepool.task_manager import TaskManager -from nodepool import stats from nodepool import version from nodepool import zk @@ -51,10 +50,6 @@ class OpenStackProvider(Provider): self._taskmanager = None self._current_nodepool_quota = None self._zk = None - self._down_ports = set() - self._last_port_cleanup = None - self._port_cleanup_interval_secs = 180 - self._statsd = stats.get_client() def start(self, zk_conn): if self._use_taskmanager: @@ -422,21 +417,6 @@ class OpenStackProvider(Provider): **meta) return image.id - def listPorts(self, status=None): - ''' - List known ports. - - :param str status: A valid port status. E.g., 'ACTIVE' or 'DOWN'. - ''' - if status: - ports = self._client.list_ports(filters={'status': status}) - else: - ports = self._client.list_ports() - return ports - - def deletePort(self, port_id): - self._client.delete_port(port_id) - def listImages(self): return self._client.list_images() @@ -464,7 +444,7 @@ class OpenStackProvider(Provider): self.log.debug('Deleting server %s' % server_id) self.deleteServer(server_id) - def cleanupLeakedInstances(self): + def cleanupLeakedResources(self): ''' Delete any leaked server instances. @@ -512,63 +492,6 @@ class OpenStackProvider(Provider): node.state = zk.DELETING self._zk.storeNode(node) - def filterComputePorts(self, ports): - ''' - Return a list of compute ports (or no device owner). - - We are not interested in ports for routers or DHCP. - ''' - ret = [] - for p in ports: - if p.device_owner is None or p.device_owner.startswith("compute:"): - ret.append(p) - return ret - - def cleanupLeakedPorts(self): - if not self._last_port_cleanup: - self._last_port_cleanup = time.monotonic() - ports = self.listPorts(status='DOWN') - ports = self.filterComputePorts(ports) - self._down_ports = set([p.id for p in ports]) - return - - # Return if not enough time has passed between cleanup - last_check_in_secs = int(time.monotonic() - self._last_port_cleanup) - if last_check_in_secs <= self._port_cleanup_interval_secs: - return - - ports = self.listPorts(status='DOWN') - ports = self.filterComputePorts(ports) - current_set = set([p.id for p in ports]) - remove_set = current_set & self._down_ports - - removed_count = 0 - for port_id in remove_set: - try: - self.deletePort(port_id) - except Exception: - self.log.exception("Exception deleting port %s in %s:", - port_id, self.provider.name) - else: - removed_count += 1 - self.log.debug("Removed DOWN port %s in %s", - port_id, self.provider.name) - - if self._statsd and removed_count: - key = 'nodepool.provider.%s.downPorts' % (self.provider.name) - self._statsd.incr(key, removed_count) - - self._last_port_cleanup = time.monotonic() - - # Rely on OpenStack to tell us the down ports rather than doing our - # own set adjustment. - ports = self.listPorts(status='DOWN') - ports = self.filterComputePorts(ports) - self._down_ports = set([p.id for p in ports]) - - def cleanupLeakedResources(self): - self.cleanupLeakedInstances() - self.cleanupLeakedPorts() if self.provider.clean_floating_ips: self._client.delete_unattached_floating_ips() diff --git a/nodepool/tests/test_launcher.py b/nodepool/tests/test_launcher.py index 1406f489c..27c3ec34f 100644 --- a/nodepool/tests/test_launcher.py +++ b/nodepool/tests/test_launcher.py @@ -1773,24 +1773,3 @@ class TestLauncher(tests.DBTestCase): req3 = self.waitForNodeRequest(req3) self.assertEqual(req3.state, zk.FAILED) - - def test_leaked_port_cleanup(self): - configfile = self.setup_config('node.yaml') - self.useBuilder(configfile) - pool = self.useNodepool(configfile, watermark_sleep=1) - pool.cleanup_interval = 1 - pool.start() - self.waitForNodes('fake-label') - - manager = pool.getProviderManager('fake-provider') - down_ports = manager.listPorts(status='DOWN') - self.assertEqual(2, len(down_ports)) - self.log.debug("Down ports: %s", down_ports) - - # Change the port cleanup interval to happen quicker - manager._port_cleanup_interval_secs = 2 - while manager.listPorts(status='DOWN'): - time.sleep(1) - - self.assertReportedStat('nodepool.provider.fake-provider.downPorts', - value='2', kind='c') diff --git a/releasenotes/notes/port-cleanup-667d476437f03358.yaml b/releasenotes/notes/port-cleanup-667d476437f03358.yaml deleted file mode 100644 index e89e375ad..000000000 --- a/releasenotes/notes/port-cleanup-667d476437f03358.yaml +++ /dev/null @@ -1,7 +0,0 @@ ---- -features: - - | - Added a new routine to the OpenStack driver cleanup resources phase that - will remove any ports reported to be in the DOWN state. Ports will have - to be seen as DOWN for at least three minutes before they will be removed. - The number of ports removed will be reported to statsd.