Merge "Logs stats for nodepool automated cleanup"
This commit is contained in:
@@ -410,6 +410,8 @@ these metrics are supported:
|
||||
Nodepool builder
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
The following metrics are produced by a ``nodepool-builder`` process:
|
||||
|
||||
.. zuul:stat:: nodepool.dib_image_build.<diskimage_name>.<ext>.size
|
||||
:type: gauge
|
||||
|
||||
@@ -444,11 +446,7 @@ Nodepool builder
|
||||
Nodepool launcher
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. zuul:stat:: nodepool.provider.<provider>.max_servers
|
||||
:type: gauge
|
||||
|
||||
Current setting of the max-server configuration parameter for the respective
|
||||
provider.
|
||||
The following metrics are produced by a ``nodepool-launcher`` process:
|
||||
|
||||
.. _nodepool_nodes:
|
||||
|
||||
@@ -466,11 +464,20 @@ Nodepool launcher
|
||||
* ready
|
||||
* used
|
||||
|
||||
.. zuul:stat:: nodepool.provider.<provider>.downPorts
|
||||
.. zuul:stat:: nodepool.label.<label>.nodes.<state>
|
||||
:type: counter
|
||||
|
||||
Number of ports in the DOWN state that have been removed automatically
|
||||
in the cleanup resources phase of the OpenStack driver.
|
||||
Number of nodes with a specific label in a specific state. See
|
||||
:ref:`nodepool.nodes <nodepool_nodes>` for a list of possible states.
|
||||
|
||||
Provider Metrics
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
.. zuul:stat:: nodepool.provider.<provider>.max_servers
|
||||
:type: gauge
|
||||
|
||||
Current setting of the max-server configuration parameter for the respective
|
||||
provider.
|
||||
|
||||
.. zuul:stat:: nodepool.provider.<provider>.nodes.<state>
|
||||
:type: gauge
|
||||
@@ -478,17 +485,31 @@ Nodepool launcher
|
||||
Number of nodes per provider that are in one specific state. See
|
||||
:ref:`nodepool.nodes <nodepool_nodes>` for a list of possible states.
|
||||
|
||||
.. zuul:stat:: nodepool.label.<label>.nodes.<state>
|
||||
.. zuul:stat:: nodepool.provider.<provider>.leaked.ports
|
||||
:type: counter
|
||||
|
||||
Number of nodes with a specific label in a specific state. See
|
||||
:ref:`nodepool.nodes <nodepool_nodes>` for a list of possible states.
|
||||
Number of ports in the DOWN state that have been removed
|
||||
automatically in the cleanup resources phase of the OpenStack
|
||||
driver. Non-zero values indicate an error situation as ports
|
||||
should be cleaned up automatically.
|
||||
|
||||
.. zuul:stat:: nodepool.task.<provider>.<task>
|
||||
:type: counter, timer
|
||||
.. zuul:stat:: nodepool.provider.<provider>.leaked.instances
|
||||
:type: counter
|
||||
|
||||
Number of tasks executed per provider plus the duration of the task
|
||||
execution.
|
||||
Number of nodes not correctly recorded in Zookeeper that nodepool
|
||||
has cleaned up automatically. Non-zero values indicate an error
|
||||
situation as instances should be cleaned automatically.
|
||||
|
||||
.. zuul:stat:: nodepool.provider.<provider>.leaked.floatingips
|
||||
:type: counter
|
||||
|
||||
Records the number of unattached floating IPs removed automatically
|
||||
by nodepool. Elevated rates indicate an error situation as
|
||||
floating IPs should be managed automatically.
|
||||
|
||||
|
||||
Launch metrics
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
.. _nodepool_launch:
|
||||
|
||||
@@ -529,8 +550,8 @@ Nodepool launcher
|
||||
|
||||
See :ref:`nodepool.launch <nodepool_launch>` for a list of possible results.
|
||||
|
||||
OpenStack API stats
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
OpenStack API metrics
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Low level details on the timing of OpenStack API calls will be logged
|
||||
by ``openstacksdk``. These calls are logged under
|
||||
|
||||
@@ -538,6 +538,10 @@ class OpenStackProvider(Provider):
|
||||
node.pool = meta.get('nodepool_pool_name')
|
||||
node.state = zk.DELETING
|
||||
self._zk.storeNode(node)
|
||||
if self._statsd:
|
||||
key = ('nodepool.provider.%s.leaked.nodes'
|
||||
% self.provider.name)
|
||||
self._statsd.incr(key)
|
||||
|
||||
def filterComputePorts(self, ports):
|
||||
'''
|
||||
@@ -582,7 +586,7 @@ class OpenStackProvider(Provider):
|
||||
port_id, self.provider.name)
|
||||
|
||||
if self._statsd and removed_count:
|
||||
key = 'nodepool.provider.%s.downPorts' % (self.provider.name)
|
||||
key = 'nodepool.provider.%s.leaked.ports' % (self.provider.name)
|
||||
self._statsd.incr(key, removed_count)
|
||||
|
||||
self._last_port_cleanup = time.monotonic()
|
||||
@@ -598,7 +602,17 @@ class OpenStackProvider(Provider):
|
||||
if self.provider.port_cleanup_interval:
|
||||
self.cleanupLeakedPorts()
|
||||
if self.provider.clean_floating_ips:
|
||||
self._client.delete_unattached_floating_ips()
|
||||
did_clean = self._client.delete_unattached_floating_ips()
|
||||
if did_clean:
|
||||
# some openstacksdk's return True if any port was
|
||||
# cleaned, rather than the count. Just set it to 1 to
|
||||
# indicate something happened.
|
||||
if type(did_clean) == bool:
|
||||
did_clean = 1
|
||||
if self._statsd:
|
||||
key = ('nodepool.provider.%s.leaked.floatingips'
|
||||
% self.provider.name)
|
||||
self._statsd.incr(key, did_clean)
|
||||
|
||||
def getAZs(self):
|
||||
if self.__azs is None:
|
||||
|
||||
@@ -2192,8 +2192,9 @@ class TestLauncher(tests.DBTestCase):
|
||||
# ports not cleaned up yet, retry
|
||||
pass
|
||||
|
||||
self.assertReportedStat('nodepool.provider.fake-provider.downPorts',
|
||||
value='2', kind='c')
|
||||
self.assertReportedStat(
|
||||
'nodepool.provider.fake-provider.leaked.ports',
|
||||
value='2', kind='c')
|
||||
|
||||
def test_deleteRawNode_exception(self):
|
||||
configfile = self.setup_config('node.yaml')
|
||||
|
||||
11
releasenotes/notes/leaked-ports-metric-31c6c4841ea7dac0.yaml
Normal file
11
releasenotes/notes/leaked-ports-metric-31c6c4841ea7dac0.yaml
Normal file
@@ -0,0 +1,11 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
There are new metrics for leaked resources:
|
||||
* :zuul:stat:`nodepool.provider.<provider>.leaked.ports`
|
||||
* :zuul:stat:`nodepool.provider.<provider>.leaked.instances`
|
||||
* :zuul:stat:`nodepool.provider.<provider>.leaked.floatingips`
|
||||
upgrade:
|
||||
- |
|
||||
The metric ``nodepool.provider.<provider>.downPorts`` has been renamed
|
||||
to ``nodepool.provider.<provider>.leaked.ports``
|
||||
Reference in New Issue
Block a user