Merge "vm workload consolidation: use actual host metrics"
This commit is contained in:
commit
9492c2190e
@ -26,9 +26,15 @@ metric service name plugins comment
|
|||||||
``memory.resident`` ceilometer_ none
|
``memory.resident`` ceilometer_ none
|
||||||
``memory`` ceilometer_ none
|
``memory`` ceilometer_ none
|
||||||
``disk.root.size`` ceilometer_ none
|
``disk.root.size`` ceilometer_ none
|
||||||
|
``compute.node.cpu.percent`` ceilometer_ none (optional) need to set the
|
||||||
|
``compute_monitors`` option
|
||||||
|
to ``cpu.virt_driver`` in the
|
||||||
|
nova.conf.
|
||||||
|
``hardware.memory.used`` ceilometer_ SNMP_ (optional)
|
||||||
============================ ============ ======= =========================
|
============================ ============ ======= =========================
|
||||||
|
|
||||||
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute
|
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute
|
||||||
|
.. _SNMP: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#snmp-based-meters
|
||||||
|
|
||||||
Cluster data model
|
Cluster data model
|
||||||
******************
|
******************
|
||||||
|
@ -18,7 +18,10 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
|
import collections
|
||||||
|
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
|
import oslo_utils
|
||||||
|
|
||||||
from watcher._i18n import _
|
from watcher._i18n import _
|
||||||
from watcher.applier.actions import migration
|
from watcher.applier.actions import migration
|
||||||
@ -67,7 +70,8 @@ class VMWorkloadConsolidation(base.ServerConsolidationBaseStrategy):
|
|||||||
|
|
||||||
AGGREGATE = 'mean'
|
AGGREGATE = 'mean'
|
||||||
DATASOURCE_METRICS = ['instance_ram_allocated', 'instance_cpu_usage',
|
DATASOURCE_METRICS = ['instance_ram_allocated', 'instance_cpu_usage',
|
||||||
'instance_ram_usage', 'instance_root_disk_size']
|
'instance_ram_usage', 'instance_root_disk_size',
|
||||||
|
'host_cpu_usage', 'host_ram_usage']
|
||||||
|
|
||||||
MIGRATION = "migrate"
|
MIGRATION = "migrate"
|
||||||
CHANGE_NOVA_SERVICE_STATE = "change_nova_service_state"
|
CHANGE_NOVA_SERVICE_STATE = "change_nova_service_state"
|
||||||
@ -77,6 +81,11 @@ class VMWorkloadConsolidation(base.ServerConsolidationBaseStrategy):
|
|||||||
self.number_of_migrations = 0
|
self.number_of_migrations = 0
|
||||||
self.number_of_released_nodes = 0
|
self.number_of_released_nodes = 0
|
||||||
self.datasource_instance_data_cache = dict()
|
self.datasource_instance_data_cache = dict()
|
||||||
|
self.datasource_node_data_cache = dict()
|
||||||
|
# Host metric adjustments that take into account planned
|
||||||
|
# migrations.
|
||||||
|
self.host_metric_delta = collections.defaultdict(
|
||||||
|
lambda: collections.defaultdict(int))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_name(cls):
|
def get_name(cls):
|
||||||
@ -227,6 +236,18 @@ class VMWorkloadConsolidation(base.ServerConsolidationBaseStrategy):
|
|||||||
destination_node)
|
destination_node)
|
||||||
self.number_of_migrations += 1
|
self.number_of_migrations += 1
|
||||||
|
|
||||||
|
instance_util = self.get_instance_utilization(instance)
|
||||||
|
self.host_metric_delta[source_node.hostname]['cpu'] -= (
|
||||||
|
instance_util['cpu'])
|
||||||
|
# We'll deduce the vm allocated memory.
|
||||||
|
self.host_metric_delta[source_node.hostname]['ram'] -= (
|
||||||
|
instance.memory)
|
||||||
|
|
||||||
|
self.host_metric_delta[destination_node.hostname]['cpu'] += (
|
||||||
|
instance_util['cpu'])
|
||||||
|
self.host_metric_delta[destination_node.hostname]['ram'] += (
|
||||||
|
instance.memory)
|
||||||
|
|
||||||
def disable_unused_nodes(self):
|
def disable_unused_nodes(self):
|
||||||
"""Generate actions for disabling unused nodes.
|
"""Generate actions for disabling unused nodes.
|
||||||
|
|
||||||
@ -289,6 +310,21 @@ class VMWorkloadConsolidation(base.ServerConsolidationBaseStrategy):
|
|||||||
disk=instance_disk_util)
|
disk=instance_disk_util)
|
||||||
return self.datasource_instance_data_cache.get(instance.uuid)
|
return self.datasource_instance_data_cache.get(instance.uuid)
|
||||||
|
|
||||||
|
def _get_node_total_utilization(self, node):
|
||||||
|
if node.hostname in self.datasource_node_data_cache:
|
||||||
|
return self.datasource_node_data_cache[node.hostname]
|
||||||
|
|
||||||
|
cpu = self.datasource_backend.get_host_cpu_usage(
|
||||||
|
node, self.period, self.AGGREGATE,
|
||||||
|
self.granularity)
|
||||||
|
ram = self.datasource_backend.get_host_ram_usage(
|
||||||
|
node, self.period, self.AGGREGATE,
|
||||||
|
self.granularity)
|
||||||
|
|
||||||
|
self.datasource_node_data_cache[node.hostname] = dict(
|
||||||
|
cpu=cpu, ram=ram)
|
||||||
|
return self.datasource_node_data_cache[node.hostname]
|
||||||
|
|
||||||
def get_node_utilization(self, node):
|
def get_node_utilization(self, node):
|
||||||
"""Collect cpu, ram and disk utilization statistics of a node.
|
"""Collect cpu, ram and disk utilization statistics of a node.
|
||||||
|
|
||||||
@ -309,7 +345,33 @@ class VMWorkloadConsolidation(base.ServerConsolidationBaseStrategy):
|
|||||||
LOG.debug("instance utilization: %s %s",
|
LOG.debug("instance utilization: %s %s",
|
||||||
instance, instance_util)
|
instance, instance_util)
|
||||||
|
|
||||||
return dict(cpu=node_cpu_util, ram=node_ram_util,
|
total_node_util = self._get_node_total_utilization(node)
|
||||||
|
total_node_cpu_util = total_node_util['cpu'] or 0
|
||||||
|
if total_node_cpu_util:
|
||||||
|
total_node_cpu_util = total_node_cpu_util * node.vcpus / 100
|
||||||
|
# account for planned migrations
|
||||||
|
total_node_cpu_util += self.host_metric_delta[node.hostname]['cpu']
|
||||||
|
|
||||||
|
total_node_ram_util = total_node_util['ram'] or 0
|
||||||
|
if total_node_ram_util:
|
||||||
|
total_node_ram_util /= oslo_utils.units.Ki
|
||||||
|
total_node_ram_util += self.host_metric_delta[node.hostname]['ram']
|
||||||
|
|
||||||
|
LOG.debug(
|
||||||
|
"node utilization: %s. "
|
||||||
|
"total instance cpu: %s, "
|
||||||
|
"total instance ram: %s, "
|
||||||
|
"total instance disk: %s, "
|
||||||
|
"total host cpu: %s, "
|
||||||
|
"total host ram: %s, "
|
||||||
|
"node delta usage: %s.",
|
||||||
|
node,
|
||||||
|
node_cpu_util, node_ram_util, node_disk_util,
|
||||||
|
total_node_cpu_util, total_node_ram_util,
|
||||||
|
self.host_metric_delta[node.hostname])
|
||||||
|
|
||||||
|
return dict(cpu=max(node_cpu_util, total_node_cpu_util),
|
||||||
|
ram=max(node_ram_util, total_node_ram_util),
|
||||||
disk=node_disk_util)
|
disk=node_disk_util)
|
||||||
|
|
||||||
def get_node_capacity(self, node):
|
def get_node_capacity(self, node):
|
||||||
|
@ -80,7 +80,7 @@ class FakerModelCollector(base.BaseClusterDataModelCollector):
|
|||||||
return self.load_model('scenario_4_with_metrics.xml')
|
return self.load_model('scenario_4_with_metrics.xml')
|
||||||
|
|
||||||
|
|
||||||
class FakeCeilometerMetrics(object):
|
class FakeGnocchiMetrics(object):
|
||||||
def __init__(self, model):
|
def __init__(self, model):
|
||||||
self.model = model
|
self.model = model
|
||||||
|
|
||||||
@ -90,6 +90,9 @@ class FakeCeilometerMetrics(object):
|
|||||||
if meter_name == 'host_cpu_usage':
|
if meter_name == 'host_cpu_usage':
|
||||||
return self.get_compute_node_cpu_util(
|
return self.get_compute_node_cpu_util(
|
||||||
resource, period, aggregate, granularity)
|
resource, period, aggregate, granularity)
|
||||||
|
elif meter_name == 'host_ram_usage':
|
||||||
|
return self.get_compute_node_ram_util(
|
||||||
|
resource, period, aggregate, granularity)
|
||||||
elif meter_name == 'instance_cpu_usage':
|
elif meter_name == 'instance_cpu_usage':
|
||||||
return self.get_instance_cpu_util(
|
return self.get_instance_cpu_util(
|
||||||
resource, period, aggregate, granularity)
|
resource, period, aggregate, granularity)
|
||||||
@ -110,18 +113,28 @@ class FakeCeilometerMetrics(object):
|
|||||||
Returns relative node CPU utilization <0, 100>.
|
Returns relative node CPU utilization <0, 100>.
|
||||||
:param r_id: resource id
|
:param r_id: resource id
|
||||||
"""
|
"""
|
||||||
node_uuid = '%s_%s' % (resource.uuid, resource.hostname)
|
node = self.model.get_node_by_uuid(resource.uuid)
|
||||||
node = self.model.get_node_by_uuid(node_uuid)
|
|
||||||
instances = self.model.get_node_instances(node)
|
instances = self.model.get_node_instances(node)
|
||||||
util_sum = 0.0
|
util_sum = 0.0
|
||||||
for instance_uuid in instances:
|
for instance in instances:
|
||||||
instance = self.model.get_instance_by_uuid(instance_uuid)
|
|
||||||
total_cpu_util = instance.vcpus * self.get_instance_cpu_util(
|
total_cpu_util = instance.vcpus * self.get_instance_cpu_util(
|
||||||
instance.uuid)
|
instance, period, aggregate, granularity)
|
||||||
util_sum += total_cpu_util / 100.0
|
util_sum += total_cpu_util / 100.0
|
||||||
util_sum /= node.vcpus
|
util_sum /= node.vcpus
|
||||||
return util_sum * 100.0
|
return util_sum * 100.0
|
||||||
|
|
||||||
|
def get_compute_node_ram_util(self, resource, period, aggregate,
|
||||||
|
granularity):
|
||||||
|
# Returns mock host ram usage in KB based on the allocated
|
||||||
|
# instances.
|
||||||
|
node = self.model.get_node_by_uuid(resource.uuid)
|
||||||
|
instances = self.model.get_node_instances(node)
|
||||||
|
util_sum = 0.0
|
||||||
|
for instance in instances:
|
||||||
|
util_sum += self.get_instance_ram_util(
|
||||||
|
instance, period, aggregate, granularity)
|
||||||
|
return util_sum / 1024
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_instance_cpu_util(resource, period, aggregate,
|
def get_instance_cpu_util(resource, period, aggregate,
|
||||||
granularity):
|
granularity):
|
||||||
@ -171,93 +184,7 @@ class FakeCeilometerMetrics(object):
|
|||||||
return instance_disk_util[str(resource.uuid)]
|
return instance_disk_util[str(resource.uuid)]
|
||||||
|
|
||||||
|
|
||||||
class FakeGnocchiMetrics(object):
|
# TODO(lpetrut): consider dropping Ceilometer support, it was deprecated
|
||||||
def __init__(self, model):
|
# in Ocata.
|
||||||
self.model = model
|
class FakeCeilometerMetrics(FakeGnocchiMetrics):
|
||||||
|
pass
|
||||||
def mock_get_statistics(self, resource=None, resource_type=None,
|
|
||||||
meter_name=None, period=300, aggregate='mean',
|
|
||||||
granularity=300):
|
|
||||||
if meter_name == 'host_cpu_usage':
|
|
||||||
return self.get_compute_node_cpu_util(
|
|
||||||
resource, period, aggregate, granularity)
|
|
||||||
elif meter_name == 'instance_cpu_usage':
|
|
||||||
return self.get_instance_cpu_util(
|
|
||||||
resource, period, aggregate, granularity)
|
|
||||||
elif meter_name == 'instance_ram_usage':
|
|
||||||
return self.get_instance_ram_util(
|
|
||||||
resource, period, aggregate, granularity)
|
|
||||||
elif meter_name == 'instance_root_disk_size':
|
|
||||||
return self.get_instance_disk_root_size(
|
|
||||||
resource, period, aggregate, granularity)
|
|
||||||
|
|
||||||
def get_compute_node_cpu_util(self, resource, period, aggregate,
|
|
||||||
granularity):
|
|
||||||
"""Calculates node utilization dynamicaly.
|
|
||||||
|
|
||||||
node CPU utilization should consider
|
|
||||||
and corelate with actual instance-node mappings
|
|
||||||
provided within a cluster model.
|
|
||||||
Returns relative node CPU utilization <0, 100>.
|
|
||||||
|
|
||||||
:param r_id: resource id
|
|
||||||
"""
|
|
||||||
node_uuid = "%s_%s" % (resource.uuid, resource.hostname)
|
|
||||||
node = self.model.get_node_by_uuid(node_uuid)
|
|
||||||
instances = self.model.get_node_instances(node)
|
|
||||||
util_sum = 0.0
|
|
||||||
for instance_uuid in instances:
|
|
||||||
instance = self.model.get_instance_by_uuid(instance_uuid)
|
|
||||||
total_cpu_util = instance.vcpus * self.get_instance_cpu_util(
|
|
||||||
instance.uuid)
|
|
||||||
util_sum += total_cpu_util / 100.0
|
|
||||||
util_sum /= node.vcpus
|
|
||||||
return util_sum * 100.0
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_instance_cpu_util(resource, period, aggregate,
|
|
||||||
granularity):
|
|
||||||
instance_cpu_util = dict()
|
|
||||||
instance_cpu_util['INSTANCE_0'] = 10
|
|
||||||
instance_cpu_util['INSTANCE_1'] = 30
|
|
||||||
instance_cpu_util['INSTANCE_2'] = 60
|
|
||||||
instance_cpu_util['INSTANCE_3'] = 20
|
|
||||||
instance_cpu_util['INSTANCE_4'] = 40
|
|
||||||
instance_cpu_util['INSTANCE_5'] = 50
|
|
||||||
instance_cpu_util['INSTANCE_6'] = 100
|
|
||||||
instance_cpu_util['INSTANCE_7'] = 100
|
|
||||||
instance_cpu_util['INSTANCE_8'] = 100
|
|
||||||
instance_cpu_util['INSTANCE_9'] = 100
|
|
||||||
return instance_cpu_util[str(resource.uuid)]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_instance_ram_util(resource, period, aggregate,
|
|
||||||
granularity):
|
|
||||||
instance_ram_util = dict()
|
|
||||||
instance_ram_util['INSTANCE_0'] = 1
|
|
||||||
instance_ram_util['INSTANCE_1'] = 2
|
|
||||||
instance_ram_util['INSTANCE_2'] = 4
|
|
||||||
instance_ram_util['INSTANCE_3'] = 8
|
|
||||||
instance_ram_util['INSTANCE_4'] = 3
|
|
||||||
instance_ram_util['INSTANCE_5'] = 2
|
|
||||||
instance_ram_util['INSTANCE_6'] = 1
|
|
||||||
instance_ram_util['INSTANCE_7'] = 2
|
|
||||||
instance_ram_util['INSTANCE_8'] = 4
|
|
||||||
instance_ram_util['INSTANCE_9'] = 8
|
|
||||||
return instance_ram_util[str(resource.uuid)]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_instance_disk_root_size(resource, period, aggregate,
|
|
||||||
granularity):
|
|
||||||
instance_disk_util = dict()
|
|
||||||
instance_disk_util['INSTANCE_0'] = 10
|
|
||||||
instance_disk_util['INSTANCE_1'] = 15
|
|
||||||
instance_disk_util['INSTANCE_2'] = 30
|
|
||||||
instance_disk_util['INSTANCE_3'] = 35
|
|
||||||
instance_disk_util['INSTANCE_4'] = 20
|
|
||||||
instance_disk_util['INSTANCE_5'] = 25
|
|
||||||
instance_disk_util['INSTANCE_6'] = 25
|
|
||||||
instance_disk_util['INSTANCE_7'] = 25
|
|
||||||
instance_disk_util['INSTANCE_8'] = 25
|
|
||||||
instance_disk_util['INSTANCE_9'] = 25
|
|
||||||
return instance_disk_util[str(resource.uuid)]
|
|
||||||
|
@ -64,6 +64,10 @@ class TestVMWorkloadConsolidation(TestBaseStrategy):
|
|||||||
self.fake_metrics.get_instance_ram_util),
|
self.fake_metrics.get_instance_ram_util),
|
||||||
get_instance_root_disk_size=(
|
get_instance_root_disk_size=(
|
||||||
self.fake_metrics.get_instance_disk_root_size),
|
self.fake_metrics.get_instance_disk_root_size),
|
||||||
|
get_host_cpu_usage=(
|
||||||
|
self.fake_metrics.get_compute_node_cpu_util),
|
||||||
|
get_host_ram_usage=(
|
||||||
|
self.fake_metrics.get_compute_node_ram_util)
|
||||||
)
|
)
|
||||||
self.strategy = strategies.VMWorkloadConsolidation(
|
self.strategy = strategies.VMWorkloadConsolidation(
|
||||||
config=mock.Mock(datasources=self.datasource))
|
config=mock.Mock(datasources=self.datasource))
|
||||||
@ -88,6 +92,71 @@ class TestVMWorkloadConsolidation(TestBaseStrategy):
|
|||||||
node_util,
|
node_util,
|
||||||
self.strategy.get_node_utilization(node_0))
|
self.strategy.get_node_utilization(node_0))
|
||||||
|
|
||||||
|
def test_get_node_utilization_using_host_metrics(self):
|
||||||
|
model = self.fake_c_cluster.generate_scenario_1()
|
||||||
|
self.m_c_model.return_value = model
|
||||||
|
self.fake_metrics.model = model
|
||||||
|
node_0 = model.get_node_by_uuid("Node_0")
|
||||||
|
|
||||||
|
# "get_node_utilization" is expected to return the maximum
|
||||||
|
# between the host metrics and the sum of the instance metrics.
|
||||||
|
data_src = self.m_datasource.return_value
|
||||||
|
cpu_usage = 30
|
||||||
|
data_src.get_host_cpu_usage = mock.Mock(return_value=cpu_usage)
|
||||||
|
data_src.get_host_ram_usage = mock.Mock(return_value=512 * 1024)
|
||||||
|
|
||||||
|
exp_cpu_usage = cpu_usage * node_0.vcpus / 100
|
||||||
|
exp_node_util = dict(cpu=exp_cpu_usage, ram=512, disk=10)
|
||||||
|
self.assertEqual(
|
||||||
|
exp_node_util,
|
||||||
|
self.strategy.get_node_utilization(node_0))
|
||||||
|
|
||||||
|
def test_get_node_utilization_after_migrations(self):
|
||||||
|
model = self.fake_c_cluster.generate_scenario_1()
|
||||||
|
self.m_c_model.return_value = model
|
||||||
|
self.fake_metrics.model = model
|
||||||
|
node_0 = model.get_node_by_uuid("Node_0")
|
||||||
|
node_1 = model.get_node_by_uuid("Node_1")
|
||||||
|
|
||||||
|
data_src = self.m_datasource.return_value
|
||||||
|
cpu_usage = 30
|
||||||
|
host_ram_usage_mb = 512
|
||||||
|
data_src.get_host_cpu_usage = mock.Mock(return_value=cpu_usage)
|
||||||
|
data_src.get_host_ram_usage = mock.Mock(
|
||||||
|
return_value=host_ram_usage_mb * 1024)
|
||||||
|
|
||||||
|
instance_uuid = 'INSTANCE_0'
|
||||||
|
instance = model.get_instance_by_uuid(instance_uuid)
|
||||||
|
self.strategy.add_migration(instance, node_0, node_1)
|
||||||
|
|
||||||
|
instance_util = self.strategy.get_instance_utilization(instance)
|
||||||
|
|
||||||
|
# Ensure that we take into account planned migrations when
|
||||||
|
# determining node utilization
|
||||||
|
exp_node_0_cpu_usage = (
|
||||||
|
cpu_usage * node_0.vcpus) / 100 - instance_util['cpu']
|
||||||
|
exp_node_1_cpu_usage = (
|
||||||
|
cpu_usage * node_1.vcpus) / 100 + instance_util['cpu']
|
||||||
|
|
||||||
|
exp_node_0_ram_usage = host_ram_usage_mb - instance.memory
|
||||||
|
exp_node_1_ram_usage = host_ram_usage_mb + instance.memory
|
||||||
|
|
||||||
|
exp_node_0_util = dict(
|
||||||
|
cpu=exp_node_0_cpu_usage,
|
||||||
|
ram=exp_node_0_ram_usage,
|
||||||
|
disk=0)
|
||||||
|
exp_node_1_util = dict(
|
||||||
|
cpu=exp_node_1_cpu_usage,
|
||||||
|
ram=exp_node_1_ram_usage,
|
||||||
|
disk=25)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
exp_node_0_util,
|
||||||
|
self.strategy.get_node_utilization(node_0))
|
||||||
|
self.assertEqual(
|
||||||
|
exp_node_1_util,
|
||||||
|
self.strategy.get_node_utilization(node_1))
|
||||||
|
|
||||||
def test_get_node_capacity(self):
|
def test_get_node_capacity(self):
|
||||||
model = self.fake_c_cluster.generate_scenario_1()
|
model = self.fake_c_cluster.generate_scenario_1()
|
||||||
self.m_c_model.return_value = model
|
self.m_c_model.return_value = model
|
||||||
|
Loading…
Reference in New Issue
Block a user