Merge "vm workload consolidation: use actual host metrics"
This commit is contained in:
commit
9492c2190e
@ -26,9 +26,15 @@ metric service name plugins comment
|
||||
``memory.resident`` ceilometer_ none
|
||||
``memory`` ceilometer_ none
|
||||
``disk.root.size`` ceilometer_ none
|
||||
``compute.node.cpu.percent`` ceilometer_ none (optional) need to set the
|
||||
``compute_monitors`` option
|
||||
to ``cpu.virt_driver`` in the
|
||||
nova.conf.
|
||||
``hardware.memory.used`` ceilometer_ SNMP_ (optional)
|
||||
============================ ============ ======= =========================
|
||||
|
||||
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute
|
||||
.. _SNMP: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#snmp-based-meters
|
||||
|
||||
Cluster data model
|
||||
******************
|
||||
|
@ -18,7 +18,10 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import collections
|
||||
|
||||
from oslo_log import log
|
||||
import oslo_utils
|
||||
|
||||
from watcher._i18n import _
|
||||
from watcher.applier.actions import migration
|
||||
@ -67,7 +70,8 @@ class VMWorkloadConsolidation(base.ServerConsolidationBaseStrategy):
|
||||
|
||||
AGGREGATE = 'mean'
|
||||
DATASOURCE_METRICS = ['instance_ram_allocated', 'instance_cpu_usage',
|
||||
'instance_ram_usage', 'instance_root_disk_size']
|
||||
'instance_ram_usage', 'instance_root_disk_size',
|
||||
'host_cpu_usage', 'host_ram_usage']
|
||||
|
||||
MIGRATION = "migrate"
|
||||
CHANGE_NOVA_SERVICE_STATE = "change_nova_service_state"
|
||||
@ -77,6 +81,11 @@ class VMWorkloadConsolidation(base.ServerConsolidationBaseStrategy):
|
||||
self.number_of_migrations = 0
|
||||
self.number_of_released_nodes = 0
|
||||
self.datasource_instance_data_cache = dict()
|
||||
self.datasource_node_data_cache = dict()
|
||||
# Host metric adjustments that take into account planned
|
||||
# migrations.
|
||||
self.host_metric_delta = collections.defaultdict(
|
||||
lambda: collections.defaultdict(int))
|
||||
|
||||
@classmethod
|
||||
def get_name(cls):
|
||||
@ -227,6 +236,18 @@ class VMWorkloadConsolidation(base.ServerConsolidationBaseStrategy):
|
||||
destination_node)
|
||||
self.number_of_migrations += 1
|
||||
|
||||
instance_util = self.get_instance_utilization(instance)
|
||||
self.host_metric_delta[source_node.hostname]['cpu'] -= (
|
||||
instance_util['cpu'])
|
||||
# We'll deduce the vm allocated memory.
|
||||
self.host_metric_delta[source_node.hostname]['ram'] -= (
|
||||
instance.memory)
|
||||
|
||||
self.host_metric_delta[destination_node.hostname]['cpu'] += (
|
||||
instance_util['cpu'])
|
||||
self.host_metric_delta[destination_node.hostname]['ram'] += (
|
||||
instance.memory)
|
||||
|
||||
def disable_unused_nodes(self):
|
||||
"""Generate actions for disabling unused nodes.
|
||||
|
||||
@ -289,6 +310,21 @@ class VMWorkloadConsolidation(base.ServerConsolidationBaseStrategy):
|
||||
disk=instance_disk_util)
|
||||
return self.datasource_instance_data_cache.get(instance.uuid)
|
||||
|
||||
def _get_node_total_utilization(self, node):
|
||||
if node.hostname in self.datasource_node_data_cache:
|
||||
return self.datasource_node_data_cache[node.hostname]
|
||||
|
||||
cpu = self.datasource_backend.get_host_cpu_usage(
|
||||
node, self.period, self.AGGREGATE,
|
||||
self.granularity)
|
||||
ram = self.datasource_backend.get_host_ram_usage(
|
||||
node, self.period, self.AGGREGATE,
|
||||
self.granularity)
|
||||
|
||||
self.datasource_node_data_cache[node.hostname] = dict(
|
||||
cpu=cpu, ram=ram)
|
||||
return self.datasource_node_data_cache[node.hostname]
|
||||
|
||||
def get_node_utilization(self, node):
|
||||
"""Collect cpu, ram and disk utilization statistics of a node.
|
||||
|
||||
@ -309,7 +345,33 @@ class VMWorkloadConsolidation(base.ServerConsolidationBaseStrategy):
|
||||
LOG.debug("instance utilization: %s %s",
|
||||
instance, instance_util)
|
||||
|
||||
return dict(cpu=node_cpu_util, ram=node_ram_util,
|
||||
total_node_util = self._get_node_total_utilization(node)
|
||||
total_node_cpu_util = total_node_util['cpu'] or 0
|
||||
if total_node_cpu_util:
|
||||
total_node_cpu_util = total_node_cpu_util * node.vcpus / 100
|
||||
# account for planned migrations
|
||||
total_node_cpu_util += self.host_metric_delta[node.hostname]['cpu']
|
||||
|
||||
total_node_ram_util = total_node_util['ram'] or 0
|
||||
if total_node_ram_util:
|
||||
total_node_ram_util /= oslo_utils.units.Ki
|
||||
total_node_ram_util += self.host_metric_delta[node.hostname]['ram']
|
||||
|
||||
LOG.debug(
|
||||
"node utilization: %s. "
|
||||
"total instance cpu: %s, "
|
||||
"total instance ram: %s, "
|
||||
"total instance disk: %s, "
|
||||
"total host cpu: %s, "
|
||||
"total host ram: %s, "
|
||||
"node delta usage: %s.",
|
||||
node,
|
||||
node_cpu_util, node_ram_util, node_disk_util,
|
||||
total_node_cpu_util, total_node_ram_util,
|
||||
self.host_metric_delta[node.hostname])
|
||||
|
||||
return dict(cpu=max(node_cpu_util, total_node_cpu_util),
|
||||
ram=max(node_ram_util, total_node_ram_util),
|
||||
disk=node_disk_util)
|
||||
|
||||
def get_node_capacity(self, node):
|
||||
|
@ -80,7 +80,7 @@ class FakerModelCollector(base.BaseClusterDataModelCollector):
|
||||
return self.load_model('scenario_4_with_metrics.xml')
|
||||
|
||||
|
||||
class FakeCeilometerMetrics(object):
|
||||
class FakeGnocchiMetrics(object):
|
||||
def __init__(self, model):
|
||||
self.model = model
|
||||
|
||||
@ -90,6 +90,9 @@ class FakeCeilometerMetrics(object):
|
||||
if meter_name == 'host_cpu_usage':
|
||||
return self.get_compute_node_cpu_util(
|
||||
resource, period, aggregate, granularity)
|
||||
elif meter_name == 'host_ram_usage':
|
||||
return self.get_compute_node_ram_util(
|
||||
resource, period, aggregate, granularity)
|
||||
elif meter_name == 'instance_cpu_usage':
|
||||
return self.get_instance_cpu_util(
|
||||
resource, period, aggregate, granularity)
|
||||
@ -110,109 +113,27 @@ class FakeCeilometerMetrics(object):
|
||||
Returns relative node CPU utilization <0, 100>.
|
||||
:param r_id: resource id
|
||||
"""
|
||||
node_uuid = '%s_%s' % (resource.uuid, resource.hostname)
|
||||
node = self.model.get_node_by_uuid(node_uuid)
|
||||
node = self.model.get_node_by_uuid(resource.uuid)
|
||||
instances = self.model.get_node_instances(node)
|
||||
util_sum = 0.0
|
||||
for instance_uuid in instances:
|
||||
instance = self.model.get_instance_by_uuid(instance_uuid)
|
||||
for instance in instances:
|
||||
total_cpu_util = instance.vcpus * self.get_instance_cpu_util(
|
||||
instance.uuid)
|
||||
instance, period, aggregate, granularity)
|
||||
util_sum += total_cpu_util / 100.0
|
||||
util_sum /= node.vcpus
|
||||
return util_sum * 100.0
|
||||
|
||||
@staticmethod
|
||||
def get_instance_cpu_util(resource, period, aggregate,
|
||||
granularity):
|
||||
instance_cpu_util = dict()
|
||||
instance_cpu_util['INSTANCE_0'] = 10
|
||||
instance_cpu_util['INSTANCE_1'] = 30
|
||||
instance_cpu_util['INSTANCE_2'] = 60
|
||||
instance_cpu_util['INSTANCE_3'] = 20
|
||||
instance_cpu_util['INSTANCE_4'] = 40
|
||||
instance_cpu_util['INSTANCE_5'] = 50
|
||||
instance_cpu_util['INSTANCE_6'] = 100
|
||||
instance_cpu_util['INSTANCE_7'] = 100
|
||||
instance_cpu_util['INSTANCE_8'] = 100
|
||||
instance_cpu_util['INSTANCE_9'] = 100
|
||||
return instance_cpu_util[str(resource.uuid)]
|
||||
|
||||
@staticmethod
|
||||
def get_instance_ram_util(resource, period, aggregate,
|
||||
granularity):
|
||||
instance_ram_util = dict()
|
||||
instance_ram_util['INSTANCE_0'] = 1
|
||||
instance_ram_util['INSTANCE_1'] = 2
|
||||
instance_ram_util['INSTANCE_2'] = 4
|
||||
instance_ram_util['INSTANCE_3'] = 8
|
||||
instance_ram_util['INSTANCE_4'] = 3
|
||||
instance_ram_util['INSTANCE_5'] = 2
|
||||
instance_ram_util['INSTANCE_6'] = 1
|
||||
instance_ram_util['INSTANCE_7'] = 2
|
||||
instance_ram_util['INSTANCE_8'] = 4
|
||||
instance_ram_util['INSTANCE_9'] = 8
|
||||
return instance_ram_util[str(resource.uuid)]
|
||||
|
||||
@staticmethod
|
||||
def get_instance_disk_root_size(resource, period, aggregate,
|
||||
granularity):
|
||||
instance_disk_util = dict()
|
||||
instance_disk_util['INSTANCE_0'] = 10
|
||||
instance_disk_util['INSTANCE_1'] = 15
|
||||
instance_disk_util['INSTANCE_2'] = 30
|
||||
instance_disk_util['INSTANCE_3'] = 35
|
||||
instance_disk_util['INSTANCE_4'] = 20
|
||||
instance_disk_util['INSTANCE_5'] = 25
|
||||
instance_disk_util['INSTANCE_6'] = 25
|
||||
instance_disk_util['INSTANCE_7'] = 25
|
||||
instance_disk_util['INSTANCE_8'] = 25
|
||||
instance_disk_util['INSTANCE_9'] = 25
|
||||
return instance_disk_util[str(resource.uuid)]
|
||||
|
||||
|
||||
class FakeGnocchiMetrics(object):
|
||||
def __init__(self, model):
|
||||
self.model = model
|
||||
|
||||
def mock_get_statistics(self, resource=None, resource_type=None,
|
||||
meter_name=None, period=300, aggregate='mean',
|
||||
granularity=300):
|
||||
if meter_name == 'host_cpu_usage':
|
||||
return self.get_compute_node_cpu_util(
|
||||
resource, period, aggregate, granularity)
|
||||
elif meter_name == 'instance_cpu_usage':
|
||||
return self.get_instance_cpu_util(
|
||||
resource, period, aggregate, granularity)
|
||||
elif meter_name == 'instance_ram_usage':
|
||||
return self.get_instance_ram_util(
|
||||
resource, period, aggregate, granularity)
|
||||
elif meter_name == 'instance_root_disk_size':
|
||||
return self.get_instance_disk_root_size(
|
||||
resource, period, aggregate, granularity)
|
||||
|
||||
def get_compute_node_cpu_util(self, resource, period, aggregate,
|
||||
def get_compute_node_ram_util(self, resource, period, aggregate,
|
||||
granularity):
|
||||
"""Calculates node utilization dynamicaly.
|
||||
|
||||
node CPU utilization should consider
|
||||
and corelate with actual instance-node mappings
|
||||
provided within a cluster model.
|
||||
Returns relative node CPU utilization <0, 100>.
|
||||
|
||||
:param r_id: resource id
|
||||
"""
|
||||
node_uuid = "%s_%s" % (resource.uuid, resource.hostname)
|
||||
node = self.model.get_node_by_uuid(node_uuid)
|
||||
# Returns mock host ram usage in KB based on the allocated
|
||||
# instances.
|
||||
node = self.model.get_node_by_uuid(resource.uuid)
|
||||
instances = self.model.get_node_instances(node)
|
||||
util_sum = 0.0
|
||||
for instance_uuid in instances:
|
||||
instance = self.model.get_instance_by_uuid(instance_uuid)
|
||||
total_cpu_util = instance.vcpus * self.get_instance_cpu_util(
|
||||
instance.uuid)
|
||||
util_sum += total_cpu_util / 100.0
|
||||
util_sum /= node.vcpus
|
||||
return util_sum * 100.0
|
||||
for instance in instances:
|
||||
util_sum += self.get_instance_ram_util(
|
||||
instance, period, aggregate, granularity)
|
||||
return util_sum / 1024
|
||||
|
||||
@staticmethod
|
||||
def get_instance_cpu_util(resource, period, aggregate,
|
||||
@ -261,3 +182,9 @@ class FakeGnocchiMetrics(object):
|
||||
instance_disk_util['INSTANCE_8'] = 25
|
||||
instance_disk_util['INSTANCE_9'] = 25
|
||||
return instance_disk_util[str(resource.uuid)]
|
||||
|
||||
|
||||
# TODO(lpetrut): consider dropping Ceilometer support, it was deprecated
|
||||
# in Ocata.
|
||||
class FakeCeilometerMetrics(FakeGnocchiMetrics):
|
||||
pass
|
||||
|
@ -64,6 +64,10 @@ class TestVMWorkloadConsolidation(TestBaseStrategy):
|
||||
self.fake_metrics.get_instance_ram_util),
|
||||
get_instance_root_disk_size=(
|
||||
self.fake_metrics.get_instance_disk_root_size),
|
||||
get_host_cpu_usage=(
|
||||
self.fake_metrics.get_compute_node_cpu_util),
|
||||
get_host_ram_usage=(
|
||||
self.fake_metrics.get_compute_node_ram_util)
|
||||
)
|
||||
self.strategy = strategies.VMWorkloadConsolidation(
|
||||
config=mock.Mock(datasources=self.datasource))
|
||||
@ -88,6 +92,71 @@ class TestVMWorkloadConsolidation(TestBaseStrategy):
|
||||
node_util,
|
||||
self.strategy.get_node_utilization(node_0))
|
||||
|
||||
def test_get_node_utilization_using_host_metrics(self):
|
||||
model = self.fake_c_cluster.generate_scenario_1()
|
||||
self.m_c_model.return_value = model
|
||||
self.fake_metrics.model = model
|
||||
node_0 = model.get_node_by_uuid("Node_0")
|
||||
|
||||
# "get_node_utilization" is expected to return the maximum
|
||||
# between the host metrics and the sum of the instance metrics.
|
||||
data_src = self.m_datasource.return_value
|
||||
cpu_usage = 30
|
||||
data_src.get_host_cpu_usage = mock.Mock(return_value=cpu_usage)
|
||||
data_src.get_host_ram_usage = mock.Mock(return_value=512 * 1024)
|
||||
|
||||
exp_cpu_usage = cpu_usage * node_0.vcpus / 100
|
||||
exp_node_util = dict(cpu=exp_cpu_usage, ram=512, disk=10)
|
||||
self.assertEqual(
|
||||
exp_node_util,
|
||||
self.strategy.get_node_utilization(node_0))
|
||||
|
||||
def test_get_node_utilization_after_migrations(self):
|
||||
model = self.fake_c_cluster.generate_scenario_1()
|
||||
self.m_c_model.return_value = model
|
||||
self.fake_metrics.model = model
|
||||
node_0 = model.get_node_by_uuid("Node_0")
|
||||
node_1 = model.get_node_by_uuid("Node_1")
|
||||
|
||||
data_src = self.m_datasource.return_value
|
||||
cpu_usage = 30
|
||||
host_ram_usage_mb = 512
|
||||
data_src.get_host_cpu_usage = mock.Mock(return_value=cpu_usage)
|
||||
data_src.get_host_ram_usage = mock.Mock(
|
||||
return_value=host_ram_usage_mb * 1024)
|
||||
|
||||
instance_uuid = 'INSTANCE_0'
|
||||
instance = model.get_instance_by_uuid(instance_uuid)
|
||||
self.strategy.add_migration(instance, node_0, node_1)
|
||||
|
||||
instance_util = self.strategy.get_instance_utilization(instance)
|
||||
|
||||
# Ensure that we take into account planned migrations when
|
||||
# determining node utilization
|
||||
exp_node_0_cpu_usage = (
|
||||
cpu_usage * node_0.vcpus) / 100 - instance_util['cpu']
|
||||
exp_node_1_cpu_usage = (
|
||||
cpu_usage * node_1.vcpus) / 100 + instance_util['cpu']
|
||||
|
||||
exp_node_0_ram_usage = host_ram_usage_mb - instance.memory
|
||||
exp_node_1_ram_usage = host_ram_usage_mb + instance.memory
|
||||
|
||||
exp_node_0_util = dict(
|
||||
cpu=exp_node_0_cpu_usage,
|
||||
ram=exp_node_0_ram_usage,
|
||||
disk=0)
|
||||
exp_node_1_util = dict(
|
||||
cpu=exp_node_1_cpu_usage,
|
||||
ram=exp_node_1_ram_usage,
|
||||
disk=25)
|
||||
|
||||
self.assertEqual(
|
||||
exp_node_0_util,
|
||||
self.strategy.get_node_utilization(node_0))
|
||||
self.assertEqual(
|
||||
exp_node_1_util,
|
||||
self.strategy.get_node_utilization(node_1))
|
||||
|
||||
def test_get_node_capacity(self):
|
||||
model = self.fake_c_cluster.generate_scenario_1()
|
||||
self.m_c_model.return_value = model
|
||||
|
Loading…
Reference in New Issue
Block a user