Merge "workload balance base on cpu or ram util"
This commit is contained in:
commit
e5c3df0c2f
@ -25,6 +25,7 @@ The *workload_balance* strategy requires the following metrics:
|
||||
metric service name plugins comment
|
||||
======================= ============ ======= =======
|
||||
``cpu_util`` ceilometer_ none
|
||||
``memory.resident`` ceilometer_ none
|
||||
======================= ============ ======= =======
|
||||
|
||||
.. _ceilometer: http://docs.openstack.org/admin-guide/telemetry-measurements.html#openstack-compute
|
||||
@ -66,6 +67,9 @@ Strategy parameters are:
|
||||
============== ====== ============= ====================================
|
||||
parameter type default Value description
|
||||
============== ====== ============= ====================================
|
||||
``metrics`` String 'cpu_util' Workload balance base on cpu or ram
|
||||
utilization. choice: ['cpu_util',
|
||||
'memory.resident']
|
||||
``threshold`` Number 25.0 Workload threshold for migration
|
||||
``period`` Number 300 Aggregate time period of ceilometer
|
||||
============== ====== ============= ====================================
|
||||
@ -90,7 +94,7 @@ How to use it ?
|
||||
at1 workload_balancing --strategy workload_balance
|
||||
|
||||
$ openstack optimize audit create -a at1 -p threshold=26.0 \
|
||||
-p period=310
|
||||
-p period=310 -p metrics=cpu_util
|
||||
|
||||
External Links
|
||||
--------------
|
||||
|
@ -0,0 +1,7 @@
|
||||
---
|
||||
features:
|
||||
- Existing workload_balance strategy based on
|
||||
the VM workloads of CPU. This feature improves
|
||||
the strategy. By the input parameter "metrics",
|
||||
it makes decision to migrate a VM base on CPU
|
||||
or memory utilization.
|
@ -22,7 +22,7 @@
|
||||
*Description*
|
||||
|
||||
This strategy migrates a VM based on the VM workload of the hosts.
|
||||
It makes decision to migrate a workload whenever a host's CPU
|
||||
It makes decision to migrate a workload whenever a host's CPU or RAM
|
||||
utilization % is higher than the specified threshold. The VM to
|
||||
be moved should make the host close to average workload of all
|
||||
hosts nodes.
|
||||
@ -32,7 +32,7 @@ hosts nodes.
|
||||
* Hardware: compute node should use the same physical CPUs
|
||||
* Software: Ceilometer component ceilometer-agent-compute
|
||||
running in each compute node, and Ceilometer API can
|
||||
report such telemetry "cpu_util" successfully.
|
||||
report such telemetry "cpu_util" and "memory.resident" successfully.
|
||||
* You must have at least 2 physical compute nodes to run
|
||||
this strategy.
|
||||
|
||||
@ -69,16 +69,16 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
|
||||
It is a migration strategy based on the VM workload of physical
|
||||
servers. It generates solutions to move a workload whenever a server's
|
||||
CPU utilization % is higher than the specified threshold.
|
||||
CPU or RAM utilization % is higher than the specified threshold.
|
||||
The VM to be moved should make the host close to average workload
|
||||
of all compute nodes.
|
||||
|
||||
*Requirements*
|
||||
|
||||
* Hardware: compute node should use the same physical CPUs
|
||||
* Hardware: compute node should use the same physical CPUs/RAMs
|
||||
* Software: Ceilometer component ceilometer-agent-compute running
|
||||
in each compute node, and Ceilometer API can report such telemetry
|
||||
"cpu_util" successfully.
|
||||
"cpu_util" and "memory.resident" successfully.
|
||||
* You must have at least 2 physical compute nodes to run this strategy
|
||||
|
||||
*Limitations*
|
||||
@ -91,8 +91,12 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
"""
|
||||
|
||||
# The meter to report CPU utilization % of VM in ceilometer
|
||||
METER_NAME = "cpu_util"
|
||||
# Unit: %, value range is [0 , 100]
|
||||
CPU_METER_NAME = "cpu_util"
|
||||
|
||||
# The meter to report memory resident of VM in ceilometer
|
||||
# Unit: MB
|
||||
MEM_METER_NAME = "memory.resident"
|
||||
|
||||
MIGRATION = "migrate"
|
||||
|
||||
@ -104,9 +108,9 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
:param osc: :py:class:`~.OpenStackClients` instance
|
||||
"""
|
||||
super(WorkloadBalance, self).__init__(config, osc)
|
||||
# the migration plan will be triggered when the CPU utilization %
|
||||
# reaches threshold
|
||||
self._meter = self.METER_NAME
|
||||
# the migration plan will be triggered when the CPU or RAM
|
||||
# utilization % reaches threshold
|
||||
self._meter = None
|
||||
self._ceilometer = None
|
||||
self._gnocchi = None
|
||||
|
||||
@ -151,6 +155,13 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
# Mandatory default setting for each element
|
||||
return {
|
||||
"properties": {
|
||||
"metrics": {
|
||||
"description": "Workload balance based on metrics: "
|
||||
"cpu or ram utilization",
|
||||
"type": "string",
|
||||
"choice": ["cpu_util", "memory.resident"],
|
||||
"default": "cpu_util"
|
||||
},
|
||||
"threshold": {
|
||||
"description": "workload threshold for migration",
|
||||
"type": "number",
|
||||
@ -251,18 +262,21 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
cores_available = host.vcpus - cores_used
|
||||
disk_available = host.disk - disk_used
|
||||
mem_available = host.memory - mem_used
|
||||
if (
|
||||
cores_available >= required_cores and
|
||||
disk_available >= required_disk and
|
||||
if (cores_available >= required_cores and
|
||||
mem_available >= required_mem and
|
||||
disk_available >= required_disk):
|
||||
if (self._meter == self.CPU_METER_NAME and
|
||||
((src_instance_workload + workload) <
|
||||
self.threshold / 100 * host.vcpus)
|
||||
):
|
||||
self.threshold / 100 * host.vcpus)):
|
||||
destination_hosts.append(instance_data)
|
||||
if (self._meter == self.MEM_METER_NAME and
|
||||
((src_instance_workload + workload) <
|
||||
self.threshold / 100 * host.memory)):
|
||||
destination_hosts.append(instance_data)
|
||||
|
||||
return destination_hosts
|
||||
|
||||
def group_hosts_by_cpu_util(self):
|
||||
def group_hosts_by_cpu_or_ram_util(self):
|
||||
"""Calculate the workloads of each node
|
||||
|
||||
try to find out the nodes which have reached threshold
|
||||
@ -286,10 +300,10 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
instances = self.compute_model.get_node_instances(node)
|
||||
node_workload = 0.0
|
||||
for instance in instances:
|
||||
cpu_util = None
|
||||
instance_util = None
|
||||
try:
|
||||
if self.config.datasource == "ceilometer":
|
||||
cpu_util = self.ceilometer.statistic_aggregation(
|
||||
instance_util = self.ceilometer.statistic_aggregation(
|
||||
resource_id=instance.uuid,
|
||||
meter_name=self._meter,
|
||||
period=self._period,
|
||||
@ -298,7 +312,7 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
stop_time = datetime.datetime.utcnow()
|
||||
start_time = stop_time - datetime.timedelta(
|
||||
seconds=int(self._period))
|
||||
cpu_util = self.gnocchi.statistic_aggregation(
|
||||
instance_util = self.gnocchi.statistic_aggregation(
|
||||
resource_id=instance.uuid,
|
||||
metric=self._meter,
|
||||
granularity=self.granularity,
|
||||
@ -308,23 +322,32 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
)
|
||||
except Exception as exc:
|
||||
LOG.exception(exc)
|
||||
LOG.error("Can not get cpu_util from %s",
|
||||
LOG.error("Can not get %s from %s", self._meter,
|
||||
self.config.datasource)
|
||||
continue
|
||||
if cpu_util is None:
|
||||
LOG.debug("Instance (%s): cpu_util is None", instance.uuid)
|
||||
if instance_util is None:
|
||||
LOG.debug("Instance (%s): %s is None",
|
||||
instance.uuid, self._meter)
|
||||
continue
|
||||
workload_cache[instance.uuid] = cpu_util * instance.vcpus / 100
|
||||
if self._meter == self.CPU_METER_NAME:
|
||||
workload_cache[instance.uuid] = (instance_util *
|
||||
instance.vcpus / 100)
|
||||
else:
|
||||
workload_cache[instance.uuid] = instance_util
|
||||
node_workload += workload_cache[instance.uuid]
|
||||
LOG.debug("VM (%s): cpu_util %f", instance.uuid, cpu_util)
|
||||
node_cpu_util = node_workload / node.vcpus * 100
|
||||
LOG.debug("VM (%s): %s %f", instance.uuid, self._meter,
|
||||
instance_util)
|
||||
|
||||
cluster_workload += node_workload
|
||||
if self._meter == self.CPU_METER_NAME:
|
||||
node_util = node_workload / node.vcpus * 100
|
||||
else:
|
||||
node_util = node_workload / node.memory * 100
|
||||
|
||||
instance_data = {
|
||||
'node': node, "cpu_util": node_cpu_util,
|
||||
'node': node, self._meter: node_util,
|
||||
'workload': node_workload}
|
||||
if node_cpu_util >= self.threshold:
|
||||
if node_util >= self.threshold:
|
||||
# mark the node to release resources
|
||||
overload_hosts.append(instance_data)
|
||||
else:
|
||||
@ -356,8 +379,9 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
"""
|
||||
self.threshold = self.input_parameters.threshold
|
||||
self._period = self.input_parameters.period
|
||||
self._meter = self.input_parameters.metrics
|
||||
source_nodes, target_nodes, avg_workload, workload_cache = (
|
||||
self.group_hosts_by_cpu_util())
|
||||
self.group_hosts_by_cpu_or_ram_util())
|
||||
|
||||
if not source_nodes:
|
||||
LOG.debug("No hosts require optimization")
|
||||
@ -373,7 +397,7 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
# choose the server with largest cpu_util
|
||||
source_nodes = sorted(source_nodes,
|
||||
reverse=True,
|
||||
key=lambda x: (x[self.METER_NAME]))
|
||||
key=lambda x: (x[self._meter]))
|
||||
|
||||
instance_to_migrate = self.choose_instance_to_migrate(
|
||||
source_nodes, avg_workload, workload_cache)
|
||||
@ -391,7 +415,7 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
|
||||
"be because of there's no enough CPU/Memory/DISK")
|
||||
return self.solution
|
||||
destination_hosts = sorted(destination_hosts,
|
||||
key=lambda x: (x["cpu_util"]))
|
||||
key=lambda x: (x[self._meter]))
|
||||
# always use the host with lowerest CPU utilization
|
||||
mig_destination_node = destination_hosts[0]['node']
|
||||
# generate solution to migrate the instance to the dest server,
|
||||
|
@ -54,6 +54,8 @@ class FakeCeilometerMetrics(object):
|
||||
result = 0.0
|
||||
if meter_name == "cpu_util":
|
||||
result = self.get_average_usage_instance_cpu_wb(resource_id)
|
||||
elif meter_name == "memory.resident":
|
||||
result = self.get_average_usage_instance_memory_wb(resource_id)
|
||||
return result
|
||||
|
||||
def mock_get_statistics_nn(self, resource_id, meter_name, period,
|
||||
@ -211,6 +213,20 @@ class FakeCeilometerMetrics(object):
|
||||
mock['INSTANCE_4'] = 10
|
||||
return float(mock[str(uuid)])
|
||||
|
||||
@staticmethod
|
||||
def get_average_usage_instance_memory_wb(uuid):
|
||||
mock = {}
|
||||
# node 0
|
||||
mock['INSTANCE_1'] = 30
|
||||
# node 1
|
||||
mock['INSTANCE_3'] = 12
|
||||
mock['INSTANCE_4'] = 12
|
||||
if uuid not in mock.keys():
|
||||
# mock[uuid] = random.randint(1, 4)
|
||||
mock[uuid] = 12
|
||||
|
||||
return mock[str(uuid)]
|
||||
|
||||
@staticmethod
|
||||
def get_average_usage_instance_cpu(uuid):
|
||||
"""The last VM CPU usage values to average
|
||||
|
@ -1,10 +1,10 @@
|
||||
<ModelRoot>
|
||||
<ComputeNode human_id="" uuid="Node_0" status="enabled" state="up" id="0" hostname="hostname_0" vcpus="40" disk="250" disk_capacity="250" memory="132">
|
||||
<Instance state="active" human_id="" uuid="73b09e16-35b7-4922-804e-e8f5d9b740fc" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
|
||||
<Instance state="active" human_id="" uuid="INSTANCE_1" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
|
||||
<Instance state="active" human_id="" uuid="73b09e16-35b7-4922-804e-e8f5d9b740fc" vcpus="10" disk="20" disk_capacity="20" memory="32" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
|
||||
<Instance state="active" human_id="" uuid="INSTANCE_1" vcpus="10" disk="20" disk_capacity="20" memory="32" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
|
||||
</ComputeNode>
|
||||
<ComputeNode human_id="" uuid="Node_1" status="enabled" state="up" id="1" hostname="hostname_1" vcpus="40" disk="250" disk_capacity="250" memory="132">
|
||||
<Instance state="active" human_id="" uuid="INSTANCE_3" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
|
||||
<Instance state="active" human_id="" uuid="INSTANCE_4" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
|
||||
<Instance state="active" human_id="" uuid="INSTANCE_3" vcpus="10" disk="20" disk_capacity="20" memory="32" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
|
||||
<Instance state="active" human_id="" uuid="INSTANCE_4" vcpus="10" disk="20" disk_capacity="20" memory="32" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
|
||||
</ComputeNode>
|
||||
</ModelRoot>
|
||||
|
@ -50,6 +50,8 @@ class FakeGnocchiMetrics(object):
|
||||
result = 0.0
|
||||
if metric == "cpu_util":
|
||||
result = self.get_average_usage_instance_cpu_wb(resource_id)
|
||||
elif metric == "memory.resident":
|
||||
result = self.get_average_usage_instance_memory_wb(resource_id)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
@ -242,3 +244,17 @@ class FakeGnocchiMetrics(object):
|
||||
mock['INSTANCE_3'] = 20
|
||||
mock['INSTANCE_4'] = 10
|
||||
return float(mock[str(uuid)])
|
||||
|
||||
@staticmethod
|
||||
def get_average_usage_instance_memory_wb(uuid):
|
||||
mock = {}
|
||||
# node 0
|
||||
mock['INSTANCE_1'] = 30
|
||||
# node 1
|
||||
mock['INSTANCE_3'] = 12
|
||||
mock['INSTANCE_4'] = 12
|
||||
if uuid not in mock.keys():
|
||||
# mock[uuid] = random.randint(1, 4)
|
||||
mock[uuid] = 12
|
||||
|
||||
return mock[str(uuid)]
|
||||
|
@ -74,10 +74,12 @@ class TestWorkloadBalance(base.TestCase):
|
||||
self.strategy = strategies.WorkloadBalance(
|
||||
config=mock.Mock(datasource=self.datasource))
|
||||
self.strategy.input_parameters = utils.Struct()
|
||||
self.strategy.input_parameters.update({'threshold': 25.0,
|
||||
self.strategy.input_parameters.update({'metrics': 'cpu_util',
|
||||
'threshold': 25.0,
|
||||
'period': 300})
|
||||
self.strategy.threshold = 25.0
|
||||
self.strategy._period = 300
|
||||
self.strategy._meter = "cpu_util"
|
||||
|
||||
def test_calc_used_resource(self):
|
||||
model = self.fake_cluster.generate_scenario_6_with_2_nodes()
|
||||
@ -86,21 +88,31 @@ class TestWorkloadBalance(base.TestCase):
|
||||
cores_used, mem_used, disk_used = (
|
||||
self.strategy.calculate_used_resource(node))
|
||||
|
||||
self.assertEqual((cores_used, mem_used, disk_used), (20, 4, 40))
|
||||
self.assertEqual((cores_used, mem_used, disk_used), (20, 64, 40))
|
||||
|
||||
def test_group_hosts_by_cpu_util(self):
|
||||
model = self.fake_cluster.generate_scenario_6_with_2_nodes()
|
||||
self.m_model.return_value = model
|
||||
self.strategy.threshold = 30
|
||||
n1, n2, avg, w_map = self.strategy.group_hosts_by_cpu_util()
|
||||
n1, n2, avg, w_map = self.strategy.group_hosts_by_cpu_or_ram_util()
|
||||
self.assertEqual(n1[0]['node'].uuid, 'Node_0')
|
||||
self.assertEqual(n2[0]['node'].uuid, 'Node_1')
|
||||
self.assertEqual(avg, 8.0)
|
||||
|
||||
def test_group_hosts_by_ram_util(self):
|
||||
model = self.fake_cluster.generate_scenario_6_with_2_nodes()
|
||||
self.m_model.return_value = model
|
||||
self.strategy._meter = "memory.resident"
|
||||
self.strategy.threshold = 30
|
||||
n1, n2, avg, w_map = self.strategy.group_hosts_by_cpu_or_ram_util()
|
||||
self.assertEqual(n1[0]['node'].uuid, 'Node_0')
|
||||
self.assertEqual(n2[0]['node'].uuid, 'Node_1')
|
||||
self.assertEqual(avg, 33.0)
|
||||
|
||||
def test_choose_instance_to_migrate(self):
|
||||
model = self.fake_cluster.generate_scenario_6_with_2_nodes()
|
||||
self.m_model.return_value = model
|
||||
n1, n2, avg, w_map = self.strategy.group_hosts_by_cpu_util()
|
||||
n1, n2, avg, w_map = self.strategy.group_hosts_by_cpu_or_ram_util()
|
||||
instance_to_mig = self.strategy.choose_instance_to_migrate(
|
||||
n1, avg, w_map)
|
||||
self.assertEqual(instance_to_mig[0].uuid, 'Node_0')
|
||||
@ -110,7 +122,7 @@ class TestWorkloadBalance(base.TestCase):
|
||||
def test_choose_instance_notfound(self):
|
||||
model = self.fake_cluster.generate_scenario_6_with_2_nodes()
|
||||
self.m_model.return_value = model
|
||||
n1, n2, avg, w_map = self.strategy.group_hosts_by_cpu_util()
|
||||
n1, n2, avg, w_map = self.strategy.group_hosts_by_cpu_or_ram_util()
|
||||
instances = model.get_all_instances()
|
||||
[model.remove_instance(inst) for inst in instances.values()]
|
||||
instance_to_mig = self.strategy.choose_instance_to_migrate(
|
||||
@ -122,7 +134,7 @@ class TestWorkloadBalance(base.TestCase):
|
||||
self.m_model.return_value = model
|
||||
self.strategy.datasource = mock.MagicMock(
|
||||
statistic_aggregation=self.fake_metrics.mock_get_statistics_wb)
|
||||
n1, n2, avg, w_map = self.strategy.group_hosts_by_cpu_util()
|
||||
n1, n2, avg, w_map = self.strategy.group_hosts_by_cpu_or_ram_util()
|
||||
instance_to_mig = self.strategy.choose_instance_to_migrate(
|
||||
n1, avg, w_map)
|
||||
dest_hosts = self.strategy.filter_destination_hosts(
|
||||
@ -202,7 +214,7 @@ class TestWorkloadBalance(base.TestCase):
|
||||
m_gnocchi.statistic_aggregation = mock.Mock(
|
||||
side_effect=self.fake_metrics.mock_get_statistics_wb)
|
||||
instance0 = model.get_instance_by_uuid("INSTANCE_0")
|
||||
self.strategy.group_hosts_by_cpu_util()
|
||||
self.strategy.group_hosts_by_cpu_or_ram_util()
|
||||
if self.strategy.config.datasource == "ceilometer":
|
||||
m_ceilometer.statistic_aggregation.assert_any_call(
|
||||
aggregate='avg', meter_name='cpu_util',
|
||||
|
Loading…
Reference in New Issue
Block a user