Burn-in: Add CPU step

Add a clean step for CPU burn-in via stress-ng. Get basic
run parameters from the node's driver_info.

Story: #2007523
Task: #42382

Change-Id: I14fd4164991fb94263757244f716b6bfe8edf875
This commit is contained in:
Arne Wiebalck
2021-04-26 12:00:44 +02:00
parent 9edb13d891
commit 6702fcaa43
6 changed files with 138 additions and 1 deletions

View File

@@ -74,6 +74,9 @@ Known limitations:
Clean steps Clean steps
----------- -----------
``deploy.burnin_cpu``
Stress-test the CPUs of a node via stress-ng for a configurable
amount of time. Disabled by default.
``deploy.erase_devices`` ``deploy.erase_devices``
Securely erases all information from all recognized disk devices. Securely erases all information from all recognized disk devices.
Relatively fast when secure ATA erase is available, otherwise can take Relatively fast when secure ATA erase is available, otherwise can take

View File

@@ -0,0 +1,48 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ironic_lib import utils
from oslo_concurrency import processutils
from oslo_log import log
from ironic_python_agent import errors
LOG = log.getLogger(__name__)
def stress_ng_cpu(node):
"""Burn-in the CPU with stress-ng
Run stress-ng on a configurable number of CPUs for
a configurable amount of time. Without config use
all CPUs and stress them for 24 hours.
:param node: Ironic node object
:raises: CommandExecutionError if the execution of stress-ng fails.
"""
info = node.get('driver_info', {})
cpu = info.get('agent_burnin_cpu_cpu', 0)
timeout = info.get('agent_burnin_cpu_timeout', 86400)
args = ('stress-ng', '--cpu', cpu, '--timeout', timeout,
'--metrics-brief')
LOG.debug('Burn-in stress_ng_cpu command: %s', args)
try:
_, err = utils.execute(*args)
# stress-ng reports on stderr only
LOG.info(err)
except (processutils.ProcessExecutionError, OSError) as e:
error_msg = ("stress-ng (cpu) failed with error %(err)s",
{'err': e})
LOG.error(error_msg)
raise errors.CommandExecutionError(error_msg)

View File

@@ -38,6 +38,7 @@ import pyudev
import stevedore import stevedore
import yaml import yaml
from ironic_python_agent import burnin
from ironic_python_agent import encoding from ironic_python_agent import encoding
from ironic_python_agent import errors from ironic_python_agent import errors
from ironic_python_agent.extensions import base as ext_base from ironic_python_agent.extensions import base as ext_base
@@ -1393,6 +1394,14 @@ class GenericHardwareManager(HardwareManager):
except OSError: except OSError:
os.remove(filepath) os.remove(filepath)
def burnin_cpu(self, node, ports):
"""Burn-in the CPU
:param node: Ironic node object
:param ports: list of Ironic port objects
"""
burnin.stress_ng_cpu(node)
def _shred_block_device(self, node, block_device): def _shred_block_device(self, node, block_device):
"""Erase a block device using shred. """Erase a block device using shred.
@@ -1865,7 +1874,14 @@ class GenericHardwareManager(HardwareManager):
'interface': 'raid', 'interface': 'raid',
'reboot_requested': False, 'reboot_requested': False,
'abortable': True 'abortable': True
} },
{
'step': 'burnin_cpu',
'priority': 0,
'interface': 'deploy',
'reboot_requested': False,
'abortable': True
},
] ]
def get_deploy_steps(self, node, ports): def get_deploy_steps(self, node, ports):

View File

@@ -0,0 +1,56 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from unittest import mock
from ironic_lib import utils
from oslo_concurrency import processutils
from ironic_python_agent import burnin
from ironic_python_agent import errors
from ironic_python_agent.tests.unit import base
@mock.patch.object(utils, 'execute', autospec=True)
class TestBurnin(base.IronicAgentTest):
def test_stress_ng_cpu_default(self, mock_execute):
node = {'driver_info': {}}
mock_execute.return_value = (['out', 'err'])
burnin.stress_ng_cpu(node)
mock_execute.assert_called_once_with(
'stress-ng', '--cpu', 0, '--timeout', 86400, '--metrics-brief')
def test_stress_ng_cpu_non_default(self, mock_execute):
node = {'driver_info': {'agent_burnin_cpu_cpu': 3,
'agent_burnin_cpu_timeout': 2911}}
mock_execute.return_value = (['out', 'err'])
burnin.stress_ng_cpu(node)
mock_execute.assert_called_once_with(
'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief')
def test_stress_ng_cpu_no_stress_ng(self, mock_execute):
node = {'driver_info': {}}
mock_execute.side_effect = (['out', 'err'],
processutils.ProcessExecutionError())
burnin.stress_ng_cpu(node)
self.assertRaises(errors.CommandExecutionError,
burnin.stress_ng_cpu, node)

View File

@@ -149,6 +149,13 @@ class TestGenericHardwareManager(base.IronicAgentTest):
'interface': 'raid', 'interface': 'raid',
'reboot_requested': False, 'reboot_requested': False,
'abortable': True 'abortable': True
},
{
'step': 'burnin_cpu',
'priority': 0,
'interface': 'deploy',
'reboot_requested': False,
'abortable': True
} }
] ]
clean_steps = self.hardware.get_clean_steps(self.node, []) clean_steps = self.hardware.get_clean_steps(self.node, [])

View File

@@ -0,0 +1,7 @@
---
features:
- |
Adds a burn-in cleaning step 'burnin_cpu' to stress test CPUs for a
configurable amount of time with stress-ng. To use this step,
stress-ng needs to be installed on the RAM disk.