Burn-in: Add CPU step
Add a clean step for CPU burn-in via stress-ng. Get basic run parameters from the node's driver_info. Story: #2007523 Task: #42382 Change-Id: I14fd4164991fb94263757244f716b6bfe8edf875
This commit is contained in:
parent
9edb13d891
commit
6702fcaa43
@ -74,6 +74,9 @@ Known limitations:
|
||||
Clean steps
|
||||
-----------
|
||||
|
||||
``deploy.burnin_cpu``
|
||||
Stress-test the CPUs of a node via stress-ng for a configurable
|
||||
amount of time. Disabled by default.
|
||||
``deploy.erase_devices``
|
||||
Securely erases all information from all recognized disk devices.
|
||||
Relatively fast when secure ATA erase is available, otherwise can take
|
||||
|
48
ironic_python_agent/burnin.py
Normal file
48
ironic_python_agent/burnin.py
Normal file
@ -0,0 +1,48 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from ironic_lib import utils
|
||||
from oslo_concurrency import processutils
|
||||
from oslo_log import log
|
||||
|
||||
from ironic_python_agent import errors
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
def stress_ng_cpu(node):
|
||||
"""Burn-in the CPU with stress-ng
|
||||
|
||||
Run stress-ng on a configurable number of CPUs for
|
||||
a configurable amount of time. Without config use
|
||||
all CPUs and stress them for 24 hours.
|
||||
|
||||
:param node: Ironic node object
|
||||
:raises: CommandExecutionError if the execution of stress-ng fails.
|
||||
"""
|
||||
info = node.get('driver_info', {})
|
||||
cpu = info.get('agent_burnin_cpu_cpu', 0)
|
||||
timeout = info.get('agent_burnin_cpu_timeout', 86400)
|
||||
|
||||
args = ('stress-ng', '--cpu', cpu, '--timeout', timeout,
|
||||
'--metrics-brief')
|
||||
LOG.debug('Burn-in stress_ng_cpu command: %s', args)
|
||||
|
||||
try:
|
||||
_, err = utils.execute(*args)
|
||||
# stress-ng reports on stderr only
|
||||
LOG.info(err)
|
||||
except (processutils.ProcessExecutionError, OSError) as e:
|
||||
error_msg = ("stress-ng (cpu) failed with error %(err)s",
|
||||
{'err': e})
|
||||
LOG.error(error_msg)
|
||||
raise errors.CommandExecutionError(error_msg)
|
@ -38,6 +38,7 @@ import pyudev
|
||||
import stevedore
|
||||
import yaml
|
||||
|
||||
from ironic_python_agent import burnin
|
||||
from ironic_python_agent import encoding
|
||||
from ironic_python_agent import errors
|
||||
from ironic_python_agent.extensions import base as ext_base
|
||||
@ -1393,6 +1394,14 @@ class GenericHardwareManager(HardwareManager):
|
||||
except OSError:
|
||||
os.remove(filepath)
|
||||
|
||||
def burnin_cpu(self, node, ports):
|
||||
"""Burn-in the CPU
|
||||
|
||||
:param node: Ironic node object
|
||||
:param ports: list of Ironic port objects
|
||||
"""
|
||||
burnin.stress_ng_cpu(node)
|
||||
|
||||
def _shred_block_device(self, node, block_device):
|
||||
"""Erase a block device using shred.
|
||||
|
||||
@ -1865,7 +1874,14 @@ class GenericHardwareManager(HardwareManager):
|
||||
'interface': 'raid',
|
||||
'reboot_requested': False,
|
||||
'abortable': True
|
||||
}
|
||||
},
|
||||
{
|
||||
'step': 'burnin_cpu',
|
||||
'priority': 0,
|
||||
'interface': 'deploy',
|
||||
'reboot_requested': False,
|
||||
'abortable': True
|
||||
},
|
||||
]
|
||||
|
||||
def get_deploy_steps(self, node, ports):
|
||||
|
56
ironic_python_agent/tests/unit/test_burnin.py
Normal file
56
ironic_python_agent/tests/unit/test_burnin.py
Normal file
@ -0,0 +1,56 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from unittest import mock
|
||||
|
||||
from ironic_lib import utils
|
||||
from oslo_concurrency import processutils
|
||||
|
||||
from ironic_python_agent import burnin
|
||||
from ironic_python_agent import errors
|
||||
from ironic_python_agent.tests.unit import base
|
||||
|
||||
|
||||
@mock.patch.object(utils, 'execute', autospec=True)
|
||||
class TestBurnin(base.IronicAgentTest):
|
||||
|
||||
def test_stress_ng_cpu_default(self, mock_execute):
|
||||
|
||||
node = {'driver_info': {}}
|
||||
mock_execute.return_value = (['out', 'err'])
|
||||
|
||||
burnin.stress_ng_cpu(node)
|
||||
|
||||
mock_execute.assert_called_once_with(
|
||||
'stress-ng', '--cpu', 0, '--timeout', 86400, '--metrics-brief')
|
||||
|
||||
def test_stress_ng_cpu_non_default(self, mock_execute):
|
||||
|
||||
node = {'driver_info': {'agent_burnin_cpu_cpu': 3,
|
||||
'agent_burnin_cpu_timeout': 2911}}
|
||||
mock_execute.return_value = (['out', 'err'])
|
||||
|
||||
burnin.stress_ng_cpu(node)
|
||||
|
||||
mock_execute.assert_called_once_with(
|
||||
'stress-ng', '--cpu', 3, '--timeout', 2911, '--metrics-brief')
|
||||
|
||||
def test_stress_ng_cpu_no_stress_ng(self, mock_execute):
|
||||
|
||||
node = {'driver_info': {}}
|
||||
mock_execute.side_effect = (['out', 'err'],
|
||||
processutils.ProcessExecutionError())
|
||||
|
||||
burnin.stress_ng_cpu(node)
|
||||
|
||||
self.assertRaises(errors.CommandExecutionError,
|
||||
burnin.stress_ng_cpu, node)
|
@ -149,6 +149,13 @@ class TestGenericHardwareManager(base.IronicAgentTest):
|
||||
'interface': 'raid',
|
||||
'reboot_requested': False,
|
||||
'abortable': True
|
||||
},
|
||||
{
|
||||
'step': 'burnin_cpu',
|
||||
'priority': 0,
|
||||
'interface': 'deploy',
|
||||
'reboot_requested': False,
|
||||
'abortable': True
|
||||
}
|
||||
]
|
||||
clean_steps = self.hardware.get_clean_steps(self.node, [])
|
||||
|
7
releasenotes/notes/add_burnin_cpu-9acbb36048246a6b.yaml
Normal file
7
releasenotes/notes/add_burnin_cpu-9acbb36048246a6b.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Adds a burn-in cleaning step 'burnin_cpu' to stress test CPUs for a
|
||||
configurable amount of time with stress-ng. To use this step,
|
||||
stress-ng needs to be installed on the RAM disk.
|
||||
|
Loading…
x
Reference in New Issue
Block a user