Add config option for tuning osd memory target

Closes-Bug: #1934143

Depends-On: https://review.opendev.org/c/openstack/charm-ceph-mon/+/897724

Change-Id: I22dfc25c4ac2737f5d872ca2bdab3c533533dbff
(cherry picked from commit ba6186e5de)
This commit is contained in:
Samuel Walladge 2023-01-10 12:01:32 +10:30
parent 8ba9c98995
commit e852a99c33
3 changed files with 207 additions and 1 deletions

View File

@ -222,6 +222,30 @@ options:
.
Setting this option on a running Ceph OSD node will not affect running
OSD devices, but will add the setting to ceph.conf for the next restart.
tune-osd-memory-target:
type: string
default:
description: |
Set to tune the value of osd_memory_target.
If unset or set to an empty string,
the charm will not update the value for ceph.
This means that a new deployment with this value unset will default to ceph's default (4GB).
And if a value was set, but then later unset, ceph will remain configured with the last set value.
This is to allow for manually configuring this value in ceph without interference from the charm.
If set to "{n}%" (where n is an integer), the value will be set as follows:
total ram * (n/100) / number of osds on the host
If set to "{n}GB" (n is an integer), osd_memory_target will be set per OSD directly.
Take care when choosing a value that it both provides enough memory for ceph
and leave enough memory for the system and other workloads to function.
For common cases,
it is recommended to stay within the bounds of 4GB < value < 90% of system memory.
If these bounds are broken, a warning will be emitted by the charm,
but the value will still be set.
ignore-device-errors:
type: boolean
default: False

View File

@ -19,6 +19,7 @@ import glob
import json
import netifaces
import os
import re
import shutil
import socket
import subprocess
@ -54,6 +55,7 @@ from charmhelpers.core.host import (
add_to_updatedb_prunepath,
cmp_pkgrevno,
is_container,
get_total_ram,
lsb_release,
mkdir,
service_reload,
@ -360,6 +362,63 @@ def use_short_objects():
return False
def warn_if_memory_outside_bounds(value):
"""
Log a warning if value < 4GB or (value * osds) > 90% total memory.
:param value: int - proposed value for osd_memory_target in bytes
"""
ninety_percent = int(0.9 * get_total_ram())
four_GB = 4 * 1024 * 1024 * 1024
num_osds = len(kv().get("osd-devices", []))
# 4GB is the default value; we don't want to go lower than that,
# otherwise performance will be impacted.
if value < four_GB:
log("tune-osd-memory-target results in value < 4GB. "
"This is not recommended.", level=WARNING)
# 90% is a somewhat arbitrary upper limit,
# that should allow enough memory for the OS to function,
# while not limiting ceph too much.
elif (value * num_osds) > ninety_percent:
log("tune-osd-memory-target results in value > 90% of system ram. "
"This is not recommended.", level=WARNING)
def get_osd_memory_target():
"""
Processes the config value of tune-osd-memory-target.
Returns a safe value for osd_memory_target.
:returns: integer value for osd_memory_target, converted to a string.
:rtype: string
"""
tune_osd_memory_target = config('tune-osd-memory-target')
if not tune_osd_memory_target:
return ""
match = re.match(r"(\d+)GB$", tune_osd_memory_target)
if match:
osd_memory_target = int(match.group(1)) * 1024 * 1024 * 1024
warn_if_memory_outside_bounds(osd_memory_target)
return str(osd_memory_target)
match = re.match(r"(\d+)%$", tune_osd_memory_target)
if match:
percentage = int(match.group(1)) / 100
num_osds = len(kv().get("osd-devices", []))
osd_memory_target = int(get_total_ram() * percentage / num_osds)
warn_if_memory_outside_bounds(osd_memory_target)
return str(osd_memory_target)
log("tune-osd-memory-target value invalid,"
" leaving the OSD memory target unchanged", level=ERROR)
return ""
def get_ceph_context(upgrading=False):
"""Returns the current context dictionary for generating ceph.conf
@ -475,6 +534,15 @@ def config_changed():
if sysctl_dict:
create_sysctl(sysctl_dict, '/etc/sysctl.d/50-ceph-osd-charm.conf')
for r_id in hookenv.relation_ids('mon'):
hookenv.relation_set(
relation_id=r_id,
relation_settings={
'osd-host': socket.gethostname(),
'osd-memory-target': get_osd_memory_target(),
}
)
e_mountpoint = config('ephemeral-unmount')
if e_mountpoint and ceph.filesystem_mounted(e_mountpoint):
umount(e_mountpoint)
@ -563,7 +631,9 @@ def prepare_disks_and_activate():
'bootstrapped-osds': len(db.get('osd-devices', [])),
'ceph_release': ceph.resolve_ceph_version(
hookenv.config('source') or 'distro'
)
),
'osd-host': socket.gethostname(),
'osd-memory-target': get_osd_memory_target(),
}
)

View File

@ -34,6 +34,7 @@ CHARM_CONFIG = {'config-flags': '',
'osd-journal-size': 1024,
'osd-max-backfills': 1,
'osd-recovery-max-active': 2,
'tune-osd-memory-target': '',
'use-direct-io': True,
'osd-format': 'ext4',
'prefer-ipv6': False,
@ -54,6 +55,8 @@ BLUESTORE_DB_TEST_SIZE = 2 * 2 ** 30
class CephHooksTestCase(unittest.TestCase):
maxDiff = None
def setUp(self):
super(CephHooksTestCase, self).setUp()
@ -707,6 +710,115 @@ class CephHooksTestCase(unittest.TestCase):
config['bdev-enable-discard'] = value
self.assertEqual(ceph_hooks.get_bdev_enable_discard(), expected)
@patch.object(ceph_hooks, "get_total_ram")
@patch.object(ceph_hooks, "kv")
@patch.object(ceph_hooks, "log")
def test_warn_memory_bounds(
self, mock_log, mock_kv, mock_total_ram
):
mock_total_ram.return_value = 16 * 1024 * 1024 * 1024 # 16GB
mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]}
ceph_hooks.warn_if_memory_outside_bounds(5 * 1024 * 1024 * 1024) # 5GB
mock_log.assert_not_called()
mock_kv.return_value = {"osd-devices": ["osd1", "osd2", "osd3"]}
ceph_hooks.warn_if_memory_outside_bounds(5 * 1024 * 1024 * 1024) # 5GB
mock_log.assert_called_with(
"tune-osd-memory-target results in value > 90% of system ram. "
"This is not recommended.",
level=ceph_hooks.WARNING
)
mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]}
ceph_hooks.warn_if_memory_outside_bounds(2 * 1024 * 1024 * 1024) # 2GB
mock_log.assert_called_with(
"tune-osd-memory-target results in value < 4GB. "
"This is not recommended.",
level=ceph_hooks.WARNING
)
@patch.object(ceph_hooks, "config")
@patch.object(ceph_hooks, "get_total_ram")
@patch.object(ceph_hooks, "kv")
@patch.object(ceph_hooks, "log")
def test_get_osd_memory_target_gb(
self, mock_log, mock_kv, mock_total_ram,
mock_config,
):
mock_total_ram.return_value = 16 * 1024 * 1024 * 1024 # 16GB
mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]}
def config_func(k):
if k == "tune-osd-memory-target":
return "5GB"
raise ValueError
mock_config.side_effect = config_func
target = ceph_hooks.get_osd_memory_target()
self.assertEqual(target, str(5 * 1024 * 1024 * 1024)) # 5GB
@patch.object(ceph_hooks, "config")
@patch.object(ceph_hooks, "get_total_ram")
@patch.object(ceph_hooks, "kv")
@patch.object(ceph_hooks, "log")
def test_get_osd_memory_target_percentage(
self, mock_log, mock_kv, mock_total_ram,
mock_config,
):
mock_total_ram.return_value = 16 * 1024 * 1024 * 1024 # 16GB
mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]}
def config_func(k):
if k == "tune-osd-memory-target":
return "50%"
raise ValueError
mock_config.side_effect = config_func
target = ceph_hooks.get_osd_memory_target()
# should be 50% of 16GB / 2 osd devices = 4GB
self.assertEqual(target, str(4 * 1024 * 1024 * 1024)) # 4GB
@patch.object(ceph_hooks, "config")
@patch.object(ceph_hooks, "get_total_ram")
@patch.object(ceph_hooks, "kv")
@patch.object(ceph_hooks, "log")
def test_get_osd_memory_target_empty(
self, mock_log, mock_kv, mock_total_ram,
mock_config,
):
mock_total_ram.return_value = 16 * 1024 * 1024 * 1024 # 16GB
mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]}
mock_config.side_effect = lambda _: None
target = ceph_hooks.get_osd_memory_target()
self.assertEqual(target, "")
@patch.object(ceph_hooks, "config")
@patch.object(ceph_hooks, "get_total_ram")
@patch.object(ceph_hooks, "kv")
@patch.object(ceph_hooks, "log")
def test_get_osd_memory_target_invalid(
self, mock_log, mock_kv, mock_total_ram,
mock_config,
):
mock_total_ram.return_value = 16 * 1024 * 1024 * 1024 # 16GB
mock_kv.return_value = {"osd-devices": ["osd1", "osd2"]}
def config_func(k):
if k == "tune-osd-memory-target":
return "foo"
raise ValueError
mock_config.side_effect = config_func
target = ceph_hooks.get_osd_memory_target()
self.assertEqual(target, "")
mock_log.assert_called_with(
"tune-osd-memory-target value invalid,"
" leaving the OSD memory target unchanged",
level=ceph_hooks.ERROR,
)
@patch.object(ceph_hooks, 'local_unit')
@patch.object(ceph_hooks, 'relation_get')