Add caching for knownhost private-address lookups

This change adds caching for the host look ups associated with a
private-address of a unit.  This cache is maintained across hook
invocations, and is designed to reduce the time spent in
cloud-compute-relation-changed hooks (which occur as nova-compute units
join and update on the cloud-compute relation).

The feature has been added under an EXPERIMENTAL config flag (with the
default being "don't use the cached values") in case there are any
corner cases around DNS resolution in the deploying cloud during
deployment.

An action is included to allow clearing of the cache at unit,
application and whole relation level.  This clears the cache and
re-triggers the host resolution, and relation updates.  This is in case
of either 1) DNS changed during the deployment, 2) DNS has been altered
during the running of the cloud.

Change-Id: I5a68bf4c30bf1591184d660d50559c969822ddcf
This commit is contained in:
Alex Kavanagh 2019-07-16 14:27:30 +01:00
parent 4d9b4a2600
commit fe65e12b31
8 changed files with 296 additions and 38 deletions

View File

@ -1,15 +1,51 @@
openstack-upgrade:
description: Perform openstack upgrades. Config option action-managed-upgrade must be set to True.
description: |
Perform openstack upgrades. Config option action-managed-upgrade must be
set to True.
pause:
description: Pause the nova-cloud-controller unit. This action will stop related services.
description: |
Pause the nova-cloud-controller unit. This action will stop related
services.
resume:
descrpition: Resume the nova-cloud-controller unit. This action will start related services.
description: |
Resume the nova-cloud-controller unit. This action will start related
services.
archive-data:
descrpition: Run job to archive deleted rows in database
description: Run job to archive deleted rows in database
params:
batch-size:
type: integer
default: 10000
description: Archive old data to shadow tables
description: Archive old data to shadow tables
security-checklist:
description: Validate the running configuration against the OpenStack security guides checklist
description: |
Validate the running configuration against the OpenStack security guides
checklist
clear-unit-knownhost-cache:
params:
unit:
target: string
default: ""
description: |
Clear the knownhost cache for (default) all the units, a service, or a
single unit.
.
The default is all units. If the 'target' param has an '/' in it, then it
is assumed ot be a single unit. If no '/' is present, then all the units
in a service will be refreshed.
.
e.g. target="nova-compute/4" will just clear the nova-compute/4 unit (in
the 'nova-compute' application), whereas target='nova-compute' will refresh
all of the units in the 'nova-compute' application.
.
The action triggers a refresh resolution of the known hosts for the unit,
which then populates the cache, updates the knownhosts file for the
associated service (e.g. 'nova-compute'), and, importantly, sets the
relation data for that associated service with the new knownhosts file.
This may cause a 'cloud-compute' relation changed hook on the associated
nova-compute units if the hosts have changed.
.
This action still functions even if the 'cache-known-hosts' config value is
not set; caching of hosts occurs regardless of that setting, and so this
action can be used to force an update if DNS has changed in the system, or
for a particular host (although this scenario is unlikely).

View File

@ -29,6 +29,7 @@ _add_path(_root)
import charmhelpers.core.hookenv as hookenv
import hooks.nova_cc_utils as utils
import hooks.nova_cc_hooks as ncc_hooks
def pause(args):
@ -52,12 +53,81 @@ def archive_data(args):
max_rows=hookenv.action_get('batch-size'))})
def clear_unit_knownhost_cache(args):
"""Clear the knownhost cache for a unit (or all units), and then refresh
the knownhosts, and potentially set the relation data for the associated
service.
If the target param doesn't match any unit or service, then the action does
nothing.
"""
target = hookenv.action_get('target')
hookenv.action_set({
"Units updated": clear_knownhost_cache(target)
})
def clear_knownhost_cache(target):
"""Clear the known host cache for a target, rescan the affected units,
and then update the knownhosts file for the affected service(s) and set the
appropriate relation data.
Examples of target are:
- "" = all services, all units (clear all the caches)
- "aservice" = clear all the units' caches on 'aservice'
- "aservice/4" = just clear this specific unit and update the relation on
that service.
Note that if target doesn't match anything, then the function takes no
action and no Exception is raised.
:param target: The target to clear.
:type target: str
:returns: a list of units that were affected.
:rtype: List[Dict[str, str]]
"""
affected_units = []
parts = target.split('/', 1)
target_service = parts[0]
is_unit = len(parts) > 1
for r_id in hookenv.relation_ids('cloud-compute'):
units = hookenv.related_units(r_id)
if not units:
continue
service = utils.remote_service_from_unit(unit=units[0])
if target_service and service != target_service:
continue
updated = False
for unit in units:
if is_unit and unit != target:
continue
private_address = hookenv.relation_get(
attribute='private-address', unit=unit, rid=r_id)
if private_address:
utils.clear_hostset_cache_for(private_address)
ncc_hooks.update_ssh_key(rid=r_id, unit=unit)
updated = True
affected_units.append({unit: private_address})
# Note that this uses the last unit in the relation; that's ok as it's
# only used to identify the service
if updated:
ncc_hooks.notify_ssh_keys_to_compute_units(rid=r_id, unit=unit)
return affected_units
# A dictionary of all the defined actions to callables (which take
# parsed arguments).
ACTIONS = {
"pause": pause,
"resume": resume,
"archive-data": archive_data}
"archive-data": archive_data,
"clear-unit-knownhost-cache": clear_unit_knownhost_cache,
}
def main(args):

View File

@ -0,0 +1 @@
actions.py

View File

@ -143,6 +143,31 @@ options:
* shared-db
* amqp
* identity-service
cache-known-hosts:
type: boolean
default: false
description: |
EXPERIMENTAL - If true then the charm will cache host and ip lookups for
a unit when populating the knownhosts file for nova-compute service.
This is a known performance issue around maintaining the knownhosts files
for each nova-compute service, and caching is a strategy to reduce the
hook execution time when the 'cloud-compute' relation changes. If false,
then no caching is performed. Changing from true to false will NOT cause
new lookups to be performed.
.
To clear the caches and force new lookups to be performed, the action
'clear-unit-knownhost-cache' should be used.
.
This config flag is experimental as it's very hard to determine if there
will be any DNS issues during the deployment onto different platforms.
Thus it may be preferred to keep the flag false during deployment and
then switch to true after deployment.
.
Note that the charm keeps a record of the lookups for each unit
regardless of the setting of this flag. The cache is only used if the
flag is true.
.
At a future release the default will be true.
console-access-protocol:
type: string
default:

View File

@ -774,6 +774,9 @@ def update_ssh_key(rid=None, unit=None):
private_address = rel_settings.get('private-address', None)
hostname = rel_settings.get('hostname', '')
# only resolve the hosts once, so this is the memo for it
resolved_hosts = None
if migration_auth_type == 'ssh':
# TODO(ajkavanagh) -- the hookenv was previous behaviour, but there
# isn't a good place to put this yet; it will be moved or removed at
@ -786,8 +789,9 @@ def update_ssh_key(rid=None, unit=None):
.format(rid or hookenv.relation_id(),
unit or hookenv.remote_unit()))
return
ncc_utils.ssh_resolve_compute_hosts(
remote_service, private_address, hostname, user=None)
resolved_hosts = ncc_utils.resolve_hosts_for(private_address, hostname)
ncc_utils.ssh_compute_add_known_hosts(
remote_service, resolved_hosts, user=None)
ncc_utils.add_authorized_key_if_doesnt_exist(
key, remote_service, private_address, user=None)
@ -795,8 +799,13 @@ def update_ssh_key(rid=None, unit=None):
# Always try to fetch the user 'nova' key on the remote compute unit
if nova_ssh_public_key:
ncc_utils.ssh_resolve_compute_hosts(
remote_service, private_address, hostname, user='nova')
# in the unlikely event the migration type wasn't ssh, we still have to
# resolve the hosts
if resolved_hosts is None:
resolved_hosts = ncc_utils.resolve_hosts_for(private_address,
hostname)
ncc_utils.ssh_compute_add_known_hosts(
remote_service, resolved_hosts, user='nova')
ncc_utils.add_authorized_key_if_doesnt_exist(
nova_ssh_public_key, remote_service, private_address, user='nova')
@ -899,8 +908,10 @@ def _batch_write_ssh_on_relation(rid, prefix, max_index, _iter):
@hooks.hook('cloud-compute-relation-departed')
def compute_departed():
relation_data = hookenv.relation_get()
ncc_utils.ssh_compute_remove(
public_key=hookenv.relation_get('ssh_public_key'))
public_key=relation_data.get('ssh_public_key'))
ncc_utils.clear_hostset_cache_for(relation_data.get('private-address'))
@hooks.hook('neutron-network-service-relation-joined',

View File

@ -31,6 +31,7 @@ import charmhelpers.contrib.peerstorage as ch_peerstorage
import charmhelpers.core.decorators as ch_decorators
import charmhelpers.core.hookenv as hookenv
import charmhelpers.core.host as ch_host
import charmhelpers.core.unitdata as unitdata
import charmhelpers.fetch as ch_fetch
import hooks.nova_cc_common as common
@ -1216,10 +1217,9 @@ def add_authorized_key_if_doesnt_exist(public_key,
keys.write(public_key + '\n')
def ssh_resolve_compute_hosts(remote_service,
private_address,
hostname,
user=None):
def ssh_compute_add_known_hosts(remote_service,
resolved_hosts,
user=None):
"""Resolve all the host names for the private address, and store it against
the remote service (effectively the relation) and an optional user.
@ -1230,21 +1230,18 @@ def ssh_resolve_compute_hosts(remote_service,
:param remote_service: The remote service against which to store the hosts
file.
:type remote_service: str
:param private_address: The private address to resolve hostnames against.
:type private_address: Union[str, None]
:param hostname: An optional hostname (extracted from the relation data of
the unit), to also use to resolve hostnames for the compute unit.
:type hostname: str
:param resolved_hosts: The hosts to add
:type resolved_hosts: List[str]
:param user: an optional user against which to store the resolved
hostnames.
:type user: Union[str, None]
"""
for host in _resolve_hosts(private_address, hostname):
for host in resolved_hosts:
# TODO(ajkavanagh) expensive
add_known_host(host, remote_service, user)
def _resolve_hosts(private_address, hostname):
def resolve_hosts_for(private_address, hostname):
"""Return all of the resolved hosts for a unit
Using private-address and (if availble) hostname attributes on the
@ -1265,35 +1262,65 @@ def _resolve_hosts(private_address, hostname):
if private_address is None:
return []
db = unitdata.kv()
db_key = "hostset-{}".format(private_address)
cached_hostset = db.get(db_key, default=None)
if hostname:
hostname = hostname.lower()
# only use the cached hostset if the config flag is true
if hookenv.config('cache-known-hosts') and cached_hostset is not None:
# in the unlikely event that we've already cached the host but the
# hostname is now present, add that in.
if (not ch_ip.is_ipv6(private_address) and
hostname and
hostname not in cached_hostset):
return cached_hostset + hostname
return cached_hostset
# Use a set to enforce uniqueness; order doesn't matter
hosts = set()
if not ch_ip.is_ipv6(private_address):
if hostname:
hosts.add(hostname.lower())
hosts.add(hostname)
if not ch_utils.is_ip(private_address):
hosts.append(private_address.lower())
# TODO(ajkavanagh) expensive
hosts.add(ch_utils.get_host_ip(private_address))
short = private_address.split('.')[0]
# TODO(ajkavanagh) expensive
if ch_ip.ns_query(short):
hosts.add(short.lower())
else:
hosts.add(private_address)
# TODO(ajkavanagh) expensive
hn = ch_utils.get_hostname(private_address)
if hn:
hosts.add(hn.lower())
short = hn.split('.')[0]
# TODO(ajkananagh) expensive
if ch_ip.ns_query(short):
hosts.add(short.lower())
else:
hosts.add(private_address)
return list(hosts)
# Note, the cache is maintained regardless of whether the config
# 'cache-known-hosts' flag is set; the flag only affects usage and lookup.
hosts = list(hosts)
db.set(db_key, hosts)
db.flush()
return hosts
def clear_hostset_cache_for(private_address):
"""Clear the hostset cache for a private address that refers to a unit.
:param private_address: the private address corresponding to the unit
:type private_address: str
"""
db = unitdata.kv()
db_key = "hostset-{}".format(private_address)
db.unset(db_key)
db.flush()
def ssh_known_hosts_lines(remote_service, user=None):

View File

@ -63,6 +63,94 @@ class ResumeTestCase(CharmTestCase):
self.resume_unit_helper.assert_called_once_with('test-config')
class ClearUnitKnownhostCacheTestCase(CharmTestCase):
@staticmethod
def _relation_get(attribute=None, unit=None, rid=None):
return {
'aservice/1': '10.0.0.1',
'aservice/2': '10.0.0.2',
'aservice/3': '10.0.0.3',
'aservice/4': '10.0.0.4',
'bservice/1': '10.0.1.1',
'bservice/2': '10.0.1.2',
'bservice/3': '10.0.1.3',
}.get(unit)
def setUp(self):
super(ClearUnitKnownhostCacheTestCase, self).setUp(
actions, [
"charmhelpers.core.hookenv.action_get",
"charmhelpers.core.hookenv.action_set",
"charmhelpers.core.hookenv.relation_ids",
"charmhelpers.core.hookenv.related_units",
"charmhelpers.core.hookenv.relation_get",
"hooks.nova_cc_utils.clear_hostset_cache_for",
"hooks.nova_cc_hooks.update_ssh_key",
"hooks.nova_cc_hooks.notify_ssh_keys_to_compute_units",
])
self.relation_ids.return_value = ["r:1", "r:2"]
self.related_units.side_effect = [
['aservice/1', 'aservice/2', 'aservice/3', 'aservice/4'],
['bservice/1', 'bservice/2', 'bservice/3'],
]
self.relation_get.side_effect = \
ClearUnitKnownhostCacheTestCase._relation_get
def test_target_unit(self):
self.action_get.return_value = 'aservice/2'
actions.clear_unit_knownhost_cache([])
self.action_set.assert_called_once_with({
"Units updated": [{'aservice/2': '10.0.0.2'}]
})
self.clear_hostset_cache_for.assert_called_once_with('10.0.0.2')
self.update_ssh_key.assert_called_once_with(rid="r:1",
unit="aservice/2")
self.notify_ssh_keys_to_compute_units.assert_called_once_with(
rid="r:1", unit="aservice/4")
def test_target_service(self):
self.action_get.return_value = 'bservice'
actions.clear_unit_knownhost_cache([])
self.action_set.assert_called_once_with({
"Units updated": [
{'bservice/1': '10.0.1.1'},
{'bservice/2': '10.0.1.2'},
{'bservice/3': '10.0.1.3'},
]
})
self.clear_hostset_cache_for.assert_has_calls(
[mock.call('10.0.1.1'),
mock.call('10.0.1.2'),
mock.call('10.0.1.3')])
self.update_ssh_key.assert_has_calls(
[mock.call(rid="r:2", unit="bservice/1"),
mock.call(rid="r:2", unit="bservice/2"),
mock.call(rid="r:2", unit="bservice/3")])
self.notify_ssh_keys_to_compute_units.assert_has_calls(
[mock.call(rid="r:2", unit="bservice/3")])
def test_target_all(self):
self.action_get.return_value = ''
actions.clear_unit_knownhost_cache([])
self.action_set.assert_called_once_with({
"Units updated": [
{'aservice/1': '10.0.0.1'},
{'aservice/2': '10.0.0.2'},
{'aservice/3': '10.0.0.3'},
{'aservice/4': '10.0.0.4'},
{'bservice/1': '10.0.1.1'},
{'bservice/2': '10.0.1.2'},
{'bservice/3': '10.0.1.3'},
]
})
# check both services were updated; that'll imply the other calls were
# made.
self.notify_ssh_keys_to_compute_units.assert_has_calls(
[mock.call(rid="r:1", unit="aservice/4"),
mock.call(rid="r:2", unit="bservice/3")])
class MainTestCase(CharmTestCase):
def setUp(self):

View File

@ -68,7 +68,7 @@ TO_PATCH = [
'hooks.nova_cc_utils.serial_console_settings',
'hooks.nova_cc_utils.services',
'hooks.nova_cc_utils.ssh_authorized_keys_lines',
'hooks.nova_cc_utils.ssh_resolve_compute_hosts',
'hooks.nova_cc_utils.ssh_compute_add_known_hosts',
'hooks.nova_cc_utils.ssh_known_hosts_lines',
'uuid',
]
@ -425,14 +425,14 @@ class NovaCCHooksTests(CharmTestCase):
hooks._goal_state_achieved_for_relid('aservice', None))
@patch('hooks.nova_cc_utils.add_authorized_key_if_doesnt_exist')
@patch('hooks.nova_cc_utils.ssh_resolve_compute_hosts')
@patch('hooks.nova_cc_utils.ssh_compute_add_known_hosts')
@patch('hooks.nova_cc_hooks._goal_state_achieved_for_relid')
@patch('hooks.nova_cc_utils.remote_service_from_unit')
def test_update_ssh_keys_and_notify_compute_units_ssh_migration(
self,
mock_remote_service_from_unit,
mock__goal_state_achieved_for_relid,
mock_ssh_resolve_compute_hosts,
mock_ssh_compute_add_known_hosts,
mock_add_authorized_key_if_doesnt_exist):
mock_remote_service_from_unit.return_value = 'aservice'
mock__goal_state_achieved_for_relid.return_value = True
@ -444,8 +444,8 @@ class NovaCCHooksTests(CharmTestCase):
self.ssh_authorized_keys_lines.return_value = [
'auth_0', 'auth_1', 'auth_2']
hooks.update_ssh_keys_and_notify_compute_units()
mock_ssh_resolve_compute_hosts.assert_called_once_with(
'aservice', '10.0.0.1', '', user=None)
mock_ssh_compute_add_known_hosts.assert_called_once_with(
'aservice', ['10.0.0.1'], user=None)
mock_add_authorized_key_if_doesnt_exist.assert_called_once_with(
'fookey', 'aservice', '10.0.0.1', user=None)
expected_relations = [
@ -470,14 +470,14 @@ class NovaCCHooksTests(CharmTestCase):
'cloud-compute', None)
@patch('hooks.nova_cc_utils.add_authorized_key_if_doesnt_exist')
@patch('hooks.nova_cc_utils.ssh_resolve_compute_hosts')
@patch('hooks.nova_cc_utils.ssh_compute_add_known_hosts')
@patch('hooks.nova_cc_hooks._goal_state_achieved_for_relid')
@patch('hooks.nova_cc_utils.remote_service_from_unit')
def test_update_ssh_keys_and_notify_compute_units_nova_public_key(
self,
mock_remote_service_from_unit,
mock__goal_state_achieved_for_relid,
mock_ssh_resolve_compute_hosts,
mock_ssh_compute_add_known_hosts,
mock_add_authorized_key_if_doesnt_exist):
mock_remote_service_from_unit.return_value = 'aservice'
mock__goal_state_achieved_for_relid.return_value = True
@ -489,8 +489,8 @@ class NovaCCHooksTests(CharmTestCase):
self.ssh_authorized_keys_lines.return_value = [
'auth_0', 'auth_1', 'auth_2']
hooks.update_ssh_keys_and_notify_compute_units()
mock_ssh_resolve_compute_hosts.assert_called_once_with(
'aservice', '10.0.0.1', '', user='nova')
mock_ssh_compute_add_known_hosts.assert_called_once_with(
'aservice', ['10.0.0.1'], user='nova')
mock_add_authorized_key_if_doesnt_exist.assert_called_once_with(
'fookey', 'aservice', '10.0.0.1', user='nova')
expected_relations = [