From 635d933a1b729cbb9bb93789bb2bf044d79a3381 Mon Sep 17 00:00:00 2001 From: Kam Nasim Date: Tue, 20 Mar 2018 11:04:34 -0400 Subject: [PATCH] Fix upgrade with Host TPM This commit allows TPM certificates, per controller hosts, to be persisted across: - Host Reinstalls - Controller Restores - Doing a Backup on controller-1 and doing a Restore on controller-0 - DX / SX Upgrades By populating them in Sysinv's per host tpmdevice table's tpm_data field, we manage to push these certs in as hiera data and recreate the TPM certs The TPM binary memory maps are base64 encoded to allow RPC conduction and storage in DB, as by default when the Agent RPC message is parsed, it is done so using json.dumps which expects utf-8 encoding. The Binary maps are base64 decoded prior to be written to the hiera records so that the right content ends up in the files when the config manifest applies. Change-Id: Ie8b282808afacbba92949eefb85e96d6be441822 Story: 2002886 Task: 22847 Signed-off-by: Jack Ding --- .../100-sysinv-tpm-data-migration.py | 86 +++++++++++++++++++ puppet-manifests/centos/build_srpm.data | 2 +- .../src/modules/platform/manifests/config.pp | 19 ++++ sysinv/sysinv/sysinv/sysinv/agent/manager.py | 62 ++++++------- .../sysinv/sysinv/api/controllers/v1/host.py | 36 ++++---- sysinv/sysinv/sysinv/sysinv/common/utils.py | 33 ++++++- .../sysinv/sysinv/sysinv/conductor/manager.py | 74 ++++------------ .../sysinv/sysinv/sysinv/conductor/rpcapi.py | 23 +---- .../sysinv/sysinv/sysinv/objects/tpmdevice.py | 1 + .../sysinv/sysinv/sysinv/puppet/platform.py | 22 +++++ 10 files changed, 232 insertions(+), 126 deletions(-) create mode 100644 controllerconfig/controllerconfig/upgrade-scripts/100-sysinv-tpm-data-migration.py diff --git a/controllerconfig/controllerconfig/upgrade-scripts/100-sysinv-tpm-data-migration.py b/controllerconfig/controllerconfig/upgrade-scripts/100-sysinv-tpm-data-migration.py new file mode 100644 index 0000000000..4cf17fc1ae --- /dev/null +++ b/controllerconfig/controllerconfig/upgrade-scripts/100-sysinv-tpm-data-migration.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# Copyright (c) 2018 Wind River Systems, Inc. +# +# The right to copy, distribute, modify, or otherwise make use +# of this software may be licensed only pursuant to the terms +# of an applicable Wind River license agreement. +# +# This migration script copies the inode capabilities reserved +# field, which contain tpm_data to the tpm_data field in the +# tpmdevices DB table + +import sys + +import psycopg2 +from controllerconfig.common import log +from psycopg2.extras import RealDictCursor + +LOG = log.get_logger(__name__) + + +def main(): + action = None + from_release = None + to_release = None # noqa + arg = 1 + while arg < len(sys.argv): + if arg == 1: + from_release = sys.argv[arg] + elif arg == 2: + to_release = sys.argv[arg] # noqa + elif arg == 3: + action = sys.argv[arg] + else: + print ("Invalid option %s." % sys.argv[arg]) + return 1 + arg += 1 + + log.configure() + + if from_release == "17.06" and action == "migrate": + try: + LOG.info("performing sysinv TPM Device migration from release " + "%s to %s with action: %s" % + (from_release, to_release, action)) + copy_sysinv_tpm_data() + except Exception as ex: + LOG.exception(ex) + print ex + return 1 + + +# We will update for all controller hosts. +# We stow the TPM data in R4, in the inode.capabilities +# field since that is the only JSONEncodedDict field thats +# organized by hostid and vacant in R4. +def copy_sysinv_tpm_data(): + conn = psycopg2.connect("dbname='sysinv' user='postgres'") + with conn: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute("SELECT id FROM i_host WHERE " + "personality='controller';") + ctrhosts = cur.fetchall() + if ctrhosts is None or len(ctrhosts) == 0: + LOG.exception( + "Failed to fetch controller host information") + raise + for ctrhost in ctrhosts: + # we may have multiple nodes per host, and + # we only populate one of them (per host) with + # the tpm_data. + cur.execute("SELECT capabilities FROM i_node WHERE " + "forihostid='%s' AND capabilities!='{}';" % + ctrhost['id']) + tpm_data = cur.fetchone() + if tpm_data and 'capabilities' in tpm_data: + tpm_data = tpm_data['capabilities'] + LOG.info("Updating tpm_data for host '%s'" % ctrhost['id']) + cur.execute("UPDATE tpmdevice SET tpm_data='%s' WHERE " + "host_id='%s' AND tpm_data is null ;" % + (tpm_data, ctrhost['id'])) + # clear the capabilities field for all hosts + cur.execute("UPDATE i_node SET capabilities='{}';") + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/puppet-manifests/centos/build_srpm.data b/puppet-manifests/centos/build_srpm.data index d27f609987..6171a4746a 100644 --- a/puppet-manifests/centos/build_srpm.data +++ b/puppet-manifests/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="src" -TIS_PATCH_VER=57 +TIS_PATCH_VER=58 diff --git a/puppet-manifests/src/modules/platform/manifests/config.pp b/puppet-manifests/src/modules/platform/manifests/config.pp index fa79ecfee0..77eb0fd35d 100644 --- a/puppet-manifests/src/modules/platform/manifests/config.pp +++ b/puppet-manifests/src/modules/platform/manifests/config.pp @@ -208,6 +208,24 @@ class platform::config::timezone } +class platform::config::tpm { + $tpm_certs = hiera_hash('platform::tpm::tpm_data', undef) + if $tpm_certs != undef { + # iterate through each tpm_cert creating it if it doesn't exist + $tpm_certs.each |String $key, String $value| { + file { "create-TPM-cert-${key}": + path => $key, + ensure => present, + owner => root, + group => root, + mode => '0644', + content => $value, + } + } + } +} + + class platform::config::pre { group { 'nobody': ensure => 'present', @@ -218,6 +236,7 @@ class platform::config::pre { include ::platform::config::hostname include ::platform::config::hosts include ::platform::config::file + include ::platform::config::tpm } diff --git a/sysinv/sysinv/sysinv/sysinv/agent/manager.py b/sysinv/sysinv/sysinv/sysinv/agent/manager.py index b4b0381060..cf173b309c 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/manager.py @@ -1522,7 +1522,7 @@ class AgentManager(service.PeriodicService): response_dict) def apply_tpm_config(self, context, tpm_context): - """Configure TPM device on this node + """Configure or Update TPM device on this node :param context: request context :param tpm_context: the tpm object context @@ -1536,44 +1536,44 @@ class AgentManager(service.PeriodicService): # agent applies the tpmconfig self._tpmconfig_host_first_apply = True - # create a tpmdevice configuration for this host self._tpmconfig_rpc_failure = False response_dict = {} + attribute_dict = {} rpcapi = conductor_rpcapi.ConductorAPI( topic=conductor_rpcapi.MANAGER_TOPIC) - tpmdevice = None - update_dict = {} - if tpm_context.get('modify', False): - # we are editing an existing configuration - # reset the state to APPLYING and pass in - # update parameters. Since this request - # came from the Sysinv-api layer, assume - # update parameters have already been validated - update_dict['state'] = constants.TPMCONFIG_APPLYING - tpmdevice = rpcapi.tpm_device_update_by_host(context, - self._ihost_uuid, - update_dict) - else: - # pass in a dictionary of attributes if need be - tpmdevice = rpcapi.tpm_device_create_by_host(context, - self._ihost_uuid, - {}) - if not tpmdevice: + # invoke tpmdevice-setup on this node. + # + # We also need to fetch and persist the content + # of the TPM certificates in DB. + try: + utils.execute('tpmdevice-setup', + tpm_context['cert_path'], + tpm_context['tpm_path'], + tpm_context['public_path'], + run_as_root=True) + + attribute_dict['tpm_data'] = \ + utils.read_filtered_directory_content( + os.path.dirname(tpm_context['tpm_path']), + "*.bin", "*.tpm") + except exception.ProcessExecutionError as e: + LOG.exception(e) response_dict['is_configured'] = False else: - # invoke tpmdevice-setup on this node - try: - utils.execute('tpmdevice-setup', - tpm_context['cert_path'], - tpm_context['tpm_path'], - tpm_context['public_path'], - run_as_root=True) - except exception.ProcessExecutionError as e: - LOG.exception(e) + response_dict['is_configured'] = True + attribute_dict['state'] = constants.TPMCONFIG_APPLYING + + # Only create a TPM device entry if the TPM certificates + # were successfully created + if response_dict['is_configured']: + # Create a new TPM device for this host, or update it + # with new TPM certs if such a device already exists. + tpmdevice = rpcapi.tpm_device_update_by_host(context, + self._ihost_uuid, + attribute_dict) + if not tpmdevice: response_dict['is_configured'] = False - else: - response_dict['is_configured'] = True # we will not tie this to agent audit, send back # response to conductor now. diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index 0b15214e32..4879fbe9a1 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -4906,31 +4906,31 @@ class HostController(rest.RestController): """Pre swact/unlock semantic checks for TPM configuration""" tpmconfig = utils.get_tpm_config() if tpmconfig: - # retrieve the tpmdevice configuration for this host + # retrieve the tpmdevice configuration for this host. + # If this host got Reinstalled or Restored, and it had + # TPM configured on it prior, then we should still find + # a valid tpmdevice entry for this host. Otherwise this + # is a new host or a previous host that was deleted and re-added tpmdevice = \ pecan.request.dbapi.tpmdevice_get_by_host(ihost['uuid']) - if not tpmdevice: + if not tpmdevice or len(tpmdevice) > 1: raise wsme.exc.ClientSideError( - _("Global TPM configuration found; " - "but no TPM Device configuration on host %s." % - ihost['hostname'])) - # only one entry per host - if len(tpmdevice) > 1: - raise wsme.exc.ClientSideError( - _("Global TPM configuration found; " - "but no TPM Device configuration on host %s." % + _("Global TPM configuration found; but " + "no valid TPM Device configuration on host %s." % ihost['hostname'])) tpmdevice = tpmdevice[0] - if tpmdevice.state: - if tpmdevice.state == constants.TPMCONFIG_APPLYING: - raise wsme.exc.ClientSideError( - _("TPM configuration in progress on host %s; " - "Please wait for operation to complete " - "before re-attempting." % ihost['hostname'])) - elif tpmdevice.state != constants.TPMCONFIG_APPLIED: + if tpmdevice.state == constants.TPMCONFIG_APPLYING: + raise wsme.exc.ClientSideError( + _("TPM configuration in progress on host %s; " + "Please wait for operation to complete " + "before re-attempting." % ihost['hostname'])) + elif tpmdevice.state != constants.TPMCONFIG_APPLIED: + # if the TPM certificate for this host is not + # preserved as tpm_data, then disallow unlock/swact + if not tpmdevice.tpm_data: raise wsme.exc.ClientSideError( _("TPM configuration not fully applied on host %s; " - "Please run system certificate-install -m tpm_mode" + "Please run system certificate-install -m tpm_mode " "before re-attempting." % ihost['hostname'])) @staticmethod diff --git a/sysinv/sysinv/sysinv/sysinv/common/utils.py b/sysinv/sysinv/sysinv/sysinv/common/utils.py index 2a880cf084..5311370071 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/common/utils.py @@ -29,9 +29,10 @@ import contextlib import datetime import errno import functools - import fcntl +import glob import hashlib +import itertools as it import json import math import os @@ -1658,3 +1659,33 @@ def get_cgts_vg_free_space(): raise Exception("Command vgdisplay failed") return cgts_vg_free + + +def read_filtered_directory_content(dirpath, *filters): + """ Reads the content of a directory, filtered on + glob like expressions. + + Returns a dictionary, with the "key" being the filename + and the "value" being the content of that file + """ + def filter_directory_files(dirpath, *filters): + return it.chain.from_iterable(glob.iglob(dirpath + '/' + filter) + for filter in filters) + + content_dict = {} + for filename in filter_directory_files(dirpath, *filters): + content = "" + with open(os.path.join(filename), 'rb') as obj: + content = obj.read() + try: + # If the filter specified binary files then + # these will need to be base64 encoded so that + # they can be transferred over RPC and stored in DB + content.decode('utf-8') + except UnicodeError: + content = content.encode('base64') + content_dict['base64_encoded_files'] = \ + content_dict.get("base64_encoded_files", []) + [filename] + + content_dict[filename] = content + return content_dict diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index e8d39d206c..eaeecd552e 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -9026,10 +9026,10 @@ class ConductorManager(service.PeriodicService): raise exception.SysinvException(_( "Invalid host_uuid: %s") % host_uuid) - def tpm_device_create_by_host(self, context, + def tpm_device_update_by_host(self, context, host_uuid, tpmdevice_dict): - """Synchronously, have the conductor create a tpmdevice per host. - returns the created device + """Synchronously, have the conductor create or update + a tpmdevice per host. :param context: request context. :param host_uuid: uuid or id of the host @@ -9046,68 +9046,28 @@ class ConductorManager(service.PeriodicService): tpm_devices = self.dbapi.tpmdevice_get_by_host(tpm_host.id) if tpm_devices: tpmdevice = self.dbapi.tpmdevice_update(tpm_devices[0].uuid, - {'state': constants.TPMCONFIG_APPLYING}) - + tpmdevice_dict) # update table tpmconfig updated_at as its visible from tpmconfig-show try: tpm_obj = self.dbapi.tpmconfig_get_one() + updated_at = timeutils.utcnow() self.dbapi.tpmconfig_update(tpm_obj.uuid, - {'updated_at': timeutils.utcnow()}) - LOG.info("tpm_device_create_by_host tpmconfig updated_at") + {'updated_at': updated_at}) + LOG.info("TPM config updated at: %s" % updated_at) except exception.NotFound: - LOG.error("tpm_device_create_by_host tpmconfig NotFound") + LOG.error("tpm_device_update_by_host tpmconfig NotFound") else: try: # create new tpmdevice - devicedict = { - 'host_uuid': tpm_host['uuid'], - 'state': constants.TPMCONFIG_APPLYING - } + tpmdevice_dict.update({'host_uuid': tpm_host['uuid']}) tpmdevice = self.dbapi.tpmdevice_create(tpm_host['id'], - devicedict) + tpmdevice_dict) except: LOG.exception("Cannot create TPM device for host %s" % host_uuid) return return tpmdevice - def tpm_device_update_by_host(self, context, - host_uuid, update_dict): - """Synchronously, have the conductor update a tpmdevice per host. - returns the updated device - - :param context: request context. - :param host_uuid: uuid or id of the host - :param update_dict: a dictionary of attributes to be updated - - :returns tpmdevice object - """ - try: - tpm_host = self.dbapi.ihost_get(host_uuid) - except exception.ServerNotFound: - LOG.error("Cannot find host by id %s" % host_uuid) - return - - try: - # update the tpmdevice - # since this will be an internal call from the - # agent, we will not validate the update parameters - existing_tpmdevice = \ - self.dbapi.tpmdevice_get_by_host(tpm_host.uuid) - - if (not existing_tpmdevice or len(existing_tpmdevice) > 1): - LOG.error("TPM device not found, or multiple found " - "for host %s" % tpm_host.uuid) - return - - updated_tpmdevice = self.dbapi.tpmdevice_update( - existing_tpmdevice[0].uuid, update_dict) - except: - LOG.exception("TPM device not found, or cannot be updated " - "for host %s" % tpm_host.uuid) - return - return updated_tpmdevice - def cinder_prepare_db_for_volume_restore(self, context): """ Send a request to cinder to remove all volume snapshots and set all @@ -9471,13 +9431,15 @@ class ConductorManager(service.PeriodicService): self._perform_config_certificate_tpm_mode( context, tpm, private_bytes, public_bytes) + file_content = public_bytes + # copy the certificate to shared directory + with os.fdopen(os.open(constants.SSL_PEM_FILE_SHARED, + os.O_CREAT | os.O_WRONLY, + constants.CONFIG_FILE_PERMISSION_ROOT_READ_ONLY), + 'wb') as f: + f.write(file_content) + self._remove_certificate_file(mode, certificate_file) - try: - LOG.info("config_certificate mode=%s remove %s" % - (mode, constants.SSL_PEM_FILE_SHARED)) - os.remove(constants.SSL_PEM_FILE_SHARED) - except OSError: - pass elif mode == constants.CERT_MODE_SSL: config_uuid = self._config_update_hosts(context, personalities) diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py index 47a9722e66..cabb2ae6de 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/rpcapi.py @@ -1390,37 +1390,22 @@ class ConductorAPI(sysinv.openstack.common.rpc.proxy.RpcProxy): host_uuid=host_uuid, response_dict=response_dict)) - def tpm_device_create_by_host(self, context, + def tpm_device_update_by_host(self, context, host_uuid, tpmdevice_dict): - """Synchronously , have the conductor create a tpmdevice per host. + """Synchronously , have the conductor create or update + a tpmdevice per host. :param context: request context. :param host_uuid: uuid or id of the host :param tpmdevice_dict: a dictionary of tpm device attributes - :returns: tpmdevice object - """ - return self.call( - context, - self.make_msg('tpm_device_create_by_host', - host_uuid=host_uuid, - tpmdevice_dict=tpmdevice_dict)) - - def tpm_device_update_by_host(self, context, - host_uuid, update_dict): - """Synchronously , have the conductor update a tpmdevice per host. - - :param context: request context. - :param host_uuid: uuid or id of the host - :param update_dict: a dictionary of attributes to be updated - :returns: tpmdevice object """ return self.call( context, self.make_msg('tpm_device_update_by_host', host_uuid=host_uuid, - update_dict=update_dict)) + tpmdevice_dict=tpmdevice_dict)) def cinder_prepare_db_for_volume_restore(self, context): """ diff --git a/sysinv/sysinv/sysinv/sysinv/objects/tpmdevice.py b/sysinv/sysinv/sysinv/sysinv/objects/tpmdevice.py index f30fc1d8bc..86e39d3ee4 100644 --- a/sysinv/sysinv/sysinv/sysinv/objects/tpmdevice.py +++ b/sysinv/sysinv/sysinv/sysinv/objects/tpmdevice.py @@ -21,6 +21,7 @@ class TPMDevice(base.SysinvObject): 'id': int, 'uuid': utils.str_or_none, 'state': utils.str_or_none, + 'tpm_data': utils.dict_or_none, 'host_id': int, 'host_uuid': utils.str_or_none, diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/platform.py b/sysinv/sysinv/sysinv/sysinv/puppet/platform.py index f3e7356524..08f8033dcd 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/platform.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/platform.py @@ -65,6 +65,7 @@ class PlatformPuppet(base.BasePuppet): config.update(self._get_host_sysctl_config(host)) config.update(self._get_host_drbd_config(host)) config.update(self._get_host_upgrade_config(host)) + config.update(self._get_host_tpm_config(host)) config.update(self._get_host_cpu_config(host)) config.update(self._get_host_hugepage_config(host)) return config @@ -489,6 +490,27 @@ class PlatformPuppet(base.BasePuppet): }) return config + def _get_host_tpm_config(self, host): + config = {} + if host.personality == constants.CONTROLLER: + try: + tpmdevice = self.dbapi.tpmdevice_get_by_host(host.id) + if tpmdevice and len(tpmdevice) == 1: + tpm_data = tpmdevice[0].tpm_data + # some of the TPM certs may be base64 encoded + # for transmission over RPC and storage in DB, + # convert these back to their native encoding + encoded_files = tpm_data.pop("base64_encoded_files", []) + for binary in encoded_files: + tpm_data[binary] = tpm_data[binary].decode('base64') + config.update({ + 'platform::tpm::tpm_data': tpm_data + }) + except exception.NotFound: + # No TPM device found + pass + return config + def _get_host_cpu_config(self, host): config = {} if constants.COMPUTE in utils.get_personalities(host):