[k8s] Support CA certs rotate

Now k8s cluster owner can do CA cert rotate to re-generate CA of
the cluster, service account keys and the certs of all nodes will
be regenerated as well. Cluster user needs to get a new kubeconfig
to access kubernetes API. This function is only supported by
Fedora CoreOS driver.

To test this patch with python-magnumclient, you need this patch
https://review.opendev.org/#/c/724243/, otherwise, you will see
an error about "not enough values to unpack", though the CA cert
rotate request has been processed by Magnum server side correctly.

Task: 39580
Story: 2005201

Change-Id: I4ae12f928e4f49b99732fba097371692cb35d9ee
This commit is contained in:
Feilong Wang 2020-04-29 13:04:46 +12:00
parent 79c4b72f6e
commit 8020391e4a
16 changed files with 255 additions and 17 deletions

View File

@ -119,3 +119,29 @@ Response Example
.. literalinclude:: samples/certificates-ca-sign-resp.json
:language: javascript
Rotate the CA certificate for a bay/cluster
===========================================
.. rest_method:: PATCH /v1/certificates/{bay_uuid/cluster_uuid}
Rotate the CA certificate for a bay/cluster and invalidate all user
certificates.
Response Codes
--------------
.. rest_status_code:: success status.yaml
- 202
.. rest_status_code:: error status.yaml
- 400
Request
-------
.. rest_parameters:: parameters.yaml
- cluster: cluster_id

View File

@ -2467,6 +2467,9 @@ Rotate Certificate
openstack coe ca rotate secure-k8s-cluster
Please note that now the CA rotate function is only supported
by Fedora CoreOS driver.
User Examples
-------------

View File

@ -27,6 +27,21 @@ from magnum.common import policy
from magnum import objects
class ClusterID(wtypes.Base):
"""API representation of a cluster ID
This class enforces type checking and value constraints, and converts
between the internal object model and the API representation of a cluster
ID.
"""
uuid = types.uuid
"""Unique UUID for this cluster"""
def __init__(self, uuid):
self.uuid = uuid
class Certificate(base.APIBase):
"""API representation of a certificate.
@ -167,7 +182,7 @@ class CertificateController(base.Controller):
cert_obj)
return Certificate.convert_with_links(new_cert)
@expose.expose(None, types.uuid_or_name, status_code=202)
@expose.expose(ClusterID, types.uuid_or_name, status_code=202)
def patch(self, cluster_ident):
context = pecan.request.context
cluster = api_utils.get_resource('Cluster', cluster_ident)
@ -176,4 +191,7 @@ class CertificateController(base.Controller):
if cluster.cluster_template.tls_disabled:
raise exception.NotSupported("Rotating the CA certificate on a "
"non-TLS cluster is not supported")
pecan.request.rpcapi.rotate_ca_certificate(cluster)
return ClusterID(cluster.uuid)

View File

@ -12,13 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from heatclient import exc
from oslo_log import log as logging
from pycadf import cadftaxonomy as taxonomy
from magnum.common import exception
from magnum.common import profiler
from magnum.conductor.handlers.common import cert_manager
from magnum.conductor import utils as conductor_utils
from magnum.drivers.common import driver
from magnum.i18n import _
from magnum import objects
from magnum.objects import fields
import six
LOG = logging.getLogger(__name__)
@ -57,6 +63,46 @@ class Handler(object):
return certificate
def rotate_ca_certificate(self, context, cluster):
LOG.info('start rotate_ca_certificate for cluster: %s', cluster.uuid)
allow_update_status = (
fields.ClusterStatus.CREATE_COMPLETE,
fields.ClusterStatus.UPDATE_COMPLETE,
fields.ClusterStatus.RESUME_COMPLETE,
fields.ClusterStatus.RESTORE_COMPLETE,
fields.ClusterStatus.ROLLBACK_COMPLETE,
fields.ClusterStatus.SNAPSHOT_COMPLETE,
fields.ClusterStatus.CHECK_COMPLETE,
fields.ClusterStatus.ADOPT_COMPLETE
)
if cluster.status not in allow_update_status:
conductor_utils.notify_about_cluster_operation(
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
cluster)
operation = _('Updating a cluster when status is '
'"%s"') % cluster.status
raise exception.NotSupported(operation=operation)
try:
# re-generate the ca certs
cert_manager.generate_certificates_to_cluster(cluster,
context=context)
cluster_driver = driver.Driver.get_driver_for_cluster(context,
cluster)
cluster_driver.rotate_ca_certificate(context, cluster)
cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS
cluster.status_reason = None
except Exception as e:
cluster.status = fields.ClusterStatus.UPDATE_FAILED
cluster.status_reason = six.text_type(e)
cluster.save()
conductor_utils.notify_about_cluster_operation(
context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE,
cluster)
if isinstance(e, exc.HTTPBadRequest):
e = exception.InvalidParameterValue(message=six.text_type(e))
raise e
raise
cluster.save()
return cluster

View File

@ -77,7 +77,7 @@ EOF
curl $VERIFY_CA -X GET \
-H "X-Auth-Token: $USER_TOKEN" \
-H "OpenStack-API-Version: container-infra latest" \
$MAGNUM_URL/certificates/$CLUSTER_UUID | python -c 'import sys, json; print(json.load(sys.stdin)["pem"])' > $CA_CERT
$MAGNUM_URL/certificates/$CLUSTER_UUID | python -c 'import sys, json; print(json.load(sys.stdin)["pem"])' >> $CA_CERT
# Generate client's private key and csr
$ssh_cmd openssl genrsa -out "${_KEY}" 4096

View File

@ -112,7 +112,7 @@ EOF
curl $VERIFY_CA -X GET \
-H "X-Auth-Token: $USER_TOKEN" \
-H "OpenStack-API-Version: container-infra latest" \
$MAGNUM_URL/certificates/$CLUSTER_UUID | python -c 'import sys, json; print(json.load(sys.stdin)["pem"])' > ${CA_CERT}
$MAGNUM_URL/certificates/$CLUSTER_UUID | python -c 'import sys, json; print(json.load(sys.stdin)["pem"])' >> ${CA_CERT}
# Generate server's private key and csr
$ssh_cmd openssl genrsa -out "${_KEY}" 4096
@ -192,11 +192,13 @@ echo -e "${KUBE_SERVICE_ACCOUNT_PRIVATE_KEY}" > ${cert_dir}/service_account_priv
# Common certs and key are created for both etcd and kubernetes services.
# Both etcd and kube user should have permission to access the certs and key.
$ssh_cmd groupadd kube_etcd
$ssh_cmd usermod -a -G kube_etcd etcd
$ssh_cmd usermod -a -G kube_etcd kube
$ssh_cmd chmod 550 "${cert_dir}"
$ssh_cmd chown -R kube:kube_etcd "${cert_dir}"
$ssh_cmd chmod 440 "$cert_dir/server.key"
$ssh_cmd mkdir -p /etc/etcd/certs
$ssh_cmd cp ${cert_dir}/* /etc/etcd/certs
if [ -z "`cat /etc/group | grep kube_etcd`" ]; then
$ssh_cmd groupadd kube_etcd
$ssh_cmd usermod -a -G kube_etcd etcd
$ssh_cmd usermod -a -G kube_etcd kube
$ssh_cmd chmod 550 "${cert_dir}"
$ssh_cmd chown -R kube:kube_etcd "${cert_dir}"
$ssh_cmd chmod 440 "$cert_dir/server.key"
$ssh_cmd mkdir -p /etc/etcd/certs
$ssh_cmd cp ${cert_dir}/* /etc/etcd/certs
fi

View File

@ -0,0 +1,45 @@
echo "START: rotate CA certs on master"
set +x
. /etc/sysconfig/heat-params
set -x
set -eu -o pipefail
ssh_cmd="ssh -F /srv/magnum/.ssh/config root@localhost"
service_account_key=$kube_service_account_key_input
service_account_private_key=$kube_service_account_private_key_input
if [ ! -z "$service_account_key" ] && [ ! -z "$service_account_private_key" ] ; then
# Follow the instructions on https://kubernetes.io/docs/tasks/tls/manual-rotation-of-ca-certificates/
for namespace in $(kubectl get namespace -o jsonpath='{.items[*].metadata.name}'); do
for name in $(kubectl get deployments -n $namespace -o jsonpath='{.items[*].metadata.name}'); do
kubectl patch deployment -n ${namespace} ${name} -p '{"spec":{"template":{"metadata":{"annotations":{"ca-rotation": "1"}}}}}';
done
for name in $(kubectl get daemonset -n $namespace -o jsonpath='{.items[*].metadata.name}'); do
kubectl patch daemonset -n ${namespace} ${name} -p '{"spec":{"template":{"metadata":{"annotations":{"ca-rotation": "1"}}}}}';
done
done
# Annotate any Daemonsets and Deployments to trigger pod replacement in a safer rolling fashion.
for namespace in $(kubectl get namespace -o jsonpath='{.items[*].metadata.name}'); do
for name in $(kubectl get deployments -n $namespace -o jsonpath='{.items[*].metadata.name}'); do
kubectl patch deployment -n ${namespace} ${name} -p '{"spec":{"template":{"metadata":{"annotations":{"ca-rotation": "1"}}}}}';
done
for name in $(kubectl get daemonset -n $namespace -o jsonpath='{.items[*].metadata.name}'); do
kubectl patch daemonset -n ${namespace} ${name} -p '{"spec":{"template":{"metadata":{"annotations":{"ca-rotation": "1"}}}}}';
done
done
for service in etcd kube-apiserver kube-controller-manager kube-scheduler kubelet kube-proxy; do
echo "restart service $service"
$ssh_cmd systemctl restart $service
done
# NOTE(flwang): Re-patch the calico-node daemonset again to make sure all pods are being recreated
kubectl patch daemonset -n kube-system calico-node -p '{"spec":{"template":{"metadata":{"annotations":{"ca-rotation": "2"}}}}}';
fi
echo "END: rotate CA certs on master"

View File

@ -0,0 +1,22 @@
echo "START: rotate CA certs on worker"
set +x
. /etc/sysconfig/heat-params
set -x
set -eu -o pipefail
ssh_cmd="ssh -F /srv/magnum/.ssh/config root@localhost"
service_account_key=$kube_service_account_key_input
service_account_private_key=$kube_service_account_private_key_input
if [ ! -z "$service_account_key" ] && [ ! -z "$service_account_private_key" ] ; then
for service in kubelet kube-proxy; do
echo "restart service $service"
$ssh_cmd systemctl restart $service
done
fi
echo "END: rotate CA certs on worker"

View File

@ -9,7 +9,7 @@ else
kubecontrol="/var/lib/containers/atomic/heat-container-agent.0/rootfs/usr/bin/kubectl --kubeconfig $KUBECONFIG"
fi
new_kube_tag="$kube_tag_input"
new_kube_image_digest="$kube_image_digest"
new_kube_image_digest="$kube_image_digest_input"
new_ostree_remote="$ostree_remote_input"
new_ostree_commit="$ostree_commit_input"

View File

@ -32,6 +32,7 @@ from magnum.common import exception
from magnum.common import keystone
from magnum.common import octavia
from magnum.common import short_id
from magnum.common.x509 import operations as x509
from magnum.conductor.handlers.common import cert_manager
from magnum.conductor.handlers.common import trust_manager
from magnum.conductor import utils as conductor_utils
@ -445,6 +446,32 @@ class FedoraKubernetesDriver(KubernetesDriver):
}
return extra_params
def rotate_ca_certificate(self, context, cluster):
cluster_template = conductor_utils.retrieve_cluster_template(context,
cluster)
if cluster_template.cluster_distro not in ["fedora-coreos"]:
raise exception.NotSupported("Rotating the CA certificate is "
"not supported for cluster with "
"cluster_distro: %s." %
cluster_template.cluster_distro)
osc = clients.OpenStackClients(context)
rollback = True
heat_params = {}
csr_keys = x509.generate_csr_and_key(u"Kubernetes Service Account")
heat_params['kube_service_account_key'] = \
csr_keys["public_key"].replace("\n", "\\n")
heat_params['kube_service_account_private_key'] = \
csr_keys["private_key"].replace("\n", "\\n")
fields = {
'existing': True,
'parameters': heat_params,
'disable_rollback': not rollback
}
osc.heat().stacks.update(cluster.stack_id, **fields)
class HeatPoller(object):

View File

@ -1472,7 +1472,8 @@ resources:
containerd_version: {get_param: containerd_version}
containerd_tarball_url: {get_param: containerd_tarball_url}
containerd_tarball_sha256: {get_param: containerd_tarball_sha256}
kube_service_account_key: {get_param: kube_service_account_key}
kube_service_account_private_key: {get_param: kube_service_account_private_key}
outputs:
api_address:

View File

@ -1006,12 +1006,16 @@ resources:
- name: kube_image_digest_input
- name: ostree_remote_input
- name: ostree_commit_input
- name: kube_service_account_key_input
- name: kube_service_account_private_key_input
config:
list_join:
- "\n"
-
- "#!/bin/bash"
- get_file: ../../common/templates/kubernetes/fragments/upgrade-kubernetes.sh
- get_file: ../../common/templates/kubernetes/fragments/make-cert.sh
- get_file: ../../common/templates/kubernetes/fragments/rotate-kubernetes-ca-certs-master.sh
upgrade_kubernetes_deployment:
type: OS::Heat::SoftwareDeployment
@ -1025,6 +1029,8 @@ resources:
kube_image_digest_input: {get_param: kube_image_digest}
ostree_remote_input: {get_param: ostree_remote}
ostree_commit_input: {get_param: ostree_commit}
kube_service_account_key_input: {get_param: kube_service_account_key}
kube_service_account_private_key_input: {get_param: kube_service_account_private_key}
outputs:

View File

@ -350,6 +350,21 @@ parameters:
type: string
description: sha256 of the target containerd tarball.
kube_service_account_key:
type: string
hidden: true
description: >
The signed cert will be used to verify the k8s service account tokens
during authentication.
NOTE: This is used for worker nodes to trigger certs rotate.
kube_service_account_private_key:
type: string
hidden: true
description: >
The private key will be used to sign generated k8s service account
tokens.
conditions:
image_based: {equals: [{get_param: boot_volume_size}, 0]}
@ -582,12 +597,16 @@ resources:
- name: kube_tag_input
- name: ostree_remote_input
- name: ostree_commit_input
- name: kube_service_account_key_input
- name: kube_service_account_private_key_input
config:
list_join:
- "\n"
-
- "#!/bin/bash"
- get_file: ../../common/templates/kubernetes/fragments/upgrade-kubernetes.sh
- get_file: ../../common/templates/kubernetes/fragments/make-cert-client.sh
- get_file: ../../common/templates/kubernetes/fragments/rotate-kubernetes-ca-certs-worker.sh
upgrade_kubernetes_deployment:
type: OS::Heat::SoftwareDeployment
@ -600,6 +619,8 @@ resources:
kube_tag_input: {get_param: kube_tag}
ostree_remote_input: {get_param: ostree_remote}
ostree_commit_input: {get_param: ostree_commit}
kube_service_account_key_input: {get_param: kube_service_account_key}
kube_service_account_private_key_input: {get_param: kube_service_account_private_key}
outputs:

View File

@ -195,6 +195,8 @@ class TestRotateCaCertificate(api_base.FunctionalTest):
def setUp(self):
super(TestRotateCaCertificate, self).setUp()
self.cluster_template = obj_utils.create_test_cluster_template(
self.context, cluster_distro='fedora-coreos')
self.cluster = obj_utils.create_test_cluster(self.context)
conductor_api_patcher = mock.patch('magnum.conductor.api.API')

View File

@ -140,3 +140,14 @@ class K8sFedoraAtomicV1DriverTest(base.DbTestCase):
self.driver.upgrade_cluster, self.context,
self.cluster_obj, self.cluster_template, 1,
self.nodegroup_obj)
@patch('magnum.common.keystone.KeystoneClientV3')
@patch('magnum.common.clients.OpenStackClients')
def test_ca_rotate_not_supported(self, mock_osc, mock_keystone):
self.cluster_template.cluster_distro = 'fedora-atomic'
self.cluster_template.save()
mock_keystone.is_octavia_enabled.return_value = False
self.assertRaises(exception.NotSupported,
self.driver.rotate_ca_certificate,
self.context,
self.cluster_obj)

View File

@ -0,0 +1,8 @@
---
features:
- |
Kubernetes cluster owner can now do CA cert rotate to re-generate CA of
the cluster, service account keys and the certs of all nodes will
be regenerated as well. Cluster user needs to get a new kubeconfig
to access kubernetes API. This function is only supported by
Fedora CoreOS driver.