From e924577d64639e70d0a9fd5ebd986c9378d9db67 Mon Sep 17 00:00:00 2001 From: Lingxian Kong Date: Tue, 24 Jul 2018 17:40:09 +1200 Subject: [PATCH] Support to specify workload type for runtime Support to specify ``trusted`` for runtime creation. In Kubernetes orchestrator implementation, it's using ``io.kubernetes.cri-o.TrustedSandbox`` annotation in the pod specification to choose the underlying container runtime. This feature is useful to leverage the security container technology such as Kata containers or gVisor. It also gets rid of the security concerns for running image type function. Story: 2003088 Task: 23172 Change-Id: Ic4fa3e97dcc239c7177448e3cef5d0f02340d302 --- qinling/api/controllers/v1/resources.py | 27 +------- qinling/api/controllers/v1/runtime.py | 19 +++--- .../versions/005_add_trusted_for_runtime.py | 36 ++++++++++ qinling/db/sqlalchemy/models.py | 1 + qinling/engine/default_engine.py | 7 +- qinling/orchestrator/base.py | 4 +- qinling/orchestrator/kubernetes/manager.py | 66 ++++++++++--------- .../kubernetes/templates/deployment.j2 | 2 + .../orchestrator/kubernetes/templates/pod.j2 | 2 + .../unit/api/controllers/v1/test_runtime.py | 3 + qinling/tests/unit/base.py | 3 +- .../tests/unit/engine/test_default_engine.py | 5 +- .../orchestrator/kubernetes/test_manager.py | 41 +++--------- ...orkload-type-support-d613cdb7bb90b2a2.yaml | 9 +++ 14 files changed, 119 insertions(+), 106 deletions(-) create mode 100644 qinling/db/sqlalchemy/migration/alembic_migrations/versions/005_add_trusted_for_runtime.py create mode 100644 releasenotes/notes/workload-type-support-d613cdb7bb90b2a2.yaml diff --git a/qinling/api/controllers/v1/resources.py b/qinling/api/controllers/v1/resources.py index 6c731e18..ed945f56 100644 --- a/qinling/api/controllers/v1/resources.py +++ b/qinling/api/controllers/v1/resources.py @@ -241,25 +241,12 @@ class Runtime(Resource): image = wtypes.text description = wtypes.text is_public = wsme.wsattr(bool, default=True) + trusted = bool status = wsme.wsattr(wtypes.text, readonly=True) project_id = wsme.wsattr(wtypes.text, readonly=True) created_at = wsme.wsattr(wtypes.text, readonly=True) updated_at = wsme.wsattr(wtypes.text, readonly=True) - @classmethod - def sample(cls): - return cls( - id='123e4567-e89b-12d3-a456-426655440000', - name='python2.7', - image='lingxiankong/python', - status='available', - is_public=True, - project_id='default', - description='Python 2.7 environment.', - created_at='1970-01-01T00:00:00.000000', - updated_at='1970-01-01T00:00:00.000000' - ) - class Runtimes(ResourceList): runtimes = [Runtime] @@ -269,18 +256,6 @@ class Runtimes(ResourceList): super(Runtimes, self).__init__(**kwargs) - @classmethod - def sample(cls): - sample = cls() - sample.runtimes = [Runtime.sample()] - sample.next = ( - "http://localhost:7070/v1/environments?" - "sort_keys=id,name&sort_dirs=asc,desc&limit=10&" - "marker=123e4567-e89b-12d3-a456-426655440000" - ) - - return sample - class RuntimePoolCapacity(Resource): total = wsme.wsattr(int, readonly=True) diff --git a/qinling/api/controllers/v1/runtime.py b/qinling/api/controllers/v1/runtime.py index abf92f8c..d305bf92 100644 --- a/qinling/api/controllers/v1/runtime.py +++ b/qinling/api/controllers/v1/runtime.py @@ -71,6 +71,8 @@ class RuntimesController(rest.RestController): acl.enforce('runtime:create', context.get_ctx()) params = runtime.to_dict() + if 'trusted' not in params: + params['trusted'] = True if not POST_REQUIRED.issubset(set(params.keys())): raise exc.InputException( @@ -117,8 +119,9 @@ class RuntimesController(rest.RestController): def put(self, id, runtime): """Update runtime. - Currently, we only support update name, description, image. When - updating image, send message to engine for asynchronous handling. + Currently, we support update name, description, image. When + updating image, send message to engine for asynchronous + handling. """ acl.enforce('runtime:update', context.get_ctx()) @@ -130,8 +133,10 @@ class RuntimesController(rest.RestController): LOG.info('Update resource, params: %s', values, resource={'type': self.type, 'id': id}) + image = values.get('image') + with db_api.transaction(): - if 'image' in values: + if image is not None: pre_runtime = db_api.get_runtime(id) if pre_runtime.status != status.AVAILABLE: raise exc.RuntimeNotAvailableException( @@ -139,7 +144,7 @@ class RuntimesController(rest.RestController): ) pre_image = pre_runtime.image - if pre_image != values['image']: + if pre_image != image: # Ensure there is no function running in the runtime. db_funcs = db_api.get_functions( insecure=True, fields=['id'], runtime_id=id @@ -155,11 +160,9 @@ class RuntimesController(rest.RestController): values['status'] = status.UPGRADING self.engine_client.update_runtime( id, - image=values['image'], - pre_image=pre_image + image=image, + pre_image=pre_image, ) - else: - values.pop('image') runtime_db = db_api.update_runtime(id, values) diff --git a/qinling/db/sqlalchemy/migration/alembic_migrations/versions/005_add_trusted_for_runtime.py b/qinling/db/sqlalchemy/migration/alembic_migrations/versions/005_add_trusted_for_runtime.py new file mode 100644 index 00000000..95b3ab63 --- /dev/null +++ b/qinling/db/sqlalchemy/migration/alembic_migrations/versions/005_add_trusted_for_runtime.py @@ -0,0 +1,36 @@ +# Copyright 2018 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""add trusted field for runtimes table + +Revision ID: 005 +Revises: 004 +Create Date: 2018-07-24 12:00:00.888969 + +""" + +# revision identifiers, used by Alembic. +revision = '005' +down_revision = '004' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.add_column( + 'runtimes', + sa.Column('trusted', sa.BOOLEAN, nullable=False, default=True, + server_default="1") + ) diff --git a/qinling/db/sqlalchemy/models.py b/qinling/db/sqlalchemy/models.py index d7764532..17700a33 100644 --- a/qinling/db/sqlalchemy/models.py +++ b/qinling/db/sqlalchemy/models.py @@ -28,6 +28,7 @@ class Runtime(model_base.QinlingSecureModelBase): image = sa.Column(sa.String(255), nullable=False) status = sa.Column(sa.String(32), nullable=False) is_public = sa.Column(sa.BOOLEAN, default=True) + trusted = sa.Column(sa.BOOLEAN, default=True) class Function(model_base.QinlingSecureModelBase): diff --git a/qinling/engine/default_engine.py b/qinling/engine/default_engine.py index f9da3365..303c1318 100644 --- a/qinling/engine/default_engine.py +++ b/qinling/engine/default_engine.py @@ -43,7 +43,8 @@ class DefaultEngine(object): try: self.orchestrator.create_pool( runtime_id, - runtime.image + runtime.image, + trusted=runtime.trusted ) runtime.status = status.AVAILABLE LOG.info('Runtime %s created.', runtime_id) @@ -69,9 +70,7 @@ class DefaultEngine(object): runtime_id, image, pre_image ) - ret = self.orchestrator.update_pool( - runtime_id, image=image - ) + ret = self.orchestrator.update_pool(runtime_id, image=image) if ret: values = {'status': status.AVAILABLE} diff --git a/qinling/orchestrator/base.py b/qinling/orchestrator/base.py index ff06402a..6b28e124 100644 --- a/qinling/orchestrator/base.py +++ b/qinling/orchestrator/base.py @@ -27,7 +27,7 @@ class OrchestratorBase(object): """OrchestratorBase interface.""" @abc.abstractmethod - def create_pool(self, name, image, **kwargs): + def create_pool(self, name, image, trusted=True, **kwargs): raise NotImplementedError @abc.abstractmethod @@ -35,7 +35,7 @@ class OrchestratorBase(object): raise NotImplementedError @abc.abstractmethod - def update_pool(self, name, **kwargs): + def update_pool(self, name, image=None, **kwargs): raise NotImplementedError @abc.abstractmethod diff --git a/qinling/orchestrator/kubernetes/manager.py b/qinling/orchestrator/kubernetes/manager.py index 9cc71f34..e8ccff1e 100644 --- a/qinling/orchestrator/kubernetes/manager.py +++ b/qinling/orchestrator/kubernetes/manager.py @@ -125,10 +125,8 @@ class KubernetesManager(base.OrchestratorBase): self.conf.kubernetes.namespace ) - if ( - not ret.status.replicas or - ret.status.replicas != ret.status.available_replicas - ): + if (not ret.status.replicas or + ret.status.replicas != ret.status.available_replicas): raise exc.OrchestratorException('Deployment %s not ready.' % name) def get_pool(self, name): @@ -158,7 +156,7 @@ class KubernetesManager(base.OrchestratorBase): return {"total": total, "available": available} - def create_pool(self, name, image): + def create_pool(self, name, image, trusted=True): deployment_body = self.deployment_template.render( { "name": name, @@ -166,7 +164,8 @@ class KubernetesManager(base.OrchestratorBase): "replicas": self.conf.kubernetes.replicas, "container_name": 'worker', "image": image, - "sidecar_image": self.conf.engine.sidecar_image + "sidecar_image": self.conf.engine.sidecar_image, + "trusted": str(trusted).lower() } ) @@ -222,6 +221,21 @@ class KubernetesManager(base.OrchestratorBase): LOG.info("Pods in deployment %s deleted.", name) LOG.info("Deployment %s deleted.", name) + @tenacity.retry( + wait=tenacity.wait_fixed(5), + stop=tenacity.stop_after_delay(600), + reraise=True, + retry=tenacity.retry_if_exception_type(exc.OrchestratorException) + ) + def _wait_for_upgrade(self, deploy_name): + ret = self.v1extension.read_namespaced_deployment( + deploy_name, + self.conf.kubernetes.namespace + ) + if ret.status.unavailable_replicas is not None: + raise exc.OrchestratorException("Deployment %s upgrade not " + "ready." % deploy_name) + def update_pool(self, name, image=None): """Deployment rolling-update. @@ -235,7 +249,6 @@ class KubernetesManager(base.OrchestratorBase): 'spec': { 'containers': [ { - # TODO(kong): Make the name configurable. 'name': 'worker', 'image': image } @@ -248,30 +261,23 @@ class KubernetesManager(base.OrchestratorBase): name, self.conf.kubernetes.namespace, body ) - unavailable_replicas = 1 - # TODO(kong): Make this configurable - retry = 5 - while unavailable_replicas != 0 and retry > 0: - time.sleep(5) - retry = retry - 1 + try: + time.sleep(10) + self._wait_for_upgrade(name) + except exc.OrchestratorException: + LOG.warn("Timeout when waiting for the deployment %s upgrade, " + "Start to roll back.", name) - deploy = self.v1extension.read_namespaced_deployment_status( - name, - self.conf.kubernetes.namespace - ) - unavailable_replicas = deploy.status.unavailable_replicas - - # Handle failure of rolling-update. - if unavailable_replicas > 0: - body = { - "name": name, - "rollbackTo": { - "revision": 0 - } - } - self.v1extension.create_namespaced_deployment_rollback( - name, self.conf.kubernetes.namespace, body - ) + body = {"rollbackTo": {"revision": 0}} + try: + self.v1extension.create_namespaced_deployment_rollback( + name, self.conf.kubernetes.namespace, body + ) + except Exception: + # TODO(lxkong): remove the exception catch until kubernetes + # python lib has a new release. Refer to + # https://github.com/kubernetes-client/python/issues/491 + pass return False diff --git a/qinling/orchestrator/kubernetes/templates/deployment.j2 b/qinling/orchestrator/kubernetes/templates/deployment.j2 index bcb2a7b8..dcf882b0 100644 --- a/qinling/orchestrator/kubernetes/templates/deployment.j2 +++ b/qinling/orchestrator/kubernetes/templates/deployment.j2 @@ -19,6 +19,8 @@ spec: {% for key, value in labels.items() %} {{ key }}: {{ value }} {% endfor %} + annotations: + io.kubernetes.cri-o.TrustedSandbox: "{{ trusted }}" spec: terminationGracePeriodSeconds: 5 automountServiceAccountToken: false diff --git a/qinling/orchestrator/kubernetes/templates/pod.j2 b/qinling/orchestrator/kubernetes/templates/pod.j2 index 32a947f3..f55326b0 100644 --- a/qinling/orchestrator/kubernetes/templates/pod.j2 +++ b/qinling/orchestrator/kubernetes/templates/pod.j2 @@ -6,6 +6,8 @@ metadata: {% for key, value in labels.items() %} {{ key }}: {{ value }} {% endfor %} + annotations: + io.kubernetes.cri-o.TrustedSandbox: "false" spec: terminationGracePeriodSeconds: 5 automountServiceAccountToken: false diff --git a/qinling/tests/unit/api/controllers/v1/test_runtime.py b/qinling/tests/unit/api/controllers/v1/test_runtime.py index 2eb19c60..04fa74c0 100644 --- a/qinling/tests/unit/api/controllers/v1/test_runtime.py +++ b/qinling/tests/unit/api/controllers/v1/test_runtime.py @@ -71,7 +71,10 @@ class TestRuntimeController(base.APITest): resp = self.app.post_json('/v1/runtimes', body) self.assertEqual(201, resp.status_int) + + body.update({"trusted": True}) self._assertDictContainsSubset(resp.json, body) + mock_create_time.assert_called_once_with(resp.json['id']) @mock.patch('qinling.rpc.EngineClient.create_runtime') diff --git a/qinling/tests/unit/base.py b/qinling/tests/unit/base.py index bc4d704f..4e92c1cd 100644 --- a/qinling/tests/unit/base.py +++ b/qinling/tests/unit/base.py @@ -175,7 +175,8 @@ class DbTestCase(BaseTest): # 'auth_enable' is disabled by default, we create runtime for # default tenant. 'project_id': DEFAULT_PROJECT_ID, - 'status': status.AVAILABLE + 'status': status.AVAILABLE, + 'trusted': True } ) diff --git a/qinling/tests/unit/engine/test_default_engine.py b/qinling/tests/unit/engine/test_default_engine.py index 3e6feb27..e3e484a8 100644 --- a/qinling/tests/unit/engine/test_default_engine.py +++ b/qinling/tests/unit/engine/test_default_engine.py @@ -50,7 +50,8 @@ class TestDefaultEngine(base.DbTestCase): self.default_engine.create_runtime(mock.Mock(), runtime_id) self.orchestrator.create_pool.assert_called_once_with( - runtime_id, runtime.image) + runtime_id, runtime.image, trusted=True) + runtime = db_api.get_runtime(runtime_id) self.assertEqual(status.AVAILABLE, runtime.status) @@ -64,7 +65,7 @@ class TestDefaultEngine(base.DbTestCase): self.default_engine.create_runtime(mock.Mock(), runtime_id) self.orchestrator.create_pool.assert_called_once_with( - runtime_id, runtime.image) + runtime_id, runtime.image, trusted=True) runtime = db_api.get_runtime(runtime_id) self.assertEqual(status.ERROR, runtime.status) diff --git a/qinling/tests/unit/orchestrator/kubernetes/test_manager.py b/qinling/tests/unit/orchestrator/kubernetes/test_manager.py index 4fd881af..0b2186dc 100644 --- a/qinling/tests/unit/orchestrator/kubernetes/test_manager.py +++ b/qinling/tests/unit/orchestrator/kubernetes/test_manager.py @@ -196,7 +196,8 @@ class TestKubernetesManager(base.DbTestCase): 'replicas': fake_replicas, 'container_name': 'worker', 'image': fake_image, - 'sidecar_image': CONF.engine.sidecar_image + 'sidecar_image': CONF.engine.sidecar_image, + 'trusted': 'true' } ) self.k8s_v1_ext.create_namespaced_deployment.assert_called_once_with( @@ -297,8 +298,8 @@ class TestKubernetesManager(base.DbTestCase): } } ret = mock.Mock() - ret.status.unavailable_replicas = 0 - self.k8s_v1_ext.read_namespaced_deployment_status.return_value = ret + ret.status.unavailable_replicas = None + self.k8s_v1_ext.read_namespaced_deployment.return_value = ret update_result = self.manager.update_pool(fake_deployment_name, image=image) @@ -306,7 +307,7 @@ class TestKubernetesManager(base.DbTestCase): self.assertTrue(update_result) self.k8s_v1_ext.patch_namespaced_deployment.assert_called_once_with( fake_deployment_name, self.fake_namespace, body) - read_status = self.k8s_v1_ext.read_namespaced_deployment_status + read_status = self.k8s_v1_ext.read_namespaced_deployment read_status.assert_called_once_with(fake_deployment_name, self.fake_namespace) @@ -316,9 +317,8 @@ class TestKubernetesManager(base.DbTestCase): ret1 = mock.Mock() ret1.status.unavailable_replicas = 1 ret2 = mock.Mock() - ret2.status.unavailable_replicas = 0 - self.k8s_v1_ext.read_namespaced_deployment_status.side_effect = [ - ret1, ret2] + ret2.status.unavailable_replicas = None + self.k8s_v1_ext.read_namespaced_deployment.side_effect = [ret1, ret2] update_result = self.manager.update_pool(fake_deployment_name, image=image) @@ -326,34 +326,9 @@ class TestKubernetesManager(base.DbTestCase): self.assertTrue(update_result) self.k8s_v1_ext.patch_namespaced_deployment.assert_called_once_with( fake_deployment_name, self.fake_namespace, mock.ANY) - read_status = self.k8s_v1_ext.read_namespaced_deployment_status + read_status = self.k8s_v1_ext.read_namespaced_deployment self.assertEqual(2, read_status.call_count) - def test_update_pool_rollback(self): - fake_deployment_name = self.rand_name('deployment', prefix=self.prefix) - image = self.rand_name('image', prefix=self.prefix) - ret = mock.Mock() - ret.status.unavailable_replicas = 1 - self.k8s_v1_ext.read_namespaced_deployment_status.return_value = ret - rollback_body = { - "name": fake_deployment_name, - "rollbackTo": { - "revision": 0 - } - } - - update_result = self.manager.update_pool(fake_deployment_name, - image=image) - - self.assertFalse(update_result) - self.k8s_v1_ext.patch_namespaced_deployment.assert_called_once_with( - fake_deployment_name, self.fake_namespace, mock.ANY) - read_status = self.k8s_v1_ext.read_namespaced_deployment_status - self.assertEqual(5, read_status.call_count) - rollback = self.k8s_v1_ext.create_namespaced_deployment_rollback - rollback.assert_called_once_with( - fake_deployment_name, self.fake_namespace, rollback_body) - def test_get_pool(self): fake_deployment_name = self.rand_name('deployment', prefix=self.prefix) diff --git a/releasenotes/notes/workload-type-support-d613cdb7bb90b2a2.yaml b/releasenotes/notes/workload-type-support-d613cdb7bb90b2a2.yaml new file mode 100644 index 00000000..3ebb8815 --- /dev/null +++ b/releasenotes/notes/workload-type-support-d613cdb7bb90b2a2.yaml @@ -0,0 +1,9 @@ +--- +features: + - Support to specify ``trusted`` for runtime creation. In Kubernetes + orchestrator implementation, it's using + ``io.kubernetes.cri-o.TrustedSandbox`` annotation in the pod specification + to choose the underlying container runtime. This feature is useful to + leverage the security container technology such as Kata containers or + gVisor. It also gets rid of the security concerns for running image type + function.