Sanitize secrets contained in validation error message

This sanitizes any potential secrets contained in 'message' section of validation output if the document has substitions (implying that a secret may have been substituted into it) or if the document itself was encrypted, implying that the document already contains a secret. Change-Id: I394eb8c4e6002f896ecdaa14d2be1e5f948e5048
2018-02-13 05:43:49 +00:00 · 2018-02-13 05:43:49 +00:00 · bb3c6390d7
parent 86ae1ba9ee
commit bb3c6390d7
4 changed files with 114 additions and 26 deletions
--- a/deckhand/engine/document_validation.py
+++ b/deckhand/engine/document_validation.py
@ -153,14 +153,6 @@ class GenericValidator(BaseValidator):
 class DataSchemaValidator(GenericValidator):
    """Validator for validating ``DataSchema`` documents."""
    def __init__(self, data_schemas):
        super(DataSchemaValidator, self).__init__()
        global _DEFAULT_SCHEMAS
        self._default_schema_map = _DEFAULT_SCHEMAS
        self._external_data_schemas = [d.data for d in data_schemas]
        self._schema_map = self._build_schema_map(data_schemas)
    def _build_schema_map(self, data_schemas):
        schema_map = copy.deepcopy(self._default_schema_map)
@ -180,6 +172,14 @@ class DataSchemaValidator(GenericValidator):
        return schema_map
    def __init__(self, data_schemas):
        super(DataSchemaValidator, self).__init__()
        global _DEFAULT_SCHEMAS
        self._default_schema_map = _DEFAULT_SCHEMAS
        self._external_data_schemas = [d.data for d in data_schemas]
        self._schema_map = self._build_schema_map(data_schemas)
    def matches(self, document):
        if document.is_abstract:
            LOG.info('Skipping schema validation for abstract document [%s]: '
@ -225,7 +225,8 @@ class DataSchemaValidator(GenericValidator):
            # secrets. While this may make debugging a few validation failures
            # more difficult, it is a necessary evil.
            sanitized_document = (
-                SecretsSubstitution.sanitize_potential_secrets(document))
+                SecretsSubstitution.sanitize_potential_secrets(
                    error, document))
            parent_error_section = utils.jsonpath_parse(
                sanitized_document, parent_path_to_error_in_document)
        except Exception:
@ -239,8 +240,6 @@ class DataSchemaValidator(GenericValidator):
            'schema': document.schema,
            'path': path_to_error_in_document,
            'error_section': parent_error_section,
            # TODO(fmontei): Also sanitize any secrets contained in the message
            # as well.
            'message': error.message
        }
--- a/deckhand/engine/document_wrapper.py
+++ b/deckhand/engine/document_wrapper.py
@ -88,5 +88,13 @@ class DocumentDict(dict):
        return utils.jsonpath_parse(
            self, 'metadata.layeringDefinition.actions') or []
    @property
    def storage_policy(self):
        return utils.jsonpath_parse(self, 'metadata.storagePolicy') or ''
    @property
    def is_encrypted(self):
        return self.storage_policy == 'encrypted'
    def __hash__(self):
        return hash(json.dumps(self, sort_keys=True))
--- a/deckhand/engine/secrets_manager.py
+++ b/deckhand/engine/secrets_manager.py
@ -13,6 +13,7 @@
 # limitations under the License.
 import copy
 import re
 from oslo_log import log as logging
 import six
@ -114,17 +115,32 @@ class SecretsSubstitution(object):
    """Class for document substitution logic for YAML files."""
    @staticmethod
-    def sanitize_potential_secrets(document):
+    def sanitize_potential_secrets(error, document):
        """Sanitize all secret data that may have been substituted into the
-        document. Uses references in ``document.substitutions`` to determine
+        document or contained in the document itself (if the document has
-        which values to sanitize. Only meaningful to call this on post-rendered
+        ``metadata.storagePolicy`` == 'encrypted'). Uses references in
-        documents.
+        ``document.substitutions`` to determine which values to sanitize. Only
        meaningful to call this on post-rendered documents.
-        :param DocumentDict document: Document to sanitize.
+        :param error: Error message produced by ``jsonschema``.
        :param document: Document to sanitize.
        :type document: DocumentDict
        """
        if not document.substitutions and not document.is_encrypted:
            return document
        insecure_reg_exps = [
            re.compile(r'^.* is not of type .+$')
        ]
        to_sanitize = copy.deepcopy(document)
        safe_message = 'Sanitized to avoid exposing secret.'
        # Sanitize any secrets contained in `error.message` referentially.
        if error.message and any(
                r.match(error.message) for r in insecure_reg_exps):
            error.message = safe_message
        # Sanitize any secrets extracted from the document itself.
        for sub in document.substitutions:
            replaced_data = utils.jsonpath_replace(
                to_sanitize['data'], safe_message, sub['dest']['path'])
--- a/deckhand/tests/unit/engine/test_document_validation.py
+++ b/deckhand/tests/unit/engine/test_document_validation.py
@ -64,21 +64,27 @@ class TestDocumentValidation(engine_test_base.TestDocumentValidationBase):
                      mock_log.info.mock_calls[0][1][0])
    @mock.patch.object(document_validation, 'jsonschema', autospec=True)
-    def test_validation_failure_does_not_expose_secrets(self, mock_jsonschema):
+    def test_validation_failure_sanitizes_error_section_secrets(
            self, mock_jsonschema):
        m_args = mock.Mock()
        mock_jsonschema.Draft4Validator(m_args).iter_errors.side_effect = [
            # Return empty list of errors for base schema validator and pretend
            # that 1 error is returned for next validator.
-            [], [mock.Mock(path=[], schema_path=[])]
+            [],
            [mock.Mock(path=[], schema_path=[], message='scary-secret-here')]
        ]
-        test_document = self._read_data('sample_document')
+
-        for sub in test_document['metadata']['substitutions']:
+        document_factory = factories.DocumentFactory(1, [1])
-            substituted_data = utils.jsonpath_replace(
+        test_document = document_factory.gen_test(
-                test_document['data'], 'scary-secret', sub['dest']['path'])
+            {
-            test_document['data'].update(substituted_data)
+                '_GLOBAL_DATA_1_': {'data': {'secret-a': 5}},
-        self.assertEqual(
+                '_GLOBAL_SUBSTITUTIONS_1_': [
-            'scary-secret', utils.jsonpath_parse(test_document['data'],
+                    {'src': {
-                                                 sub['dest']['path']))
+                        'path': '.', 'schema': 'foo/bar/v1', 'name': 'foo'},
                     'dest': {'path': '.secret-a'}}
                ]
            },
            global_abstract=False)[-1]
        data_schema_factory = factories.DataSchemaFactory()
        data_schema = data_schema_factory.gen_test(test_document['schema'], {})
@ -91,3 +97,62 @@ class TestDocumentValidation(engine_test_base.TestDocumentValidationBase):
        self.assertIn('Sanitized to avoid exposing secret.',
                      str(validations[0]['errors'][-1]))
        self.assertNotIn('scary-secret.', str(validations[0]['errors'][-1]))
    def test_validation_failure_sanitizes_message_secrets(self):
        data_schema_factory = factories.DataSchemaFactory()
        metadata_name = 'example/Doc/v1'
        schema_to_use = {
            '$schema': 'http://json-schema.org/schema#',
            'type': 'object',
            'properties': {
                'secret-a': {'type': 'string'}
            },
            'required': ['secret-a'],
            'additionalProperties': False
        }
        data_schema = data_schema_factory.gen_test(
            metadata_name, data=schema_to_use)
        # Case 1: Check that sensitive data is sanitized if the document has
        # substitutions and `metadata.storagePolicy` == 'cleartext'.
        document_factory = factories.DocumentFactory(1, [1])
        test_document = document_factory.gen_test({
            "_GLOBAL_DATA_1_": {'data': {'secret-a': 5}},
            "_GLOBAL_SCHEMA_1_": metadata_name,
            "_GLOBAL_SUBSTITUTIONS_1_": [{
                "dest": {
                    "path": ".secret-a"
                },
                "src": {
                    "schema": "deckhand/CertificateKey/v1",
                    "name": "site-cert",
                    "path": "."
                }
            }],
        }, global_abstract=False)[-1]
        test_document['metadata']['storagePolicy'] = 'cleartext'
        validations = document_validation.DocumentValidation(
            test_document, existing_data_schemas=[data_schema],
            pre_validate=False).validate_all()
        self.assertEqual(1, len(validations[0]['errors']))
        self.assertEqual('Sanitized to avoid exposing secret.',
                         validations[0]['errors'][0]['message'])
        # Case 2: Check that sensitive data is sanitized if the document has
        # no substitutions and `metadata.storagePolicy` == 'encrypted'.
        test_document = document_factory.gen_test({
            "_GLOBAL_DATA_1_": {'data': {'secret-a': 5}},
            "_GLOBAL_SCHEMA_1_": metadata_name,
            "_GLOBAL_SUBSTITUTIONS_1_": [],
        }, global_abstract=False)[-1]
        test_document['metadata']['storagePolicy'] = 'encrypted'
        validations = document_validation.DocumentValidation(
            test_document, existing_data_schemas=[data_schema],
            pre_validate=False).validate_all()
        self.assertEqual(1, len(validations[0]['errors']))
        self.assertEqual('Sanitized to avoid exposing secret.',
                         validations[0]['errors'][0]['message'])