Merge "fix: Add validation logic to check for duplicate documents in engine"

This commit is contained in:
Zuul 2018-10-19 21:35:47 +00:00 committed by Gerrit Code Review
commit a991513eff
2 changed files with 68 additions and 4 deletions

View File

@ -360,6 +360,31 @@ class DataSchemaValidator(GenericValidator):
schema, document, error, root_path)
class DuplicateDocumentValidator(BaseValidator):
"""Validator used for guarding against duplicate documents."""
def __init__(self):
super(DuplicateDocumentValidator, self).__init__()
self._document_history = set()
self._diagnostic = ('Ensure that each raw document has a unique '
'combination of (name, schema, '
'metadata.layeringDefinition.layer).')
def validate(self, document, **kwargs):
"""Validates that duplicate document doesn't exist."""
if document.meta in self._document_history:
validation_message = vm.ValidationMessage(
message="Duplicate document exists",
doc_schema=document.schema,
doc_name=document.name,
doc_layer=document.layer,
diagnostic=self._diagnostic)
return [validation_message.format_message()]
else:
self._document_history.add(document.meta)
return []
class DocumentValidation(object):
def __init__(self, documents, existing_data_schemas=None,
@ -425,12 +450,16 @@ class DocumentValidation(object):
self._documents.append(document)
self._validators = [
DataSchemaValidator(self._external_data_schemas)
]
self._pre_validate = pre_validate
self._validators = [
DataSchemaValidator(self._external_data_schemas),
]
if self._pre_validate:
# Only perform this additional validation "offline". The controller
# need not call this as the db module will handle this validation.
self._validators.append(DuplicateDocumentValidator())
def _get_supported_schema_list(self):
schema_list = []
validator = self._validators[-1]

View File

@ -98,6 +98,41 @@ class TestDocumentValidation(engine_test_base.TestDocumentValidationBase):
str(validations[0]['errors'][-1]))
self.assertNotIn('scary-secret.', str(validations[0]['errors'][-1]))
def test_validation_document_duplication(self):
"""Validate that duplicate document fails when duplicate passed in."""
test_document = self._read_data('sample_document')
# Should only fail when pre_validate is True as the `db` module already
# handles this on behalf of the controller.
validations = document_validation.DocumentValidation(
[test_document] * 2, # Provide 2 of the same document.
pre_validate=True).validate_all()
expected_error = {
'diagnostic': mock.ANY,
'documents': [{
'layer': test_document['metadata']['layeringDefinition'][
'layer'],
'name': test_document['metadata']['name'],
'schema': test_document['schema']
}],
'error': True,
'kind': 'ValidationMessage',
'level': 'Error',
'message': 'Duplicate document exists',
'name': 'Deckhand validation error'
}
self.assertEqual(1, len(validations[1]['errors']))
self.assertEqual(expected_error,
validations[1]['errors'][0])
# With pre_validate=False the validation should skip.
validations = document_validation.DocumentValidation(
[test_document] * 2, # Provide 2 of the same document.
pre_validate=False).validate_all()
self.assertEmpty(validations[1]['errors'])
def test_validation_failure_sanitizes_message_secrets(self):
data_schema_factory = factories.DataSchemaFactory()
metadata_name = 'example/Doc/v1'