A configuration management service with support for secrets.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

560 lines
22KB

  1. # Copyright 2017 AT&T Intellectual Property. All other rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import abc
  15. import copy
  16. import os
  17. import pkg_resources
  18. import re
  19. import yaml
  20. import jsonschema
  21. from oslo_log import log as logging
  22. import six
  23. from deckhand.common import document as document_wrapper
  24. from deckhand.common import utils
  25. from deckhand.common import validation_message as vm
  26. from deckhand.engine.secrets_manager import SecretsSubstitution
  27. from deckhand import errors
  28. from deckhand import types
  29. LOG = logging.getLogger(__name__)
  30. _DEFAULT_SCHEMAS = {}
  31. _SUPPORTED_SCHEMA_VERSIONS = ('v1', 'v2')
  32. def _get_schema_parts(document, schema_key='schema'):
  33. schema_parts = utils.jsonpath_parse(document, schema_key).split('/')
  34. schema_prefix = '/'.join(schema_parts[:2])
  35. schema_version = schema_parts[2]
  36. return schema_prefix, schema_version
  37. def _get_schema_dir():
  38. return pkg_resources.resource_filename('deckhand.engine', 'schemas')
  39. def _build_schema_map():
  40. """Populates ``_DEFAULT_SCHEMAS`` with built-in Deckhand schemas."""
  41. global _DEFAULT_SCHEMAS
  42. _DEFAULT_SCHEMAS = {k: {} for k in _SUPPORTED_SCHEMA_VERSIONS}
  43. schema_dir = _get_schema_dir()
  44. for schema_file in os.listdir(schema_dir):
  45. if not schema_file.endswith('.yaml'):
  46. continue
  47. with open(os.path.join(schema_dir, schema_file)) as f:
  48. for schema in yaml.safe_load_all(f):
  49. schema_name = schema['metadata']['name']
  50. version = schema_name.split('/')[-1]
  51. _DEFAULT_SCHEMAS.setdefault(version, {})
  52. if schema_file in _DEFAULT_SCHEMAS[version]:
  53. raise RuntimeError("Duplicate DataSchema document [%s] %s "
  54. "detected." % (schema['schema'],
  55. schema_name))
  56. _DEFAULT_SCHEMAS[version].setdefault(
  57. '/'.join(schema_name.split('/')[:2]), schema['data'])
  58. _build_schema_map()
  59. @six.add_metaclass(abc.ABCMeta)
  60. class BaseValidator(object):
  61. """Abstract base validator.
  62. Sub-classes should override this to implement schema-specific document
  63. validation.
  64. """
  65. __slots__ = ('_schema_map')
  66. _supported_versions = _SUPPORTED_SCHEMA_VERSIONS
  67. _schema_re = re.compile(r'^[a-zA-Z]+\/[a-zA-Z]+\/v\d+$')
  68. def __init__(self):
  69. global _DEFAULT_SCHEMAS
  70. self._schema_map = _DEFAULT_SCHEMAS
  71. @abc.abstractmethod
  72. def validate(self, document):
  73. """Validate whether ``document`` passes schema validation."""
  74. class GenericValidator(BaseValidator):
  75. """Validator used for validating all documents, regardless whether concrete
  76. or abstract, or what version its schema is.
  77. """
  78. __slots__ = ('base_schema')
  79. _diagnostic = (
  80. 'Ensure that each document has a metadata, schema and data section. '
  81. 'Each document must pass the schema defined under: '
  82. 'https://airship-deckhand.readthedocs.io/en/latest/'
  83. 'validation.html#base-schema')
  84. def __init__(self):
  85. super(GenericValidator, self).__init__()
  86. self.base_schema = self._schema_map['v1']['deckhand/Base']
  87. def validate_metadata(self, metadata):
  88. """Validate ``metadata`` against the given schema.
  89. The ``metadata`` section of a Deckhand document describes a schema
  90. defining just the ``metadata`` section. Use that declaration to
  91. choose a schema for validating ``metadata``.
  92. :param dict metadata: Document metadata section to validate
  93. :returns: list of validation errors or empty list for success
  94. """
  95. errors = list()
  96. schema_name, schema_ver = _get_schema_parts(metadata)
  97. schema = self._schema_map.get(schema_ver, {}).get(schema_name, {})
  98. if not schema:
  99. return ['Invalid metadata schema %s version %s specified.'
  100. % (schema_name, schema_ver)]
  101. LOG.debug("Validating document metadata with schema %s/%s.",
  102. schema_name, schema_ver)
  103. jsonschema.Draft4Validator.check_schema(schema)
  104. schema_validator = jsonschema.Draft4Validator(schema)
  105. errors.extend([e.message
  106. for e in schema_validator.iter_errors(metadata)])
  107. return errors
  108. def validate(self, document, **kwargs):
  109. """Validate ``document`` against basic schema validation.
  110. Sanity-checks each document for mandatory keys like "metadata" and
  111. "schema".
  112. Applies even to abstract documents, as they must be consumed by
  113. concrete documents, so basic formatting is mandatory.
  114. Failure to pass this check results in an error.
  115. :param dict document: Document to validate.
  116. :raises RuntimeError: If the Deckhand schema itself is invalid.
  117. :raises errors.InvalidDocumentFormat: If the document failed schema
  118. validation.
  119. :returns: None
  120. """
  121. try:
  122. jsonschema.Draft4Validator.check_schema(self.base_schema)
  123. schema_validator = jsonschema.Draft4Validator(self.base_schema)
  124. error_messages = [
  125. e.message for e in schema_validator.iter_errors(document)]
  126. if not error_messages:
  127. error_messages.extend(
  128. self.validate_metadata(document.metadata))
  129. except Exception as e:
  130. raise RuntimeError(
  131. 'Unknown error occurred while attempting to use Deckhand '
  132. 'schema. Details: %s' % six.text_type(e))
  133. else:
  134. if error_messages:
  135. LOG.error(
  136. 'Failed sanity-check validation for document [%s, %s] %s. '
  137. 'Details: %s', document.schema, document.layer,
  138. document.name, error_messages)
  139. raise errors.InvalidDocumentFormat(
  140. error_list=[
  141. vm.ValidationMessage(
  142. message=message,
  143. name=vm.DOCUMENT_SANITY_CHECK_FAILURE,
  144. doc_schema=document.schema,
  145. doc_name=document.name,
  146. doc_layer=document.layer,
  147. diagnostic=self._diagnostic)
  148. for message in error_messages
  149. ],
  150. reason='Validation'
  151. )
  152. class DataSchemaValidator(GenericValidator):
  153. """Validator for validating ``DataSchema`` documents."""
  154. __slots__ = ('_default_schema_map', '_current_data_schemas')
  155. def _build_schema_map(self, data_schemas):
  156. schema_map = copy.deepcopy(self._default_schema_map)
  157. for data_schema in data_schemas:
  158. # Ensure that each `DataSchema` document has required properties
  159. # before they themselves can be used to validate other documents.
  160. if not data_schema.name:
  161. continue
  162. if self._schema_re.match(data_schema.name) is None:
  163. continue
  164. if not data_schema.data:
  165. continue
  166. schema_prefix, schema_version = _get_schema_parts(
  167. data_schema, 'metadata.name')
  168. schema_map[schema_version].setdefault(schema_prefix,
  169. data_schema.data)
  170. return schema_map
  171. def __init__(self, data_schemas):
  172. super(DataSchemaValidator, self).__init__()
  173. global _DEFAULT_SCHEMAS
  174. self._default_schema_map = _DEFAULT_SCHEMAS
  175. self._current_data_schemas = [d.data for d in data_schemas]
  176. self._schema_map = self._build_schema_map(data_schemas)
  177. def _generate_validation_error_output(self, schema, document, error,
  178. root_path):
  179. """Returns a formatted output with necessary details for debugging why
  180. a validation failed.
  181. The response is a dictionary with the following keys:
  182. * validation_schema: The schema body that was used to validate the
  183. document.
  184. * schema_path: The JSON path in the schema where the failure
  185. originated.
  186. * name: The document name.
  187. * schema: The document schema.
  188. * path: The JSON path in the document where the failure originated.
  189. * error_section: The "section" in the document above which the error
  190. originated (i.e. the dict in which ``path`` is found).
  191. * message: The error message returned by the ``jsonschema`` validator.
  192. :returns: Dictionary in the above format.
  193. """
  194. error_path = '.'.join([str(x) for x in error.path])
  195. if error_path:
  196. path_to_error_in_document = '.'.join([root_path, error_path])
  197. else:
  198. path_to_error_in_document = root_path
  199. path_to_error_in_schema = '.' + '.'.join(
  200. [str(x) for x in error.schema_path])
  201. parent_path_to_error_in_document = '.'.join(
  202. path_to_error_in_document.split('.')[:-1]) or '.'
  203. try:
  204. # NOTE(felipemonteiro): Because validation is performed on fully
  205. # rendered documents, it is necessary to omit the parts of the data
  206. # section where substitution may have occurred to avoid exposing
  207. # any secrets. While this may make debugging a few validation
  208. # failures more difficult, it is a necessary evil.
  209. sanitized_document = (
  210. SecretsSubstitution.sanitize_potential_secrets(
  211. error, document))
  212. # This incurs some degree of overhead as caching here won't make
  213. # a big difference as we are not parsing commonly referenced
  214. # JSON paths -- but this branch is only hit during error handling
  215. # so this should be OK.
  216. parent_error_section = utils.jsonpath_parse(
  217. sanitized_document, parent_path_to_error_in_document)
  218. except Exception:
  219. parent_error_section = (
  220. 'Failed to find parent section above where error occurred.')
  221. error_output = {
  222. 'validation_schema': schema,
  223. 'schema_path': path_to_error_in_schema,
  224. 'name': document.name,
  225. 'schema': document.schema,
  226. 'layer': document.layer,
  227. 'path': path_to_error_in_document,
  228. 'error_section': parent_error_section,
  229. 'message': error.message
  230. }
  231. return error_output
  232. def _get_schemas(self, document):
  233. """Retrieve the relevant schemas based on the document's ``schema``.
  234. :param dict doc: The document used for finding the correct schema
  235. to validate it based on its ``schema``.
  236. :returns: A schema to be used by ``jsonschema`` for document
  237. validation.
  238. :rtype: dict
  239. """
  240. schema_prefix, schema_version = _get_schema_parts(document)
  241. matching_schemas = []
  242. relevant_schemas = self._schema_map.get(schema_version, {})
  243. for candidate_schema_prefix, schema in relevant_schemas.items():
  244. if candidate_schema_prefix == schema_prefix:
  245. if schema not in matching_schemas:
  246. matching_schemas.append(schema)
  247. return matching_schemas
  248. def validate(self, document, pre_validate=True):
  249. """Validate ``document`` against built-in ``schema``-specific schemas.
  250. Does not apply to abstract documents.
  251. :param document: Document to validate.
  252. :type document: DocumentDict
  253. :param pre_validate: Whether to pre-validate documents using built-in
  254. schema validation. Skips over externally registered ``DataSchema``
  255. documents to avoid false positives. Default is True.
  256. :type pre_validate: bool
  257. :raises RuntimeError: If the Deckhand schema itself is invalid.
  258. :returns: Tuple of (error message, parent path for failing property)
  259. following schema validation failure.
  260. :rtype: Generator[Tuple[str, str]]
  261. """
  262. super(DataSchemaValidator, self).validate(document)
  263. # if this is a pre_validate, the only validation needed is structural
  264. # for non-control documents
  265. if not document.is_control and pre_validate:
  266. return
  267. if document.is_abstract:
  268. LOG.info('Skipping schema validation for abstract document [%s, '
  269. '%s] %s.', *document.meta)
  270. return
  271. schemas_to_use = self._get_schemas(document)
  272. if not schemas_to_use:
  273. LOG.debug('Document schema %s not recognized by %s. No further '
  274. 'validation required.', document.schema,
  275. self.__class__.__name__)
  276. for schema in schemas_to_use:
  277. root_path = '.data'
  278. try:
  279. jsonschema.Draft4Validator.check_schema(schema)
  280. schema_validator = jsonschema.Draft4Validator(schema)
  281. errors = schema_validator.iter_errors(document.get('data', {}))
  282. except Exception as e:
  283. LOG.exception(six.text_type(e))
  284. raise RuntimeError(
  285. 'Unknown error occurred while attempting to use schema '
  286. 'for validation. Details: %s.' % six.text_type(e))
  287. else:
  288. for error in errors:
  289. LOG.error(
  290. 'Failed schema validation for document [%s] %s. '
  291. 'Details: %s.', document.schema, document.name,
  292. error.message)
  293. yield self._generate_validation_error_output(
  294. schema, document, error, root_path)
  295. class DuplicateDocumentValidator(BaseValidator):
  296. """Validator used for guarding against duplicate documents."""
  297. def __init__(self):
  298. super(DuplicateDocumentValidator, self).__init__()
  299. self._document_history = set()
  300. self._diagnostic = ('Ensure that each raw document has a unique '
  301. 'combination of (name, schema, '
  302. 'metadata.layeringDefinition.layer).')
  303. def validate(self, document, **kwargs):
  304. """Validates that duplicate document doesn't exist."""
  305. if document.meta in self._document_history:
  306. validation_message = vm.ValidationMessage(
  307. message="Duplicate document exists",
  308. doc_schema=document.schema,
  309. doc_name=document.name,
  310. doc_layer=document.layer,
  311. diagnostic=self._diagnostic)
  312. return [validation_message.format_message()]
  313. else:
  314. self._document_history.add(document.meta)
  315. return []
  316. class DocumentValidation(object):
  317. def __init__(self, documents, existing_data_schemas=None,
  318. pre_validate=True):
  319. """Class for document validation logic for documents.
  320. This class is responsible for validating documents according to their
  321. schema.
  322. If ``pre_validate`` is true, then:
  323. * the base_schema validates ALL documents
  324. * ALL built-in schemas validate the appropriate
  325. document given a schema match
  326. * NO externally registered DataSchema documents
  327. are used for validation
  328. Else:
  329. * the base_schema validates ALL documents
  330. * ALL built-in schemas validate the appropriate
  331. document given a schema match
  332. * ALL externally registered DataSchema documents
  333. are used for validation given a schema match
  334. :param documents: Documents to be validated.
  335. :type documents: List[dict]
  336. :param existing_data_schemas: ``DataSchema`` documents created in prior
  337. revisions to be used to validate the "data" section of each
  338. document in ``documents``. Additional ``DataSchema`` documents in
  339. ``documents`` are combined with these.
  340. :type existing_data_schemas: dict or List[dict]
  341. :param pre_validate: Whether to pre-validate documents using built-in
  342. schema validation. Skips over externally registered ``DataSchema``
  343. documents to avoid false positives. Default is True.
  344. :type pre_validate: bool
  345. """
  346. self._documents = []
  347. self._current_data_schemas = [document_wrapper.DocumentDict(d)
  348. for d in existing_data_schemas or []]
  349. data_schema_map = {d.meta: d for d in self._current_data_schemas}
  350. raw_properties = ('data', 'metadata', 'schema')
  351. if not isinstance(documents, list):
  352. documents = [documents]
  353. for document in documents:
  354. # For post-validation documents are retrieved from the DB so those
  355. # DB properties need to be stripped to avoid validation errors.
  356. raw_document = {}
  357. for prop in raw_properties:
  358. raw_document[prop] = document.get(prop)
  359. document = document_wrapper.DocumentDict(raw_document)
  360. if document.schema.startswith(types.DATA_SCHEMA_SCHEMA):
  361. self._current_data_schemas.append(document)
  362. # If a newer version of the same DataSchema was passed in,
  363. # only use the new one and discard the old one.
  364. if document.meta in data_schema_map:
  365. self._current_data_schemas.remove(
  366. data_schema_map.pop(document.meta))
  367. self._documents.append(document)
  368. self._pre_validate = pre_validate
  369. self._validators = [
  370. DataSchemaValidator(self._current_data_schemas),
  371. ]
  372. if self._pre_validate:
  373. # Only perform this additional validation "offline". The controller
  374. # need not call this as the db module will handle this validation.
  375. self._validators.append(DuplicateDocumentValidator())
  376. def _get_supported_schema_list(self):
  377. schema_list = []
  378. validator = self._validators[-1]
  379. for schema_version, schema_map in validator._schema_map.items():
  380. for schema_name in schema_map:
  381. schema_list.append(schema_name + '/' + schema_version)
  382. return schema_list
  383. def _format_validation_results(self, results):
  384. """Format the validation result to be compatible with database
  385. formatting.
  386. :results: The validation results generated during document validation.
  387. :type results: List[dict]
  388. :returns: List of formatted validation results.
  389. :rtype: List[dict]
  390. """
  391. internal_validator = {
  392. 'name': 'deckhand',
  393. 'version': '1.0'
  394. }
  395. formatted_results = []
  396. for result in results:
  397. formatted_result = {
  398. 'name': types.DECKHAND_SCHEMA_VALIDATION,
  399. 'status': result['status'],
  400. 'validator': internal_validator,
  401. 'errors': result['errors']
  402. }
  403. formatted_results.append(formatted_result)
  404. return formatted_results
  405. def _validate_one(self, document):
  406. result = {'errors': []}
  407. supported_schema_list = self._get_supported_schema_list()
  408. document_schema = None if not document.get('schema') else '/'.join(
  409. _get_schema_parts(document))
  410. if document_schema not in supported_schema_list:
  411. message = ("The provided document schema %s is not registered. "
  412. "Registered schemas include: %s" % (
  413. document.get('schema', 'N/A'),
  414. supported_schema_list))
  415. LOG.info(message)
  416. for validator in self._validators:
  417. error_outputs = validator.validate(
  418. document, pre_validate=self._pre_validate)
  419. if error_outputs:
  420. result['errors'].extend(error_outputs)
  421. if result['errors']:
  422. result.setdefault('status', 'failure')
  423. else:
  424. result.setdefault('status', 'success')
  425. return result
  426. def validate_all(self):
  427. """Validate that all documents are correctly formatted.
  428. All concrete documents in the revision must successfully pass their
  429. JSON schema validations. The result of the validation is stored under
  430. the "deckhand-document-schema-validation" validation namespace for
  431. a document revision.
  432. All abstract documents must themselves be sanity-checked.
  433. Validation is broken up into 2 "main" stages:
  434. 1) Validate that each document contains the basic bulding blocks
  435. needed: i.e. ``schema`` and ``metadata`` using a "base" schema.
  436. Failing this validation is deemed a critical failure, resulting
  437. in an exception.
  438. 2) Execute ``DataSchema`` validations if applicable. Includes all
  439. built-in ``DataSchema`` documents by default.
  440. :returns: A list of validations (one for each document validated).
  441. :rtype: List[dict]
  442. :raises errors.InvalidDocumentFormat: If the document failed schema
  443. validation and the failure is deemed critical.
  444. :raises RuntimeError: If a Deckhand schema itself is invalid.
  445. """
  446. validation_results = []
  447. for document in self._documents:
  448. result = self._validate_one(document)
  449. validation_results.append(result)
  450. return self._format_validation_results(validation_results)