Add support for scheduling periodic checks

If the `schedule` config option is set to a valid cron expression,
the charm will schedule tempest test runs (readonly-quick list)
according to the set schedule.

Change-Id: I8a10eff30dcbe385ebb4f8fd8d1d249f703786dc
This commit is contained in:
Samuel Allan 2024-01-24 12:18:08 +10:30
parent 8dc3cdff4c
commit c4322d1fa7
9 changed files with 387 additions and 24 deletions

View File

@ -74,11 +74,23 @@ config:
options:
schedule:
type: string
default: "off"
default: ""
description: |
The cron-like schedule to define when to run tempest. When the value is
"off" (case-insensitive), then period checks will be disabled. The
default is to turn off period checks.
The cron schedule expression to define when to run tempest periodic checks.
When the value is empty (default), period checks will be disabled.
The cron implementation used is Vixie Cron, installed from Ubuntu main.
For help with expressions, see `man 5 crontab` for Vixie Cron,
or visit https://crontab.guru/ .
The schedule should not result in tempest running more than once every 15 minutes.
Example expressions:
"*/30 * * * *" every 30 minutes
"5 2 * * *" at 2:05am every day
"5 2 * * mon" at 2:05am every Monday
actions:
validate:

View File

@ -7,3 +7,6 @@ cosl
# From ops_sunbeam
tenacity
# for validating cron expressions
croniter

View File

@ -31,6 +31,7 @@ import ops.pebble
import ops_sunbeam.charm as sunbeam_charm
import ops_sunbeam.container_handlers as sunbeam_chandlers
import ops_sunbeam.core as sunbeam_core
import ops_sunbeam.guard as sunbeam_guard
import ops_sunbeam.relation_handlers as sunbeam_rhandlers
from handlers import (
GrafanaDashboardRelationHandler,
@ -43,9 +44,11 @@ from ops.main import (
)
from ops.model import (
ActiveStatus,
BlockedStatus,
MaintenanceStatus,
)
from ops_sunbeam.config_contexts import (
ConfigContext,
)
from utils.constants import (
CONTAINER,
TEMPEST_CONCURRENCY,
@ -57,10 +60,23 @@ from utils.constants import (
TEMPEST_WORKSPACE,
TEMPEST_WORKSPACE_PATH,
)
from utils.validators import (
validated_schedule,
)
logger = logging.getLogger(__name__)
class TempestConfigurationContext(ConfigContext):
"""Configuration context for tempest."""
def context(self) -> dict:
"""Tempest context."""
return {
"schedule": self.charm.get_schedule(),
}
class TempestOperatorCharm(sunbeam_charm.OSBaseOperatorCharmK8S):
"""Charm the service."""
@ -102,6 +118,33 @@ class TempestOperatorCharm(sunbeam_charm.OSBaseOperatorCharmK8S):
),
]
def get_schedule(self) -> str:
"""Return the schedule option if valid and should be enabled.
If the schedule option is invalid,
or periodic checks shouldn't currently be enabled
(eg. observability relations not ready),
then return an empty schedule string.
An empty string disables the schedule.
"""
schedule = validated_schedule(self.config["schedule"])
if not schedule.valid:
return ""
# TODO: once observability integration is implemented,
# check if observability relations are ready here.
# TODO: when we have a way to check if tempest env is ready
# (tempest init complete, etc.),
# then disable schedule until it is ready.
return schedule.value
@property
def config_contexts(self) -> List[ConfigContext]:
"""Generate list of configuration adapters for the charm."""
return [TempestConfigurationContext(self, "tempest")]
def get_pebble_handlers(self) -> List[sunbeam_chandlers.PebbleHandler]:
"""Pebble handlers for operator."""
return [
@ -175,6 +218,13 @@ class TempestOperatorCharm(sunbeam_charm.OSBaseOperatorCharmK8S):
NOTE: this will be improved in future to avoid running unnecessarily.
"""
logger.debug("Running post config setup")
schedule = validated_schedule(self.config["schedule"])
if not schedule.valid:
raise sunbeam_guard.BlockedExceptionError(
f"invalid schedule config: {schedule.err}"
)
self.status.set(MaintenanceStatus("tempest init in progress"))
pebble = self.pebble_handler()
@ -183,10 +233,9 @@ class TempestOperatorCharm(sunbeam_charm.OSBaseOperatorCharmK8S):
try:
pebble.init_tempest(env)
except RuntimeError:
self.status.set(
BlockedStatus("tempest init failed, see logs for more info")
raise sunbeam_guard.BlockedExceptionError(
"tempest init failed, see logs for more info"
)
return
self.status.set(ActiveStatus(""))
logger.debug("Finish post config setup")

View File

@ -67,6 +67,8 @@ def assert_ready(f):
class TempestPebbleHandler(sunbeam_chandlers.ServicePebbleHandler):
"""Pebble handler for the container."""
PERIODIC_TEST_RUNNER = "periodic-test"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.container = self.charm.unit.get_container(self.container_name)
@ -83,16 +85,65 @@ class TempestPebbleHandler(sunbeam_chandlers.ServicePebbleHandler):
# (eg. observability connected, configuration set to run).
self.service_name: {
"override": "replace",
"summary": "Running tempest periodically",
"summary": "crontab to wake up pebble periodically for running periodic checks",
# Must run cron in foreground to be managed by pebble
"command": "cron -f",
"user": "root",
"group": "root",
"startup": "enabled",
},
self.PERIODIC_TEST_RUNNER: {
"override": "replace",
"summary": "Running tempest periodically",
"working-dir": TEMPEST_HOME,
"command": f"/usr/local/sbin/tempest-run-wrapper --load-list {TEMPEST_LIST_DIR}/readonly-quick",
"user": "tempest",
"group": "tempest",
"startup": "disabled",
"on-success": "ignore",
"on-failure": "ignore",
},
},
}
@property
def service_ready(self) -> bool:
"""Determine whether the service the container provides is running.
Override because we only want the cron service to be auto managed.
"""
if not self.pebble_ready:
return False
services = self.container.get_services(self.service_name)
return all([s.is_running() for s in services.values()])
def start_all(self, restart: bool = True) -> None:
"""Start services in container.
Override because we only want the cron service to be auto managed.
:param restart: Whether to stop services before starting them.
"""
if not self.container.can_connect():
logger.debug(
f"Container {self.container_name} not ready, deferring restart"
)
return
services = self.container.get_services(self.service_name)
for service_name, service in services.items():
if not service.is_running():
logger.debug(
f"Starting {service_name} in {self.container_name}"
)
self.container.start(service_name)
continue
if restart:
logger.debug(
f"Restarting {service_name} in {self.container_name}"
)
self.container.restart(service_name)
@assert_ready
def get_test_lists(self) -> List[str]:
"""Get the filenames of available test lists."""
@ -109,8 +160,13 @@ class TempestPebbleHandler(sunbeam_chandlers.ServicePebbleHandler):
# when periodic checks are enabled.
# This ensures that tempest gets the env, inherited from cron.
layer = self.get_layer()
layer["services"][self.service_name]["environment"] = env
self.container.add_layer(self.service_name, layer, combine=True)
layer["services"][self.PERIODIC_TEST_RUNNER]["environment"] = env
self.container.add_layer(
self.PERIODIC_TEST_RUNNER, layer, combine=True
)
# ensure the cron service is running
self.container.start(self.service_name)
try:
self.execute(

View File

@ -1,17 +1,12 @@
# Do not change this file, this file is managed by juju. This is a dedicated
# system-wide crontab for running tempest periodically.
# Do not change this file, this file is managed by juju.
# This is a dedicated system-wide crontab for running tempest periodically.
SHELL=/bin/sh
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
PEBBLE_SOCKET=/charm/container/pebble.socket
# Example of job definition:
# .---------------- minute (0 - 59)
# | .------------- hour (0 - 23)
# | | .---------- day of month (1 - 31)
# | | | .------- month (1 - 12) OR jan,feb,mar,apr ...
# | | | | .---- day of week (0 - 6) (Sunday=0 or 7) OR sun,mon,tue,wed,thu,fri,sat
# | | | | |
# * * * * * user-name command to be executed
{% if options.schedule.casefold() != "off" %}
# Note that the process lock is shared between ad hoc check and the periodic check.
{{ options.schedule }} tempest tempest-run-wrapper --load-list /tempest_test_lists/readonly-quick
{% if tempest.schedule %}
# Note that the process lock is shared between ad hoc check and this periodic check.
# Run this through pebble, so that the charm can configure pebble to run the service with the cloud credentials.
{{ tempest.schedule }} root pebble start periodic-test
{% endif %}

View File

@ -0,0 +1,83 @@
# Copyright 2023 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for validating."""
from dataclasses import (
dataclass,
)
from datetime import (
datetime,
)
from croniter import (
croniter,
)
@dataclass
class Schedule:
"""A cron schedule that has validation information."""
value: str
valid: bool
err: str
def validated_schedule(schedule: str) -> Schedule:
"""Process and validate a schedule str.
Return the schedule with validation info.
"""
# Empty schedule is fine; it means it's disabled in this context.
if not schedule:
return Schedule(value=schedule, valid=True, err="")
# croniter supports second repeats, but vixie cron does not.
if len(schedule.split()) == 6:
return Schedule(
value=schedule,
valid=False,
err="This cron does not support seconds in schedule (6 fields). "
"Exactly 5 columns must be specified for iterator expression.",
)
# constant base time for consistency
base = datetime(2004, 3, 5)
try:
cron = croniter(schedule, base, max_years_between_matches=1)
except ValueError as e:
msg = str(e)
# croniter supports second repeats, but vixie cron does not,
# so update the error message here to suit.
if "Exactly 5 or 6 columns" in msg:
msg = (
"Exactly 5 columns must be specified for iterator expression."
)
return Schedule(value=schedule, valid=False, err=msg)
# This is a rather naive method for enforcing this,
# and it may be possible to craft an expression
# that results in some consecutive runs within 15 minutes,
# however this is fine, as there is process locking for tempest,
# and this is more of a sanity check than a security requirement.
t1 = cron.get_next()
t2 = cron.get_next()
if t2 - t1 < 15 * 60: # 15 minutes in seconds
return Schedule(
value=schedule,
valid=False,
err="Cannot schedule periodic check to run faster than every 15 minutes.",
)
return Schedule(value=schedule, valid=True, err="")

View File

@ -189,6 +189,36 @@ class TestTempestOperatorCharm(test_utils.CharmTestCase):
self.harness.remove_relation(identity_ops_rel_id)
self.harness.remove_relation(grafana_dashboard_rel_id)
def test_config_context_schedule(self):
"""Test config context contains the schedule as expected."""
test_utils.set_all_pebbles_ready(self.harness)
logging_rel_id = self.add_logging_relation(self.harness)
identity_ops_rel_id = self.add_identity_ops_relation(self.harness)
grafana_dashboard_rel_id = self.add_grafana_dashboard_relation(
self.harness
)
# ok schedule
schedule = "0 0 */7 * *"
self.harness.update_config({"schedule": schedule})
self.assertEqual(
self.harness.charm.contexts().tempest.schedule, schedule
)
# too frequent
schedule = "* * * * *"
self.harness.update_config({"schedule": schedule})
self.assertEqual(self.harness.charm.contexts().tempest.schedule, "")
# disabled
schedule = ""
self.harness.update_config({"schedule": schedule})
self.assertEqual(self.harness.charm.contexts().tempest.schedule, "")
self.harness.remove_relation(logging_rel_id)
self.harness.remove_relation(identity_ops_rel_id)
self.harness.remove_relation(grafana_dashboard_rel_id)
def test_validate_action_invalid_regex(self):
"""Test validate action with invalid regex provided."""
test_utils.set_all_pebbles_ready(self.harness)
@ -387,3 +417,50 @@ class TestTempestOperatorCharm(test_utils.CharmTestCase):
self.harness.remove_relation(logging_rel_id)
self.harness.remove_relation(identity_ops_rel_id)
self.harness.remove_relation(grafana_dashboard_rel_id)
def test_blocked_status_invalid_schedule(self):
"""Test to verify blocked status with invalid schedule config."""
test_utils.set_all_pebbles_ready(self.harness)
logging_rel_id = self.add_logging_relation(self.harness)
identity_ops_rel_id = self.add_identity_ops_relation(self.harness)
grafana_dashboard_rel_id = self.add_grafana_dashboard_relation(
self.harness
)
# invalid schedule should make charm in blocked status
self.harness.update_config({"schedule": "* *"})
self.assertIn("invalid schedule", self.harness.charm.status.message())
self.assertEqual(self.harness.charm.status.status.name, "blocked")
# updating the schedule to something valid should unblock it
self.harness.update_config({"schedule": "*/20 * * * *"})
self.assertEqual(self.harness.charm.status.message(), "")
self.assertEqual(self.harness.charm.status.status.name, "active")
self.harness.remove_relation(logging_rel_id)
self.harness.remove_relation(identity_ops_rel_id)
self.harness.remove_relation(grafana_dashboard_rel_id)
def test_error_initing_tempest(self):
"""Test to verify blocked status if tempest init fails."""
test_utils.set_all_pebbles_ready(self.harness)
logging_rel_id = self.add_logging_relation(self.harness)
identity_ops_rel_id = self.add_identity_ops_relation(self.harness)
grafana_dashboard_rel_id = self.add_grafana_dashboard_relation(
self.harness
)
mock_pebble = mock.Mock()
mock_pebble.init_tempest = mock.Mock(side_effect=RuntimeError)
self.harness.charm.pebble_handler = mock.Mock(return_value=mock_pebble)
self.harness.update_config({"schedule": "*/21 * * * *"})
self.assertIn(
"tempest init failed", self.harness.charm.status.message()
)
self.assertEqual(self.harness.charm.status.status.name, "blocked")
self.harness.remove_relation(logging_rel_id)
self.harness.remove_relation(identity_ops_rel_id)
self.harness.remove_relation(grafana_dashboard_rel_id)

View File

@ -0,0 +1,87 @@
#!/usr/bin/env python3
# Copyright 2023 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unit tests for Tempest validator utility functions."""
import unittest
from utils.validators import (
validated_schedule,
)
class TempestCharmValidatorTests(unittest.TestCase):
"""Test validator functions."""
def test_valid_cron_expressions(self):
"""Verify valid cron expressions are marked as valid."""
expressions = [
"5 4 * * *", # daily at 4:05
"*/30 * * * *", # every 30 minutes
"5 2 * * *", # at 2:05am every day
"5 2 * * mon", # at 2:05am every Monday
"", # empty = disabled, and is ok
]
for exp in expressions:
schedule = validated_schedule(exp)
self.assertTrue(schedule.valid)
self.assertEqual(schedule.err, "")
self.assertEqual(schedule.value, exp)
def test_expression_too_fast(self):
"""Verify an expression with an interval too fast is caught."""
exp = "*/5 * * * *"
schedule = validated_schedule(exp)
self.assertFalse(schedule.valid)
self.assertIn("faster than every 15 minutes", schedule.err)
self.assertEqual(schedule.value, exp)
def test_expression_too_fast_edge_cases(self):
"""Verify an expression with intervals near edge cases are caught."""
exp = "*/14 * * * *"
schedule = validated_schedule(exp)
self.assertFalse(schedule.valid)
self.assertIn("faster than every 15 minutes", schedule.err)
self.assertEqual(schedule.value, exp)
exp = "*/15 * * * *"
schedule = validated_schedule(exp)
self.assertTrue(schedule.valid)
self.assertEqual(schedule.err, "")
self.assertEqual(schedule.value, exp)
def test_expression_six_fields(self):
"""Verify an expression with six fields is caught."""
exp = "*/30 * * * * 6"
schedule = validated_schedule(exp)
self.assertFalse(schedule.valid)
self.assertIn("not support seconds", schedule.err)
self.assertEqual(schedule.value, exp)
def test_expression_missing_column(self):
"""Verify an expression with a missing field is caught."""
exp = "*/30 * *"
schedule = validated_schedule(exp)
self.assertFalse(schedule.valid)
self.assertIn("Exactly 5 columns", schedule.err)
self.assertEqual(schedule.value, exp)
def test_expression_invalid_day(self):
"""Verify an expression with an invalid day field is caught."""
exp = "*/25 * * * xyz"
schedule = validated_schedule(exp)
self.assertFalse(schedule.valid)
self.assertIn("not acceptable", schedule.err)
self.assertEqual(schedule.value, exp)

View File

@ -14,6 +14,7 @@ pydantic<2 # traefik-k8s ingress lib
requests # cinder-ceph-k8s
netifaces # cinder-ceph-k8s
cosl # openstack-exporter
croniter # tempest-k8s
git+https://github.com/juju/charm-helpers.git#egg=charmhelpers # cinder-ceph-k8s,glance-k8s,gnocchi-k8s
git+https://opendev.org/openstack/charm-ops-interface-ceph-client#egg=interface_ceph_client # cinder-ceph-k8s
requests-unixsocket # sunbeam-clusterd