diff --git a/doc/source/admin/components.rst b/doc/source/admin/components.rst index 7cba2516b7..dfe5d3c00a 100644 --- a/doc/source/admin/components.rst +++ b/doc/source/admin/components.rst @@ -329,6 +329,22 @@ The following sections of ``zuul.conf`` are used by the scheduler: pipeline precedence, followed by relative priority, and finally the order in which they were submitted. + .. attr:: default_hold_expiration + :default: max_hold_expiration + + The default value for held node expiration if not supplied. This + will default to the value of ``max_hold_expiration`` if not changed, + or if it is set to a higher value than the max. + + .. attr:: max_hold_expiration + :default: 0 + + Maximum number of seconds any nodes held for an autohold request + will remain available. A value of 0 disables this, and the nodes + will remain held until the autohold request is manually deleted. + If a value higher than ``max_hold_expiration`` is supplied during + hold request creation, it will be lowered to this value. + Operation ~~~~~~~~~ diff --git a/releasenotes/notes/max_hold_age-1c6cef0bce537bce.yaml b/releasenotes/notes/max_hold_age-1c6cef0bce537bce.yaml new file mode 100644 index 0000000000..940945c7d6 --- /dev/null +++ b/releasenotes/notes/max_hold_age-1c6cef0bce537bce.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + New scheduler options, max_hold_expiration and default_hold_expiration, are + added to control how long nodes held for an autohold request remain + available. The max_hold_expiration option defaults to 0, which means the + held nodes will not automatically expire, and default_hold_expiration + defaults to the value of max_hold_expiration. diff --git a/tests/fixtures/zuul-hold-expiration.conf b/tests/fixtures/zuul-hold-expiration.conf new file mode 100644 index 0000000000..08a3bdc765 --- /dev/null +++ b/tests/fixtures/zuul-hold-expiration.conf @@ -0,0 +1,38 @@ +[gearman] +server=127.0.0.1 + +[statsd] +# note, use 127.0.0.1 rather than localhost to avoid getting ipv6 +# see: https://github.com/jsocol/pystatsd/issues/61 +server=127.0.0.1 + +[scheduler] +tenant_config=main.yaml +relative_priority=true +default_hold_expiration=1800 +max_hold_expiration=3600 + +[merger] +git_dir=/tmp/zuul-test/merger-git +git_user_email=zuul@example.com +git_user_name=zuul + +[executor] +git_dir=/tmp/zuul-test/executor-git + +[connection gerrit] +driver=gerrit +server=review.example.com +user=jenkins +sshkey=fake_id_rsa_path + +[connection smtp] +driver=smtp +server=localhost +port=25 +default_from=zuul@example.com +default_to=you@example.com + +[web] +static_cache_expiry=1200 +root=https://zuul.example.com/ diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py index 15bef56213..0016628090 100644 --- a/tests/unit/test_scheduler.py +++ b/tests/unit/test_scheduler.py @@ -138,6 +138,106 @@ class TestAuthorizeViaRPC(ZuulTestCase): self.assertTrue(json.loads(authorized)) +class TestSchedulerAutoholdHoldExpiration(ZuulTestCase): + ''' + This class of tests validates the autohold node expiration values + are set correctly via zuul config or from a custom value. + ''' + config_file = 'zuul-hold-expiration.conf' + tenant_config_file = 'config/single-tenant/main.yaml' + + @simple_layout('layouts/autohold.yaml') + def test_autohold_max_hold_default(self): + ''' + Test that the hold request node expiration will default to the + value specified in the configuration file. + ''' + client = zuul.rpcclient.RPCClient('127.0.0.1', + self.gearman_server.port) + self.addCleanup(client.shutdown) + + # Add a autohold with no hold expiration. + r = client.autohold('tenant-one', 'org/project', 'project-test2', + "", "", "reason text", 1) + self.assertTrue(r) + + # There should be a record in ZooKeeper + request_list = self.zk.getHoldRequests() + self.assertEqual(1, len(request_list)) + request = self.zk.getHoldRequest(request_list[0]) + self.assertIsNotNone(request) + self.assertEqual('tenant-one', request.tenant) + self.assertEqual('review.example.com/org/project', request.project) + self.assertEqual('project-test2', request.job) + self.assertEqual('reason text', request.reason) + self.assertEqual(1, request.max_count) + self.assertEqual(0, request.current_count) + self.assertEqual([], request.nodes) + # This should be the default value from the zuul config file. + self.assertEqual(1800, request.node_expiration) + + @simple_layout('layouts/autohold.yaml') + def test_autohold_max_hold_custom(self): + ''' + Test that the hold request node expiration will be set to the custom + value specified in the request. + ''' + client = zuul.rpcclient.RPCClient('127.0.0.1', + self.gearman_server.port) + self.addCleanup(client.shutdown) + + # Add a autohold with a custom hold expiration. + r = client.autohold('tenant-one', 'org/project', 'project-test2', + "", "", "reason text", 1, 500) + self.assertTrue(r) + + # There should be a record in ZooKeeper + request_list = self.zk.getHoldRequests() + self.assertEqual(1, len(request_list)) + request = self.zk.getHoldRequest(request_list[0]) + self.assertIsNotNone(request) + self.assertEqual('tenant-one', request.tenant) + self.assertEqual('review.example.com/org/project', request.project) + self.assertEqual('project-test2', request.job) + self.assertEqual('reason text', request.reason) + self.assertEqual(1, request.max_count) + self.assertEqual(0, request.current_count) + self.assertEqual([], request.nodes) + # This should be the value from the user request. + self.assertEqual(500, request.node_expiration) + + @simple_layout('layouts/autohold.yaml') + def test_autohold_max_hold_custom_invalid(self): + ''' + Test that if the custom hold request node expiration is higher than our + configured max, it will be lowered to the max. + ''' + client = zuul.rpcclient.RPCClient('127.0.0.1', + self.gearman_server.port) + self.addCleanup(client.shutdown) + + # Add a autohold with a custom hold expiration that is higher than our + # configured max. + r = client.autohold('tenant-one', 'org/project', 'project-test2', + "", "", "reason text", 1, 10000) + self.assertTrue(r) + + # There should be a record in ZooKeeper + request_list = self.zk.getHoldRequests() + self.assertEqual(1, len(request_list)) + request = self.zk.getHoldRequest(request_list[0]) + self.assertIsNotNone(request) + self.assertEqual('tenant-one', request.tenant) + self.assertEqual('review.example.com/org/project', request.project) + self.assertEqual('project-test2', request.job) + self.assertEqual('reason text', request.reason) + self.assertEqual(1, request.max_count) + self.assertEqual(0, request.current_count) + self.assertEqual([], request.nodes) + # This should be the max value from the zuul config file. + self.assertEqual(3600, request.node_expiration) + + class TestScheduler(ZuulTestCase): tenant_config_file = 'config/single-tenant/main.yaml' diff --git a/zuul/cmd/client.py b/zuul/cmd/client.py index ef95e78991..2873d93ddf 100755 --- a/zuul/cmd/client.py +++ b/zuul/cmd/client.py @@ -194,12 +194,11 @@ class Client(zuul.cmd.ZuulApp): cmd_autohold.add_argument('--count', help='number of job runs (default: 1)', required=False, type=int, default=1) - cmd_autohold.add_argument('--node-hold-expiration', - help=('how long in seconds should the ' - 'node set be in HOLD status ' - '(default: nodepool\'s max-hold-age ' - 'if set, or indefinitely)'), - required=False, type=int, default=0) + cmd_autohold.add_argument( + '--node-hold-expiration', + help=('how long in seconds should the node set be in HOLD status ' + '(default: scheduler\'s default_hold_expiration value)'), + required=False, type=int) cmd_autohold.set_defaults(func=self.autohold) cmd_autohold_delete = subparsers.add_parser( diff --git a/zuul/scheduler.py b/zuul/scheduler.py index b525584afc..ffdb87bbaf 100644 --- a/zuul/scheduler.py +++ b/zuul/scheduler.py @@ -563,6 +563,27 @@ class Scheduler(threading.Thread): request.ref_filter = ref_filter request.reason = reason request.max_count = count + + max_hold = get_default( + self.config, 'scheduler', 'max_hold_expiration', 0) + default_hold = get_default( + self.config, 'scheduler', 'default_hold_expiration', max_hold) + + # If the max hold is not infinite, we need to make sure that + # our default value does not exceed it. + if max_hold and default_hold != max_hold and (default_hold == 0 or + default_hold > max_hold): + default_hold = max_hold + + # Set node_hold_expiration to default if no value is supplied + if node_hold_expiration is None: + node_hold_expiration = default_hold + + # Reset node_hold_expiration to max if it exceeds the max + elif max_hold and (node_hold_expiration == 0 or + node_hold_expiration > max_hold): + node_hold_expiration = max_hold + request.node_expiration = node_hold_expiration # No need to lock it since we are creating a new one.