diff --git a/tests/base.py b/tests/base.py index db0221f293..548c011402 100644 --- a/tests/base.py +++ b/tests/base.py @@ -3877,7 +3877,8 @@ class ZuulTestCase(BaseTestCase): path = os.path.join(self.test_root, "changes.data") self.test_config.changes.load(path) - def requestNodes(self, labels, tenant="tenant-one", pipeline="check"): + def requestNodes(self, labels, tenant="tenant-one", pipeline="check", + timeout=10): result_queue = PipelineResultEventQueue( self.zk_client, tenant, pipeline) @@ -3899,7 +3900,7 @@ class ZuulTestCase(BaseTestCase): span_info=None, ) for _ in iterate_timeout( - 10, "nodeset request to be fulfilled"): + timeout, "nodeset request to be fulfilled"): result_events = list(result_queue) if result_events: for event in result_events: diff --git a/tests/fake_nodescan.py b/tests/fake_nodescan.py new file mode 100644 index 0000000000..f97796565e --- /dev/null +++ b/tests/fake_nodescan.py @@ -0,0 +1,87 @@ +# Copyright (C) 2023 Acme Gating, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +class FakeSocket: + def __init__(self): + self.blocking = True + self.fd = 1 + + def setblocking(self, b): + self.blocking = b + + def getsockopt(self, level, optname): + return None + + def connect(self, addr): + if not self.blocking: + raise BlockingIOError() + raise Exception("blocking connect attempted") + + def fileno(self): + return self.fd + + +class FakePoll: + def __init__(self, _fail=False): + self.fds = [] + self._fail = _fail + + def register(self, fd, bitmap): + self.fds.append(fd) + + def unregister(self, fd): + if fd in self.fds: + self.fds.remove(fd) + + def poll(self, timeout=None): + if self._fail: + return [] + fds = self.fds[:] + self.fds = [f for f in fds if not isinstance(f, FakeSocket)] + fds = [f.fileno() if hasattr(f, 'fileno') else f for f in fds] + return [(f, 0) for f in fds] + + +class Dummy: + pass + + +class FakeKey: + def get_name(self): + return 'fake key' + + def get_base64(self): + return 'fake base64' + + +class FakeTransport: + def __init__(self, _fail=False, active=True): + self.active = active + self._fail = _fail + + def start_client(self, event=None, timeout=None): + if not self._fail: + event.set() + + def get_security_options(self): + ret = Dummy() + ret.key_types = ['rsa'] + return ret + + def get_remote_server_key(self): + return FakeKey() + + def get_exception(self): + return Exception("Fake ssh error") diff --git a/tests/fake_openstack.py b/tests/fake_openstack.py index f8f0ab3d97..fc169dca65 100644 --- a/tests/fake_openstack.py +++ b/tests/fake_openstack.py @@ -176,16 +176,16 @@ class FakeOpenstackConnection: if self.cloud._fake_needs_floating_ip: addresses = dict( public=[], - private=[dict(version=4, addr='fake')] + private=[dict(version=4, addr='198.51.100.1')] ) - interface_ip = 'fake' + interface_ip = '198.51.100.1' else: addresses = dict( - public=[dict(version=4, addr='fake'), - dict(version=6, addr='fake_v6')], - private=[dict(version=4, addr='fake')] + public=[dict(version=4, addr='198.51.100.1'), + dict(version=6, addr='2001:db8::1')], + private=[dict(version=4, addr='198.51.100.1')] ) - interface_ip = 'fake' + interface_ip = '198.51.100.1' args = dict( id=uuid.uuid4().hex, diff --git a/tests/fixtures/config/launcher-min-ready/git/common-config/zuul.yaml b/tests/fixtures/config/launcher-min-ready/git/common-config/zuul.yaml index 2784ad413d..a3399e75bf 100644 --- a/tests/fixtures/config/launcher-min-ready/git/common-config/zuul.yaml +++ b/tests/fixtures/config/launcher-min-ready/git/common-config/zuul.yaml @@ -101,6 +101,7 @@ name: aws-base abstract: true connection: aws + host-key-checking: false boot-timeout: 120 launch-timeout: 600 launch-attempts: 2 diff --git a/tests/fixtures/layouts/nodepool-empty-nodeset.yaml b/tests/fixtures/layouts/nodepool-empty-nodeset.yaml index 2f067741c3..65b328aa6d 100644 --- a/tests/fixtures/layouts/nodepool-empty-nodeset.yaml +++ b/tests/fixtures/layouts/nodepool-empty-nodeset.yaml @@ -66,6 +66,7 @@ name: aws-base abstract: true connection: aws + host-key-checking: false boot-timeout: 120 launch-timeout: 600 launch-attempts: 2 diff --git a/tests/fixtures/layouts/nodepool-image-no-validate.yaml b/tests/fixtures/layouts/nodepool-image-no-validate.yaml index 9911f4f30f..07dbd2677b 100644 --- a/tests/fixtures/layouts/nodepool-image-no-validate.yaml +++ b/tests/fixtures/layouts/nodepool-image-no-validate.yaml @@ -47,6 +47,7 @@ name: aws-base abstract: true connection: aws + host-key-checking: false boot-timeout: 120 launch-timeout: 600 object-storage: diff --git a/tests/fixtures/layouts/nodepool-image-validate.yaml b/tests/fixtures/layouts/nodepool-image-validate.yaml index e07b223a53..faa564f089 100644 --- a/tests/fixtures/layouts/nodepool-image-validate.yaml +++ b/tests/fixtures/layouts/nodepool-image-validate.yaml @@ -64,6 +64,7 @@ name: aws-base abstract: true connection: aws + host-key-checking: false boot-timeout: 120 launch-timeout: 600 object-storage: diff --git a/tests/fixtures/layouts/nodepool-image.yaml b/tests/fixtures/layouts/nodepool-image.yaml index 9e63a2eb77..9ccbe044e9 100644 --- a/tests/fixtures/layouts/nodepool-image.yaml +++ b/tests/fixtures/layouts/nodepool-image.yaml @@ -151,6 +151,7 @@ name: aws-base abstract: true connection: aws + host-key-checking: false boot-timeout: 120 launch-timeout: 600 object-storage: diff --git a/tests/fixtures/layouts/nodepool-missing-connection.yaml b/tests/fixtures/layouts/nodepool-missing-connection.yaml index 9ab7a1f9cc..8f6d25b972 100644 --- a/tests/fixtures/layouts/nodepool-missing-connection.yaml +++ b/tests/fixtures/layouts/nodepool-missing-connection.yaml @@ -124,6 +124,7 @@ name: aws-base abstract: true connection: aws-missing + host-key-checking: false boot-timeout: 120 launch-timeout: 600 launch-attempts: 2 diff --git a/tests/fixtures/layouts/nodepool-nodescan.yaml b/tests/fixtures/layouts/nodepool-nodescan.yaml new file mode 100644 index 0000000000..91c81fd1f9 --- /dev/null +++ b/tests/fixtures/layouts/nodepool-nodescan.yaml @@ -0,0 +1,174 @@ +- pipeline: + name: check + manager: independent + trigger: + gerrit: + - event: patchset-created + success: + gerrit: + Verified: 1 + failure: + gerrit: + Verified: -1 + +- pipeline: + name: gate + manager: dependent + success-message: Build succeeded (gate). + trigger: + gerrit: + - event: comment-added + approval: + - Approved: 1 + success: + gerrit: + Verified: 2 + submit: true + failure: + gerrit: + Verified: -2 + start: + gerrit: + Verified: 0 + precedence: high + +- pipeline: + name: post + manager: independent + trigger: + gerrit: + - event: ref-updated + ref: ^(?!refs/).*$ + +- pipeline: + name: tag + manager: independent + trigger: + gerrit: + - event: ref-updated + ref: ^refs/tags/.*$ + +- job: + name: base + parent: null + run: playbooks/base.yaml + nodeset: + nodes: + - label: debian-normal + name: controller + +- job: + name: check-job + run: playbooks/check.yaml + +- job: + name: post-job + run: playbooks/post.yaml + +- job: + name: tag-job + run: playbooks/tag.yaml + +- project: + name: org/project + check: + jobs: + - check-job + gate: + jobs: + - check-job + post: + jobs: + - post-job + tag: + jobs: + - tag-job + +- image: + name: debian + type: cloud + +- flavor: + name: normal + +- flavor: + name: large + +- flavor: + name: dedicated + +- flavor: + name: invalid + +- label: + name: debian-normal + image: debian + flavor: normal + +- label: + name: debian-large + image: debian + flavor: large + +- label: + name: debian-dedicated + image: debian + flavor: dedicated + +- label: + name: debian-invalid + image: debian + flavor: invalid + +- section: + name: aws-base + abstract: true + connection: aws + host-key-checking: true + boot-timeout: 1 + launch-timeout: 600 + launch-attempts: 2 + object-storage: + bucket-name: zuul + # TODO + # key-name: zuul + flavors: + - name: normal + instance-type: t3.medium + volume-type: gp3 + volume-size: 40 + iops: 500 + throughput: 200 + - name: large + instance-type: t3.large + - name: dedicated + instance-type: t3.large + dedicated-host: True + - name: invalid + instance-type: invalid + images: + - name: debian + image-id: ami-1e749f67 + +- section: + name: aws-us-east-1 + parent: aws-base + region: us-east-1 + +- provider: + name: aws-us-east-1-main + section: aws-us-east-1 + labels: + - name: debian-normal + key-name: zuul + volume-type: gp3 + volume-size: 40 + # Change iops and omit throughput to test label overriding the + # flavor + iops: 1000 + - name: debian-large + key-name: zuul + - name: debian-dedicated + key-name: zuul + - name: debian-invalid + key-name: zuul diff --git a/tests/fixtures/layouts/nodepool.yaml b/tests/fixtures/layouts/nodepool.yaml index 25359e6c94..d0a0ee8c13 100644 --- a/tests/fixtures/layouts/nodepool.yaml +++ b/tests/fixtures/layouts/nodepool.yaml @@ -124,7 +124,8 @@ name: aws-base abstract: true connection: aws - boot-timeout: 120 + host-key-checking: false + boot-timeout: 300 launch-timeout: 600 launch-attempts: 2 object-storage: diff --git a/tests/fixtures/layouts/openstack/nodepool-image.yaml b/tests/fixtures/layouts/openstack/nodepool-image.yaml index fd8aa119a3..11797af25f 100644 --- a/tests/fixtures/layouts/openstack/nodepool-image.yaml +++ b/tests/fixtures/layouts/openstack/nodepool-image.yaml @@ -57,6 +57,7 @@ name: openstack-base abstract: true connection: openstack + host-key-checking: false boot-timeout: 120 launch-timeout: 600 launch-attempts: 2 diff --git a/tests/fixtures/layouts/openstack/nodepool.yaml b/tests/fixtures/layouts/openstack/nodepool.yaml index c0d17b35e4..066c78ba54 100644 --- a/tests/fixtures/layouts/openstack/nodepool.yaml +++ b/tests/fixtures/layouts/openstack/nodepool.yaml @@ -100,6 +100,7 @@ name: openstack-base abstract: true connection: openstack + host-key-checking: false boot-timeout: 120 launch-timeout: 600 launch-attempts: 2 diff --git a/tests/unit/test_launcher.py b/tests/unit/test_launcher.py index ad3c05fff3..1af4c25bda 100644 --- a/tests/unit/test_launcher.py +++ b/tests/unit/test_launcher.py @@ -21,7 +21,6 @@ from unittest import mock from zuul import model from zuul.launcher.client import LauncherClient -import fixtures import responses import testtools from kazoo.exceptions import NoNodeError @@ -36,6 +35,11 @@ from tests.base import ( return_data, ResponsesFixture, ) +from tests.fake_nodescan import ( + FakeSocket, + FakePoll, + FakeTransport, +) class ImageMocksFixture(ResponsesFixture): @@ -137,8 +141,6 @@ class LauncherBaseTestCase(ZuulTestCase): self.mock_aws.start() # Must start responses after mock_aws self.useFixture(ImageMocksFixture()) - self.useFixture(fixtures.MonkeyPatch( - 'zuul.launcher.server.NodescanRequest.FAKE', True)) self.s3 = boto3.resource('s3', region_name='us-west-2') self.s3.create_bucket( Bucket='zuul', @@ -444,7 +446,7 @@ class TestLauncher(LauncherBaseTestCase): self.waitUntilSettled() nodes = self.launcher.api.nodes_cache.getItems() - self.assertNotEqual(nodes[0].host_keys, []) + self.assertEqual(nodes[0].host_keys, []) self.executor_server.hold_jobs_in_build = False self.executor_server.release() @@ -763,6 +765,64 @@ class TestLauncher(LauncherBaseTestCase): except NoNodeError: break + @simple_layout('layouts/nodepool-nodescan.yaml', enable_nodepool=True) + @okay_tracebacks('_checkNodescanRequest') + @mock.patch('paramiko.transport.Transport') + @mock.patch('socket.socket') + @mock.patch('select.epoll') + def test_nodescan_failure(self, mock_epoll, mock_socket, mock_transport): + # Test a nodescan failure + fake_socket = FakeSocket() + mock_socket.return_value = fake_socket + mock_epoll.return_value = FakePoll() + mock_transport.return_value = FakeTransport(_fail=True) + + ctx = self.createZKContext(None) + request = self.requestNodes(["debian-normal"], timeout=30) + self.assertEqual(request.state, model.NodesetRequest.State.FAILED) + provider_nodes = request.provider_nodes[0] + self.assertEqual(len(provider_nodes), 2) + self.assertEqual(len(request.nodes), 1) + self.assertEqual(provider_nodes[-1], request.nodes[-1]) + + provider_nodes = [] + for node_id in request.nodes: + provider_nodes.append(model.ProviderNode.fromZK( + ctx, path=model.ProviderNode._getPath(node_id))) + + request.delete(ctx) + self.waitUntilSettled() + + for pnode in provider_nodes: + for _ in iterate_timeout(60, "node to be deleted"): + try: + pnode.refresh(ctx) + except NoNodeError: + break + + @simple_layout('layouts/nodepool-nodescan.yaml', enable_nodepool=True) + @okay_tracebacks('_checkNodescanRequest') + @mock.patch('paramiko.transport.Transport') + @mock.patch('socket.socket') + @mock.patch('select.epoll') + def test_nodescan_success(self, mock_epoll, mock_socket, mock_transport): + # Test a normal launch with a nodescan + fake_socket = FakeSocket() + mock_socket.return_value = fake_socket + mock_epoll.return_value = FakePoll() + mock_transport.return_value = FakeTransport() + + ctx = self.createZKContext(None) + request = self.requestNodes(["debian-normal"]) + self.assertEqual(request.state, model.NodesetRequest.State.FULFILLED) + provider_nodes = request.provider_nodes[0] + self.assertEqual(len(provider_nodes), 1) + self.assertEqual(len(request.nodes), 1) + + node = model.ProviderNode.fromZK( + ctx, path=model.ProviderNode._getPath(request.nodes[0])) + self.assertEqual(['fake key fake base64'], node.host_keys) + @simple_layout('layouts/nodepool-image.yaml', enable_nodepool=True) @return_data( 'build-debian-local-image', @@ -876,7 +936,7 @@ class TestMinReadyLauncher(LauncherBaseTestCase): if len(in_use_nodes) == 2: break - self.assertNotEqual(nodes[0].host_keys, []) + self.assertEqual(nodes[0].host_keys, []) self.executor_server.hold_jobs_in_build = False self.executor_server.release() diff --git a/tests/unit/test_nodescan.py b/tests/unit/test_nodescan.py index 3d1e948805..93971b380f 100644 --- a/tests/unit/test_nodescan.py +++ b/tests/unit/test_nodescan.py @@ -27,80 +27,11 @@ from tests.base import ( iterate_timeout, okay_tracebacks, ) - - -class FakeSocket: - def __init__(self): - self.blocking = True - self.fd = 1 - - def setblocking(self, b): - self.blocking = b - - def getsockopt(self, level, optname): - return None - - def connect(self, addr): - if not self.blocking: - raise BlockingIOError() - raise Exception("blocking connect attempted") - - def fileno(self): - return self.fd - - -class FakePoll: - def __init__(self, _fail=False): - self.fds = [] - self._fail = _fail - - def register(self, fd, bitmap): - self.fds.append(fd) - - def unregister(self, fd): - if fd in self.fds: - self.fds.remove(fd) - - def poll(self, timeout=None): - if self._fail: - return [] - fds = self.fds[:] - self.fds = [f for f in fds if not isinstance(f, FakeSocket)] - fds = [f.fileno() if hasattr(f, 'fileno') else f for f in fds] - return [(f, 0) for f in fds] - - -class Dummy: - pass - - -class FakeKey: - def get_name(self): - return 'fake key' - - def get_base64(self): - return 'fake base64' - - -class FakeTransport: - def __init__(self, _fail=False, active=True): - self.active = active - self._fail = _fail - - def start_client(self, event=None, timeout=None): - if not self._fail: - event.set() - - def get_security_options(self): - ret = Dummy() - ret.key_types = ['rsa'] - return ret - - def get_remote_server_key(self): - return FakeKey() - - def get_exception(self): - return Exception("Fake ssh error") +from tests.fake_nodescan import ( + FakeSocket, + FakePoll, + FakeTransport, +) class DummyProviderNode(ProviderNode, subclass_id="dummy-nodescan"): diff --git a/zuul/launcher/server.py b/zuul/launcher/server.py index f5069fc025..c1137fbf94 100644 --- a/zuul/launcher/server.py +++ b/zuul/launcher/server.py @@ -264,8 +264,6 @@ class NodescanRequest: CONNECTING_KEY = 'connecting key' NEGOTIATING_KEY = 'negotiating key' COMPLETE = 'complete' - # For unit testing - FAKE = False def __init__(self, node, log): self.state = self.START @@ -282,10 +280,9 @@ class NodescanRequest: self.gather_hostkeys = False self.ip = node.interface_ip self.port = node.connection_port - if 'fake' not in self.ip and not self.FAKE: - addrinfo = socket.getaddrinfo(self.ip, self.port)[0] - self.family = addrinfo[0] - self.sockaddr = addrinfo[4] + addrinfo = socket.getaddrinfo(self.ip, self.port)[0] + self.family = addrinfo[0] + self.sockaddr = addrinfo[4] self.sock = None self.transport = None self.event = None @@ -412,14 +409,9 @@ class NodescanRequest: if not self.host_key_checking: self.state = self.COMPLETE else: - if 'fake' in self.ip or self.FAKE: - if self.gather_hostkeys: - self.keys = ['ssh-rsa FAKEKEY'] - self.state = self.COMPLETE - else: - self.init_connection_attempts += 1 - self._connect() - self.state = self.CONNECTING_INIT + self.init_connection_attempts += 1 + self._connect() + self.state = self.CONNECTING_INIT if self.state == self.CONNECTING_INIT: if not socket_ready: @@ -1024,7 +1016,6 @@ class Launcher: with node.activeContext(ctx): node.setState(state) self.wake_event.set() - # Deallocate ready node w/o a request for re-use if (node.request_id and not request and node.state == node.State.READY): diff --git a/zuul/provider/__init__.py b/zuul/provider/__init__.py index 1fd8c91937..281476e55e 100644 --- a/zuul/provider/__init__.py +++ b/zuul/provider/__init__.py @@ -74,8 +74,11 @@ class BaseProviderFlavor(CNameMixin, metaclass=abc.ABCMeta): class BaseProviderLabel(CNameMixin, metaclass=abc.ABCMeta): - inheritable_schema = assemble() + inheritable_schema = assemble( + provider_schema.common_label, + ) schema = assemble( + provider_schema.common_label, provider_schema.base_label, ) image_flavor_inheritable_schema = assemble() @@ -185,7 +188,6 @@ class BaseProviderSchema(metaclass=abc.ABCMeta): Optional('abstract', default=False): Nullable(bool), Optional('parent'): Nullable(str), Required('connection'): str, - Optional('boot-timeout'): Nullable(int), Optional('launch-timeout'): Nullable(int), Optional('launch-attempts', default=3): int, }) diff --git a/zuul/provider/schema.py b/zuul/provider/schema.py index 00f09c0b3c..741da413ea 100644 --- a/zuul/provider/schema.py +++ b/zuul/provider/schema.py @@ -22,6 +22,13 @@ from zuul.lib.voluputil import Required, Optional, Nullable # Labels +# The label attributes which can appear either in the main body of the +# section stanza, or in a section/provider label, or in a standalone +# label. +common_label = vs.Schema({ + Optional('boot-timeout', default=300): int, +}) + # The label attributes that can appear in a section/provider label or # a standalone label (but not in the section body). base_label = vs.Schema({ @@ -34,7 +41,6 @@ base_label = vs.Schema({ Optional('tags', default=dict): {str: str}, Optional('min-ready', default=0): int, Optional('max-ready-age', default=0): int, - Optional('boot-timeout', default=300): int, }) # Label attributes that are common to any kind of ssh-based driver.