From d8420315d4f8fde0416c65f39404055da16fc952 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Mon, 18 Apr 2022 12:51:56 -0700 Subject: [PATCH] Make nodepool hostvars available on unreachable hosts Zuul ignores nodes with network_cli connections when running the setup playbook in order to support the use-case where a network appliance begins a job in an unreachable state. Starting with Zuul v4.6.0, for security reasons, host variables require a functioning Ansible connection in order to be set. This includes the Nodepool variables such as public and private IP addresses. Causing a network_cli host to become online after the start of a job requires this information, and so this functionality has not worked since 4.6.0. To correct this, we now always add in the nodepool host vars as originally set regardless of whether the variable freeze playbook runs for a particular host. This means that even if we have no other variables set for that host, we at least know the IP address of the host and can interact with it directly in order to bring it online. Additionally, the freeze playbook did not correctly check the return value, due to a typo. This accidentally allowed Zuul to continue to function in this situation (but without the nodepool variables accessible). With this change we continue to ignore the return value, but intentionally so. A test for this use-case is added, along with a releaso note. Change-Id: Icd8a2c035e6c04a7c198281adbd07fef422a6c63 Story: 2009226 --- .../notes/network-vars-60fd922781da7b92.yaml | 6 ++++++ .../git/common-config/playbooks/network.yaml | 9 +++++++++ .../config/inventory/git/common-config/zuul.yaml | 15 +++++++++++++++ .../config/inventory/git/org_project2/.zuul.yaml | 3 +-- tests/unit/test_inventory.py | 16 +++++++++++++++- zuul/executor/server.py | 12 +++++++++++- 6 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/network-vars-60fd922781da7b92.yaml create mode 100644 tests/fixtures/config/inventory/git/common-config/playbooks/network.yaml diff --git a/releasenotes/notes/network-vars-60fd922781da7b92.yaml b/releasenotes/notes/network-vars-60fd922781da7b92.yaml new file mode 100644 index 0000000000..79724eff64 --- /dev/null +++ b/releasenotes/notes/network-vars-60fd922781da7b92.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Nodepool host variables are now available even for unreachable + hosts. This is useful for network appliances which start the job + in an unreachable state. diff --git a/tests/fixtures/config/inventory/git/common-config/playbooks/network.yaml b/tests/fixtures/config/inventory/git/common-config/playbooks/network.yaml new file mode 100644 index 0000000000..913234d942 --- /dev/null +++ b/tests/fixtures/config/inventory/git/common-config/playbooks/network.yaml @@ -0,0 +1,9 @@ +- hosts: controller + tasks: + # This verifies that even though we don't have other frozen vars + # (since we can't connect to it), we do still have the nodepool + # hostvars for the appliance. + - name: Use the nodepool hostvars for the appliance + debug: + msg: "ssh admin@{{ hostvars[item].nodepool.public_ipv4 }}" + with_inventory_hostnames: appliance diff --git a/tests/fixtures/config/inventory/git/common-config/zuul.yaml b/tests/fixtures/config/inventory/git/common-config/zuul.yaml index 95a95a1959..69023c6bd5 100644 --- a/tests/fixtures/config/inventory/git/common-config/zuul.yaml +++ b/tests/fixtures/config/inventory/git/common-config/zuul.yaml @@ -93,12 +93,27 @@ - job: name: jinja2-message + files: jinja.txt nodeset: nodes: - name: ubuntu-xenial label: ubuntu-xenial run: playbooks/jinja2-message.yaml +- job: + name: network + files: network.txt + nodeset: + nodes: + - name: controller + label: ubuntu-xenial + - name: appliance + label: network + host-vars: + appliance: + ansible_network_os: foo + run: playbooks/network.yaml + - job: name: ansible-version28-inventory nodeset: diff --git a/tests/fixtures/config/inventory/git/org_project2/.zuul.yaml b/tests/fixtures/config/inventory/git/org_project2/.zuul.yaml index 759e0abbbc..9fb22fcc9f 100644 --- a/tests/fixtures/config/inventory/git/org_project2/.zuul.yaml +++ b/tests/fixtures/config/inventory/git/org_project2/.zuul.yaml @@ -2,5 +2,4 @@ check: jobs: - jinja2-message - - + - network diff --git a/tests/unit/test_inventory.py b/tests/unit/test_inventory.py index 1f96bb5edc..ec36c1c945 100644 --- a/tests/unit/test_inventory.py +++ b/tests/unit/test_inventory.py @@ -397,7 +397,8 @@ class TestAnsibleInventory(AnsibleZuulTestCase): # Output extra ansible info so we might see errors. self.executor_server.verbose = True A = self.fake_gerrit.addFakeChange( - 'org/project2', 'master', expected_message) + 'org/project2', 'master', expected_message, + files={'jinja.txt': 'foo'}) self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) self.waitUntilSettled() self.assertHistory([ @@ -429,6 +430,19 @@ class TestAnsibleInventory(AnsibleZuulTestCase): def test_jinja2_message_raw(self): self._jinja2_message("This message has {% raw %} in {% endraw %} it ") + def test_network_inventory(self): + # Network appliances can't run the freeze or setup playbooks, + # so they won't have any job variables available. But they + # should still have nodepool hostvars. Run a playbook that + # verifies that. + A = self.fake_gerrit.addFakeChange( + 'org/project2', 'master', 'A', + files={'network.txt': 'foo'}) + self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1)) + self.waitUntilSettled() + self.assertHistory([ + dict(name='network', result='SUCCESS', changes='1,1')]) + class TestWindowsInventory(TestInventoryBase): config_file = 'zuul-winrm.conf' diff --git a/zuul/executor/server.py b/zuul/executor/server.py index 4a7e144bdb..b774d977aa 100644 --- a/zuul/executor/server.py +++ b/zuul/executor/server.py @@ -1719,7 +1719,12 @@ class AnsibleJob(object): self.original_hostvars) freeze_status, freeze_code = self.runAnsibleFreeze( self.jobdir.freeze_playbook, self.ansible_version) - if freeze_status != self.RESULT_NORMAL or setup_code != 0: + # We ignore the return code because we run this playbook on + # all hosts, even ones which we didn't run the setup playbook + # on. If a host is unreachable, we should still proceed (a + # later playbook may cause it to become reachable). We just + # won't have all of the variables set. + if freeze_status != self.RESULT_NORMAL: return result self.loadFrozenHostvars() @@ -2567,6 +2572,11 @@ class AnsibleJob(object): with open(path) as f: facts = json.loads(f.read()) self.frozen_hostvars[host['name']] = facts.pop('_zuul_frozen', {}) + # Always include the nodepool vars, even if we didn't run + # the playbook for this host. + if 'host_vars' in host: + self.frozen_hostvars[host['name']]['nodepool'] =\ + host['host_vars']['nodepool'] with open(path, 'w') as f: f.write(json.dumps(facts))