validate-host: retry network tests and include unbound logs
Split the network testing component of the validate-host rule into a separate task, so it can be retried a couple of times in case something is a bit slow about bringing up external networking. Add failure collection of unbound logs if they appear to be in some common locations (such as will be there on infra nodes). Change-Id: Id12f1ba064fa2e5f75b9a5cfba76d238d23d3f57
This commit is contained in:
parent
76fdb33658
commit
7e00ba32da
@ -18,15 +18,10 @@
|
|||||||
import os
|
import os
|
||||||
import shlex
|
import shlex
|
||||||
import subprocess
|
import subprocess
|
||||||
import traceback
|
|
||||||
|
|
||||||
|
|
||||||
command_map = {
|
command_map = {
|
||||||
'uname': 'uname -a',
|
'uname': 'uname -a',
|
||||||
'network_interfaces': 'ip address show',
|
|
||||||
'network_routing_v4': 'ip route show',
|
|
||||||
'network_routing_v6': 'ip -6 route show',
|
|
||||||
'network_neighbors': 'ip neighbor show',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -44,16 +39,14 @@ def main():
|
|||||||
argument_spec=dict(
|
argument_spec=dict(
|
||||||
image_manifest=dict(required=False, type='str'),
|
image_manifest=dict(required=False, type='str'),
|
||||||
image_manifest_files=dict(required=False, type='list'),
|
image_manifest_files=dict(required=False, type='list'),
|
||||||
traceroute_host=dict(required=False, type='str'),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
image_manifest = module.params['image_manifest']
|
image_manifest = module.params['image_manifest']
|
||||||
traceroute_host = module.params['traceroute_host']
|
|
||||||
image_manifest_files = module.params['image_manifest_files']
|
image_manifest_files = module.params['image_manifest_files']
|
||||||
if not image_manifest_files and image_manifest:
|
if not image_manifest_files and image_manifest:
|
||||||
image_manifest_files = [image_manifest]
|
image_manifest_files = [image_manifest]
|
||||||
ret = {'image_manifest_files': [], 'traceroute': None}
|
ret = {'image_manifest_files': []}
|
||||||
|
|
||||||
for image_manifest in image_manifest_files:
|
for image_manifest in image_manifest_files:
|
||||||
if image_manifest and os.path.exists(image_manifest):
|
if image_manifest and os.path.exists(image_manifest):
|
||||||
@ -63,31 +56,6 @@ def main():
|
|||||||
'underline': len(image_manifest) * '-',
|
'underline': len(image_manifest) * '-',
|
||||||
'content': open(image_manifest, 'r').read(),
|
'content': open(image_manifest, 'r').read(),
|
||||||
})
|
})
|
||||||
if traceroute_host:
|
|
||||||
passed = False
|
|
||||||
try:
|
|
||||||
ret['traceroute_v6'] = run_command(
|
|
||||||
'traceroute6 -n {host}'.format(host=traceroute_host))
|
|
||||||
passed = True
|
|
||||||
except (subprocess.CalledProcessError, OSError) as e:
|
|
||||||
ret['traceroute_v6_exception'] = traceback.format_exc(e)
|
|
||||||
ret['traceroute_v6_output'] = e.output
|
|
||||||
ret['traceroute_v6_return'] = e.returncode
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
ret['traceroute_v4'] = run_command(
|
|
||||||
'traceroute -n {host}'.format(host=traceroute_host))
|
|
||||||
passed = True
|
|
||||||
except (subprocess.CalledProcessError, OSError) as e:
|
|
||||||
ret['traceroute_v4_exception'] = traceback.format_exc(e)
|
|
||||||
ret['traceroute_v4_output'] = e.output
|
|
||||||
ret['traceroute_v4_return'] = e.returncode
|
|
||||||
pass
|
|
||||||
if not passed:
|
|
||||||
module.fail_json(
|
|
||||||
msg="No viable v4 or v6 route found to {traceroute_host}."
|
|
||||||
" The build node is assumed to be invalid.".format(
|
|
||||||
traceroute_host=traceroute_host), **ret)
|
|
||||||
|
|
||||||
for key, command in command_map.items():
|
for key, command in command_map.items():
|
||||||
try:
|
try:
|
||||||
|
118
roles/validate-host/library/zuul_network_validate.py
Normal file
118
roles/validate-host/library/zuul_network_validate.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
# Copyright (c) 2018 Red Hat
|
||||||
|
#
|
||||||
|
# This module is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This software is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this software. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import shlex
|
||||||
|
import subprocess
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
|
command_map = {
|
||||||
|
'network_interfaces': 'ip address show',
|
||||||
|
'network_routing_v4': 'ip route show',
|
||||||
|
'network_routing_v6': 'ip -6 route show',
|
||||||
|
'network_neighbors': 'ip neighbor show',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run_command(command):
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PATH'] = '{path}:/sbin:/usr/sbin'.format(path=env['PATH'])
|
||||||
|
return subprocess.check_output(
|
||||||
|
shlex.split(command),
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
env=env)
|
||||||
|
|
||||||
|
|
||||||
|
def collect_unbound_logs():
|
||||||
|
'''Look for unbound logs
|
||||||
|
|
||||||
|
This looks for unbound logs in common places and returns the
|
||||||
|
contents. Intended for the failure path to add more info if the
|
||||||
|
traceroutes have failed.
|
||||||
|
'''
|
||||||
|
ret = {}
|
||||||
|
|
||||||
|
# NOTE(ianw): keep this one first, the other exists but isn't
|
||||||
|
# populated on infra rpm images for ... reasons
|
||||||
|
if os.path.exists('/var/lib/unbound/unbound.log'):
|
||||||
|
unbound_log_file = '/var/lib/unbound/unbound.log'
|
||||||
|
elif os.path.exists('/var/log/unbound.log'):
|
||||||
|
unbound_log_file = '/var/log/unbound.log'
|
||||||
|
else:
|
||||||
|
return ret
|
||||||
|
|
||||||
|
with open(unbound_log_file) as f:
|
||||||
|
ret['unbound_log_file'] = unbound_log_file
|
||||||
|
# NOTE(ianw): At high verbosity this can be big ... but this
|
||||||
|
# is also intended to be used early which should limit it's
|
||||||
|
# size. We could tail it ...
|
||||||
|
ret['unbound_log_file_content'] = f.read()
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
module = AnsibleModule(
|
||||||
|
argument_spec=dict(
|
||||||
|
traceroute_host=dict(required=True, type='str'),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
traceroute_host = module.params['traceroute_host']
|
||||||
|
|
||||||
|
ret = {}
|
||||||
|
|
||||||
|
for key, command in command_map.items():
|
||||||
|
try:
|
||||||
|
ret[key] = run_command(command)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
passed = False
|
||||||
|
try:
|
||||||
|
ret['traceroute_v6'] = run_command(
|
||||||
|
'traceroute6 -n {host}'.format(host=traceroute_host))
|
||||||
|
passed = True
|
||||||
|
except (subprocess.CalledProcessError, OSError) as e:
|
||||||
|
ret['traceroute_v6_exception'] = traceback.format_exc(e)
|
||||||
|
ret['traceroute_v6_output'] = e.output
|
||||||
|
ret['traceroute_v6_return'] = e.returncode
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
ret['traceroute_v4'] = run_command(
|
||||||
|
'traceroute -n {host}'.format(host=traceroute_host))
|
||||||
|
passed = True
|
||||||
|
except (subprocess.CalledProcessError, OSError) as e:
|
||||||
|
ret['traceroute_v4_exception'] = traceback.format_exc(e)
|
||||||
|
ret['traceroute_v4_output'] = e.output
|
||||||
|
ret['traceroute_v4_return'] = e.returncode
|
||||||
|
pass
|
||||||
|
if not passed:
|
||||||
|
ret.update(collect_unbound_logs())
|
||||||
|
module.fail_json(
|
||||||
|
msg="No viable v4 or v6 route found to {traceroute_host}."
|
||||||
|
" The build node is assumed to be invalid.".format(
|
||||||
|
traceroute_host=traceroute_host), **ret)
|
||||||
|
|
||||||
|
module.exit_json(changed=False, _zuul_nolog_return=True, **ret)
|
||||||
|
|
||||||
|
from ansible.module_utils.basic import * # noqa
|
||||||
|
from ansible.module_utils.basic import AnsibleModule
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -23,9 +23,16 @@
|
|||||||
zuul_debug_info:
|
zuul_debug_info:
|
||||||
image_manifest: "{{ zuul_site_image_manifest|default(omit) }}"
|
image_manifest: "{{ zuul_site_image_manifest|default(omit) }}"
|
||||||
image_manifest_files: "{{ zuul_site_image_manifest_files|default(omit) }}"
|
image_manifest_files: "{{ zuul_site_image_manifest_files|default(omit) }}"
|
||||||
traceroute_host: "{{ zuul_site_traceroute_host|default(omit) }}"
|
|
||||||
register: zdi
|
register: zdi
|
||||||
|
|
||||||
|
- name: Collect network information from zuul worker
|
||||||
|
zuul_network_validate:
|
||||||
|
traceroute_host: "{{ zuul_site_traceroute_host|default(omit) }}"
|
||||||
|
register: znetinfo
|
||||||
|
retries: 3
|
||||||
|
delay: 5
|
||||||
|
until: znetinfo.failed == False
|
||||||
|
|
||||||
- name: Write out all zuul information for each host
|
- name: Write out all zuul information for each host
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
template:
|
template:
|
||||||
|
@ -15,40 +15,40 @@ Host & kernel
|
|||||||
{{ zdi.uname }}
|
{{ zdi.uname }}
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if 'network_interfaces' in zdi %}
|
{% if 'network_interfaces' in znetinfo %}
|
||||||
Network interface addresses
|
Network interface addresses
|
||||||
===========================
|
===========================
|
||||||
{{ zdi.network_interfaces }}
|
{{ znetinfo.network_interfaces }}
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if 'network_routing_v4' in zdi %}
|
{% if 'network_routing_v4' in znetinfo %}
|
||||||
Network routing tables v4
|
Network routing tables v4
|
||||||
=========================
|
=========================
|
||||||
{{ zdi.network_routing_v4 }}
|
{{ znetinfo.network_routing_v4 }}
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if 'network_routing_v6' in zdi %}
|
{% if 'network_routing_v6' in znetinfo %}
|
||||||
Network routing tables v6
|
Network routing tables v6
|
||||||
=========================
|
=========================
|
||||||
{{ zdi.network_routing_v6 }}
|
{{ znetinfo.network_routing_v6 }}
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if 'network_neighbors' in zdi %}
|
{% if 'network_neighbors' in znetinfo %}
|
||||||
Network neighbors
|
Network neighbors
|
||||||
=================
|
=================
|
||||||
{{ zdi.network_neighbors }}
|
{{ znetinfo.network_neighbors }}
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if 'traceroute_v4' in zdi %}
|
{% if 'traceroute_v4' in znetinfo %}
|
||||||
Route to Known Host v4
|
Route to Known Host v4
|
||||||
======================
|
======================
|
||||||
Known Host: {{ zuul_site_traceroute_host }}
|
Known Host: {{ zuul_site_traceroute_host }}
|
||||||
{{ zdi.traceroute_v4 }}
|
{{ znetinfo.traceroute_v4 }}
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if 'traceroute_v6' in zdi %}
|
{% if 'traceroute_v6' in znetinfo %}
|
||||||
Route to Known Host v6
|
Route to Known Host v6
|
||||||
======================
|
======================
|
||||||
Known Host: {{ zuul_site_traceroute_host }}
|
Known Host: {{ zuul_site_traceroute_host }}
|
||||||
{{ zdi.traceroute_v6 }}
|
{{ znetinfo.traceroute_v6 }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
Loading…
Reference in New Issue
Block a user