Add the MTU size to ping to catch network issues

Many times users hit this issue where nic templates set jumbo
frames properly but switches are not properly configured and this
situation creates random issues hard to troubleshoot ranging from
a CEPH cluster not forming to swift not syncing properly.  Adding
an early test for MTU sizes in the ansible playbooks should fail
early and put us on a path to early solve this issue instead of
spending a week or more trying to figure out why CEPH batch LVM
is not working.

Change-Id: If4450e8cf1be8231f06033b603dba41dfa65ffbb
(cherry picked from commit 6acde891f8)
This commit is contained in:
David Hill 2021-11-09 11:00:42 -05:00 committed by David Hill
parent c8b144aa3e
commit 3545064fb0
2 changed files with 46 additions and 4 deletions

View File

@ -33,13 +33,10 @@
- ansible_facts.default_ipv4.gateway is defined
- name: Check Controllers availability
command: "{{ (':' in controller) | ternary('ping6', 'ping') }} -w 10 -c 1 {{ controller }}"
retries: 10
delay: 60
include_tasks: ping.yml
loop_control:
loop_var: controller
loop: "{{ tripleo_nodes_validation_ping_test_ips }}"
changed_when: false
when:
- tripleo_nodes_validation_validate_controllers_icmp|bool
- tripleo_nodes_validation_ping_test_ips | length > 0

View File

@ -0,0 +1,45 @@
---
# Copyright 2021 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
- name: Set IP options
set_fact:
_sed_opts: "'s/.*dev \\([^ ]*\\).*/\\1/') "
_ping_cmd: "{{ (':' in controller) | ternary('ping6', 'ping') }}"
- name: Lookup interface information
register: _nic_mtu
shell: |
INT=$(ip ro get {{ controller }} | head -1 | sed '{{ _sed_opts }}')
MTU=$(cat /sys/class/net/${INT}/mtu 2>/dev/null || echo "0")
echo "$INT $MTU"
- name: Set interface vars
set_fact:
_nic: "{{ _nic_mtu.get('stdout', '').split(' ')[0] | default('lo') }}"
_mtu: "{{ _nic_mtu.get('stdout', '').split(' ')[1] | default(0) | int - 28 }}"
- name: Network availability validation block
block:
- name: Check IP responsiveness
command: "{{ _ping_cmd }} -w 10 -c 1 {{ controller }}"
retries: 10
delay: 60
changed_when: false
- name: Validate packet with {{ _mtu }} MTU size can reach controller from {{ _nic }}
command: "{{ _ping_cmd }} -w 10 -s {{ _mtu }} -c 1 {{ controller }}"
retries: 10
delay: 60
changed_when: false
when: _mtu | int > 0
when: _nic != 'lo'