Merge "Fix invalid DOWN status when updating a UDP pool" into stable/ussuri

This commit is contained in:
Zuul 2021-01-15 07:32:19 +00:00 committed by Gerrit Code Review
commit ff6f3d837a
5 changed files with 77 additions and 4 deletions

View File

@ -11,6 +11,7 @@
# under the License.
import ipaddress
import os
import re
import subprocess
@ -236,6 +237,20 @@ def get_udp_listener_pool_status(listener_id):
'members': {}
}}
config_path = util.keepalived_lvs_cfg_path(listener_id)
pids_pathes = util.keepalived_lvs_pids_path(listener_id)
config_stat = os.stat(config_path)
check_pid_stat = os.stat(pids_pathes[2])
# Indicates that keepalived configuration has been updated but the service
# has yet to be restarted.
# NOTE: It only works if we are doing a RESTART on configuration change,
# Iaa34db6cb1dfed98e96a585c5d105e263c7efa65 forces a RESTART instead of a
# RELOAD, we need to be careful if we want to switch back to RELOAD after
# updating to a recent keepalived release.
restarting = config_stat.st_mtime > check_pid_stat.st_mtime
with open(util.keepalived_lvs_cfg_path(listener_id), 'r') as f:
cfg = f.read()
hm_enabled = len(CHECKER_REGEX.findall(cfg)) > 0
@ -259,7 +274,8 @@ def get_udp_listener_pool_status(listener_id):
if member_ip_port is None:
status = constants.MAINT
elif member_ip_port in down_member_ip_port_set:
status = constants.DOWN
status = (
constants.RESTARTING if restarting else constants.DOWN)
elif int(realserver_result[member_ip_port]['Weight']) == 0:
status = constants.DRAIN
else:
@ -275,7 +291,8 @@ def get_udp_listener_pool_status(listener_id):
if member['ipport'] is None:
member_results[member['id']] = constants.MAINT
elif hm_enabled:
member_results[member['id']] = constants.DOWN
member_results[member['id']] = (
constants.RESTARTING if restarting else constants.DOWN)
else:
member_results[member['id']] = constants.NO_CHECK

View File

@ -645,6 +645,8 @@ HAPROXY_DEFAULT_MAXCONN = 50000
# the sysctl fs.file-max fs.nr_open settings in the image
HAPROXY_MAX_MAXCONN = 1000000
RESTARTING = 'RESTARTING'
# Quota Constants
QUOTA_UNLIMITED = -1
MIN_QUOTA = QUOTA_UNLIMITED

View File

@ -431,6 +431,14 @@ class UpdateHealthDb(update_base.HealthUpdateBase):
member_status = constants.OFFLINE
elif status == constants.NO_CHECK:
member_status = constants.NO_MONITOR
elif status == constants.RESTARTING:
# RESTARTING means that keepalived is restarting and a down
# member has been detected, the real status of the member
# is not clear, it might mean that the checker hasn't run
# yet.
# In this case, keep previous member_status, and wait for a
# non-transitional status.
pass
else:
LOG.warning('Member %(mem)s reported '
'status of %(status)s',

View File

@ -298,8 +298,15 @@ class LvsQueryTestCase(base.TestCase):
self.disabled_listener_id)
self.assertEqual((None, constants.AMPHORA_NAMESPACE), res)
@mock.patch('os.stat')
@mock.patch('subprocess.check_output')
def test_get_udp_listener_pool_status(self, mock_check_output):
def test_get_udp_listener_pool_status(self, mock_check_output,
mock_os_stat):
mock_os_stat.side_effect = (
mock.Mock(st_mtime=1234),
mock.Mock(st_mtime=1234),
)
# test with ipv4 and ipv6
mock_check_output.return_value = KERNAL_FILE_SAMPLE_V4
res = lvs_query.get_udp_listener_pool_status(self.listener_id_v4)
@ -313,6 +320,11 @@ class LvsQueryTestCase(base.TestCase):
self.member_id4_v4: constants.MAINT}}}
self.assertEqual(expected, res)
mock_os_stat.side_effect = (
mock.Mock(st_mtime=1234),
mock.Mock(st_mtime=1234),
)
mock_check_output.return_value = KERNAL_FILE_SAMPLE_V6
res = lvs_query.get_udp_listener_pool_status(self.listener_id_v6)
expected = {
@ -325,6 +337,28 @@ class LvsQueryTestCase(base.TestCase):
self.member_id4_v6: constants.MAINT}}}
self.assertEqual(expected, res)
@mock.patch('os.stat')
@mock.patch('subprocess.check_output')
def test_get_udp_listener_pool_status_restarting(self, mock_check_output,
mock_os_stat):
mock_os_stat.side_effect = (
mock.Mock(st_mtime=1234), # config file
mock.Mock(st_mtime=1220), # pid file
)
# test with ipv4 and ipv6
mock_check_output.return_value = KERNAL_FILE_SAMPLE_V4
res = lvs_query.get_udp_listener_pool_status(self.listener_id_v4)
expected = {
'lvs':
{'uuid': self.pool_id_v4,
'status': constants.UP,
'members': {self.member_id1_v4: constants.UP,
self.member_id2_v4: constants.UP,
self.member_id3_v4: constants.RESTARTING,
self.member_id4_v4: constants.MAINT}}}
self.assertEqual(expected, res)
@mock.patch('octavia.amphorae.backends.utils.keepalivedlvs_query.'
'get_udp_listener_resource_ipports_nsname')
def test_get_udp_listener_pool_status_when_no_pool(
@ -366,14 +400,19 @@ class LvsQueryTestCase(base.TestCase):
}}
self.assertEqual(expected, res)
@mock.patch('os.stat')
@mock.patch('octavia.amphorae.backends.utils.keepalivedlvs_query.'
'get_listener_realserver_mapping')
def test_get_udp_listener_pool_status_when_not_get_realserver_result(
self, mock_get_mapping):
self, mock_get_mapping, mock_os_stat):
# This will hit if the kernel lvs file (/proc/net/ip_vs)
# lose its content. So at this moment, eventhough we configure the
# pool and member into udp keepalived config file, we have to set
# ths status of pool and its members to DOWN.
mock_os_stat.side_effect = (
mock.Mock(st_mtime=1234),
mock.Mock(st_mtime=1234),
)
mock_get_mapping.return_value = (False, {})
res = lvs_query.get_udp_listener_pool_status(self.listener_id_v4)
expected = {

View File

@ -0,0 +1,7 @@
---
fixes:
- |
Fix a potential invalid DOWN operating status for members of a UDP pool.
A race condition could have occured when building the first heartbeat
message after adding a new member in a pool, this recently added member
could have been seen as DOWN.