Redfish Virtual Media Controller enhancements

Adding support for additional server that responds to a number of
redfish requests with the 204 (OK but with no response content).

This update fixes http 204 status handling to expect no response
content.

This update also adds support for servers that download image at
insertion time and have short connection timeout periods that cannot
be changed.

1. Image Insertion: add image download monitoring phase to image
   insertion.
   - to accommodate for servers that load the image during insertion

2. Image Eject: Modify to retry eject and poll for ejection complete.
   - to handle case where the eject is called while a server is aready
     downloading an image.

3. Power Control: Change host power control operation from a 10 second
   inline wait to a 10 x 1sec power state change verification loop
   - avoid server issue that leads to redfish client connection timeout
     due to inactivity. Also extended the power control timeout to
     handled servers that aer slow.

This update also adds the following work around to existing redfish
python library issue.

1. BMC Ping: Verify bmc is pingable before redfish client connect.
   - to avoid redfish client python library hang issue when bmc ip
     is unreachable.

This update also implements the following debug and robustness
improvements:

1. Switch from single to multi line progress to provide better progress
   detail.
2. Add redfish client request execution duration time to the http
   status logs.
3. Add ipv6 bool to control object to so only a single lookup is
   required.
4. Pair action stage logs with action verified log.
5. Fixed 2 logs that didn't have proper parameter format layout.
6. Various logging changes to assist in future issue debug.
7. Verify container continues to work with existing servers.

Change-Id: I9c90ac03ece220b12708594adea582b9f640cdf3
Closes-Bug: 1889086
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
This commit is contained in:
Eric MacDonald 2020-07-27 10:26:34 -04:00
parent a40175ec84
commit d46c9c55a9

View File

@ -1,7 +1,7 @@
#!/usr/bin/python3 #!/usr/bin/python3
############################################################################### ###############################################################################
# #
# Copyright (c) 2019 Wind River Systems, Inc. # Copyright (c) 2019-2020 Wind River Systems, Inc.
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
@ -19,9 +19,10 @@
# Step 2: Root Query ... Learn Redfish Services offered by the BMC # Step 2: Root Query ... Learn Redfish Services offered by the BMC
# Step 3: Find CD/DVD ... Locate the virtual media CD/DVD device # Step 3: Find CD/DVD ... Locate the virtual media CD/DVD device
# Step 4: Power Off Host ... Host power needs to be off # Step 4: Power Off Host ... Host power needs to be off
# Step 5: Inject Iso ... Inject the URL based ISO image into CD/DVD # Step 5: Eject Iso ... Eject iso if needed
# Step 6: Force DVD Boot ... Set Net boot device to be CD/DVD # Step 6: Inject Iso ... Inject the URL based ISO image into CD/DVD
# Step 7: Power On Host ... Host will boot and install from DVD # Step 7: Force DVD Boot ... Set Net boot device to be CD/DVD
# Step 8: Power On Host ... Host will boot and install from DVD
# #
# Note: All server starting state conditions such as the server running or # Note: All server starting state conditions such as the server running or
# being stuck in POST, say at the grub prompt due to previous host boot # being stuck in POST, say at the grub prompt due to previous host boot
@ -129,7 +130,7 @@ import redfish
FEATURE_NAME = 'Redfish Virtual Media Controller' FEATURE_NAME = 'Redfish Virtual Media Controller'
VERSION_MAJOR = 2 VERSION_MAJOR = 2
VERSION_MINOR = 0 VERSION_MINOR = 1
POWER_ON = 'On' POWER_ON = 'On'
POWER_OFF = "Off" POWER_OFF = "Off"
@ -168,15 +169,12 @@ def t():
return datetime.datetime.now().replace(microsecond=0) return datetime.datetime.now().replace(microsecond=0)
def ilog(string, cr=True): def ilog(string):
""" """
Info Log Utility Info Log Utility
""" """
if cr is True or debug:
sys.stdout.write("\n%s Info : %s" % (t(), string)) sys.stdout.write("\n%s Info : %s" % (t(), string))
else:
sys.stdout.write("%s, " % string)
def elog(string): def elog(string):
@ -231,7 +229,7 @@ def dlog4(string):
def slog(stage): def slog(stage):
"""Execution Stage Log""" """Execution Stage Log"""
dlog1("Stage : %s" % stage) sys.stdout.write("\n%s Stage : %s" % (t(), stage))
def rvmc_exit(code): def rvmc_exit(code):
@ -269,6 +267,13 @@ POST = 'POST'
GET = 'GET' GET = 'GET'
PATCH = 'PATCH' PATCH = 'PATCH'
# max number of polling retries while waiting for some long task to complete
MAX_POLL_COUNT = 200
# some servers timeout on inter comm gaps longer than 10 secs
RETRY_DELAY_SECS = 10
# 2 second delay constant
DELAY_2_SECS = 2
def is_ipv6_address(address): def is_ipv6_address(address):
""" """
@ -351,7 +356,10 @@ def parse_target(target_name, target_dict):
# #
###################################################################### ######################################################################
if is_ipv6_address(address) is True: if is_ipv6_address(address) is True:
bmc_ipv6 = True
address = '[' + address + ']' address = '[' + address + ']'
else:
bmc_ipv6 = False
# Create object and add it to the target object list # Create object and add it to the target object list
try: try:
@ -362,6 +370,7 @@ def parse_target(target_name, target_dict):
str(pw_dec), str(pw_dec),
target_dict.get('image')) target_dict.get('image'))
if vmc_obj: if vmc_obj:
vmc_obj.ipv6 = bmc_ipv6
target_object_list.append(vmc_obj) target_object_list.append(vmc_obj)
else: else:
elog("Unable to create control object for target:%s ; " elog("Unable to create control object for target:%s ; "
@ -397,7 +406,7 @@ class VmcObject(object):
self.pw_encoded = password.rstrip() self.pw_encoded = password.rstrip()
self.pw = password_decoded self.pw = password_decoded
self.img = image.rstrip() self.img = image.rstrip()
self.ipv6 = False
self.redfish_obj = None # redfish client connection object self.redfish_obj = None # redfish client connection object
self.session = False # True when session for this BMC is created self.session = False # True when session for this BMC is created
@ -414,6 +423,7 @@ class VmcObject(object):
# Virtual Media Info # Virtual Media Info
self.vm_url = None self.vm_url = None
self.vm_eject_url = None
self.vm_group_url = None self.vm_group_url = None
self.vm_group = None self.vm_group = None
self.vm_label = None self.vm_label = None
@ -457,7 +467,7 @@ class VmcObject(object):
:type path: str :type path: str
:param payload: POST or PATCH payload data :param payload: POST or PATCH payload data
:type payload: dictionary :type payload: dictionary
:returns True if request succeeded (200) otherwise False :returns True if request succeeded (200,202(accepted),204(no content)
""" """
self.response = None self.response = None
@ -466,6 +476,7 @@ class VmcObject(object):
else: else:
url = self.url url = self.url
before_request_time = datetime.datetime.now().replace(microsecond=0)
try: try:
dlog3("Request : %s %s" % (operation, url)) dlog3("Request : %s %s" % (operation, url))
if operation == GET: if operation == GET:
@ -483,7 +494,7 @@ class VmcObject(object):
dlog3("Payload : %s" % payload) dlog3("Payload : %s" % payload)
self.response = self.redfish_obj.patch(url, self.response = self.redfish_obj.patch(url,
body=payload, body=payload,
headers=POST_HEADERS) headers=PATCH_HEADERS)
else: else:
elog("Unsupported operation: %s" % operation) elog("Unsupported operation: %s" % operation)
return False return False
@ -492,12 +503,16 @@ class VmcObject(object):
elog("Failed operation on '%s' (%s)" % (url, ex)) elog("Failed operation on '%s' (%s)" % (url, ex))
if self.response is not None: if self.response is not None:
after_request_time = datetime.datetime.now().replace(microsecond=0)
delta = after_request_time - before_request_time
# if we got a response, check its status # if we got a response, check its status
if self.check_ok_status(url, operation) is False: if self.check_ok_status(url, operation, delta.seconds) is False:
self._exit(1) self._exit(1)
# format response # handle 204 success with no content ; clear last response
if self.response.status == 204:
self.response = ""
return True
try: try:
if self.resp_dict() is True: if self.resp_dict() is True:
if self.format() is True: if self.format() is True:
@ -569,7 +584,7 @@ class VmcObject(object):
return value1 return value1
return value1.get(key2) return value1.get(key2)
def check_ok_status(self, function, operation): def check_ok_status(self, function, operation, seconds):
""" """
Status Status
@ -580,16 +595,26 @@ class VmcObject(object):
:returns True if response status is OK. Otherwise False. :returns True if response status is OK. Otherwise False.
""" """
# Accept applicable 400 series error from an Eject Request POST.
# This error is dealt with by the eject handler.
if self.response.status in [400, 403, 404] and \
function == self.vm_eject_url and \
operation == POST:
return True
if self.response.status not in [200, 202, 204]: if self.response.status not in [200, 202, 204]:
try:
elog("HTTP Status : %d ; %s %s failed\n%s\n" % elog("HTTP Status : %d ; %s %s failed after %i seconds\n%s\n" %
(self.response.status, (self.response.status,
operation, function, operation, function, seconds,
json.dumps(self.response.dict, indent=4, sort_keys=True))) json.dumps(self.response.dict,
indent=4, sort_keys=True)))
return False return False
except Exception as ex:
elog("check status exception ; %s" % ex)
dlog2("HTTP Status : %s %s Ok (%d)" % dlog2("HTTP Status : %s %s Ok (%d) (took %i seconds)" %
(operation, function, self.response.status)) (operation, function, self.response.status, seconds))
return True return True
def _exit(self, code): def _exit(self, code):
@ -619,8 +644,11 @@ class VmcObject(object):
# the redfish query data that was learned up to that point # the redfish query data that was learned up to that point
elog("Code : %s" % code) elog("Code : %s" % code)
# Other info
ilog("IPv6 : %s" % self.ipv6)
# Root Query Info # Root Query Info
ilog("Root Query:\n%s\n" % self.root_query_info) ilog("Root Query: %s" % self.root_query_info)
# Managers Info # Managers Info
ilog("Manager URL: %s" % self.managers_group_url) ilog("Manager URL: %s" % self.managers_group_url)
@ -644,10 +672,10 @@ class VmcObject(object):
ilog("VM Label: %s" % self.vm_label) ilog("VM Label: %s" % self.vm_label)
ilog("VM Version: %s" % self.vm_version) ilog("VM Version: %s" % self.vm_version)
ilog("VM Actions: %s" % self.vm_actions) ilog("VM Actions: %s" % self.vm_actions)
ilog("VM Media Types: %s\n" % self.vm_media_types) ilog("VM Media Types: %s" % self.vm_media_types)
ilog("Last Response raw: %s\n" % self.response) ilog("Last Response raw: %s" % self.response)
ilog("Last Response json: %s\n" % self.response_json) ilog("Last Response json: %s" % self.response_json)
rvmc_exit(code) rvmc_exit(code)
@ -668,6 +696,35 @@ class VmcObject(object):
stage = 'Redfish Client Connection' stage = 'Redfish Client Connection'
slog(stage) slog(stage)
# Verify ping response
ping_ok = False
ping_count = 0
MAX_PING_COUNT = 10
while ping_count < MAX_PING_COUNT and ping_ok is False:
response = 0
if self.ipv6 is True:
response = os.system("ping -6 -c 1 " +
self.ip[1:-1] + " > /dev/null 2>&1")
else:
response = os.system("ping -c 1 " +
self.ip + " > /dev/null 2>&1")
if response == 0:
ping_ok = True
else:
ping_count = ping_count + 1
ilog("BMC Ping : retry (%i of %i)" %
(ping_count, MAX_PING_COUNT))
time.sleep(2)
if ping_ok is False:
elog("Unable to ping '%s' (%i)" % (self.ip, ping_count))
alog("Check BMC ip address is pingable")
self._exit(1)
else:
ilog("BMC Ping Ok : %s (%i)" % (self.ip, ping_count))
# try to connect
connect_error = False connect_error = False
try: try:
# One time Redfish Client Object Create # One time Redfish Client Object Create
@ -700,8 +757,6 @@ class VmcObject(object):
stage = 'Root Query' stage = 'Root Query'
slog(stage) slog(stage)
ilog("Progress : %s, " % stage)
if self.make_request(operation=GET, path=None) is False: if self.make_request(operation=GET, path=None) is False:
elog("Failed %s GET request") elog("Failed %s GET request")
self._exit(1) self._exit(1)
@ -884,7 +939,6 @@ class VmcObject(object):
# in the same state will error out. # in the same state will error out.
# So don't do it. # So don't do it.
return return
ilog("Power %s" % state, False)
break break
info = 'Systems Reset Action Dictionary' info = 'Systems Reset Action Dictionary'
@ -953,15 +1007,12 @@ class VmcObject(object):
dlog3("ResetActions: %s" % reset_command_list) dlog3("ResetActions: %s" % reset_command_list)
# load the appropriate acceptable command list # load the appropriate acceptable command list
acceptable_reset_commands = ['ForceRestart', 'GracefulRestart']
acceptable_poweron_commands = ['ForceOn', 'On']
acceptable_poweroff_commands = ['ForceOff', 'GracefulShutdown']
if state == POWER_OFF: if state == POWER_OFF:
acceptable_commands = acceptable_poweroff_commands acceptable_commands = ['ForceOff', 'GracefulShutdown']
elif state == POWER_ON: elif state == POWER_ON:
acceptable_commands = acceptable_poweron_commands acceptable_commands = ['ForceOn', 'On']
else: else:
acceptable_commands = acceptable_reset_commands acceptable_commands = ['ForceRestart', 'GracefulRestart']
# Look for the best command for the power state requested. # Look for the best command for the power state requested.
command = None command = None
@ -993,20 +1044,30 @@ class VmcObject(object):
# this was not a power command # this was not a power command
return return
dlog2("Verify : %s" % stage) # poll for requested power state.
poll_count = 0
# delay and refresh self.power_state MAX_STATE_POLL_COUNT = 60 # some servers take longer than 10 seconds
time.sleep(10) while poll_count < MAX_STATE_POLL_COUNT and self.power_state != state:
time.sleep(1)
poll_count = poll_count + 1
# get systems info # get systems info
if self.make_request(operation=GET, if self.make_request(operation=GET,
path=self.systems_member_url) is False: path=self.systems_member_url) is False:
elog("Failed to Get System State URL:%s" % elog("Failed to Get System State (%i of %i)" %
self.systems_member_url) (poll_count, MAX_STATE_POLL_COUNT))
self._exit(1) else:
# get powerState # get powerState
self.power_state = self.get_key_value('PowerState') self.power_state = self.get_key_value('PowerState')
if self.power_state != state:
dlog1("waiting for power %s (%s) (%d)" %
(state, self.power_state, poll_count))
if self.power_state != state:
elog("Failed to Set System Power State to %s (%s)" %
(self.power_state, self.systems_member_url))
self._exit(1)
else:
ilog("%s verified (%d)" % (stage, poll_count))
###################################################################### ######################################################################
# Get CD/DVD Virtual Media URL # Get CD/DVD Virtual Media URL
@ -1019,8 +1080,6 @@ class VmcObject(object):
stage = 'Get CD/DVD Virtual Media' stage = 'Get CD/DVD Virtual Media'
slog(stage) slog(stage)
ilog("Get VM Info", False)
if self.manager_members_list is None: if self.manager_members_list is None:
elog("Unable to index Managers Members from %s" % elog("Unable to index Managers Members from %s" %
self.managers_group_url) self.managers_group_url)
@ -1209,18 +1268,68 @@ class VmcObject(object):
stage = 'Eject Current Image' stage = 'Eject Current Image'
slog(stage) slog(stage)
current_image = self.get_key_value('Image') if self.make_request(operation=GET, path=self.vm_url) is False:
if current_image: elog("Virtual media status query failed (%s)" % self.vm_url)
vm_eject = self.vm_actions.get('#VirtualMedia.EjectMedia') self._exit(1)
if vm_eject:
vm_eject_url = vm_eject.get('target') if self.get_key_value('Inserted') is False:
if vm_eject_url: return
ilog("Eject CD/DVD", False)
# Ensure there is no image inserted and handle the case where
# one might be in the process of loading.
MAX_EJECT_RETRY_COUNT = 10
eject_retry_count = 0
ejecting = True
eject_media_label = '#VirtualMedia.EjectMedia'
while eject_retry_count < MAX_EJECT_RETRY_COUNT and ejecting:
eject_retry_count = eject_retry_count + 1
vm_eject = self.vm_actions.get(eject_media_label)
if not vm_eject:
elog("Failed to get eject target (%s)" % eject_media_label)
self._exit(1)
self.vm_eject_url = vm_eject.get('target')
if self.vm_eject_url:
if self.get_key_value('Image'):
ilog("Eject Request Image %s" %
(self.get_key_value('Image')))
else:
dlog1("Eject Request")
if self.make_request(operation=POST, if self.make_request(operation=POST,
payload={}, payload={},
path=vm_eject_url) is False: path=self.vm_eject_url) is False:
elog("Failed to GET Virtual Media Eject URL from %s" % elog("Eject request failed (%s)" % self.vm_eject_url)
vm_eject_url) # accept this and continue to poll
time.sleep(DELAY_2_SECS)
poll_count = 0
while poll_count < MAX_POLL_COUNT and ejecting:
# verify the image is not in inserted
poll_count = poll_count + 1
vm_eject = self.vm_actions.get(eject_media_label)
if self.make_request(operation=GET,
path=self.vm_url) is True:
if self.get_key_value('Inserted') is False:
ilog("Ejected")
ejecting = False
elif self.get_key_value('Image'):
# if image is present then its ready to
# retry the eject, break out of poll loop
dlog1("Image Present ; %s" %
self.get_key_value('Image'))
break
else:
dlog1("Eject Wait ; Image: %s" %
self.get_key_value('Image'))
time.sleep(RETRY_DELAY_SECS)
else:
elog("Failed to query vm state (%s)" % self.vm_url)
self._exit(1)
if ejecting is True:
elog("%s wait timeout" % stage)
self._exit(1)
###################################################################### ######################################################################
# Insert Image into Virtual Media CD/DVD # Insert Image into Virtual Media CD/DVD
@ -1233,8 +1342,6 @@ class VmcObject(object):
stage = 'Insert Image into Virtual Media CD/DVD' stage = 'Insert Image into Virtual Media CD/DVD'
slog(stage) slog(stage)
ilog("Insert Image", False)
vm_insert_url = None vm_insert_url = None
vm_insert_act = self.vm_actions.get('#VirtualMedia.InsertMedia') vm_insert_act = self.vm_actions.get('#VirtualMedia.InsertMedia')
if vm_insert_act: if vm_insert_act:
@ -1254,12 +1361,34 @@ class VmcObject(object):
elog("Failed to Insert Media") elog("Failed to Insert Media")
self._exit(1) self._exit(1)
dlog2("Verify : %s" % stage) # Handle case where the BMC loads the iso image during the insertion.
# In that case the 'Inserted' is True but the Image is not immediately
# mounted.
poll_count = 0
ImageInserting = True
while poll_count < MAX_POLL_COUNT and ImageInserting:
if self.make_request(operation=GET, path=self.vm_url) is False: if self.make_request(operation=GET, path=self.vm_url) is False:
elog("Unable to verify Image insertion" % self.vm_url) elog("Unable to verify Image insertion (%s)" % self.vm_url)
self._exit(1) self._exit(1)
if self.get_key_value('Image') == self.img:
ilog("Image Insertion (took %i seconds)" %
(poll_count * RETRY_DELAY_SECS))
ImageInserting = False
else:
time.sleep(RETRY_DELAY_SECS)
poll_count = poll_count + 1
dlog1("Image Insertion Wait ; %3d secs (%3d of %3d)" %
(poll_count * RETRY_DELAY_SECS,
poll_count, MAX_POLL_COUNT))
if ImageInserting is True:
elog("Image insertion timeout")
self._exit(1)
else:
ilog("%s verified (took %i seconds)" %
(stage, poll_count * RETRY_DELAY_SECS))
if self.get_key_value('Image') != self.img: if self.get_key_value('Image') != self.img:
elog("Insertion verification failed.") elog("Insertion verification failed.")
ilog("Expected Image: %s" % self.img) ilog("Expected Image: %s" % self.img)
@ -1283,11 +1412,9 @@ class VmcObject(object):
Set Next Boot Override to CD/DVD Set Next Boot Override to CD/DVD
""" """
stage = 'Set Next Boot Override to CD/DVD"' stage = 'Set Next Boot Override to CD/DVD'
slog(stage) slog(stage)
ilog("Set CD/DVD Boot", False)
# Walk the Systems Members list looking for Boot support. # Walk the Systems Members list looking for Boot support.
# #
# "Members": [ { "@odata.id": "/redfish/v1/Systems/1/" } ], # "Members": [ { "@odata.id": "/redfish/v1/Systems/1/" } ],
@ -1319,20 +1446,52 @@ class VmcObject(object):
if self.boot_control_dict is None: if self.boot_control_dict is None:
elog("Unable to get %s from %s" % (info, self.systems_member_url)) elog("Unable to get %s from %s" % (info, self.systems_member_url))
self._exit(1) self._exit(1)
else:
allowable_label = 'BootSourceOverrideMode@Redfish.AllowableValues'
mode_list = self.get_key_value('Boot', allowable_label)
if mode_list is None:
payload = {"Boot": {"BootSourceOverrideEnabled": "Once", payload = {"Boot": {"BootSourceOverrideEnabled": "Once",
"BootSourceOverrideTarget": "Cd"}} "BootSourceOverrideTarget": "Cd"}}
else:
dlog1("Boot Override Modes: %s" % mode_list)
# Prioritize UEFI over Legacy
if "UEFI" in mode_list:
payload = {"Boot": {"BootSourceOverrideEnabled": "Once",
"BootSourceOverrideMode": "UEFI",
"BootSourceOverrideTarget": "Cd"}}
elif "Legacy" in mode_list:
payload = {"Boot": {"BootSourceOverrideEnabled": "Once",
"BootSourceOverrideMode": "Legacy",
"BootSourceOverrideTarget": "Cd"}}
else:
elog("BootSourceOverrideModes %s not supported" %
mode_list)
self._exit(0)
dlog2("Boot Override Payload: %s" % payload)
if self.make_request(operation=PATCH, if self.make_request(operation=PATCH,
path=self.systems_member_url, path=self.systems_member_url,
payload=payload) is False: payload=payload) is False:
elog("Unable to verify Image insertion" % self.vm_url) elog("Unable to Set Boot Override (%s)" % self.vm_url)
self._exit(1) self._exit(1)
dlog2("Verify : %s" % stage)
if self.make_request(operation=GET, if self.make_request(operation=GET,
path=self.systems_member_url) is False: path=self.systems_member_url) is False:
elog("Unable to verify Boot Override insertion" % self.vm_url) elog("Unable to verify Set Boot Override (%s)" % self.vm_url)
self._exit(1)
else:
enabled = self.get_key_value('Boot', 'BootSourceOverrideEnabled')
device = self.get_key_value('Boot', 'BootSourceOverrideTarget')
mode = self.get_key_value('Boot', 'BootSourceOverrideMode')
if enabled == "Once" and \
supported_device(self.vm_media_types) is True:
ilog("%s verified [%s:%s:%s]" %
(stage, enabled, device, mode))
else:
elog("Unable to verify Set Boot Override [%s:%s:%s]" %
(enabled, device, mode))
self._exit(1) self._exit(1)
###################################################################### ######################################################################
@ -1362,7 +1521,7 @@ class VmcObject(object):
self._redfish_set_boot_override() self._redfish_set_boot_override()
self._redfish_poweron_host() self._redfish_poweron_host()
ilog("Done", False) ilog("Done")
if self.redfish_obj is not None and self.session is True: if self.redfish_obj is not None and self.session is True:
self.redfish_obj.logout() self.redfish_obj.logout()
@ -1407,7 +1566,7 @@ for section in cfg:
dlog2("VM Iso Label: %s" % cfg[section]) dlog2("VM Iso Label: %s" % cfg[section])
found = True found = True
if targets: if targets:
dlog1("Using specified target(s): %s" % targets) dlog2("Using specified target(s): %s" % targets)
else: else:
for target in cfg[section]: for target in cfg[section]:
targets.append(target) targets.append(target)