Recover service failure after success audit
This is a quick fix for service in rare situation that a service change state in below sequence: enabled-active --> enabled-active-failed --> disabled-failed --> enabled-active-failed When service state change directly from disabled-failed to enabled-active-failed, the failed state will not recover. Also in a seperate situation, a service failed state is cleared prematurely when the service enters into enabled-active state from enabling state because of failed/timeout audit. Since service could get into failed state because of an audit mismatch, service should be recovered to normal from failed state after a successful audit. The fix ensure service failed state is cleared after successful audit in enabled-active state. No changes in other service states. Change-Id: Ie4052fec9b1579e6da97e9e1486d7f38eafa74ea Closes-Bug: 1829880 Signed-off-by: Bin Qian <bin.qian@windriver.com>
This commit is contained in:
parent
0259011983
commit
cf92d7d64d
|
@ -183,7 +183,6 @@ static bool sm_service_audit_timeout( SmTimerIdT timer_id, int64_t user_data )
|
|||
}
|
||||
|
||||
service->action_running = SM_SERVICE_ACTION_NONE;
|
||||
service->action_pid = -1;
|
||||
service->action_timer_id = SM_TIMER_ID_INVALID;
|
||||
|
||||
error = service_audit_result_handler( service, action_running,
|
||||
|
|
|
@ -176,12 +176,9 @@ SmErrorT sm_service_enabled_active_state_exit( SmServiceT* service )
|
|||
SmErrorT sm_service_enabled_active_state_transition( SmServiceT* service,
|
||||
SmServiceStateT from_state )
|
||||
{
|
||||
if( SM_SERVICE_STATE_ENABLING == from_state )
|
||||
{
|
||||
service->status = SM_SERVICE_STATUS_NONE;
|
||||
service->condition = SM_SERVICE_CONDITION_NONE;
|
||||
}
|
||||
|
||||
// do not clear the failure condition here. If the failure is triggered
|
||||
// by an audit state mismatch, a full recovery cycle ends at the first
|
||||
// audit success
|
||||
return( SM_OKAY );
|
||||
}
|
||||
// ****************************************************************************
|
||||
|
@ -290,7 +287,10 @@ SmErrorT sm_service_enabled_active_state_event_handler( SmServiceT* service,
|
|||
break;
|
||||
|
||||
case SM_SERVICE_EVENT_AUDIT_SUCCESS:
|
||||
DPRINTFD( "Service (%s) audit success.", service->name );
|
||||
if(sm_service_clear_failure_state(service))
|
||||
{
|
||||
DPRINTFI( "Service (%s) audit success as recovered.", service->name );
|
||||
}
|
||||
break;
|
||||
|
||||
case SM_SERVICE_EVENT_AUDIT_MISMATCH:
|
||||
|
|
|
@ -29,6 +29,48 @@ static SmListT* _services = NULL;
|
|||
static SmDbHandleT* _sm_db_handle = NULL;
|
||||
|
||||
static SmErrorT sm_service_table_add( void* user_data[], void* record );
|
||||
|
||||
// ****************************************************************************
|
||||
// Service Table - clear failure state
|
||||
// returns true if service is in a failure state
|
||||
// ====================
|
||||
bool sm_service_clear_failure_state(SmServiceT* service)
|
||||
{
|
||||
SmErrorT error;
|
||||
bool prev_failure_condition;
|
||||
prev_failure_condition =
|
||||
service->recover ||
|
||||
service->fail_count > 0 ||
|
||||
service->action_fail_count > 0 ||
|
||||
service->transition_fail_count > 0 ||
|
||||
service->status == SM_SERVICE_STATUS_FAILED ||
|
||||
service->condition == SM_SERVICE_CONDITION_RECOVERY_FAILURE ||
|
||||
service->condition == SM_SERVICE_CONDITION_ACTION_FAILURE ||
|
||||
service->condition == SM_SERVICE_CONDITION_FATAL_FAILURE;
|
||||
if( prev_failure_condition )
|
||||
{
|
||||
service->recover = false;
|
||||
service->fail_count = 0;
|
||||
service->action_fail_count = 0;
|
||||
service->transition_fail_count = 0;
|
||||
service->status = SM_SERVICE_STATUS_NONE;
|
||||
service->condition = SM_SERVICE_CONDITION_NONE;
|
||||
|
||||
error = sm_service_table_persist( service );
|
||||
if( SM_OKAY != error )
|
||||
{
|
||||
DPRINTFE( "Failed to persist service (%s) data, error=%s.",
|
||||
service->name, sm_error_str(error) );
|
||||
}
|
||||
|
||||
DPRINTFI( "Cleared previous failure condition for service (%s) "
|
||||
"in % state.", service->name, sm_service_state_str(service->state) );
|
||||
|
||||
}
|
||||
|
||||
return prev_failure_condition;
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
// Service Table - Read
|
||||
// ====================
|
||||
|
|
|
@ -60,9 +60,15 @@ typedef struct
|
|||
bool provisioned;
|
||||
} SmServiceT;
|
||||
|
||||
typedef void (*SmServiceTableForEachCallbackT)
|
||||
typedef void (*SmServiceTableForEachCallbackT)
|
||||
(void* user_data[], SmServiceT* service);
|
||||
|
||||
// ****************************************************************************
|
||||
// Service Table - clear failure state
|
||||
// ====================
|
||||
extern bool sm_service_clear_failure_state(SmServiceT* service);
|
||||
// ****************************************************************************
|
||||
|
||||
// ****************************************************************************
|
||||
// Service Table - Read
|
||||
// ====================
|
||||
|
|
Loading…
Reference in New Issue