2383 lines
86 KiB
YAML
2383 lines
86 KiB
YAML
---
|
|
|
|
############################################################################
|
|
#
|
|
# events.yaml file unit testing - this is not for production!
|
|
#
|
|
############################################################################
|
|
|
|
############################################################################
|
|
#
|
|
# Record Format ... for documentation
|
|
#
|
|
# 100.001:
|
|
# Type: < Alarm | Log >
|
|
# Description: < yaml string >
|
|
# OR
|
|
# [< yaml string >, // list of yaml strings
|
|
# < yaml string >]
|
|
# OR
|
|
# critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity
|
|
# major: < yaml string >
|
|
# minor: < yaml string >
|
|
# warning: < yaml string >
|
|
# Entity_Instance_ID: < yaml string ... e.g. host=<hostname>.interface=<ifname> >
|
|
# OR
|
|
# [< yaml string >, // list of yaml strings
|
|
# < yaml string >]
|
|
# Severity: < critical | major | minor | warning >
|
|
# OR
|
|
# [critical, major] // list of severity values
|
|
# Proposed_Repair_Action: < yaml string > // NOTE ALARM ONLY FIELD
|
|
# OR
|
|
# critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity
|
|
# major: < yaml string >
|
|
# minor: < yaml string >
|
|
# warning: < yaml string >
|
|
# Maintenance_Action: < yaml string > // NOTE ALARM ONLY FIELD
|
|
# OR
|
|
# critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity
|
|
# major: < yaml string >
|
|
# minor: < yaml string >
|
|
# warning: < yaml string >
|
|
# Inhibit_Alarms: < true | false > // NOTE ALARM ONLY FIELD
|
|
# Alarm_Type: < operational-violation | ... >
|
|
# Probable_Cause: < timing-problem | ... >
|
|
# OR
|
|
# [< timing-problem | ... >, // list of probable-causes
|
|
# < timing-problem | ... >]
|
|
# Service_Affecting: < true | false >
|
|
# Suppression: < true | false > // NOTE ALARM ONLY FIELD
|
|
#
|
|
#
|
|
# Other Notes:
|
|
# - use general record format above
|
|
# - the only dictionaries allowed are ones indexed by severity
|
|
# - if there are multiple lists in a record,
|
|
# then they should all have the same # of items and corresponding list items represent instance of alarm
|
|
# - if you can't describe the alarm/log based on the above rules,
|
|
# then you can use a multi-line string format
|
|
#
|
|
############################################################################
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# RMON
|
|
# ---------------------------------------------------------------------------
|
|
|
|
100.101:
|
|
Type: Alarm
|
|
Description: "Platform CPU threshold exceeded; threshold x%, actual y% ."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: false
|
|
Suppression: true
|
|
|
|
100.102:
|
|
Type: Alarm
|
|
Description: "VSwitch CPU threshold exceeded; threshold x%, actual y% ."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: false
|
|
Suppression: true
|
|
|
|
100.103:
|
|
Type: Alarm
|
|
Description: "Memory threshold exceeded; threshold x%, actual y% ."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support; may require additional memory on Host."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: false
|
|
Suppression: true
|
|
|
|
100.104: # NOTE This should really be split into two different Alarms.
|
|
Type: Alarm
|
|
Description: |-
|
|
host=<hostname>.filesystem=<mount-dir>
|
|
File System threshold exceeded; threshold x%, actual y% .
|
|
OR
|
|
host=<hostname>.volumegroup=<volumegroup-name>
|
|
Monitor and if condition persists, consider adding additional physical volumes to the volume group.
|
|
Entity_Instance_ID: |-
|
|
host=<hostname>.filesystem=<mount-dir>
|
|
OR
|
|
host=<hostname>.volumegroup=<volumegroup-name>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: false
|
|
Suppression: true
|
|
|
|
100.105:
|
|
Type: Alarm
|
|
Description: No access to remote VM volumes.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check Management and Infrastructure Networks and Controller or Storage Nodes.
|
|
Maintenance_Action: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
100.106:
|
|
Type: Alarm
|
|
Description: "'OAM' Port failed."
|
|
Entity_Instance_ID: host=<hostname>.port=<port-name>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
100.107:
|
|
Type: Alarm
|
|
Description: |-
|
|
'OAM' Interface degraded.
|
|
OR
|
|
'OAM' Interface failed.
|
|
Entity_Instance_ID: host=<hostname>.interface=<if-name>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
100.108:
|
|
Type: Alarm
|
|
Description: "'MGMT' Port failed."
|
|
Entity_Instance_ID: host=<hostname>.port=<port-name>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
100.109:
|
|
Type: Alarm
|
|
Description: |-
|
|
'MGMT' Interface degraded.
|
|
OR
|
|
'MGMT' Interface failed.
|
|
Entity_Instance_ID: host=<hostname>.interface=<if-name>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
100.110:
|
|
Type: Alarm
|
|
Description: "'INFRA' Port failed."
|
|
Entity_Instance_ID: host=<hostname>.port=<port-name>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
100.111:
|
|
Type: Alarm
|
|
Description: |-
|
|
'INFRA' Interface degraded.
|
|
OR
|
|
'INFRA' Interface failed.
|
|
Entity_Instance_ID: host=<hostname>.interface=<if-name>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
100.112:
|
|
Type: Alarm
|
|
Description: "'DATA-VRS' Port down."
|
|
Entity_Instance_ID: host=<hostname>.port=<port-name>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
100.113:
|
|
Type: Alarm
|
|
Description: |-
|
|
'DATA-VRS' Interface degraded.
|
|
OR
|
|
'DATA-VRS' Interface down.
|
|
Entity_Instance_ID: host=<hostname>.interface=<if-name>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
100.114:
|
|
Type: Alarm
|
|
Description:
|
|
major: "NTP configuration does not contain any valid or reachable NTP servers."
|
|
minor: "NTP address <IP address> is not a valid or a reachable NTP server."
|
|
Entity_Instance_ID:
|
|
major: host=<hostname>.ntp
|
|
minor: host=<hostname>.ntp=<IP address>
|
|
Severity: [major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action: none
|
|
Inhibit_Alarms:
|
|
Alarm_Type: communication
|
|
Probable_Cause: unknown
|
|
Service_Affecting: false
|
|
Suppression: false
|
|
|
|
100.115:
|
|
Type: Alarm
|
|
Description: "VSwitch Memory Usage, processor <processor> threshold exceeded; threshold x%, actual y% ."
|
|
Entity_Instance_ID: host=<hostname>.processor=<processor>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: false
|
|
Suppression: true
|
|
|
|
100.116:
|
|
Type: Alarm
|
|
Description: "Cinder LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ."
|
|
Entity_Instance_ID: host=<hostname>.volumegroup=<volumegroup>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: false
|
|
Suppression: true
|
|
|
|
100.117:
|
|
Type: Alarm
|
|
Description: "Nova Thinpool Usage threshold exceeded; threshold x%, actual y% ."
|
|
Entity_Instance_ID: host=<hostname>.volumegroup=<volumegroup>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: threshold-crossed
|
|
Service_Affecting: false
|
|
Suppression: true
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# MAINTENANCE
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
200.001:
|
|
Type: Alarm
|
|
Description: <hostname> was administratively locked to take it out-of-service.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: warning
|
|
Proposed_Repair_Action: Administratively unlock Host to bring it back in-service.
|
|
Maintenance_Action: none
|
|
Inhibit_Alarms: true
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: out-of-service
|
|
Service_Affecting: true
|
|
Suppression: false
|
|
|
|
200.004:
|
|
Type: Alarm
|
|
Description: |-
|
|
<hostname> experienced a service-affecting failure.
|
|
Host is being auto recovered by Reboot.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: critical
|
|
Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
|
|
Maintenance_Action: auto recover
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: application-subsystem-failure
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
200.011:
|
|
Type: Alarm
|
|
Description: <hostname> experienced a configuration failure during initialization. Host is being re-configured by Reboot.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: critical
|
|
Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host.
|
|
Maintenance_Action: auto-recover
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: configuration-or-customization-error
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
200.010:
|
|
Type: Alarm
|
|
Description: <hostname> access to board management module has failed.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: warning
|
|
Proposed_Repair_Action: Check Host's board management configuration and connectivity.
|
|
Maintenance_Action: auto recover
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: communication-subsystem-failure
|
|
Service_Affecting: false
|
|
Suppression: false
|
|
|
|
200.012:
|
|
Type: Alarm
|
|
Description: <hostname> controller function has in-service failure while compute services remain healthy.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: |-
|
|
Lock and then Unlock host to recover.
|
|
Avoid using 'Force Lock' action as that will impact compute services running on this host,
|
|
If lock action fails then contact next level of support to investigate and recover.
|
|
Maintenance_Action: "degrade - requires manual action"
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: communication-subsystem-failure
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
200.013:
|
|
Type: Alarm
|
|
Description: <hostname> compute service of the only available controller is not poperational. Auto-recovery is disabled. Deggrading host instead.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Enable second controller and Switch Activity (Swact) over to it as soon as possible. Then Lock and Unlock host to recover its local compute service.
|
|
Maintenance_Action: "degrade - requires manual action"
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: communication-subsystem-failure
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
200.005:
|
|
Type: Alarm
|
|
Description: |-
|
|
Degrade:
|
|
<hostname> is experiencing an intermittent 'Management Network' communication failures that have exceeded its lower alarming threshold.
|
|
|
|
Failure:
|
|
<hostname> is experiencing a persistent critical 'Management Network' communication failure."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: |-
|
|
"Check 'Management Network' connectivity and support for multicast messaging.
|
|
If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host."
|
|
Maintenance_Action: auto recover
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: communication
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
200.009:
|
|
Type: Alarm
|
|
Description: |-
|
|
Degrade:
|
|
<hostname> is experiencing an intermittent 'Infrastructure Network' communication failures that have exceeded its lower alarming threshold.
|
|
|
|
Failure:
|
|
<hostname> is experiencing a persistent critical 'Infrastructure Network' communication failure."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: |-
|
|
"Check 'Infrastructure Network' connectivity and support for multicast messaging.
|
|
If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host."
|
|
Maintenance_Action: auto recover
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: communication
|
|
Probable_Cause: unknown
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
|
|
200.006:
|
|
Type: Alarm
|
|
Description: |-
|
|
Main Process Monitor Daemon Failure (major):
|
|
<hostname> 'Process Monitor' (pmond) process is not running or functioning properly. The system is trying to recover this process.
|
|
|
|
Monitored Process Failure (critical/major/minor):
|
|
Critical: <hostname> critical '<processname>' process has failed and could not be auto-recovered gracefully.
|
|
Auto-recovery progression by host reboot is required and in progress.
|
|
Major: <hostname> is degraded due to the failure of its '<processname>' process. Auto recovery of this major process is in progress.
|
|
Minor: <hostname> '<processname>' process has failed. Auto recovery of this minor process is in progress.
|
|
OR
|
|
<hostname> '<processname>' process has failed. Manual recovery is required.
|
|
Entity_Instance_ID: host=<hostname>.process=<processname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: |-
|
|
If this alarm does not automatically clear after some time and continues to be asserted after Host is locked and unlocked
|
|
then contact next level of support for root cause analysis and recovery.
|
|
|
|
If problem consistently occurs after Host is locked and unlocked then contact next level of support for root cause analysys and recovery."
|
|
Maintenance_Action:
|
|
critical: auto-recover
|
|
major: degrade
|
|
minor:
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting:
|
|
critical: true
|
|
major: true
|
|
minor: false
|
|
Suppression: true
|
|
|
|
|
|
# 200.006: // NOTE using duplicate ID of a completely analogous Alarm for this
|
|
# Type: Log
|
|
# Description: |-
|
|
# Main Process Monitor Daemon Failure (major)
|
|
# <hostname> 'Process Monitor' (pmond) process is not running or functioning properly.
|
|
# The system is trying to recover this process.
|
|
#
|
|
# Monitored Process Failure (critical/major/minor)
|
|
# critical: <hostname> critical '<processname>' process has failed and could not be auto-recovered gracefully.
|
|
# Auto-recovery progression by host reboot is required and in progress.
|
|
# major: <hostname> is degraded due to the failure of its '<processname>' process. Auto recovery of this major process is in progress.
|
|
# minor: <hostname> '<processname>' process has failed. Auto recovery of this minor process is in progress.
|
|
# OR
|
|
# <hostname> '<processname>' process has failed. Manual recovery is required.
|
|
# Entity_Instance_ID: host=<hostname>.process=<process-name>
|
|
# Severity: minor
|
|
# Alarm_Type: other
|
|
# Probable_Cause: unspecified-reason
|
|
# Service_Affecting: true
|
|
|
|
|
|
200.007:
|
|
Type: Alarm
|
|
Description:
|
|
critical: "Host is degraded due to a 'critical' out-of-tolerance reading from the '<sensorname>' sensor"
|
|
major: "Host is degraded due to a 'major' out-of-tolerance reading from the '<sensorname>' sensor"
|
|
minor: "Host is reporting a 'minor' out-of-tolerance reading from the '<sensorname>' sensor"
|
|
Entity_Instance_ID: host=<hostname>.sensor=<sensorname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: "If problem consistently occurs after Host is power cycled and or reset, contact next level of support or lock and replace failing host."
|
|
Maintenance_Action:
|
|
critical: degrade
|
|
major: degrade
|
|
minor: auto-recover (polling)
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting:
|
|
critical: true
|
|
major: false
|
|
minor: false
|
|
Suppression: true
|
|
|
|
200.014:
|
|
Type: Alarm
|
|
Description: "The Hardware Monitor was unable to load, configure and monitor one or more hardware sensors."
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: minor
|
|
Proposed_Repair_Action: |-
|
|
Check Board Management Controller provisioning. Try reprovisioning the BMC.
|
|
If problem persists try power cycling the host and then the entire server including the BMC power.
|
|
If problem persists then contact next level of support.
|
|
Maintenance_Action: None
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: false
|
|
Suppression: true
|
|
|
|
200.015:
|
|
Type: Alarm
|
|
Description: Unable to read one or more sensor groups from this host's board management controller
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check board management connectivity and try rebooting the board management controller. If problem persists contact next level of support or lock and replace failing host.
|
|
Maintenance_Action: None
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unknown
|
|
Service_Affecting: false
|
|
Suppression: false
|
|
|
|
|
|
200.020:
|
|
Type: Log
|
|
Description: ["<hostname> has been 'discovered' on the network",
|
|
"<hostname> has been 'added' to the system",
|
|
"<hostname> has 'entered' multi-node failure avoidance",
|
|
"<hostname> has 'exited' multi-node failure avoidance"]
|
|
Entity_Instance_ID: [host=<hostname>.event=discovered,
|
|
host=<hostname>.event=add,
|
|
host=<hostname>.event=mnfa_enter,
|
|
host=<hostname>.event=mnfa_exit]
|
|
Severity: warning
|
|
Alarm_Type: other
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
|
|
|
|
200.021:
|
|
Type: Log
|
|
Description: ["<hostname> board management controller has been 'provisioned'",
|
|
"<hostname> board management controller has been 're-provisioned'",
|
|
"<hostname> board management controller has been 'de-provisioned'",
|
|
"<hostname> manual 'unlock' request",
|
|
"<hostname> manual 'reboot' request",
|
|
"<hostname> manual 'reset' request",
|
|
"<hostname> manual 'power-off' request",
|
|
"<hostname> manual 'power-on' request",
|
|
"<hostname> manual 'reinstall' request",
|
|
"<hostname> manual 'force-lock' request",
|
|
"<hostname> manual 'delete' request",
|
|
"<hostname> manual 'controller switchover' request"]
|
|
Entity_Instance_ID: [host=<hostname>.command=provision,
|
|
host=<hostname>.command=reprovision,
|
|
host=<hostname>.command=deprovision,
|
|
host=<hostname>.command=unlock,
|
|
host=<hostname>.command=reboot,
|
|
host=<hostname>.command=reset,
|
|
host=<hostname>.command=power-off,
|
|
host=<hostname>.command=power-on,
|
|
host=<hostname>.command=reinstall,
|
|
host=<hostname>.command=force-lock,
|
|
host=<hostname>.command=delete,
|
|
host=<hostname>.command=swact]
|
|
Severity: warning
|
|
Alarm_Type: other
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
|
|
200.022:
|
|
Type: Log
|
|
Description: ["<hostname> is now 'disabled'",
|
|
"<hostname> is now 'enabled'",
|
|
"<hostname> is now 'online'",
|
|
"<hostname> is now 'offline'",
|
|
"<hostname> is 'disabled-failed' to the system"]
|
|
Entity_Instance_ID: [host=<hostname>.state=disabled,
|
|
host=<hostname>.state=enabled,
|
|
host=<hostname>.status=online,
|
|
host=<hostname>.status=offline,
|
|
host=<hostname>.status=failed]
|
|
Severity: warning
|
|
Alarm_Type: other
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# BACKUP AND RESTORE
|
|
# ---------------------------------------------------------------------------
|
|
|
|
210.001:
|
|
Type: Alarm
|
|
Description: System Backup in progress.
|
|
Entity_Instance_ID: host=controller
|
|
Severity: minor
|
|
Proposed_Repair_Action: No action required.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
Suppression: false
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SYSTEM CONFIGURATION
|
|
# ---------------------------------------------------------------------------
|
|
|
|
250.001:
|
|
Type: Alarm
|
|
Description: <hostname> Configuation is out-of-date.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Administratively lock and unlock <hostname> to update config.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: false
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# VM Compute Services
|
|
# ---------------------------------------------------------------------------
|
|
270.001:
|
|
Type: Alarm
|
|
Description: "Host <host_name> compute services failure[, reason = <reason_text>]"
|
|
Entity_Instance_ID: host=<host_name>.services=compute
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for host services recovery to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
270.101:
|
|
Type: Log
|
|
Description: "Host <host_name> compute services failure[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
270.102:
|
|
Type: Log
|
|
Description: Host <host_name> compute services enabled
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
270.103:
|
|
Type: Log
|
|
Description: Host <host_name> compute services disabled
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
|
|
275.001:
|
|
Type: Log
|
|
Description: Host <host_name> hypervisor is now <administrative_state>-<operational_state>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# NETWORK
|
|
# ---------------------------------------------------------------------------
|
|
|
|
300.001:
|
|
Type: Alarm
|
|
Description: "'Data' Port failed."
|
|
Entity_Instance_ID: host=<hostname>.port=<port-uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: loss-of-signal
|
|
Service_Affecting: true
|
|
Suppression: false
|
|
|
|
|
|
300.002:
|
|
Type: Alarm
|
|
Description: |-
|
|
'Data' Interface degraded.
|
|
OR
|
|
'Data' Interface failed.
|
|
Entity_Instance_ID: host=<hostname>.interface=<if-uuid>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: loss-of-signal
|
|
Service_Affecting: true
|
|
Suppression: false
|
|
|
|
|
|
300.003:
|
|
Type: Alarm
|
|
Description: Networking Agent not responding.
|
|
Entity_Instance_ID: host=<hostname>.agent=<agent-uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: "If condition persists, attempt to clear issue by administratively locking and unlocking the Host."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: true
|
|
Suppression: false
|
|
|
|
|
|
300.004:
|
|
Type: Alarm
|
|
Description: No enabled compute host with connectivity to provider network.
|
|
Entity_Instance_ID: host=<hostname>.providernet=<pnet-uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: Enable compute hosts with required provider network connectivity.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: true
|
|
Suppression: false
|
|
|
|
|
|
300.005:
|
|
Type: Alarm
|
|
Description: |-
|
|
Communication failure detected over provider network x% for ranges y% on host z%.
|
|
OR
|
|
Communication failure detected over provider network x% on host z%.
|
|
Entity_Instance_ID: providernet=<pnet-uuid>.host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check neighbour switch port VLAN assignments.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: true
|
|
Suppression: false
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# HIGH AVAILABILITY
|
|
# ---------------------------------------------------------------------------
|
|
|
|
400.001:
|
|
Type: Alarm
|
|
Description: |-
|
|
Service group failure; <list of affected services>.
|
|
OR
|
|
Service group degraded; <list of affected services>.
|
|
OR
|
|
Service group warning; <list of affected services>.
|
|
Entity_Instance_ID: service_domain=<domain_name>.service_group=<group_name>.host=<hostname>
|
|
Severity: [critical, major, minor]
|
|
Proposed_Repair_Action: Contact next level of support.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
|
|
400.002:
|
|
Type: Alarm
|
|
Description: |-
|
|
Service group loss of redundancy; expected <num> standby member<s> but only <num> standby member<s> available.
|
|
OR
|
|
Service group loss of redundancy; expected <num> standby member<s> but only <num> standby member<s> available.
|
|
OR
|
|
Service group loss of redundancy; expected <num> active member<s> but no active members available.
|
|
OR
|
|
Service group loss of redundancy; expected <num> active member<s> but only <num> active member<s> available.
|
|
Entity_Instance_ID: service_domain=<domain_name>.service_group=<group_name>
|
|
Severity: major
|
|
Proposed_Repair_Action: "Bring a controller node back in to service, otherwise contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
|
|
400.003:
|
|
Type: Alarm
|
|
Description: |-
|
|
License key is not installed; a valid license key is required for operation.
|
|
OR
|
|
License key has expired or is invalid; a valid license key is required for operation.
|
|
OR
|
|
Evaluation license key will expire on <date>; there are <num_days> days remaining in this evaluation.
|
|
OR
|
|
Evaluation license key will expire on <date>; there is only 1 day remaining in this evaluation.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Contact next level of support to obtain a new license key.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: key-expired
|
|
Service_Affecting: true
|
|
Suppression: false
|
|
|
|
|
|
# 400.004: // NOTE Removed
|
|
# Type: Alarm
|
|
# Description: Service group software modification detected; <list of affected files>.
|
|
# Entity_Instance_ID: host=<hostname>
|
|
# Severity: major
|
|
# Proposed_Repair_Action: Contact next level of support.
|
|
# Maintenance_Action:
|
|
# Inhibit_Alarms: false
|
|
# Alarm_Type: processing-error
|
|
# Probable_Cause: software-program-error
|
|
# Service_Affecting: true
|
|
# Suppression: false
|
|
|
|
|
|
400.005:
|
|
Type: Alarm
|
|
Description: |-
|
|
Communication failure detected with peer over port <linux-ifname>.
|
|
OR
|
|
Communication failure detected with peer over port <linux-ifname> within the last 30 seconds.
|
|
Entity_Instance_ID: host=<hostname>.network=<mgmt | oam | infra>
|
|
Severity: major
|
|
Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms: false
|
|
Alarm_Type: communication
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SM
|
|
# ---------------------------------------------------------------------------
|
|
|
|
401.001:
|
|
Type: Log
|
|
Description: Service group <group> state change from <state> to <state> on host <host_name>
|
|
Entity_Instance_ID: service_domain=<domain>.service_group=<group>.host=<host_name>
|
|
Severity: critical
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
|
|
401.002:
|
|
Type: Log
|
|
Description: |-
|
|
Service group <group> loss of redundancy; expected <X> standby member but no standby members available
|
|
or
|
|
Service group <group> loss of redundancy; expected <X> standby member but only <Y> standby member(s) available
|
|
or
|
|
Service group <group> has no active members available; expected <X> active member(s)
|
|
or
|
|
Service group <group> loss of redundancy; expected <X> active member(s) but only <Y> active member(s) available
|
|
Entity_Instance_ID: service_domain=<domain>.service_group=<group>
|
|
Severity: critical
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
|
|
401.003:
|
|
Type: Log
|
|
Description: |-
|
|
License key has expired or is invalid
|
|
or
|
|
Evaluation license key will expire on <date>
|
|
or
|
|
License key is valid
|
|
Entity_Instance_ID: host=<host_name>
|
|
Severity: critical
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
|
|
401.005:
|
|
Type: Log
|
|
Description: |-
|
|
Communication failure detected with peer over port <port> on host <host name>
|
|
or
|
|
Communication failure detected with peer over port <port> on host <host name> within the last <X> seconds
|
|
or
|
|
Communication established with peer over port <port> on host <host name>
|
|
Entity_Instance_ID: host=<host_name>.network=<network>
|
|
Severity: critical
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
|
|
401.007:
|
|
Type: Log
|
|
Description: Swact or swact-force
|
|
Entity_Instance_ID: host=<host_name>
|
|
Severity: critical
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# VM
|
|
# ---------------------------------------------------------------------------
|
|
|
|
700.001:
|
|
Type: Alarm
|
|
Description: |-
|
|
Instance <instance_name> owned by <tenant_name> has failed on host <host_name>
|
|
Instance <instance_name> owned by <tenant_name> has failed to schedule
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: The system will attempt recovery; no repair action required
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: software-error
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.002:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is paused on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Unpause the instance
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: procedural-error
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.003:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is suspended on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Resume the instance
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: procedural-error
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.005:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is rebooting on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for reboot to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.006:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is rebuilding on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for rebuild to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.007:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is evacuating from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for evacuate to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: underlying-resource-unavailable
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.008:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is live migrating from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: warning
|
|
Proposed_Repair_Action: Wait for live migration to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.009:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is cold migrating from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for cold migration to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.010:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> has been cold-migrated to host <host_name> waiting for confirmation
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Confirm or revert cold-migrate of instance
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.011:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is reverting cold migrate to host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: "Wait for cold migration revert to complete; if problem persists contact next level of support"
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: other
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.012:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is resizing on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Wait for resize to complete; if problem persists contact next level of support
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.013:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> has been resized on host <host_name> waiting for confirmation
|
|
Entity_Instance_ID: itenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: Confirm or revert resize of instance
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: processing-error
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.014:
|
|
Type: Alarm
|
|
Description: Instance <instance_name> owned by <tenant_name> is reverting resize on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Proposed_Repair_Action: "Wait for resize revert to complete; if problem persists contact next level of support"
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: other
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.015:
|
|
Type: Alarm
|
|
Description: Guest Heartbeat not established for instance <instance_name> owned by <tenant_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: major
|
|
Proposed_Repair_Action: "Verify that the instance is running the Guest-Client daemon, or disabsle Guest Heartbeat for the instance if no longer needed, otherwise contact next level of support"
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: communication
|
|
Probable_Cause: procedural-error
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.016:
|
|
Type: Alarm
|
|
Description: Multi-Node Recovery Mode
|
|
Entity_Instance_ID: subsystem=vim
|
|
Severity: major
|
|
Proposed_Repair_Action: "Wait for the system to exit out of this mode"
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: true
|
|
Suppression: true
|
|
|
|
700.101:
|
|
Type: Log
|
|
Description: Instance <instance_name> is enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.102:
|
|
Type: Log
|
|
Description: Instance <instance_name> owned by <tenant_name> has failed[, reason = <reason_text>]
|
|
Instance <instance_name> owned by <tenant_name> has failed to schedule[, reason = <reason_text>]
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.103:
|
|
Type: Log
|
|
Description: Create issued <by <tenant_name>|by the system> against <instance_name> owned by <tenant_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.104:
|
|
Type: Log
|
|
Description: Creating instance <instance_name> owned by <tenant_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.105:
|
|
Type: Log
|
|
Description: "Create rejected for instance <instance_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.106:
|
|
Type: Log
|
|
Description: "Create cancelled for instance <instance_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.107:
|
|
Type: Log
|
|
Description: "Create failed for instance <instance_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.108:
|
|
Type: Log
|
|
Description: Inance <instance_name> owned by <tenant_name> has been created
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.109:
|
|
Type: Log
|
|
Description: "Delete issued <by tenant <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.110:
|
|
Type: Log
|
|
Description: Deleting instance <instance_name> owned by <tenatn_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.111:
|
|
Type: Log
|
|
Description: "Delete rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.112:
|
|
Type: Log
|
|
Description: "Delete cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.113:
|
|
Type: Log
|
|
Description: "Delete failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.114:
|
|
Type: Log
|
|
Description: Deleted instance <instance_name> owned by <tenant_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.115:
|
|
Type: Log
|
|
Description: "Pause issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.116:
|
|
Type: Log
|
|
Description: Pause inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.117:
|
|
Type: Log
|
|
Description: "Pause rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.118:
|
|
Type: Log
|
|
Description: "Pause cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.119:
|
|
Type: Log
|
|
Description: "Pause failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.120:
|
|
Type: Log
|
|
Description: Pause complete for instance <instance_name> now paused on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.121:
|
|
Type: Log
|
|
Description: "Unpause issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.122:
|
|
Type: Log
|
|
Description: Unpause inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.123:
|
|
Type: Log
|
|
Description: "Unpause rejected for instance <instance_name> paused on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.124:
|
|
Type: Log
|
|
Description: "Unpause cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.125:
|
|
Type: Log
|
|
Description: "Unpause failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.126:
|
|
Type: Log
|
|
Description: Unpause complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.127:
|
|
Type: Log
|
|
Description: "Suspend issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.128:
|
|
Type: Log
|
|
Description: Suspend inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.129:
|
|
Type: Log
|
|
Description: "Suspend rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.130:
|
|
Type: Log
|
|
Description: "Suspend cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.131:
|
|
Type: Log
|
|
Description: "Suspend failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.132:
|
|
Type: Log
|
|
Description: Suspend complete for instance <instance_name> now suspended on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.133:
|
|
Type: Log
|
|
Description: "Resume issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.134:
|
|
Type: Log
|
|
Description: Resume inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.135:
|
|
Type: Log
|
|
Description: "Resume rejected for instance <instance_name> suspended on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.136:
|
|
Type: Log
|
|
Description: "Resume cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.137:
|
|
Type: Log
|
|
Description: "Resume failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.138:
|
|
Type: Log
|
|
Description: Resume complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.139:
|
|
Type: Log
|
|
Description: "Start issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.140:
|
|
Type: Log
|
|
Description: Start inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.141:
|
|
Type: Log
|
|
Description: "Start rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.142:
|
|
Type: Log
|
|
Description: "Start cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.143:
|
|
Type: Log
|
|
Description: "Start failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.144:
|
|
Type: Log
|
|
Description: Start complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.145:
|
|
Type: Log
|
|
Description: "Stop issued <by <tenant_name>|by the system|by the instance> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.146:
|
|
Type: Log
|
|
Description: Stop inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.147:
|
|
Type: Log
|
|
Description: "Stop rejected for instance <instance_name> enabled on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.148:
|
|
Type: Log
|
|
Description: "Stop cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.149:
|
|
Type: Log
|
|
Description: "Stop failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.150:
|
|
Type: Log
|
|
Description: Stop complete for instance <instance_name> now disabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.151:
|
|
Type: Log
|
|
Description: "Live-Migrate issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> from host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.152:
|
|
Type: Log
|
|
Description: Live-Migrate inprogress for instance <instance_name> from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.153:
|
|
Type: Log
|
|
Description: "Live-Migrate rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.154:
|
|
Type: Log
|
|
Description: "Live-Migrate cancelled for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.155:
|
|
Type: Log
|
|
Description: "Live-Migrate failed for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.156:
|
|
Type: Log
|
|
Description: Live-Migrate complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.157:
|
|
Type: Log
|
|
Description: "Cold-Migrate issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> from host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.158:
|
|
Type: Log
|
|
Description: Cold-Migrate inprogress for instance <instance_name> from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.159:
|
|
Type: Log
|
|
Description: "Cold-Migrate rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.160:
|
|
Type: Log
|
|
Description: "Cold-Migrate cancelled for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.161:
|
|
Type: Log
|
|
Description: "Cold-Migrate failed for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.162:
|
|
Type: Log
|
|
Description: Cold-Migrate complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.163:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Confirm issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.164:
|
|
Type: Log
|
|
Description: Cold-Migrate-Confirm inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.165:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Confirm rejected for instance <instance_name> now enabled on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.166:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Confirm cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.167:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Confirm failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.168:
|
|
Type: Log
|
|
Description: Cold-Migrate-Confirm complete for instance <instance_name> enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.169:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Revert issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.170:
|
|
Type: Log
|
|
Description: Cold-Migrate-Revert inprogress for instance <instance_name> from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.171:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Revert rejected for instance <instance_name> now on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.172:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Revert cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.173:
|
|
Type: Log
|
|
Description: "Cold-Migrate-Revert failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.174:
|
|
Type: Log
|
|
Description: Cold-Migrate-Revert complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.175:
|
|
Type: Log
|
|
Description: "Evacuate issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.176:
|
|
Type: Log
|
|
Description: Evacuating instance <instance_name> owned by <tenant_name> from host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.177:
|
|
Type: Log
|
|
Description: "Evacuate rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.178:
|
|
Type: Log
|
|
Description: "Evacuate cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.179:
|
|
Type: Log
|
|
Description: "Evacuate failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.180:
|
|
Type: Log
|
|
Description: Evacuate complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.181:
|
|
Type: Log
|
|
Description: |-
|
|
"Reboot <(soft-reboot)|(hard-reboot)> issued <by <tenant_name>|by the system|by the instance>
|
|
against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.182:
|
|
Type: Log
|
|
Description: Reboot inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.183:
|
|
Type: Log
|
|
Description: "Reboot rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.184:
|
|
Type: Log
|
|
Description: "Reboot cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.185:
|
|
Type: Log
|
|
Description: "Reboot failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.186:
|
|
Type: Log
|
|
Description: Reboot complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.187:
|
|
Type: Log
|
|
Description: "Rebuild issued <by <tenant_name>|by the system> against instance <instance_name> using image <image_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.188:
|
|
Type: Log
|
|
Description: Rebuild inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.189:
|
|
Type: Log
|
|
Description: "Rebuild rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.190:
|
|
Type: Log
|
|
Description: "Rebuild cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.191:
|
|
Type: Log
|
|
Description: "Rebuild failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.192:
|
|
Type: Log
|
|
Description: Rebuild complete for instance <instance_name> now enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.193:
|
|
Type: Log
|
|
Description: "Resize issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.194:
|
|
Type: Log
|
|
Description: Resize inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.195:
|
|
Type: Log
|
|
Description: "Resize rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.196:
|
|
Type: Log
|
|
Description: "Resize cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.197:
|
|
Type: Log
|
|
Description: "Resize failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.198:
|
|
Type: Log
|
|
Description: Resize complete for instance <instance_name> enabled on host <host_name> waiting for confirmation
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.199:
|
|
Type: Log
|
|
Description: "Resize-Confirm issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.200:
|
|
Type: Log
|
|
Description: Resize-Confirm inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.201:
|
|
Type: Log
|
|
Description: "Resize-Confirm rejected for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.202:
|
|
Type: Log
|
|
Description: "Resize-Confirm cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.203:
|
|
Type: Log
|
|
Description: "Resize-Confirm failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.204:
|
|
Type: Log
|
|
Description: Resize-Confirm complete for instance <instance_name> enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.205:
|
|
Type: Log
|
|
Description: "Resize-Revert issued <by <tenant_name>|by the system> against instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.206:
|
|
Type: Log
|
|
Description: Resize-Revert inprogress for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.207:
|
|
Type: Log
|
|
Description: "Resize-Revert rejected for instance <instance_name> owned by <tenant_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.208:
|
|
Type: Log
|
|
Description: "Resize-Revert cancelled for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.209:
|
|
Type: Log
|
|
Description: "Resize-Revert failed for instance <instance_name> on host <host_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.210:
|
|
Type: Log
|
|
Description: Resize-Revert complete for instance <instance_name> enabled on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.211:
|
|
Type: Log
|
|
Description: Guest Heartbeat established for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: major
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.212:
|
|
Type: Log
|
|
Description: Guest Heartbeat disconnected for instance <instance_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: major
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.213:
|
|
Type: Log
|
|
Description: "Guest Heartbeat failed for instance <instance_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.214:
|
|
Type: Log
|
|
Description: Instance <instance_name> has been renamed to <new_instance_name> owned by <tenant_name> on host <host_name>
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.215:
|
|
Type: Log
|
|
Description: "Guest Health Check failed for instance <instance_name>[, reason = <reason_text>]"
|
|
Entity_Instance_ID: tenant=<tenant-uuid>.instance=<instance-uuid>
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
700.216:
|
|
Type: Log
|
|
Description: "Entered Multi-Node Recovery Mode"
|
|
Entity_Instance_ID: subsystem=vim
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
|
|
700.217:
|
|
Type: Log
|
|
Description: "Exited Multi-Node Recovery Mode"
|
|
Entity_Instance_ID: subsystem=vim
|
|
Severity: critical
|
|
Alarm_Type: equipment
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# STORAGE
|
|
# ---------------------------------------------------------------------------
|
|
|
|
800.001:
|
|
Type: Alarm
|
|
Description: |-
|
|
Storage Alarm Condition:
|
|
1 mons down, quorum 1,2 controller-1,storage-0
|
|
Entity_Instance_ID: cluster=<dist-fs-uuid>
|
|
Severity: [critical, major]
|
|
Proposed_Repair_Action: "If problem persists, contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: equipment-malfunction
|
|
Service_Affecting:
|
|
critical: true
|
|
major: false
|
|
Suppression: false
|
|
|
|
800.010:
|
|
Type: Alarm
|
|
Description: |-
|
|
Potential data loss. No available OSDs in storage replication group.
|
|
Entity_Instance_ID: cluster=<dist-fs-uuid>.peergroup=<group-x>
|
|
Severity: [critical]
|
|
Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available.
|
|
Check if OSDs of each storage host are up and running.
|
|
If problem persists contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: equipment-malfunction
|
|
Service_Affecting:
|
|
critical: true
|
|
Suppression: false
|
|
|
|
800.011:
|
|
Type: Alarm
|
|
Description: |-
|
|
Loss of replication in peergroup.
|
|
Entity_Instance_ID: cluster=<dist-fs-uuid>.peergroup=<group-x>
|
|
Severity: [major]
|
|
Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available.
|
|
Check if OSDs of each storage host are up and running.
|
|
If problem persists contact next level of support."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: equipment
|
|
Probable_Cause: equipment-malfunction
|
|
Service_Affecting:
|
|
major: true
|
|
Suppression: false
|
|
|
|
800.002:
|
|
Type: Log
|
|
Description: ["Image storage media is full: There is not enough disk space on the image storage media.",
|
|
"Instance <instance name> snapshot failed: There is not enough disk space on the image storage media.",
|
|
"Supplied <attrs> (<supplied>) and <attrs> generated from uploaded image (<actual>) did not match. Setting image status to 'killed'.",
|
|
"Error in store configuration. Adding images to store is disabled.",
|
|
"Forbidden upload attempt: <exception>",
|
|
"Insufficient permissions on image storage media: <exception>",
|
|
"Denying attempt to upload image larger than <size> bytes.",
|
|
"Denying attempt to upload image because it exceeds the quota: <exception>",
|
|
"Received HTTP error while uploading image <image_id>",
|
|
"Client disconnected before sending all data to backend",
|
|
"Failed to upload image <image_id>"]
|
|
Entity_Instance_ID: ["image=<image-uuid>, instance=<instance-uuid>",
|
|
"tenant=<tenant-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>",
|
|
"image=<image-uuid>, instance=<instance-uuid>"]
|
|
Alarm_Type: [physical-violation,
|
|
physical-violation,
|
|
integrity-violation,
|
|
integrity-violation,
|
|
security-service-or-mechanism-violation,
|
|
security-service-or-mechanism-violation,
|
|
security-service-or-mechanism-violation,
|
|
security-service-or-mechanism-violation,
|
|
communication,
|
|
communication,
|
|
operational-violation]
|
|
Severity: warning
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
|
|
|
|
800.003:
|
|
Type: Alarm
|
|
Description: |-
|
|
Storage Alarm Condition:
|
|
total ceph cluster size greater than sum of individual pool quotas
|
|
Entity_Instance_ID: cluster=<dist-fs-uuid>
|
|
Severity: minor
|
|
Proposed_Repair_Action: "Update ceph storage pool quotas to use all available cluster space."
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: operational-violation
|
|
Probable_Cause: configuration-out-of-date
|
|
Service_Affecting: false
|
|
Suppression: false
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SOFTWARE
|
|
# ---------------------------------------------------------------------------
|
|
|
|
900.001:
|
|
Type: Alarm
|
|
Description: Patching operation in progress.
|
|
Entity_Instance_ID: host=controller
|
|
Severity: minor
|
|
Proposed_Repair_Action: Complete reboots of affected hosts.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: environmental
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
Suppression: false
|
|
|
|
900.002:
|
|
Type: Alarm
|
|
Description: Obsolete patch in system.
|
|
Entity_Instance_ID: host=controller
|
|
Severity: warning
|
|
Proposed_Repair_Action: Remove and delete obsolete patches.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: environmental
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
Suppression: false
|
|
|
|
900.003:
|
|
Type: Alarm
|
|
Description: Patch host install failure.
|
|
Entity_Instance_ID: host=<hostname>
|
|
Severity: major
|
|
Proposed_Repair_Action: Undo patching operation.
|
|
Maintenance_Action:
|
|
Inhibit_Alarms:
|
|
Alarm_Type: environmental
|
|
Probable_Cause: unspecified-reason
|
|
Service_Affecting: false
|
|
Suppression: false
|
|
|
|
...
|