LBAAS-843 - Add a mechanism to save failed load balancers for forensics
Change-Id: Iae6f9677801fba5d68cd42212111ef9c9b9cf22f
This commit is contained in:
@@ -140,6 +140,12 @@ pid = /var/run/libra/libra_admin_api.pid
|
||||
#stats_enable = False
|
||||
#stats_purge_enable = False
|
||||
|
||||
# These are the number of instances to save for forensic analysis on failure
|
||||
# A value of 0 disables saving any instances
|
||||
# Failed devices will have a status of "SAVED-OFFLINE" and "SAVED-ONLINE"
|
||||
#offline_failed_save = 0
|
||||
#online_failed_save = 0
|
||||
|
||||
# The following are the seconds of each minute
|
||||
# that the timers will run. The defaults should
|
||||
# not need to be changed..
|
||||
|
@@ -139,6 +139,14 @@ cfg.CONF.register_opts(
|
||||
cfg.IntOpt('exists_timer_seconds',
|
||||
default=55,
|
||||
help='Second of each minute exists timer should run'),
|
||||
],
|
||||
cfg.IntOpt('offline_failed_save',
|
||||
default=0,
|
||||
help='Number of failed offline instances to save '
|
||||
'for forensic analysis'),
|
||||
cfg.IntOpt('online_failed_save',
|
||||
default=0,
|
||||
help='Number of failed online instances to save '
|
||||
'for forensic analysis'),
|
||||
],
|
||||
group=adminapi_group
|
||||
)
|
||||
|
@@ -17,6 +17,7 @@ from libra.common.api.lbaas import loadbalancers_devices, Vip, Counters
|
||||
from libra.common.api.lbaas import Device, LoadBalancer, db_session
|
||||
from libra.common.api.gearman_client import submit_job, submit_vip_job
|
||||
from libra.openstack.common import log
|
||||
from oslo.config import cfg
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
@@ -25,6 +26,7 @@ LOG = log.getLogger(__name__)
|
||||
def rebuild_device(device_id):
|
||||
new_device_id = None
|
||||
new_device_name = None
|
||||
ONLINE_FAILED_SAVE = cfg.CONF['admin_api'].online_failed_save
|
||||
with db_session() as session:
|
||||
new_device = session.query(Device).\
|
||||
filter(~Device.id.in_(
|
||||
@@ -72,9 +74,14 @@ def rebuild_device(device_id):
|
||||
vip = session.query(Vip).filter(Vip.device == device_id).first()
|
||||
if vip:
|
||||
vip.device = new_device_id
|
||||
saved_count = session.query(Device).\
|
||||
filter(Device.status == 'SAVED-ONLINE').count()
|
||||
device = session.query(Device).\
|
||||
filter(Device.id == device_id).first()
|
||||
device.status = 'DELETED'
|
||||
if ONLINE_FAILED_SAVE > 0 and saved_count < ONLINE_FAILED_SAVE:
|
||||
device.status = 'SAVED-ONLINE'
|
||||
else:
|
||||
device.status = 'DELETED'
|
||||
lbs = session.query(LoadBalancer).\
|
||||
join(LoadBalancer.devices).\
|
||||
filter(Device.id == new_device_id).all()
|
||||
|
@@ -16,6 +16,7 @@ from libra.common.api.lbaas import Device, LoadBalancer, db_session
|
||||
from libra.common.api.lbaas import loadbalancers_devices
|
||||
from libra.admin_api.library.rebuild import rebuild_device
|
||||
from libra.openstack.common import log
|
||||
from oslo.config import cfg
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
@@ -50,10 +51,19 @@ class DbDriver(AlertDriver):
|
||||
self._rebuild_device(device_id)
|
||||
|
||||
def send_delete(self, message, device_id, device_ip, device_name):
|
||||
OFFLINE_FAILED_SAVE = cfg.CONF['admin_api'].offline_failed_save
|
||||
with db_session() as session:
|
||||
session.query(Device).\
|
||||
filter(Device.id == device_id).\
|
||||
update({"status": "DELETED"}, synchronize_session='fetch')
|
||||
saved_count = session.query(Device).\
|
||||
filter(Device.status == 'SAVED-OFFLINE').count()
|
||||
if OFFLINE_FAILED_SAVE > 0 and saved_count < OFFLINE_FAILED_SAVE:
|
||||
session.query(Device).\
|
||||
filter(Device.id == device_id).\
|
||||
update({"status": "SAVED-OFFLINE"},\
|
||||
synchronize_session='fetch')
|
||||
else:
|
||||
session.query(Device).\
|
||||
filter(Device.id == device_id).\
|
||||
update({"status": "DELETED"}, synchronize_session='fetch')
|
||||
session.commit()
|
||||
|
||||
def send_node_change(self, message, lbid, degraded):
|
||||
|
Reference in New Issue
Block a user