LBAAS-843 - Add a mechanism to save failed load balancers for forensics

Change-Id: Iae6f9677801fba5d68cd42212111ef9c9b9cf22f
This commit is contained in:
Michael Johnson
2014-10-23 23:09:05 +00:00
parent 034614105b
commit dd4484a7f1
4 changed files with 36 additions and 5 deletions

View File

@@ -140,6 +140,12 @@ pid = /var/run/libra/libra_admin_api.pid
#stats_enable = False
#stats_purge_enable = False
# These are the number of instances to save for forensic analysis on failure
# A value of 0 disables saving any instances
# Failed devices will have a status of "SAVED-OFFLINE" and "SAVED-ONLINE"
#offline_failed_save = 0
#online_failed_save = 0
# The following are the seconds of each minute
# that the timers will run. The defaults should
# not need to be changed..

View File

@@ -139,6 +139,14 @@ cfg.CONF.register_opts(
cfg.IntOpt('exists_timer_seconds',
default=55,
help='Second of each minute exists timer should run'),
],
cfg.IntOpt('offline_failed_save',
default=0,
help='Number of failed offline instances to save '
'for forensic analysis'),
cfg.IntOpt('online_failed_save',
default=0,
help='Number of failed online instances to save '
'for forensic analysis'),
],
group=adminapi_group
)

View File

@@ -17,6 +17,7 @@ from libra.common.api.lbaas import loadbalancers_devices, Vip, Counters
from libra.common.api.lbaas import Device, LoadBalancer, db_session
from libra.common.api.gearman_client import submit_job, submit_vip_job
from libra.openstack.common import log
from oslo.config import cfg
LOG = log.getLogger(__name__)
@@ -25,6 +26,7 @@ LOG = log.getLogger(__name__)
def rebuild_device(device_id):
new_device_id = None
new_device_name = None
ONLINE_FAILED_SAVE = cfg.CONF['admin_api'].online_failed_save
with db_session() as session:
new_device = session.query(Device).\
filter(~Device.id.in_(
@@ -72,9 +74,14 @@ def rebuild_device(device_id):
vip = session.query(Vip).filter(Vip.device == device_id).first()
if vip:
vip.device = new_device_id
saved_count = session.query(Device).\
filter(Device.status == 'SAVED-ONLINE').count()
device = session.query(Device).\
filter(Device.id == device_id).first()
device.status = 'DELETED'
if ONLINE_FAILED_SAVE > 0 and saved_count < ONLINE_FAILED_SAVE:
device.status = 'SAVED-ONLINE'
else:
device.status = 'DELETED'
lbs = session.query(LoadBalancer).\
join(LoadBalancer.devices).\
filter(Device.id == new_device_id).all()

View File

@@ -16,6 +16,7 @@ from libra.common.api.lbaas import Device, LoadBalancer, db_session
from libra.common.api.lbaas import loadbalancers_devices
from libra.admin_api.library.rebuild import rebuild_device
from libra.openstack.common import log
from oslo.config import cfg
LOG = log.getLogger(__name__)
@@ -50,10 +51,19 @@ class DbDriver(AlertDriver):
self._rebuild_device(device_id)
def send_delete(self, message, device_id, device_ip, device_name):
OFFLINE_FAILED_SAVE = cfg.CONF['admin_api'].offline_failed_save
with db_session() as session:
session.query(Device).\
filter(Device.id == device_id).\
update({"status": "DELETED"}, synchronize_session='fetch')
saved_count = session.query(Device).\
filter(Device.status == 'SAVED-OFFLINE').count()
if OFFLINE_FAILED_SAVE > 0 and saved_count < OFFLINE_FAILED_SAVE:
session.query(Device).\
filter(Device.id == device_id).\
update({"status": "SAVED-OFFLINE"},\
synchronize_session='fetch')
else:
session.query(Device).\
filter(Device.id == device_id).\
update({"status": "DELETED"}, synchronize_session='fetch')
session.commit()
def send_node_change(self, message, lbid, degraded):