LBAAS-843 - Add a mechanism to save failed load balancers for forensics
Change-Id: Iae6f9677801fba5d68cd42212111ef9c9b9cf22f
This commit is contained in:
@@ -140,6 +140,12 @@ pid = /var/run/libra/libra_admin_api.pid
|
|||||||
#stats_enable = False
|
#stats_enable = False
|
||||||
#stats_purge_enable = False
|
#stats_purge_enable = False
|
||||||
|
|
||||||
|
# These are the number of instances to save for forensic analysis on failure
|
||||||
|
# A value of 0 disables saving any instances
|
||||||
|
# Failed devices will have a status of "SAVED-OFFLINE" and "SAVED-ONLINE"
|
||||||
|
#offline_failed_save = 0
|
||||||
|
#online_failed_save = 0
|
||||||
|
|
||||||
# The following are the seconds of each minute
|
# The following are the seconds of each minute
|
||||||
# that the timers will run. The defaults should
|
# that the timers will run. The defaults should
|
||||||
# not need to be changed..
|
# not need to be changed..
|
||||||
|
|||||||
@@ -139,6 +139,14 @@ cfg.CONF.register_opts(
|
|||||||
cfg.IntOpt('exists_timer_seconds',
|
cfg.IntOpt('exists_timer_seconds',
|
||||||
default=55,
|
default=55,
|
||||||
help='Second of each minute exists timer should run'),
|
help='Second of each minute exists timer should run'),
|
||||||
],
|
cfg.IntOpt('offline_failed_save',
|
||||||
|
default=0,
|
||||||
|
help='Number of failed offline instances to save '
|
||||||
|
'for forensic analysis'),
|
||||||
|
cfg.IntOpt('online_failed_save',
|
||||||
|
default=0,
|
||||||
|
help='Number of failed online instances to save '
|
||||||
|
'for forensic analysis'),
|
||||||
|
],
|
||||||
group=adminapi_group
|
group=adminapi_group
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from libra.common.api.lbaas import loadbalancers_devices, Vip, Counters
|
|||||||
from libra.common.api.lbaas import Device, LoadBalancer, db_session
|
from libra.common.api.lbaas import Device, LoadBalancer, db_session
|
||||||
from libra.common.api.gearman_client import submit_job, submit_vip_job
|
from libra.common.api.gearman_client import submit_job, submit_vip_job
|
||||||
from libra.openstack.common import log
|
from libra.openstack.common import log
|
||||||
|
from oslo.config import cfg
|
||||||
|
|
||||||
|
|
||||||
LOG = log.getLogger(__name__)
|
LOG = log.getLogger(__name__)
|
||||||
@@ -25,6 +26,7 @@ LOG = log.getLogger(__name__)
|
|||||||
def rebuild_device(device_id):
|
def rebuild_device(device_id):
|
||||||
new_device_id = None
|
new_device_id = None
|
||||||
new_device_name = None
|
new_device_name = None
|
||||||
|
ONLINE_FAILED_SAVE = cfg.CONF['admin_api'].online_failed_save
|
||||||
with db_session() as session:
|
with db_session() as session:
|
||||||
new_device = session.query(Device).\
|
new_device = session.query(Device).\
|
||||||
filter(~Device.id.in_(
|
filter(~Device.id.in_(
|
||||||
@@ -72,9 +74,14 @@ def rebuild_device(device_id):
|
|||||||
vip = session.query(Vip).filter(Vip.device == device_id).first()
|
vip = session.query(Vip).filter(Vip.device == device_id).first()
|
||||||
if vip:
|
if vip:
|
||||||
vip.device = new_device_id
|
vip.device = new_device_id
|
||||||
|
saved_count = session.query(Device).\
|
||||||
|
filter(Device.status == 'SAVED-ONLINE').count()
|
||||||
device = session.query(Device).\
|
device = session.query(Device).\
|
||||||
filter(Device.id == device_id).first()
|
filter(Device.id == device_id).first()
|
||||||
device.status = 'DELETED'
|
if ONLINE_FAILED_SAVE > 0 and saved_count < ONLINE_FAILED_SAVE:
|
||||||
|
device.status = 'SAVED-ONLINE'
|
||||||
|
else:
|
||||||
|
device.status = 'DELETED'
|
||||||
lbs = session.query(LoadBalancer).\
|
lbs = session.query(LoadBalancer).\
|
||||||
join(LoadBalancer.devices).\
|
join(LoadBalancer.devices).\
|
||||||
filter(Device.id == new_device_id).all()
|
filter(Device.id == new_device_id).all()
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from libra.common.api.lbaas import Device, LoadBalancer, db_session
|
|||||||
from libra.common.api.lbaas import loadbalancers_devices
|
from libra.common.api.lbaas import loadbalancers_devices
|
||||||
from libra.admin_api.library.rebuild import rebuild_device
|
from libra.admin_api.library.rebuild import rebuild_device
|
||||||
from libra.openstack.common import log
|
from libra.openstack.common import log
|
||||||
|
from oslo.config import cfg
|
||||||
|
|
||||||
|
|
||||||
LOG = log.getLogger(__name__)
|
LOG = log.getLogger(__name__)
|
||||||
@@ -50,10 +51,19 @@ class DbDriver(AlertDriver):
|
|||||||
self._rebuild_device(device_id)
|
self._rebuild_device(device_id)
|
||||||
|
|
||||||
def send_delete(self, message, device_id, device_ip, device_name):
|
def send_delete(self, message, device_id, device_ip, device_name):
|
||||||
|
OFFLINE_FAILED_SAVE = cfg.CONF['admin_api'].offline_failed_save
|
||||||
with db_session() as session:
|
with db_session() as session:
|
||||||
session.query(Device).\
|
saved_count = session.query(Device).\
|
||||||
filter(Device.id == device_id).\
|
filter(Device.status == 'SAVED-OFFLINE').count()
|
||||||
update({"status": "DELETED"}, synchronize_session='fetch')
|
if OFFLINE_FAILED_SAVE > 0 and saved_count < OFFLINE_FAILED_SAVE:
|
||||||
|
session.query(Device).\
|
||||||
|
filter(Device.id == device_id).\
|
||||||
|
update({"status": "SAVED-OFFLINE"},\
|
||||||
|
synchronize_session='fetch')
|
||||||
|
else:
|
||||||
|
session.query(Device).\
|
||||||
|
filter(Device.id == device_id).\
|
||||||
|
update({"status": "DELETED"}, synchronize_session='fetch')
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
def send_node_change(self, message, lbid, degraded):
|
def send_node_change(self, message, lbid, degraded):
|
||||||
|
|||||||
Reference in New Issue
Block a user