LBAAS-843 - Add a mechanism to save failed load balancers for forensics
Change-Id: Iae6f9677801fba5d68cd42212111ef9c9b9cf22f
This commit is contained in:
		| @@ -140,6 +140,12 @@ pid = /var/run/libra/libra_admin_api.pid | |||||||
| #stats_enable = False | #stats_enable = False | ||||||
| #stats_purge_enable = False | #stats_purge_enable = False | ||||||
|  |  | ||||||
|  | # These are the number of instances to save for forensic analysis on failure | ||||||
|  | # A value of 0 disables saving any instances | ||||||
|  | # Failed devices will have a status of "SAVED-OFFLINE" and "SAVED-ONLINE" | ||||||
|  | #offline_failed_save = 0 | ||||||
|  | #online_failed_save = 0 | ||||||
|  |  | ||||||
| # The following are the seconds of each minute | # The following are the seconds of each minute | ||||||
| # that the timers will run.  The defaults should | # that the timers will run.  The defaults should | ||||||
| # not need to be changed.. | # not need to be changed.. | ||||||
|   | |||||||
| @@ -139,6 +139,14 @@ cfg.CONF.register_opts( | |||||||
|         cfg.IntOpt('exists_timer_seconds', |         cfg.IntOpt('exists_timer_seconds', | ||||||
|                    default=55, |                    default=55, | ||||||
|                    help='Second of each minute exists timer should run'), |                    help='Second of each minute exists timer should run'), | ||||||
|     ], |         cfg.IntOpt('offline_failed_save', | ||||||
|  |                    default=0, | ||||||
|  |                    help='Number of failed offline instances to save ' | ||||||
|  |                         'for forensic analysis'), | ||||||
|  |          cfg.IntOpt('online_failed_save', | ||||||
|  |                    default=0, | ||||||
|  |                    help='Number of failed online instances to save ' | ||||||
|  |                         'for forensic analysis'), | ||||||
|  |      ], | ||||||
|     group=adminapi_group |     group=adminapi_group | ||||||
| ) | ) | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ from libra.common.api.lbaas import loadbalancers_devices, Vip, Counters | |||||||
| from libra.common.api.lbaas import Device, LoadBalancer, db_session | from libra.common.api.lbaas import Device, LoadBalancer, db_session | ||||||
| from libra.common.api.gearman_client import submit_job, submit_vip_job | from libra.common.api.gearman_client import submit_job, submit_vip_job | ||||||
| from libra.openstack.common import log | from libra.openstack.common import log | ||||||
|  | from oslo.config import cfg | ||||||
|  |  | ||||||
|  |  | ||||||
| LOG = log.getLogger(__name__) | LOG = log.getLogger(__name__) | ||||||
| @@ -25,6 +26,7 @@ LOG = log.getLogger(__name__) | |||||||
| def rebuild_device(device_id): | def rebuild_device(device_id): | ||||||
|     new_device_id = None |     new_device_id = None | ||||||
|     new_device_name = None |     new_device_name = None | ||||||
|  |     ONLINE_FAILED_SAVE = cfg.CONF['admin_api'].online_failed_save | ||||||
|     with db_session() as session: |     with db_session() as session: | ||||||
|         new_device = session.query(Device).\ |         new_device = session.query(Device).\ | ||||||
|             filter(~Device.id.in_( |             filter(~Device.id.in_( | ||||||
| @@ -72,9 +74,14 @@ def rebuild_device(device_id): | |||||||
|         vip = session.query(Vip).filter(Vip.device == device_id).first() |         vip = session.query(Vip).filter(Vip.device == device_id).first() | ||||||
|         if vip: |         if vip: | ||||||
|             vip.device = new_device_id |             vip.device = new_device_id | ||||||
|  |         saved_count = session.query(Device).\ | ||||||
|  |             filter(Device.status == 'SAVED-ONLINE').count() | ||||||
|         device = session.query(Device).\ |         device = session.query(Device).\ | ||||||
|             filter(Device.id == device_id).first() |             filter(Device.id == device_id).first() | ||||||
|         device.status = 'DELETED' |         if ONLINE_FAILED_SAVE > 0 and saved_count < ONLINE_FAILED_SAVE: | ||||||
|  |             device.status = 'SAVED-ONLINE' | ||||||
|  |         else: | ||||||
|  |             device.status = 'DELETED' | ||||||
|         lbs = session.query(LoadBalancer).\ |         lbs = session.query(LoadBalancer).\ | ||||||
|             join(LoadBalancer.devices).\ |             join(LoadBalancer.devices).\ | ||||||
|             filter(Device.id == new_device_id).all() |             filter(Device.id == new_device_id).all() | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ from libra.common.api.lbaas import Device, LoadBalancer, db_session | |||||||
| from libra.common.api.lbaas import loadbalancers_devices | from libra.common.api.lbaas import loadbalancers_devices | ||||||
| from libra.admin_api.library.rebuild import rebuild_device | from libra.admin_api.library.rebuild import rebuild_device | ||||||
| from libra.openstack.common import log | from libra.openstack.common import log | ||||||
|  | from oslo.config import cfg | ||||||
|  |  | ||||||
|  |  | ||||||
| LOG = log.getLogger(__name__) | LOG = log.getLogger(__name__) | ||||||
| @@ -50,10 +51,19 @@ class DbDriver(AlertDriver): | |||||||
|             self._rebuild_device(device_id) |             self._rebuild_device(device_id) | ||||||
|  |  | ||||||
|     def send_delete(self, message, device_id, device_ip, device_name): |     def send_delete(self, message, device_id, device_ip, device_name): | ||||||
|  |         OFFLINE_FAILED_SAVE = cfg.CONF['admin_api'].offline_failed_save | ||||||
|         with db_session() as session: |         with db_session() as session: | ||||||
|             session.query(Device).\ |             saved_count = session.query(Device).\ | ||||||
|                 filter(Device.id == device_id).\ |                 filter(Device.status == 'SAVED-OFFLINE').count() | ||||||
|                 update({"status": "DELETED"}, synchronize_session='fetch') |             if OFFLINE_FAILED_SAVE > 0 and saved_count < OFFLINE_FAILED_SAVE: | ||||||
|  |                 session.query(Device).\ | ||||||
|  |                     filter(Device.id == device_id).\ | ||||||
|  |                     update({"status": "SAVED-OFFLINE"},\ | ||||||
|  |                            synchronize_session='fetch') | ||||||
|  |             else: | ||||||
|  |                 session.query(Device).\ | ||||||
|  |                     filter(Device.id == device_id).\ | ||||||
|  |                     update({"status": "DELETED"}, synchronize_session='fetch') | ||||||
|             session.commit() |             session.commit() | ||||||
|  |  | ||||||
|     def send_node_change(self, message, lbid, degraded): |     def send_node_change(self, message, lbid, degraded): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Michael Johnson
					Michael Johnson