diff --git a/libra/admin_api/device_pool/manage_pool.py b/libra/admin_api/device_pool/manage_pool.py index 768636b3..ce21e9e2 100644 --- a/libra/admin_api/device_pool/manage_pool.py +++ b/libra/admin_api/device_pool/manage_pool.py @@ -14,6 +14,7 @@ import ipaddress import threading +import uuid from datetime import datetime from gearman.constants import JOB_UNKNOWN @@ -76,7 +77,10 @@ class Pool(object): 'action': 'DELETE_DEVICE', 'name': device.name } - message.append(dict(task='libra_pool_mgm', data=job_data)) + unique_uuid = str(uuid.uuid4()) + message.append(dict(task='libra_pool_mgm', + data=job_data, + unique=unique_uuid)) counter = session.query(Counters).\ filter(Counters.name == 'devices_deleted').first() @@ -177,7 +181,10 @@ class Pool(object): it = 0 job_data = {'action': 'BUILD_DEVICE'} while it < count: - message.append(dict(task='libra_pool_mgm', data=job_data)) + unique_uuid = str(uuid.uuid4()) + message.append(dict(task='libra_pool_mgm', + data=job_data, + unique=unique_uuid)) it += 1 gear = GearmanWork() gear.send_create_message(message) @@ -187,7 +194,10 @@ class Pool(object): it = 0 job_data = {'action': 'BUILD_IP'} while it < count: - message.append(dict(task='libra_pool_mgm', data=job_data)) + unique_uuid = str(uuid.uuid4()) + message.append(dict(task='libra_pool_mgm', + data=job_data, + unique=unique_uuid)) it += 1 gear = GearmanWork() gear.send_vips_message(message) diff --git a/libra/mgm/controllers/vip.py b/libra/mgm/controllers/vip.py index f94771e9..ce386435 100644 --- a/libra/mgm/controllers/vip.py +++ b/libra/mgm/controllers/vip.py @@ -85,6 +85,9 @@ class AssignIpController(object): .format(self.msg['name'], node_id) ) nova.vip_assign(node_id, self.msg['ip']) + + self._wait_until_ip_assigned(nova, node_id, self.msg['ip']) + if cfg.CONF['mgm']['tcp_check_port']: self.check_ip(self.msg['ip'], cfg.CONF['mgm']['tcp_check_port']) @@ -123,6 +126,30 @@ class AssignIpController(object): raise time.sleep(2) + def _wait_until_ip_assigned(self, nova, node_id, vip): + current_instance_id = None + # We can check the status for up to 24 seconds since the assign + # attempts five times. All attempts must be before the Gearman + # message times out at two minutes, so let's aim for + # trying five times in ~20 secs each of the five attempts + for x in xrange(1, 6): + try: + current_instance_id = nova.vip_get_instance_id(vip) + LOG.debug("Confirmed VIP {0} is assigned to instance ID {1}" + .format(vip, current_instance_id) + ) + if current_instance_id == node_id: + return + except: + pass + LOG.debug("VIP has instance ID {0} but was assigned to " \ + "instance {1}, sleeping" + .format(current_instance_id, node_id) + ) + if x < 5: + time.sleep(5) + raise Exception('VIP instance ID did not match assigned ' \ + 'instance ID after 20 secs. Failing assignment') class RemoveIpController(object): diff --git a/libra/mgm/nova.py b/libra/mgm/nova.py index 5d9ae5e3..d53d2a47 100644 --- a/libra/mgm/nova.py +++ b/libra/mgm/nova.py @@ -135,6 +135,19 @@ class Node(object): except exceptions.ClientException: resp, body = self.nova.delete(url) + def vip_get_instance_id(self, vip): + """ get the instance id owning the vip """ + vip_id = self._find_vip_id(vip) + url = '/os-floating-ips/{0}'.format(vip_id) + resp, body = self.nova.get(url) + if resp.status_code != 200: + raise Exception( + 'Response code {0}, message {1} when getting ' \ + 'floating IP {2} details' + .format(resp.status_code, body, vip) + ) + return body['floating_ip']['instance_id'] + def _find_vip_id(self, vip): url = '/os-floating-ips' resp, body = self.nova.get(url)