LBAAS-825 - Better OFFLINE pool failure logging
Change-Id: Ie2b40a1ab50150f782eee405a93d450d22db28e7
This commit is contained in:
@@ -63,6 +63,8 @@ class GearJobs(object):
|
||||
continue
|
||||
if ping.timed_out:
|
||||
# Ping timeout
|
||||
LOG.warn("Load balancer %s ping timed out. Retrying",
|
||||
ping.job.task);
|
||||
retry_list.append(ping.job.task)
|
||||
continue
|
||||
if ping.result['hpcs_response'] == 'FAIL':
|
||||
@@ -101,6 +103,8 @@ class GearJobs(object):
|
||||
continue
|
||||
if ping.timed_out:
|
||||
# Ping timeout
|
||||
LOG.error('Load balancer %s ping timed out again. ' \
|
||||
'Marking failed.', ping.job.task);
|
||||
failed_list.append(ping.job.task)
|
||||
continue
|
||||
if ping.result['hpcs_response'] == 'FAIL':
|
||||
@@ -109,7 +113,12 @@ class GearJobs(object):
|
||||
ping.result['status'] == 'DELETED'
|
||||
):
|
||||
continue
|
||||
# Error returned by Gearman
|
||||
# Error returned by worker via Gearman
|
||||
LOG.error('Load balancer %s reported failed by the ' \
|
||||
'worker due to: %s',
|
||||
ping.job.task,
|
||||
ping.result['hpcs_error']
|
||||
)
|
||||
failed_list.append(ping.job.task)
|
||||
continue
|
||||
else:
|
||||
@@ -135,8 +144,12 @@ class GearJobs(object):
|
||||
format(ping.job.task)
|
||||
)
|
||||
elif ping.timed_out:
|
||||
LOG.error('OFFLINE load balancer %s ping timed out. ' \
|
||||
'Marking failed.', ping.job.task);
|
||||
failed_list.append(ping.job.task)
|
||||
elif ping.result['network'] == 'FAIL':
|
||||
LOG.error('OFFLINE load balancer %s internet HTTP connect ' \
|
||||
'test failed. Marking failed.', ping.job.task);
|
||||
failed_list.append(ping.job.task)
|
||||
else:
|
||||
gearman_count = 0
|
||||
@@ -144,10 +157,20 @@ class GearJobs(object):
|
||||
for gearman_test in ping.result['gearman']:
|
||||
gearman_count += 1
|
||||
if gearman_test['status'] == 'FAIL':
|
||||
LOG.error('OFFLINE load balancer %s unable to ' \
|
||||
'contact gearman server %s.',
|
||||
ping.job.task,
|
||||
gearman_test['host']
|
||||
);
|
||||
gearman_fail += 1
|
||||
# Need 2/3rds gearman up
|
||||
max_fail_count = gearman_count / 3
|
||||
if gearman_fail > max_fail_count:
|
||||
LOG.error('OFFLINE load balancer %s failed to reach ' \
|
||||
'%d gearman servers. Marking failed.',
|
||||
ping.job.task,
|
||||
gearman_fail
|
||||
);
|
||||
failed_list.append(ping.job.task)
|
||||
return failed_list
|
||||
|
||||
@@ -185,12 +208,20 @@ class GearJobs(object):
|
||||
for stats in submitted_stats:
|
||||
if stats.state == JOB_UNKNOWN:
|
||||
# TODO: Gearman server failed, ignoring for now
|
||||
LOG.warn(
|
||||
"Gearman Job server failed during METRICS check of {0}. " \
|
||||
"Retrying.".format(ping.job.task)
|
||||
)
|
||||
retry_list.append(stats.job.task)
|
||||
elif stats.timed_out:
|
||||
# Timeout
|
||||
LOG.warn('Load balancer %s METRICS timed out. ' \
|
||||
'Retrying.', ping.job.task);
|
||||
retry_list.append(stats.job.task)
|
||||
elif stats.result['hpcs_response'] == 'FAIL':
|
||||
# Error returned by Gearman
|
||||
LOG.error('Load balancer %s METRICS response FAIL. ' \
|
||||
'Marking failed.', ping.job.task);
|
||||
failed_list.append(stats.job.task)
|
||||
else:
|
||||
# Success
|
||||
@@ -218,9 +249,13 @@ class GearJobs(object):
|
||||
failed_list.append(stats.job.task)
|
||||
elif stats.timed_out:
|
||||
# Timeout
|
||||
LOG.error('Load balancer %s METRICS timed out again. ' \
|
||||
'Marking failed.', ping.job.task);
|
||||
failed_list.append(stats.job.task)
|
||||
elif stats.result['hpcs_response'] == 'FAIL':
|
||||
# Error returned by Gearman
|
||||
LOG.error('Load balancer %s METRICS response FAIL. ' \
|
||||
'Marking failed.', ping.job.task);
|
||||
failed_list.append(stats.job.task)
|
||||
else:
|
||||
# Success
|
||||
|
@@ -112,7 +112,7 @@ class Node(object):
|
||||
resp, body = self.nova.post(url, body=body)
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova assign floating IP %s %s' \
|
||||
LOG.error('Nova assign floating IP %s %s ' \
|
||||
'POST call timed out after %d seconds.' \
|
||||
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
@@ -139,7 +139,7 @@ class Node(object):
|
||||
raise
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova remove floating IP %s %s' \
|
||||
LOG.error('Nova remove floating IP %s %s ' \
|
||||
'POST call timed out after %d seconds.' \
|
||||
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
@@ -161,7 +161,7 @@ class Node(object):
|
||||
resp, body = self.nova.delete(url)
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova delete floating IP %s %s' \
|
||||
LOG.error('Nova delete floating IP %s %s ' \
|
||||
'DELETE call timed out after %d seconds.' \
|
||||
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
@@ -174,7 +174,7 @@ class Node(object):
|
||||
resp, body = self.nova.get(url)
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova get instance id %s' \
|
||||
LOG.error('Nova get instance id %s ' \
|
||||
'GET call timed out after %d seconds.' \
|
||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
@@ -192,7 +192,7 @@ class Node(object):
|
||||
resp, body = self.nova.get(url)
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova get floating IP id %s' \
|
||||
LOG.error('Nova get floating IP id %s ' \
|
||||
'GET call timed out after %d seconds.' \
|
||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
@@ -248,7 +248,7 @@ class Node(object):
|
||||
resp, body = self.nova.post(url, body=body)
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova create node %s %s' \
|
||||
LOG.error('Nova create node %s %s ' \
|
||||
'POST call timed out after %d seconds.' \
|
||||
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
@@ -265,7 +265,7 @@ class Node(object):
|
||||
raise NotFound(msg)
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova node status %s' \
|
||||
LOG.error('Nova node status %s ' \
|
||||
'GET call timed out after %d seconds.' \
|
||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
@@ -279,7 +279,7 @@ class Node(object):
|
||||
resp, body = self.nova.delete(url)
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova node delete %s' \
|
||||
LOG.error('Nova node delete %s ' \
|
||||
'DELETE call timed out after %d seconds.' \
|
||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
@@ -299,7 +299,7 @@ class Node(object):
|
||||
raise NotFound(msg)
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova get node %s' \
|
||||
LOG.error('Nova get node %s ' \
|
||||
'GET call timed out after %d seconds.' \
|
||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
@@ -322,7 +322,7 @@ class Node(object):
|
||||
resp, body = self.nova.get(url)
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova get image %s' \
|
||||
LOG.error('Nova get image %s ' \
|
||||
'GET call timed out after %d seconds.' \
|
||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
@@ -344,7 +344,7 @@ class Node(object):
|
||||
resp, body = self.nova.get(url)
|
||||
except Exception as novaexcept:
|
||||
if "timed out" in str(novaexcept):
|
||||
LOG.error('Nova get flavors %s' \
|
||||
LOG.error('Nova get flavors %s ' \
|
||||
'GET call timed out after %d seconds.' \
|
||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||
raise
|
||||
|
Reference in New Issue
Block a user