LBAAS-825 - Better OFFLINE pool failure logging
Change-Id: Ie2b40a1ab50150f782eee405a93d450d22db28e7
This commit is contained in:
@@ -63,6 +63,8 @@ class GearJobs(object):
|
|||||||
continue
|
continue
|
||||||
if ping.timed_out:
|
if ping.timed_out:
|
||||||
# Ping timeout
|
# Ping timeout
|
||||||
|
LOG.warn("Load balancer %s ping timed out. Retrying",
|
||||||
|
ping.job.task);
|
||||||
retry_list.append(ping.job.task)
|
retry_list.append(ping.job.task)
|
||||||
continue
|
continue
|
||||||
if ping.result['hpcs_response'] == 'FAIL':
|
if ping.result['hpcs_response'] == 'FAIL':
|
||||||
@@ -101,6 +103,8 @@ class GearJobs(object):
|
|||||||
continue
|
continue
|
||||||
if ping.timed_out:
|
if ping.timed_out:
|
||||||
# Ping timeout
|
# Ping timeout
|
||||||
|
LOG.error('Load balancer %s ping timed out again. ' \
|
||||||
|
'Marking failed.', ping.job.task);
|
||||||
failed_list.append(ping.job.task)
|
failed_list.append(ping.job.task)
|
||||||
continue
|
continue
|
||||||
if ping.result['hpcs_response'] == 'FAIL':
|
if ping.result['hpcs_response'] == 'FAIL':
|
||||||
@@ -109,7 +113,12 @@ class GearJobs(object):
|
|||||||
ping.result['status'] == 'DELETED'
|
ping.result['status'] == 'DELETED'
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
# Error returned by Gearman
|
# Error returned by worker via Gearman
|
||||||
|
LOG.error('Load balancer %s reported failed by the ' \
|
||||||
|
'worker due to: %s',
|
||||||
|
ping.job.task,
|
||||||
|
ping.result['hpcs_error']
|
||||||
|
)
|
||||||
failed_list.append(ping.job.task)
|
failed_list.append(ping.job.task)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
@@ -135,8 +144,12 @@ class GearJobs(object):
|
|||||||
format(ping.job.task)
|
format(ping.job.task)
|
||||||
)
|
)
|
||||||
elif ping.timed_out:
|
elif ping.timed_out:
|
||||||
|
LOG.error('OFFLINE load balancer %s ping timed out. ' \
|
||||||
|
'Marking failed.', ping.job.task);
|
||||||
failed_list.append(ping.job.task)
|
failed_list.append(ping.job.task)
|
||||||
elif ping.result['network'] == 'FAIL':
|
elif ping.result['network'] == 'FAIL':
|
||||||
|
LOG.error('OFFLINE load balancer %s internet HTTP connect ' \
|
||||||
|
'test failed. Marking failed.', ping.job.task);
|
||||||
failed_list.append(ping.job.task)
|
failed_list.append(ping.job.task)
|
||||||
else:
|
else:
|
||||||
gearman_count = 0
|
gearman_count = 0
|
||||||
@@ -144,10 +157,20 @@ class GearJobs(object):
|
|||||||
for gearman_test in ping.result['gearman']:
|
for gearman_test in ping.result['gearman']:
|
||||||
gearman_count += 1
|
gearman_count += 1
|
||||||
if gearman_test['status'] == 'FAIL':
|
if gearman_test['status'] == 'FAIL':
|
||||||
|
LOG.error('OFFLINE load balancer %s unable to ' \
|
||||||
|
'contact gearman server %s.',
|
||||||
|
ping.job.task,
|
||||||
|
gearman_test['host']
|
||||||
|
);
|
||||||
gearman_fail += 1
|
gearman_fail += 1
|
||||||
# Need 2/3rds gearman up
|
# Need 2/3rds gearman up
|
||||||
max_fail_count = gearman_count / 3
|
max_fail_count = gearman_count / 3
|
||||||
if gearman_fail > max_fail_count:
|
if gearman_fail > max_fail_count:
|
||||||
|
LOG.error('OFFLINE load balancer %s failed to reach ' \
|
||||||
|
'%d gearman servers. Marking failed.',
|
||||||
|
ping.job.task,
|
||||||
|
gearman_fail
|
||||||
|
);
|
||||||
failed_list.append(ping.job.task)
|
failed_list.append(ping.job.task)
|
||||||
return failed_list
|
return failed_list
|
||||||
|
|
||||||
@@ -185,12 +208,20 @@ class GearJobs(object):
|
|||||||
for stats in submitted_stats:
|
for stats in submitted_stats:
|
||||||
if stats.state == JOB_UNKNOWN:
|
if stats.state == JOB_UNKNOWN:
|
||||||
# TODO: Gearman server failed, ignoring for now
|
# TODO: Gearman server failed, ignoring for now
|
||||||
|
LOG.warn(
|
||||||
|
"Gearman Job server failed during METRICS check of {0}. " \
|
||||||
|
"Retrying.".format(ping.job.task)
|
||||||
|
)
|
||||||
retry_list.append(stats.job.task)
|
retry_list.append(stats.job.task)
|
||||||
elif stats.timed_out:
|
elif stats.timed_out:
|
||||||
# Timeout
|
# Timeout
|
||||||
|
LOG.warn('Load balancer %s METRICS timed out. ' \
|
||||||
|
'Retrying.', ping.job.task);
|
||||||
retry_list.append(stats.job.task)
|
retry_list.append(stats.job.task)
|
||||||
elif stats.result['hpcs_response'] == 'FAIL':
|
elif stats.result['hpcs_response'] == 'FAIL':
|
||||||
# Error returned by Gearman
|
# Error returned by Gearman
|
||||||
|
LOG.error('Load balancer %s METRICS response FAIL. ' \
|
||||||
|
'Marking failed.', ping.job.task);
|
||||||
failed_list.append(stats.job.task)
|
failed_list.append(stats.job.task)
|
||||||
else:
|
else:
|
||||||
# Success
|
# Success
|
||||||
@@ -218,9 +249,13 @@ class GearJobs(object):
|
|||||||
failed_list.append(stats.job.task)
|
failed_list.append(stats.job.task)
|
||||||
elif stats.timed_out:
|
elif stats.timed_out:
|
||||||
# Timeout
|
# Timeout
|
||||||
|
LOG.error('Load balancer %s METRICS timed out again. ' \
|
||||||
|
'Marking failed.', ping.job.task);
|
||||||
failed_list.append(stats.job.task)
|
failed_list.append(stats.job.task)
|
||||||
elif stats.result['hpcs_response'] == 'FAIL':
|
elif stats.result['hpcs_response'] == 'FAIL':
|
||||||
# Error returned by Gearman
|
# Error returned by Gearman
|
||||||
|
LOG.error('Load balancer %s METRICS response FAIL. ' \
|
||||||
|
'Marking failed.', ping.job.task);
|
||||||
failed_list.append(stats.job.task)
|
failed_list.append(stats.job.task)
|
||||||
else:
|
else:
|
||||||
# Success
|
# Success
|
||||||
|
@@ -112,7 +112,7 @@ class Node(object):
|
|||||||
resp, body = self.nova.post(url, body=body)
|
resp, body = self.nova.post(url, body=body)
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova assign floating IP %s %s' \
|
LOG.error('Nova assign floating IP %s %s ' \
|
||||||
'POST call timed out after %d seconds.' \
|
'POST call timed out after %d seconds.' \
|
||||||
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
@@ -139,7 +139,7 @@ class Node(object):
|
|||||||
raise
|
raise
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova remove floating IP %s %s' \
|
LOG.error('Nova remove floating IP %s %s ' \
|
||||||
'POST call timed out after %d seconds.' \
|
'POST call timed out after %d seconds.' \
|
||||||
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
@@ -161,7 +161,7 @@ class Node(object):
|
|||||||
resp, body = self.nova.delete(url)
|
resp, body = self.nova.delete(url)
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova delete floating IP %s %s' \
|
LOG.error('Nova delete floating IP %s %s ' \
|
||||||
'DELETE call timed out after %d seconds.' \
|
'DELETE call timed out after %d seconds.' \
|
||||||
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
@@ -174,7 +174,7 @@ class Node(object):
|
|||||||
resp, body = self.nova.get(url)
|
resp, body = self.nova.get(url)
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova get instance id %s' \
|
LOG.error('Nova get instance id %s ' \
|
||||||
'GET call timed out after %d seconds.' \
|
'GET call timed out after %d seconds.' \
|
||||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
@@ -192,7 +192,7 @@ class Node(object):
|
|||||||
resp, body = self.nova.get(url)
|
resp, body = self.nova.get(url)
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova get floating IP id %s' \
|
LOG.error('Nova get floating IP id %s ' \
|
||||||
'GET call timed out after %d seconds.' \
|
'GET call timed out after %d seconds.' \
|
||||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
@@ -248,7 +248,7 @@ class Node(object):
|
|||||||
resp, body = self.nova.post(url, body=body)
|
resp, body = self.nova.post(url, body=body)
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova create node %s %s' \
|
LOG.error('Nova create node %s %s ' \
|
||||||
'POST call timed out after %d seconds.' \
|
'POST call timed out after %d seconds.' \
|
||||||
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
% (url, body, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
@@ -265,7 +265,7 @@ class Node(object):
|
|||||||
raise NotFound(msg)
|
raise NotFound(msg)
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova node status %s' \
|
LOG.error('Nova node status %s ' \
|
||||||
'GET call timed out after %d seconds.' \
|
'GET call timed out after %d seconds.' \
|
||||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
@@ -279,7 +279,7 @@ class Node(object):
|
|||||||
resp, body = self.nova.delete(url)
|
resp, body = self.nova.delete(url)
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova node delete %s' \
|
LOG.error('Nova node delete %s ' \
|
||||||
'DELETE call timed out after %d seconds.' \
|
'DELETE call timed out after %d seconds.' \
|
||||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
@@ -299,7 +299,7 @@ class Node(object):
|
|||||||
raise NotFound(msg)
|
raise NotFound(msg)
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova get node %s' \
|
LOG.error('Nova get node %s ' \
|
||||||
'GET call timed out after %d seconds.' \
|
'GET call timed out after %d seconds.' \
|
||||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
@@ -322,7 +322,7 @@ class Node(object):
|
|||||||
resp, body = self.nova.get(url)
|
resp, body = self.nova.get(url)
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova get image %s' \
|
LOG.error('Nova get image %s ' \
|
||||||
'GET call timed out after %d seconds.' \
|
'GET call timed out after %d seconds.' \
|
||||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
@@ -344,7 +344,7 @@ class Node(object):
|
|||||||
resp, body = self.nova.get(url)
|
resp, body = self.nova.get(url)
|
||||||
except Exception as novaexcept:
|
except Exception as novaexcept:
|
||||||
if "timed out" in str(novaexcept):
|
if "timed out" in str(novaexcept):
|
||||||
LOG.error('Nova get flavors %s' \
|
LOG.error('Nova get flavors %s ' \
|
||||||
'GET call timed out after %d seconds.' \
|
'GET call timed out after %d seconds.' \
|
||||||
% (url, cfg.CONF['mgm']['nova_timeout']))
|
% (url, cfg.CONF['mgm']['nova_timeout']))
|
||||||
raise
|
raise
|
||||||
|
Reference in New Issue
Block a user