diff --git a/libra/admin_api/stats/stats_gearman.py b/libra/admin_api/stats/stats_gearman.py index b82f9a2f..6e2403b8 100644 --- a/libra/admin_api/stats/stats_gearman.py +++ b/libra/admin_api/stats/stats_gearman.py @@ -63,6 +63,8 @@ class GearJobs(object): continue if ping.timed_out: # Ping timeout + LOG.warn("Load balancer %s ping timed out. Retrying", + ping.job.task); retry_list.append(ping.job.task) continue if ping.result['hpcs_response'] == 'FAIL': @@ -101,6 +103,8 @@ class GearJobs(object): continue if ping.timed_out: # Ping timeout + LOG.error('Load balancer %s ping timed out again. ' \ + 'Marking failed.', ping.job.task); failed_list.append(ping.job.task) continue if ping.result['hpcs_response'] == 'FAIL': @@ -109,7 +113,12 @@ class GearJobs(object): ping.result['status'] == 'DELETED' ): continue - # Error returned by Gearman + # Error returned by worker via Gearman + LOG.error('Load balancer %s reported failed by the ' \ + 'worker due to: %s', + ping.job.task, + ping.result['hpcs_error'] + ) failed_list.append(ping.job.task) continue else: @@ -135,19 +144,33 @@ class GearJobs(object): format(ping.job.task) ) elif ping.timed_out: + LOG.error('OFFLINE load balancer %s ping timed out. ' \ + 'Marking failed.', ping.job.task); failed_list.append(ping.job.task) elif ping.result['network'] == 'FAIL': - failed_list.append(ping.job.task) + LOG.error('OFFLINE load balancer %s internet HTTP connect ' \ + 'test failed. Marking failed.', ping.job.task); + failed_list.append(ping.job.task) else: gearman_count = 0 gearman_fail = 0 for gearman_test in ping.result['gearman']: gearman_count += 1 if gearman_test['status'] == 'FAIL': + LOG.error('OFFLINE load balancer %s unable to ' \ + 'contact gearman server %s.', + ping.job.task, + gearman_test['host'] + ); gearman_fail += 1 # Need 2/3rds gearman up max_fail_count = gearman_count / 3 if gearman_fail > max_fail_count: + LOG.error('OFFLINE load balancer %s failed to reach ' \ + '%d gearman servers. Marking failed.', + ping.job.task, + gearman_fail + ); failed_list.append(ping.job.task) return failed_list @@ -185,12 +208,20 @@ class GearJobs(object): for stats in submitted_stats: if stats.state == JOB_UNKNOWN: # TODO: Gearman server failed, ignoring for now + LOG.warn( + "Gearman Job server failed during METRICS check of {0}. " \ + "Retrying.".format(ping.job.task) + ) retry_list.append(stats.job.task) elif stats.timed_out: # Timeout + LOG.warn('Load balancer %s METRICS timed out. ' \ + 'Retrying.', ping.job.task); retry_list.append(stats.job.task) elif stats.result['hpcs_response'] == 'FAIL': # Error returned by Gearman + LOG.error('Load balancer %s METRICS response FAIL. ' \ + 'Marking failed.', ping.job.task); failed_list.append(stats.job.task) else: # Success @@ -218,9 +249,13 @@ class GearJobs(object): failed_list.append(stats.job.task) elif stats.timed_out: # Timeout + LOG.error('Load balancer %s METRICS timed out again. ' \ + 'Marking failed.', ping.job.task); failed_list.append(stats.job.task) elif stats.result['hpcs_response'] == 'FAIL': # Error returned by Gearman + LOG.error('Load balancer %s METRICS response FAIL. ' \ + 'Marking failed.', ping.job.task); failed_list.append(stats.job.task) else: # Success diff --git a/libra/mgm/nova.py b/libra/mgm/nova.py index cc8ba1d5..a88588ab 100644 --- a/libra/mgm/nova.py +++ b/libra/mgm/nova.py @@ -112,7 +112,7 @@ class Node(object): resp, body = self.nova.post(url, body=body) except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova assign floating IP %s %s' \ + LOG.error('Nova assign floating IP %s %s ' \ 'POST call timed out after %d seconds.' \ % (url, body, cfg.CONF['mgm']['nova_timeout'])) raise @@ -139,7 +139,7 @@ class Node(object): raise except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova remove floating IP %s %s' \ + LOG.error('Nova remove floating IP %s %s ' \ 'POST call timed out after %d seconds.' \ % (url, body, cfg.CONF['mgm']['nova_timeout'])) raise @@ -161,7 +161,7 @@ class Node(object): resp, body = self.nova.delete(url) except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova delete floating IP %s %s' \ + LOG.error('Nova delete floating IP %s %s ' \ 'DELETE call timed out after %d seconds.' \ % (url, body, cfg.CONF['mgm']['nova_timeout'])) raise @@ -174,7 +174,7 @@ class Node(object): resp, body = self.nova.get(url) except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova get instance id %s' \ + LOG.error('Nova get instance id %s ' \ 'GET call timed out after %d seconds.' \ % (url, cfg.CONF['mgm']['nova_timeout'])) raise @@ -192,7 +192,7 @@ class Node(object): resp, body = self.nova.get(url) except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova get floating IP id %s' \ + LOG.error('Nova get floating IP id %s ' \ 'GET call timed out after %d seconds.' \ % (url, cfg.CONF['mgm']['nova_timeout'])) raise @@ -248,7 +248,7 @@ class Node(object): resp, body = self.nova.post(url, body=body) except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova create node %s %s' \ + LOG.error('Nova create node %s %s ' \ 'POST call timed out after %d seconds.' \ % (url, body, cfg.CONF['mgm']['nova_timeout'])) raise @@ -265,7 +265,7 @@ class Node(object): raise NotFound(msg) except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova node status %s' \ + LOG.error('Nova node status %s ' \ 'GET call timed out after %d seconds.' \ % (url, cfg.CONF['mgm']['nova_timeout'])) raise @@ -279,7 +279,7 @@ class Node(object): resp, body = self.nova.delete(url) except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova node delete %s' \ + LOG.error('Nova node delete %s ' \ 'DELETE call timed out after %d seconds.' \ % (url, cfg.CONF['mgm']['nova_timeout'])) raise @@ -299,7 +299,7 @@ class Node(object): raise NotFound(msg) except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova get node %s' \ + LOG.error('Nova get node %s ' \ 'GET call timed out after %d seconds.' \ % (url, cfg.CONF['mgm']['nova_timeout'])) raise @@ -322,7 +322,7 @@ class Node(object): resp, body = self.nova.get(url) except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova get image %s' \ + LOG.error('Nova get image %s ' \ 'GET call timed out after %d seconds.' \ % (url, cfg.CONF['mgm']['nova_timeout'])) raise @@ -344,7 +344,7 @@ class Node(object): resp, body = self.nova.get(url) except Exception as novaexcept: if "timed out" in str(novaexcept): - LOG.error('Nova get flavors %s' \ + LOG.error('Nova get flavors %s ' \ 'GET call timed out after %d seconds.' \ % (url, cfg.CONF['mgm']['nova_timeout'])) raise