Update HAProxy default timeout values

The default HAProxy timeout values are fairly strict. On a busy cloud
it is common to exceed one or more of these timeouts. The only
indication that HAProxy has exceeded a timeout and dropped the
connection is errors such as "BadStatusLine" or "EOF." These can be
very difficult to diagnose when intermittent.

This charm-helpers sync pulls in the change to update the default
timeout values to more real world settings. These values have been
extensively tested in ServerStack. Configured values will not be
overridden.

Partial Bug: #1736171

Change-Id: I312dd56ecf55ad67485305e57f2807a5ea6975cd
This commit is contained in:
David Ames 2017-12-11 11:36:27 -08:00
parent 0bd85c7380
commit edad8b6054
14 changed files with 115 additions and 31 deletions

View File

@ -176,25 +176,25 @@ options:
default:
description: |
Server timeout configuration in ms for haproxy, used in HA
configurations. If not provided, default value of 30000ms is used.
configurations. If not provided, default value of 90000ms is used.
haproxy-client-timeout:
type: int
default:
description: |
Client timeout configuration in ms for haproxy, used in HA
configurations. If not provided, default value of 30000ms is used.
configurations. If not provided, default value of 90000ms is used.
haproxy-queue-timeout:
type: int
default:
description: |
Queue timeout configuration in ms for haproxy, used in HA
configurations. If not provided, default value of 5000ms is used.
configurations. If not provided, default value of 9000ms is used.
haproxy-connect-timeout:
type: int
default:
description: |
Connect timeout configuration in ms for haproxy, used in HA
configurations. If not provided, default value of 5000ms is used.
configurations. If not provided, default value of 9000ms is used.
# Network config (by default all access is over 'private-address')
os-admin-network:
type: string
@ -281,25 +281,25 @@ options:
default:
description: |
Server timeout configuration in ms for haproxy, used in HA
configurations. If not provided, default value of 30000ms is used.
configurations. If not provided, default value of 90000ms is used.
haproxy-client-timeout:
type: int
default:
description: |
Client timeout configuration in ms for haproxy, used in HA
configurations. If not provided, default value of 30000ms is used.
configurations. If not provided, default value of 90000ms is used.
haproxy-queue-timeout:
type: int
default:
description: |
Queue timeout configuration in ms for haproxy, used in HA
configurations. If not provided, default value of 5000ms is used.
configurations. If not provided, default value of 9000ms is used.
haproxy-connect-timeout:
type: int
default:
description: |
Connect timeout configuration in ms for haproxy, used in HA
configurations. If not provided, default value of 5000ms is used.
configurations. If not provided, default value of 9000ms is used.
# External SSL Parameters
ssl_cert:

View File

@ -13,6 +13,7 @@
# limitations under the License.
import logging
import os
import re
import sys
import six
@ -185,7 +186,7 @@ class OpenStackAmuletDeployment(AmuletDeployment):
self.d.configure(service, config)
def _auto_wait_for_status(self, message=None, exclude_services=None,
include_only=None, timeout=1800):
include_only=None, timeout=None):
"""Wait for all units to have a specific extended status, except
for any defined as excluded. Unless specified via message, any
status containing any case of 'ready' will be considered a match.
@ -215,7 +216,10 @@ class OpenStackAmuletDeployment(AmuletDeployment):
:param timeout: Maximum time in seconds to wait for status match
:returns: None. Raises if timeout is hit.
"""
self.log.info('Waiting for extended status on units...')
if not timeout:
timeout = int(os.environ.get('AMULET_SETUP_TIMEOUT', 1800))
self.log.info('Waiting for extended status on units for {}s...'
''.format(timeout))
all_services = self.d.services.keys()
@ -252,9 +256,9 @@ class OpenStackAmuletDeployment(AmuletDeployment):
service_messages = {service: message for service in services}
# Check for idleness
self.d.sentry.wait()
self.d.sentry.wait(timeout=timeout)
# Check for error states and bail early
self.d.sentry.wait_for_status(self.d.juju_env, services)
self.d.sentry.wait_for_status(self.d.juju_env, services, timeout=timeout)
# Check for ready messages
self.d.sentry.wait_for_messages(service_messages, timeout=timeout)

View File

@ -858,9 +858,12 @@ class OpenStackAmuletUtils(AmuletUtils):
:returns: List of pool name, object count, kb disk space used
"""
df = self.get_ceph_df(sentry_unit)
pool_name = df['pools'][pool_id]['name']
obj_count = df['pools'][pool_id]['stats']['objects']
kb_used = df['pools'][pool_id]['stats']['kb_used']
for pool in df['pools']:
if pool['id'] == pool_id:
pool_name = pool['name']
obj_count = pool['stats']['objects']
kb_used = pool['stats']['kb_used']
self.log.debug('Ceph {} pool (ID {}): {} objects, '
'{} kb used'.format(pool_name, pool_id,
obj_count, kb_used))

View File

@ -101,6 +101,8 @@ from charmhelpers.contrib.openstack.utils import (
git_determine_python_path,
enable_memcache,
snap_install_requested,
CompareOpenStackReleases,
os_release,
)
from charmhelpers.core.unitdata import kv
@ -1566,8 +1568,18 @@ class InternalEndpointContext(OSContextGenerator):
endpoints by default so this allows admins to optionally use internal
endpoints.
"""
def __init__(self, ost_rel_check_pkg_name):
self.ost_rel_check_pkg_name = ost_rel_check_pkg_name
def __call__(self):
return {'use_internal_endpoints': config('use-internal-endpoints')}
ctxt = {'use_internal_endpoints': config('use-internal-endpoints')}
rel = os_release(self.ost_rel_check_pkg_name, base='icehouse')
if CompareOpenStackReleases(rel) >= 'pike':
ctxt['volume_api_version'] = '3'
else:
ctxt['volume_api_version'] = '2'
return ctxt
class AppArmorContext(OSContextGenerator):

View File

@ -17,22 +17,22 @@ defaults
{%- if haproxy_queue_timeout %}
timeout queue {{ haproxy_queue_timeout }}
{%- else %}
timeout queue 5000
timeout queue 9000
{%- endif %}
{%- if haproxy_connect_timeout %}
timeout connect {{ haproxy_connect_timeout }}
{%- else %}
timeout connect 5000
timeout connect 9000
{%- endif %}
{%- if haproxy_client_timeout %}
timeout client {{ haproxy_client_timeout }}
{%- else %}
timeout client 30000
timeout client 90000
{%- endif %}
{%- if haproxy_server_timeout %}
timeout server {{ haproxy_server_timeout }}
{%- else %}
timeout server 30000
timeout server 90000
{%- endif %}
listen stats

View File

@ -392,6 +392,8 @@ def get_swift_codename(version):
releases = UBUNTU_OPENSTACK_RELEASE
release = [k for k, v in six.iteritems(releases) if codename in v]
ret = subprocess.check_output(['apt-cache', 'policy', 'swift'])
if six.PY3:
ret = ret.decode('UTF-8')
if codename in ret or release[0] in ret:
return codename
elif len(codenames) == 1:

View File

@ -377,12 +377,12 @@ def get_mon_map(service):
try:
return json.loads(mon_status)
except ValueError as v:
log("Unable to parse mon_status json: {}. Error: {}".format(
mon_status, v.message))
log("Unable to parse mon_status json: {}. Error: {}"
.format(mon_status, str(v)))
raise
except CalledProcessError as e:
log("mon_status command failed with message: {}".format(
e.message))
log("mon_status command failed with message: {}"
.format(str(e)))
raise

View File

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
from subprocess import (
CalledProcessError,
check_call,
@ -101,3 +102,52 @@ def create_lvm_volume_group(volume_group, block_device):
:block_device: str: Full path of PV-initialized block device.
'''
check_call(['vgcreate', volume_group, block_device])
def list_logical_volumes(select_criteria=None, path_mode=False):
'''
List logical volumes
:param select_criteria: str: Limit list to those volumes matching this
criteria (see 'lvs -S help' for more details)
:param path_mode: bool: return logical volume name in 'vg/lv' format, this
format is required for some commands like lvextend
:returns: [str]: List of logical volumes
'''
lv_diplay_attr = 'lv_name'
if path_mode:
# Parsing output logic relies on the column order
lv_diplay_attr = 'vg_name,' + lv_diplay_attr
cmd = ['lvs', '--options', lv_diplay_attr, '--noheadings']
if select_criteria:
cmd.extend(['--select', select_criteria])
lvs = []
for lv in check_output(cmd).decode('UTF-8').splitlines():
if not lv:
continue
if path_mode:
lvs.append('/'.join(lv.strip().split()))
else:
lvs.append(lv.strip())
return lvs
list_thin_logical_volume_pools = functools.partial(
list_logical_volumes,
select_criteria='lv_attr =~ ^t')
list_thin_logical_volumes = functools.partial(
list_logical_volumes,
select_criteria='lv_attr =~ ^V')
def extend_logical_volume_by_device(lv_name, block_device):
'''
Extends the size of logical volume lv_name by the amount of free space on
physical volume block_device.
:param lv_name: str: name of logical volume to be extended (vg/lv format)
:param block_device: str: name of block_device to be allocated to lv_name
'''
cmd = ['lvextend', lv_name, block_device]
check_call(cmd)

View File

@ -549,6 +549,8 @@ def write_file(path, content, owner='root', group='root', perms=0o444):
with open(path, 'wb') as target:
os.fchown(target.fileno(), uid, gid)
os.fchmod(target.fileno(), perms)
if six.PY3 and isinstance(content, six.string_types):
content = content.encode('UTF-8')
target.write(content)
return
# the contents were the same, but we might still need to change the

View File

@ -20,6 +20,7 @@ UBUNTU_RELEASES = (
'yakkety',
'zesty',
'artful',
'bionic',
)

View File

@ -13,6 +13,7 @@
# limitations under the License.
import logging
import os
import re
import sys
import six
@ -185,7 +186,7 @@ class OpenStackAmuletDeployment(AmuletDeployment):
self.d.configure(service, config)
def _auto_wait_for_status(self, message=None, exclude_services=None,
include_only=None, timeout=1800):
include_only=None, timeout=None):
"""Wait for all units to have a specific extended status, except
for any defined as excluded. Unless specified via message, any
status containing any case of 'ready' will be considered a match.
@ -215,7 +216,10 @@ class OpenStackAmuletDeployment(AmuletDeployment):
:param timeout: Maximum time in seconds to wait for status match
:returns: None. Raises if timeout is hit.
"""
self.log.info('Waiting for extended status on units...')
if not timeout:
timeout = int(os.environ.get('AMULET_SETUP_TIMEOUT', 1800))
self.log.info('Waiting for extended status on units for {}s...'
''.format(timeout))
all_services = self.d.services.keys()
@ -252,9 +256,9 @@ class OpenStackAmuletDeployment(AmuletDeployment):
service_messages = {service: message for service in services}
# Check for idleness
self.d.sentry.wait()
self.d.sentry.wait(timeout=timeout)
# Check for error states and bail early
self.d.sentry.wait_for_status(self.d.juju_env, services)
self.d.sentry.wait_for_status(self.d.juju_env, services, timeout=timeout)
# Check for ready messages
self.d.sentry.wait_for_messages(service_messages, timeout=timeout)

View File

@ -858,9 +858,12 @@ class OpenStackAmuletUtils(AmuletUtils):
:returns: List of pool name, object count, kb disk space used
"""
df = self.get_ceph_df(sentry_unit)
pool_name = df['pools'][pool_id]['name']
obj_count = df['pools'][pool_id]['stats']['objects']
kb_used = df['pools'][pool_id]['stats']['kb_used']
for pool in df['pools']:
if pool['id'] == pool_id:
pool_name = pool['name']
obj_count = pool['stats']['objects']
kb_used = pool['stats']['kb_used']
self.log.debug('Ceph {} pool (ID {}): {} objects, '
'{} kb used'.format(pool_name, pool_id,
obj_count, kb_used))

View File

@ -549,6 +549,8 @@ def write_file(path, content, owner='root', group='root', perms=0o444):
with open(path, 'wb') as target:
os.fchown(target.fileno(), uid, gid)
os.fchmod(target.fileno(), perms)
if six.PY3 and isinstance(content, six.string_types):
content = content.encode('UTF-8')
target.write(content)
return
# the contents were the same, but we might still need to change the

View File

@ -20,6 +20,7 @@ UBUNTU_RELEASES = (
'yakkety',
'zesty',
'artful',
'bionic',
)