swift/etc/container-server.conf-sample
Jianjian Huo 4ed2b89cb7 Sharder: warn when sharding appears to have stalled.
This patch add a configurable timeout after which the sharder
will warn if a container DB has not completed sharding.

The new config is container_sharding_timeout with a default of
172800 seconds (2 days).

Drive-by fix: recording sharding progress will cover the case
of shard range shrinking too.

Co-Authored-By: Alistair Coles <alistairncoles@gmail.com>
Change-Id: I6ce299b5232a8f394e35f148317f9e08208a0c0f
2022-10-14 08:54:06 -07:00

543 lines
22 KiB
Plaintext

[DEFAULT]
# bind_ip = 0.0.0.0
bind_port = 6201
# keep_idle = 600
# bind_timeout = 30
# backlog = 4096
# user = swift
# swift_dir = /etc/swift
# devices = /srv/node
# mount_check = true
# disable_fallocate = false
#
# Use an integer to override the number of pre-forked processes that will
# accept connections.
# workers = auto
#
# Maximum concurrent requests per worker
# max_clients = 1024
#
# This is a comma separated list of hosts allowed in the X-Container-Sync-To
# field for containers. This is the old-style of using container sync. It is
# strongly recommended to use the new style of a separate
# container-sync-realms.conf -- see container-sync-realms.conf-sample
# allowed_sync_hosts = 127.0.0.1
#
# You can specify default log routing here if you want:
# log_name = swift
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
# The following caps the length of log lines to the value given; no limit if
# set to 0, the default.
# log_max_line_length = 0
#
# Hashing algorithm for log anonymization. Must be one of algorithms supported
# by Python's hashlib.
# log_anonymization_method = MD5
#
# Salt added during log anonymization
# log_anonymization_salt =
#
# Template used to format logs. All words surrounded by curly brackets
# will be substituted with the appropriate values
# log_format = {remote_addr} - - [{time.d}/{time.b}/{time.Y}:{time.H}:{time.M}:{time.S} +0000] "{method} {path}" {status} {content_length} "{referer}" "{txn_id}" "{user_agent}" {trans_time:.4f} "{additional_info}" {pid} {policy_index}
#
# comma separated list of functions to call to setup custom log handlers.
# functions get passed: conf, name, log_to_console, log_route, fmt, logger,
# adapted_logger
# log_custom_handlers =
#
# If set, log_udp_host will override log_address
# log_udp_host =
# log_udp_port = 514
#
# You can enable StatsD logging here:
# log_statsd_host =
# log_statsd_port = 8125
# log_statsd_default_sample_rate = 1.0
# log_statsd_sample_rate_factor = 1.0
# log_statsd_metric_prefix =
#
# If you don't mind the extra disk space usage in overhead, you can turn this
# on to preallocate disk space with SQLite databases to decrease fragmentation.
# db_preallocation = off
#
# Enable this option to log all sqlite3 queries (requires python >=3.3)
# db_query_logging = off
#
# eventlet_debug = false
#
# You can set fallocate_reserve to the number of bytes or percentage of disk
# space you'd like fallocate to reserve, whether there is space for the given
# file size or not. Percentage will be used if the value ends with a '%'.
# fallocate_reserve = 1%
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
[pipeline:main]
pipeline = healthcheck recon backend_ratelimit container-server
[app:container-server]
use = egg:swift#container
# You can override the default log routing for this app here:
# set log_name = container-server
# set log_facility = LOG_LOCAL0
# set log_level = INFO
# set log_requests = true
# set log_address = /dev/log
#
# node_timeout = 3
# conn_timeout = 0.5
# allow_versions = false
#
# You can disable REPLICATE handling (default is to allow it). When deploying
# a cluster with a separate replication network, you'll want multiple
# container-server processes running: one for client-driven traffic and another
# for replication traffic. The server handling client-driven traffic may set
# this to false. If there is only one container-server process, leave this as
# true.
# replication_server = true
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
#
# You can set fallocate_reserve to the number of bytes or percentage
# of disk space you'd like kept free at all times. If the disk's free
# space falls below this value, then PUT, POST, and REPLICATE requests
# will be denied until the disk ha s more space available. Percentage
# will be used if the value ends with a '%'.
# fallocate_reserve = 1%
[filter:healthcheck]
use = egg:swift#healthcheck
# An optional filesystem path, which if present, will cause the healthcheck
# URL to return "503 Service Unavailable" with a body of "DISABLED BY FILE"
# disable_path =
[filter:recon]
use = egg:swift#recon
#recon_cache_path = /var/cache/swift
[filter:backend_ratelimit]
use = egg:swift#backend_ratelimit
# Set the maximum rate of requests per second per device per worker. Beyond
# this rate the server will return 529 responses and emit a 'backend.ratelimit'
# statsd metric without logging. The default value of zero causes no
# rate-limiting to be applied.
# requests_per_device_per_second = 0.0
#
# Set the number of seconds of unused rate-limiting allowance that can
# accumulate and be used to allow a subsequent burst of requests.
# requests_per_device_rate_buffer = 1.0
[container-replicator]
# You can override the default log routing for this app here (don't use set!):
# log_name = container-replicator
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# Maximum number of database rows that will be sync'd in a single HTTP
# replication request. Databases with less than or equal to this number of
# differing rows will always be sync'd using an HTTP replication request rather
# than using rsync.
# per_diff = 1000
#
# Maximum number of HTTP replication requests attempted on each replication
# pass for any one container. This caps how long the replicator will spend
# trying to sync a given database per pass so the other databases don't get
# starved.
# max_diffs = 100
#
# Number of replication workers to spawn.
# concurrency = 8
#
# Time in seconds to wait between replication passes
# interval = 30.0
# run_pause is deprecated, use interval instead
# run_pause = 30.0
#
# Process at most this many databases per second
# databases_per_second = 50
#
# node_timeout = 10
# conn_timeout = 0.5
#
# The replicator also performs reclamation
# reclaim_age = 604800
#
# Allow rsync to compress data which is transmitted to destination node
# during sync. However, this is applicable only when destination node is in
# a different region than the local one.
# rsync_compress = no
#
# Format of the rsync module where the replicator will send data. See
# etc/rsyncd.conf-sample for some usage examples.
# rsync_module = {replication_ip}::container
#
# recon_cache_path = /var/cache/swift
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
#
# The handoffs_only and handoff_delete options are for special-case emergency
# situations such as full disks in the cluster. These options SHOULD NOT
# BE ENABLED except in emergencies. When handoffs_only mode is enabled
# the replicator will *only* replicate from handoff nodes to primary
# nodes and will not sync primary nodes with other primary nodes.
#
# This has two main effects: first, the replicator becomes much more
# effective at removing misplaced databases, thereby freeing up disk
# space at a much faster pace than normal. Second, the replicator does
# not sync data between primary nodes, so out-of-sync account and
# container listings will not resolve while handoffs_only is enabled.
#
# This mode is intended to allow operators to temporarily sacrifice
# consistency in order to gain faster rebalancing, such as during a
# capacity addition with nearly-full disks. It is not intended for
# long-term use.
#
# handoffs_only = no
#
# handoff_delete is the number of replicas which are ensured in swift.
# If the number less than the number of replicas is set, container-replicator
# could delete local handoffs even if all replicas are not ensured in the
# cluster. The replicator would remove local handoff container database after
# syncing when the number of successful responses is greater than or equal to
# this number. By default(auto), handoff partitions will be
# removed when it has successfully replicated to all the canonical nodes.
# handoff_delete = auto
[container-updater]
# You can override the default log routing for this app here (don't use set!):
# log_name = container-updater
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# interval = 300.0
# concurrency = 4
# node_timeout = 3
# conn_timeout = 0.5
#
# Send at most this many container updates per second
# containers_per_second = 50
#
# slowdown will sleep that amount between containers. Deprecated; use
# containers_per_second instead.
# slowdown = 0.01
#
# Seconds to suppress updating an account that has generated an error
# account_suppression_time = 60
#
# recon_cache_path = /var/cache/swift
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
[container-auditor]
# You can override the default log routing for this app here (don't use set!):
# log_name = container-auditor
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# Will audit each container at most once per interval
# interval = 1800.0
#
# containers_per_second = 200
# recon_cache_path = /var/cache/swift
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
[container-sync]
# You can override the default log routing for this app here (don't use set!):
# log_name = container-sync
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# If you need to use an HTTP Proxy, set it here; defaults to no proxy.
# You can also set this to a comma separated list of HTTP Proxies and they will
# be randomly used (simple load balancing).
# sync_proxy = http://10.1.1.1:8888,http://10.1.1.2:8888
#
# Will sync each container at most once per interval
# interval = 300.0
#
# Maximum amount of time to spend syncing each container per pass
# container_time = 60
#
# Maximum amount of time in seconds for the connection attempt
# conn_timeout = 5
# Server errors from requests will be retried by default
# request_tries = 3
#
# Internal client config file path
# internal_client_conf_path = /etc/swift/internal-client.conf
#
# You can set scheduling priority of processes. Niceness values range from -20
# (most favorable to the process) to 19 (least favorable to the process).
# nice_priority =
#
# You can set I/O scheduling class and priority of processes. I/O niceness
# class values are IOPRIO_CLASS_RT (realtime), IOPRIO_CLASS_BE (best-effort) and
# IOPRIO_CLASS_IDLE (idle). I/O niceness priority is a number which goes from
# 0 to 7. The higher the value, the lower the I/O priority of the process.
# Work only with ionice_class.
# ionice_class =
# ionice_priority =
# Note: Put it at the beginning of the pipeline to profile all middleware. But
# it is safer to put this after healthcheck.
[filter:xprofile]
use = egg:swift#xprofile
# This option enable you to switch profilers which should inherit from python
# standard profiler. Currently the supported value can be 'cProfile',
# 'eventlet.green.profile' etc.
# profile_module = eventlet.green.profile
#
# This prefix will be used to combine process ID and timestamp to name the
# profile data file. Make sure the executing user has permission to write
# into this path (missing path segments will be created, if necessary).
# If you enable profiling in more than one type of daemon, you must override
# it with an unique value like: /var/log/swift/profile/container.profile
# log_filename_prefix = /tmp/log/swift/profile/default.profile
#
# the profile data will be dumped to local disk based on above naming rule
# in this interval.
# dump_interval = 5.0
#
# Be careful, this option will enable profiler to dump data into the file with
# time stamp which means there will be lots of files piled up in the directory.
# dump_timestamp = false
#
# This is the path of the URL to access the mini web UI.
# path = /__profile__
#
# Clear the data when the wsgi server shutdown.
# flush_at_shutdown = false
#
# unwind the iterator of applications
# unwind = false
[container-sharder]
# You can override the default log routing for this app here (don't use set!):
# log_name = container-sharder
# log_facility = LOG_LOCAL0
# log_level = INFO
# log_address = /dev/log
#
# Container sharder specific settings
#
# If the auto_shard option is true then the sharder will automatically select
# containers to shard, scan for shard ranges, and select shards to shrink.
# The default is false.
# Warning: auto-sharding is still under development and should not be used in
# production; do not set this option to true in a production cluster.
# auto_shard = false
#
# When auto-sharding is enabled shard_container_threshold defines the object
# count at which a container with container-sharding enabled will start to
# shard. shard_container_threshold also indirectly determines the defaults for
# rows_per_shard, shrink_threshold and expansion_limit.
# shard_container_threshold = 1000000
#
# rows_per_shard determines the initial nominal size of shard containers. The
# default is shard_container_threshold // 2
# rows_per_shard = 500000
#
# Minimum size of the final shard range. If this is greater than one then the
# final shard range may be extended to more than rows_per_shard in order to
# avoid a further shard range with less than minimum_shard_size rows. The
# default value is rows_per_shard // 5.
# minimum_shard_size = 100000
#
# When auto-sharding is enabled shrink_threshold defines the object count
# below which a 'donor' shard container will be considered for shrinking into
# another 'acceptor' shard container. The default is determined by
# shard_shrink_point. If set, shrink_threshold will take precedence over
# shard_shrink_point.
# shrink_threshold =
#
# When auto-sharding is enabled shard_shrink_point defines the object count
# below which a 'donor' shard container will be considered for shrinking into
# another 'acceptor' shard container. shard_shrink_point is a percentage of
# shard_container_threshold e.g. the default value of 10 means 10% of the
# shard_container_threshold.
# Deprecated: shrink_threshold is recommended and if set will take precedence
# over shard_shrink_point.
# shard_shrink_point = 10
#
# When auto-sharding is enabled expansion_limit defines the maximum
# allowed size of an acceptor shard container after having a donor merged into
# it. The default is determined by shard_shrink_merge_point.
# If set, expansion_limit will take precedence over shard_shrink_merge_point.
# expansion_limit =
#
# When auto-sharding is enabled shard_shrink_merge_point defines the maximum
# allowed size of an acceptor shard container after having a donor merged into
# it. Shard_shrink_merge_point is a percentage of shard_container_threshold.
# e.g. the default value of 75 means that the projected sum of a donor object
# count and acceptor count must be less than 75% of shard_container_threshold
# for the donor to be allowed to merge into the acceptor.
#
# For example, if the shard_container_threshold is 1 million,
# shard_shrink_point is 10, and shard_shrink_merge_point is 75 then a shard will
# be considered for shrinking if it has less than or equal to 100 thousand
# objects but will only merge into an acceptor if the combined object count
# would be less than or equal to 750 thousand objects.
# Deprecated: expansion_limit is recommended and if set will take precedence
# over shard_shrink_merge_point.
# shard_shrink_merge_point = 75
#
# When auto-sharding is enabled shard_scanner_batch_size defines the maximum
# number of shard ranges that will be found each time the sharder daemon visits
# a sharding container. If necessary the sharder daemon will continue to search
# for more shard ranges each time it visits the container.
# shard_scanner_batch_size = 10
#
# cleave_batch_size defines the number of shard ranges that will be cleaved
# each time the sharder daemon visits a sharding container.
# cleave_batch_size = 2
#
# cleave_row_batch_size defines the size of batches of object rows read from a
# sharding container and merged to a shard container during cleaving.
# cleave_row_batch_size = 10000
#
# max_expanding defines the maximum number of shards that could be expanded in a
# single cycle of the sharder. Defaults to unlimited (-1).
# max_expanding = -1
#
# max_shrinking defines the maximum number of shards that should be shrunk into
# each expanding shard. Defaults to 1.
# NOTE: Using values greater than 1 may result in temporary gaps in object listings
# until all selected shards have shrunk.
# max_shrinking = 1
#
# Defines the number of successfully replicated shard dbs required when
# cleaving a previously uncleaved shard range before the sharder will progress
# to the next shard range. The value should be less than or equal to the
# container ring replica count. The default of 'auto' causes the container ring
# quorum value to be used. This option only applies to the container-sharder
# replication and does not affect the number of shard container replicas that
# will eventually be replicated by the container-replicator.
# shard_replication_quorum = auto
#
# Defines the number of successfully replicated shard dbs required when
# cleaving a shard range that has been previously cleaved on another node
# before the sharder will progress to the next shard range. The value should be
# less than or equal to the container ring replica count. The default of 'auto'
# causes the shard_replication_quorum value to be used. This option only
# applies to the container-sharder replication and does not affect the number
# of shard container replicas that will eventually be replicated by the
# container-replicator.
# existing_shard_replication_quorum = auto
#
# The sharder uses an internal client to create and make requests to
# containers. The absolute path to the client config file can be configured.
# internal_client_conf_path = /etc/swift/internal-client.conf
#
# The number of time the internal client will retry requests.
# request_tries = 3
#
# Each time the sharder dumps stats to the recon cache file it includes a list
# of containers that appear to need sharding but are not yet sharding. By
# default this list is limited to the top 5 containers, ordered by object
# count. The limit may be changed by setting recon_candidates_limit to an
# integer value. A negative value implies no limit.
# recon_candidates_limit = 5
#
# As the sharder visits each container that's currently sharding it dumps to
# recon their current progress. To be able to mark their progress as completed
# this in-progress check will need to monitor containers that have just
# completed sharding. The recon_sharded_timeout parameter says for how long a
# container whose just finished sharding should be checked by the in-progress
# check. This is to allow anything monitoring the sharding recon dump to have
# enough time to collate and see things complete. The time is capped at
# reclaim_age, so this parameter should be less than or equal to reclaim_age.
# The default is 12 hours (12 x 60 x 60)
# recon_sharded_timeout = 43200
#
# Maximum amount of time in seconds after sharding has been started on a shard
# container and before it's considered as timeout. After this amount of time,
# sharder will warn that a container DB has not completed sharding.
# The default is 48 hours (48 x 60 x 60)
# container_sharding_timeout = 172800
#
# Large databases tend to take a while to work with, but we want to make sure
# we write down our progress. Use a larger-than-normal broker timeout to make
# us less likely to bomb out on a LockTimeout.
# broker_timeout = 60
#
# Time in seconds to wait between emitting stats to logs
# stats_interval = 3600.0
#
# Time in seconds to wait between sharder cycles
# interval = 30.0
#
# Process at most this many databases per second
# databases_per_second = 50
#
# The container-sharder accepts the following configuration options as defined
# in the container-replicator section:
#
# per_diff = 1000
# max_diffs = 100
# concurrency = 8
# node_timeout = 10
# conn_timeout = 0.5
# reclaim_age = 604800
# rsync_compress = no
# rsync_module = {replication_ip}::container
# recon_cache_path = /var/cache/swift
#