378 lines
13 KiB
Diff
378 lines
13 KiB
Diff
From 95c0ec5cb26efbe2c5dbf45df21518d8d1776be0 Mon Sep 17 00:00:00 2001
|
|
From: Don Penney <don.penney@windriver.com>
|
|
Date: Wed, 4 Jan 2017 12:15:53 -0500
|
|
Subject: [PATCH] TIS Patches
|
|
|
|
This patch rolls up the previous TIS patches, which includes:
|
|
1. CGTS-4787 Set DRBD service ensure parameter
|
|
|
|
2. Updates to fix DRBD resync-rate and engineered parameters:
|
|
|
|
There are several DRBD performance related parameters that must be set to
|
|
get reasonable resync performance, otherwise default resync throughput
|
|
is limited to 40MB/s. Note that user community has noted this limit
|
|
when they use default settings, or up-rev DRBD from 8.3, etc. Eg. they
|
|
realize they hit this limit despite having 10G link or better and faster
|
|
disks.
|
|
|
|
The following parameters were added to puppet-drbd module for resource
|
|
file generation, in addition to: c-plan-ahead, c-fill-target, c-min-rate,
|
|
c-max-rate, currently engineered for dynamic resync-rates.
|
|
|
|
disk section:
|
|
- 'resync-rate' (aka 'rate') was missed in the CentOS port from Kilo
|
|
- 'al-extents' set to 3389, set to a prime number. Increasing this improves
|
|
random write throughput. Could set a bit higher, but would need a study.
|
|
|
|
net section:
|
|
- 'max-buffers' engineered to scale with supported MBps, setting too low
|
|
(eg., default setting) is a bottleneck on 10G link. Set this to
|
|
maximum settable value of 20000. Note this parm may be settable to
|
|
larger values in more current DRBD rev. If we need to support faster
|
|
disks, likely need to increase this proportionately.
|
|
- 'max-epoch-size' also set to 20000. DRBD tuning recommendation page
|
|
sets this the same as max-buffers.
|
|
- 'unplug-watermark' set to 16 based on DRBD tuning recommendations page
|
|
- 'sndbuf-size' set to 0 to auto-tune; historically default was too small
|
|
- 'rcvbuf-size' set to 0 to auto-tune
|
|
---
|
|
manifests/init.pp | 11 ++--
|
|
manifests/resource.pp | 93 +++++++++++++++++++++++++---
|
|
manifests/resource/up.pp | 2 +-
|
|
manifests/service.pp | 2 +-
|
|
templates/header.res.erb | 53 ++++++++++++++--
|
|
templates/primary-resource.res.erb | 2 +-
|
|
templates/primary-stacked-resource.res.erb | 2 +-
|
|
templates/resource.res.erb | 2 +-
|
|
templates/secondary-resource.res.erb | 2 +-
|
|
templates/secondary-stacked-resource.res.erb | 2 +-
|
|
10 files changed, 148 insertions(+), 23 deletions(-)
|
|
|
|
diff --git a/manifests/init.pp b/manifests/init.pp
|
|
index 09f7d48..76ce9c9 100644
|
|
--- a/manifests/init.pp
|
|
+++ b/manifests/init.pp
|
|
@@ -6,7 +6,8 @@
|
|
#
|
|
class drbd(
|
|
$service_enable = true,
|
|
- $package_name = 'drbd8-utils',
|
|
+ $service_ensure = 'running',
|
|
+ $package_name = 'drbd-utils',
|
|
) {
|
|
include ::drbd::service
|
|
|
|
@@ -22,7 +23,7 @@ class drbd(
|
|
}
|
|
|
|
File {
|
|
- mode => '0644',
|
|
+ mode => '0640',
|
|
owner => 'root',
|
|
group => 'root',
|
|
require => Package['drbd'],
|
|
@@ -45,8 +46,10 @@ class drbd(
|
|
# only allow files managed by puppet in this directory.
|
|
file { '/etc/drbd.d':
|
|
ensure => directory,
|
|
- mode => '0644',
|
|
- purge => true,
|
|
+ mode => '0640',
|
|
+ # Set purge to false so that it does not clear the dir
|
|
+ # when the 2nd drbd resource is added.
|
|
+ purge => false,
|
|
recurse => true,
|
|
force => true,
|
|
require => Package['drbd'],
|
|
diff --git a/manifests/resource.pp b/manifests/resource.pp
|
|
index af2ff77..10edc1a 100644
|
|
--- a/manifests/resource.pp
|
|
+++ b/manifests/resource.pp
|
|
@@ -22,6 +22,10 @@
|
|
# [ha_primary] If the resource is being applied on the primary host.
|
|
# [initial_setup] If this run is associated with the initial setup. Allows a user
|
|
# to only perform dangerous setup on the initial run.
|
|
+# [link_util] replication link network utilization percent
|
|
+# [link_speed] replication link network speed mbps
|
|
+# [num_parallel] number of parallel drbd filesystems to sync
|
|
+# [rtt_ms] round-trip-time milliseconds (i.e., ping between replication nodes)
|
|
define drbd::resource (
|
|
$host1 = undef,
|
|
$host2 = undef,
|
|
@@ -39,7 +43,10 @@ define drbd::resource (
|
|
$group = 'root',
|
|
$protocol = 'C',
|
|
$verify_alg = 'crc32c',
|
|
- $rate = false,
|
|
+ $link_util = false,
|
|
+ $link_speed = false,
|
|
+ $num_parallel = false,
|
|
+ $rtt_ms = false,
|
|
$net_parameters = false,
|
|
$manage = true,
|
|
$ha_primary = false,
|
|
@@ -47,6 +54,7 @@ define drbd::resource (
|
|
$fs_type = 'ext4',
|
|
$mkfs_opts = '',
|
|
$disk = undef,
|
|
+ $handlers = false,
|
|
) {
|
|
include ::drbd
|
|
|
|
@@ -67,6 +75,75 @@ define drbd::resource (
|
|
group => $group,
|
|
}
|
|
|
|
+ if $link_util and $link_speed and $num_parallel and $rtt_ms {
|
|
+ # Engineer drbd variable sync rate parameters based on the following:
|
|
+ # https://blogs.linbit.com/p/128/drbd-sync-rate-controller/
|
|
+ # https://blogs.linbit.com/p/443/drbd-sync-rate-controller-2/
|
|
+ # Methodology adapted to account for replication link speed and parallelism.
|
|
+
|
|
+ # Since there is no aggregate bandwidth control, prorate the drbd
|
|
+ # replication bandwidth based on parallelism.
|
|
+ # Based on experimentation, it seems generally better to set num_parallel
|
|
+ # to 1 and let DRBD auto-regulate its throughput. The end result is that
|
|
+ # multiple competing filesystems (i.e., on same disk device) already have
|
|
+ # their sync throughput reduced.
|
|
+ $mbps = $link_speed / $num_parallel
|
|
+
|
|
+ # bandwidth delay product
|
|
+ $bdp_k = $mbps * $rtt_ms
|
|
+
|
|
+ # engineer initial sync rate as percent of link bandwidth
|
|
+ $rate_M = floor($link_util * $mbps / 8 / 100)
|
|
+ $rate = "${rate_M}M"
|
|
+
|
|
+ # engineer c_plan_ahead to default value (tenths)
|
|
+ # Documentation indicates this value OK even for 200 ms RTT.
|
|
+ $c_plan_ahead = 20
|
|
+
|
|
+ # engineer c_fill_target as 1*BDP (tune within 1x to 3x BDP;
|
|
+ # choose minimum value that saturates bandwidth)
|
|
+ $fill_target_k = floor(1 * $bdp_k)
|
|
+ $c_fill_target = "${fill_target_k}k"
|
|
+
|
|
+ # engineer c_min_rate -- experimentally determined so DRBD is not
|
|
+ # throttled to a crawl even when there is minimal application IO.
|
|
+ # DRBD default is way too small.
|
|
+ $min_rate_M = 15 + floor($link_util * $mbps / 8 / 100 / 25)
|
|
+ $c_min_rate = "${min_rate_M}M"
|
|
+
|
|
+ # engineer c_max_rate as percent of link bandwidth
|
|
+ $max_rate_M = floor($link_util * $mbps / 8 / 100)
|
|
+ $c_max_rate = "${max_rate_M}M"
|
|
+
|
|
+ # various tuning settings to enable larger link bandwidth (eg, 10G)
|
|
+ # max_buffers should scale with MBps; set to maximum settable
|
|
+ $max_buffers = 20000
|
|
+ $max_epoch_size = 20000
|
|
+ $unplug_watermark = 16
|
|
+ # sndbuf_size and rcvbuf_size should scale with mbps; set 0 to auto-tune
|
|
+ $sndbuf_size = 0
|
|
+ $rcvbuf_size = 0
|
|
+ # increase al_extents to improve random write throughput; set to prime number
|
|
+ $al_extents = 3389
|
|
+ } else {
|
|
+ # disable variable sync rate
|
|
+ $c_plan_ahead = 0
|
|
+ $c_fill_target = false
|
|
+ $c_min_rate = false
|
|
+ $c_max_rate = false
|
|
+
|
|
+ # engineer fixed sync rate at 40 percent of 1G
|
|
+ $rate_M = floor(40 * 1000 / 8 / 100)
|
|
+ $rate = "${rate_M}M"
|
|
+
|
|
+ $max_buffers = false
|
|
+ $max_epoch_size = false
|
|
+ $unplug_watermark = false
|
|
+ $sndbuf_size = false
|
|
+ $rcvbuf_size = false
|
|
+ $al_extents = false
|
|
+ }
|
|
+
|
|
concat { "/etc/drbd.d/${name}.res":
|
|
mode => '0600',
|
|
require => [
|
|
@@ -94,13 +171,13 @@ define drbd::resource (
|
|
}
|
|
# Export our fragment for the clustered node
|
|
if $ha_primary and $cluster {
|
|
- @@concat::fragment { "${name} ${cluster} primary resource":
|
|
+ concat::fragment { "${name} ${cluster} primary resource":
|
|
target => "/etc/drbd.d/${name}.res",
|
|
content => template('drbd/resource.res.erb'),
|
|
order => '10',
|
|
}
|
|
} elsif $cluster {
|
|
- @@concat::fragment { "${name} ${cluster} secondary resource":
|
|
+ concat::fragment { "${name} ${cluster} secondary resource":
|
|
target => "/etc/drbd.d/${name}.res",
|
|
content => template('drbd/resource.res.erb'),
|
|
order => '20',
|
|
@@ -137,11 +214,11 @@ define drbd::resource (
|
|
order => '99',
|
|
}
|
|
|
|
- if $cluster {
|
|
- # Import cluster nodes
|
|
- Concat::Fragment <<| title == "${name} ${cluster} primary resource" |>>
|
|
- Concat::Fragment <<| title == "${name} ${cluster} secondary resource" |>>
|
|
- }
|
|
+# if $cluster {
|
|
+# # Import cluster nodes
|
|
+# Concat::Fragment <<| title == "${name} ${cluster} primary resource" |>>
|
|
+# Concat::Fragment <<| title == "${name} ${cluster} secondary resource" |>>
|
|
+# }
|
|
|
|
# Due to a bug in puppet, defined() conditionals must be in a defined
|
|
# resource to be evaluated *after* the collector instead of before.
|
|
diff --git a/manifests/resource/up.pp b/manifests/resource/up.pp
|
|
index 7668792..b626f55 100644
|
|
--- a/manifests/resource/up.pp
|
|
+++ b/manifests/resource/up.pp
|
|
@@ -70,7 +70,7 @@ define drbd::resource::up (
|
|
# ensure that the device is mounted
|
|
mount { $mountpoint:
|
|
ensure => mounted,
|
|
- atboot => false,
|
|
+ atboot => yes,
|
|
device => $device,
|
|
fstype => 'auto',
|
|
options => 'defaults,noauto',
|
|
diff --git a/manifests/service.pp b/manifests/service.pp
|
|
index de56b34..f9b217a 100644
|
|
--- a/manifests/service.pp
|
|
+++ b/manifests/service.pp
|
|
@@ -1,6 +1,6 @@
|
|
class drbd::service {
|
|
@service { 'drbd':
|
|
- ensure => running,
|
|
+ ensure => $drbd::service_ensure,
|
|
enable => $drbd::service_enable,
|
|
require => Package['drbd'],
|
|
restart => 'service drbd reload',
|
|
diff --git a/templates/header.res.erb b/templates/header.res.erb
|
|
index 2d785c4..a3256a3 100644
|
|
--- a/templates/header.res.erb
|
|
+++ b/templates/header.res.erb
|
|
@@ -5,7 +5,32 @@ resource <%= @name %> {
|
|
disk <%= @disk %>;
|
|
meta-disk internal;
|
|
|
|
+ disk {
|
|
+<% if @rate -%>
|
|
+ resync-rate <%= @rate %>;
|
|
+<% end -%>
|
|
+<% if @c_plan_ahead -%>
|
|
+ c-plan-ahead <%= @c_plan_ahead %>;
|
|
+<% end -%>
|
|
+<% if @c_fill_target -%>
|
|
+ c-fill-target <%= @c_fill_target %>;
|
|
+<% end -%>
|
|
+<% if @c_min_rate -%>
|
|
+ c-min-rate <%= @c_min_rate %>;
|
|
+<% end -%>
|
|
+<% if @c_max_rate -%>
|
|
+ c-max-rate <%= @c_max_rate %>;
|
|
+<% end -%>
|
|
+<% if @al_extents -%>
|
|
+ al-extents <%= @al_extents %>;
|
|
+<% end -%>
|
|
+ }
|
|
+
|
|
net {
|
|
+ after-sb-0pri discard-zero-changes;
|
|
+ after-sb-1pri discard-secondary;
|
|
+ after-sb-2pri disconnect;
|
|
+
|
|
cram-hmac-alg sha1;
|
|
<% if @secret -%>
|
|
shared-secret "<%= @secret %>";
|
|
@@ -16,12 +41,32 @@ resource <%= @name %> {
|
|
<%= k %> <%= v %>;
|
|
<% end -%>
|
|
<% end -%>
|
|
- }
|
|
|
|
- syncer {
|
|
+<% if @max_buffers -%>
|
|
+ max-buffers <%= @max_buffers %>;
|
|
+<% end -%>
|
|
+<% if @max_epoch_size -%>
|
|
+ max-epoch-size <%= @max_epoch_size %>;
|
|
+<% end -%>
|
|
+<% if @unplug_watermark -%>
|
|
+ unplug-watermark <%= @unplug_watermark %>;
|
|
+<% end -%>
|
|
+<% if @sndbuf_size -%>
|
|
+ sndbuf-size <%= @sndbuf_size %>;
|
|
+<% end -%>
|
|
+<% if @rcvbuf_size -%>
|
|
+ rcvbuf-size <%= @rcvbuf_size %>;
|
|
+<% end -%>
|
|
+<% if @verify_alg -%>
|
|
verify-alg <%= @verify_alg %>;
|
|
-<% if @rate -%>
|
|
- rate <%= @rate %>;
|
|
<% end -%>
|
|
}
|
|
|
|
+<% if @handlers -%>
|
|
+ handlers {
|
|
+<% @handlers.sort_by {|k, v| k}.each do |k, v| -%>
|
|
+ <%= k %> "<%= v %>";
|
|
+<% end -%>
|
|
+ }
|
|
+<% end -%>
|
|
+
|
|
diff --git a/templates/primary-resource.res.erb b/templates/primary-resource.res.erb
|
|
index f8af77e..6032fd2 100644
|
|
--- a/templates/primary-resource.res.erb
|
|
+++ b/templates/primary-resource.res.erb
|
|
@@ -1,3 +1,3 @@
|
|
on <%= @host1 %> {
|
|
- address <%= @ip1 %>:<%= @port %>;
|
|
+ address <%= IPAddr.new(@ip1).ipv6?() ? "ipv6 ["+@ip1+"]:"+@port : "ipv4 "+@ip1+":"+@port %>;
|
|
}
|
|
diff --git a/templates/primary-stacked-resource.res.erb b/templates/primary-stacked-resource.res.erb
|
|
index 7eb4dad..a22d8b3 100644
|
|
--- a/templates/primary-stacked-resource.res.erb
|
|
+++ b/templates/primary-stacked-resource.res.erb
|
|
@@ -1,3 +1,3 @@
|
|
stacked-on-top-of <%= @res1 %> {
|
|
- address <%= @ip1 %>:<%= @port %>;
|
|
+ address <%= IPAddr.new(ip1).ipv6?() ? "ipv6 ["+ip1+"]:"+port : "ipv4 "+ip1+":"+port %>;
|
|
}
|
|
diff --git a/templates/resource.res.erb b/templates/resource.res.erb
|
|
index 047877e..9dd4c4d 100644
|
|
--- a/templates/resource.res.erb
|
|
+++ b/templates/resource.res.erb
|
|
@@ -1,3 +1,3 @@
|
|
on <%= @hostname %> {
|
|
- address <%= @ipaddress %>:<%= @port %>;
|
|
+ address <%= IPAddr.new(ipaddress).ipv6?() ? "ipv6 ["+ipaddress+"]:"+@port : "ipv4 "+ipaddress+":"+port %>;
|
|
}
|
|
diff --git a/templates/secondary-resource.res.erb b/templates/secondary-resource.res.erb
|
|
index 678640a..cf2fd96 100644
|
|
--- a/templates/secondary-resource.res.erb
|
|
+++ b/templates/secondary-resource.res.erb
|
|
@@ -1,3 +1,3 @@
|
|
on <%= @host2 %> {
|
|
- address <%= @ip2 %>:<%= @port %>;
|
|
+ address <%= IPAddr.new(@ip2).ipv6?() ? "ipv6 ["+@ip2+"]:"+@port : "ipv4 "+@ip2+":"+@port %>;
|
|
}
|
|
diff --git a/templates/secondary-stacked-resource.res.erb b/templates/secondary-stacked-resource.res.erb
|
|
index 409a705..87d28f5 100644
|
|
--- a/templates/secondary-stacked-resource.res.erb
|
|
+++ b/templates/secondary-stacked-resource.res.erb
|
|
@@ -1,3 +1,3 @@
|
|
stacked-on-top-of <%= @res2 %> {
|
|
- address <%= @ip2 %>:<%= @port %>;
|
|
+ address <%= IPAddr.new(ip2).ipv6?() ? "ipv6 ["+ip2+"]:"+port : "ipv4 "+ip2+":"+port %>;
|
|
}
|
|
--
|
|
1.8.3.1
|
|
|