integ/kernel-rt/centos/patches/timer-Minimize-nohz-off-overhead.patch
Jim Somerville 50a9ff6df4 Kernel Upgrades for Meltdown and Spectre
The kernel is moved ahead to version 3.10.0-693.21.1.el7

To summarize:

CVE-2017-5753 [bounds check bypass] aka 'Spectre Variant 1'
This is fixed by load fences and is "baked in" and cannot
be turned off.

CVE-2017-5715 [branch target injection] aka 'Spectre Variant 2'
This is fixed by a combination of retpolines and IBPB, or
IBRS+IBPB if on skylake.  This requires a microcode change in
the processors.  This feature, if on, has a significant performance
impact.  It is assumed on unless turned off via the
"nospectre_v2" bootarg.

CVE-2017-5754 [rogue data cache load] aka 'Meltdown' aka 'Variant 3'
This is fixed by page table isolation using the Kaiser patches.
This feature is assumed on unless turned off via the
"nopti" bootarg.

As of the commit date, we have changed the installer kickstarts
to issue both "nopti nospectre_v2" bootargs to minimize realtime
impacts by default.  The customer will be able to optionally
sacrifice performance for extra security at datafill time.

Change-Id: Id7c99923f2ee2ee91f77c7bd9940e684eff8b476
Signed-off-by: Jim Somerville <Jim.Somerville@windriver.com>
2018-06-22 12:53:11 -04:00

169 lines
6.0 KiB
Diff

From 6d8b9f4ef4b7e1e9edbce102317a195d06f96441 Mon Sep 17 00:00:00 2001
Message-Id: <6d8b9f4ef4b7e1e9edbce102317a195d06f96441.1522795098.git.Jim.Somerville@windriver.com>
In-Reply-To: <21c11de06542297206c798b405b54a3ec9052aa4.1522795097.git.Jim.Somerville@windriver.com>
References: <21c11de06542297206c798b405b54a3ec9052aa4.1522795097.git.Jim.Somerville@windriver.com>
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 26 May 2015 22:50:35 +0000
Subject: [PATCH 29/33] timer: Minimize nohz off overhead
If nohz is disabled on the kernel command line the [hr]timer code
still calls wake_up_nohz_cpu() and tick_nohz_full_cpu(), a pretty
pointless exercise. Cache nohz_active in [hr]timer per cpu bases and
avoid the overhead.
Before:
48.10% hog [.] main
15.25% [kernel] [k] _raw_spin_lock_irqsave
9.76% [kernel] [k] _raw_spin_unlock_irqrestore
6.50% [kernel] [k] mod_timer
6.44% [kernel] [k] lock_timer_base.isra.38
3.87% [kernel] [k] detach_if_pending
3.80% [kernel] [k] del_timer
2.67% [kernel] [k] internal_add_timer
1.33% [kernel] [k] __internal_add_timer
0.73% [kernel] [k] timerfn
0.54% [kernel] [k] wake_up_nohz_cpu
After:
48.73% hog [.] main
15.36% [kernel] [k] _raw_spin_lock_irqsave
9.77% [kernel] [k] _raw_spin_unlock_irqrestore
6.61% [kernel] [k] lock_timer_base.isra.38
6.42% [kernel] [k] mod_timer
3.90% [kernel] [k] detach_if_pending
3.76% [kernel] [k] del_timer
2.41% [kernel] [k] internal_add_timer
1.39% [kernel] [k] __internal_add_timer
0.76% [kernel] [k] timerfn
We probably should have a cached value for nohz full in the per cpu
bases as well to avoid the cpumask check. The base cache line is hot
already, the cpumask not necessarily.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Joonwoo Park <joonwoop@codeaurora.org>
Cc: Wenbo Wang <wenbo.wang@memblaze.com>
Link: http://lkml.kernel.org/r/20150526224512.207378134@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Alex Kozyrev <alex.kozyrev@windriver.com>
Signed-off-by: Jim Somerville <Jim.Somerville@windriver.com>
---
include/linux/hrtimer.h | 2 ++
kernel/time/tick-internal.h | 4 ++--
kernel/time/tick-sched.c | 2 +-
kernel/timer.c | 14 +++++++++++---
4 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 00d4c9b..6183bf8 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -176,6 +176,7 @@ enum hrtimer_base_type {
* Note that in RHEL7 clock_was_set is upstream's
* clock_was_set_seq (KABI).
* @migration_enabled: The migration of hrtimers to other cpus is enabled
+ * @nohz_active: The nohz functionality is enabled
* @expires_next: absolute time of the next event which was scheduled
* via clock_set_next_event()
* @hres_active: State of high resolution mode
@@ -191,6 +192,7 @@ struct hrtimer_cpu_base {
unsigned int active_bases;
unsigned int clock_was_set; /* clock_was_set_seq */
bool migration_enabled;
+ bool nohz_active;
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next;
int hres_active;
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 3ebdda4..13468bd 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -173,9 +173,9 @@ extern unsigned long tick_nohz_active;
#endif
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void timers_update_migration(void);
+extern void timers_update_migration(bool update_nohz);
#else
-static inline void timers_update_migration(void) { }
+static inline void timers_update_migration(bool update_nohz) { }
#endif
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6c92920..3ccc18c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -980,7 +980,7 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
ts->nohz_mode = mode;
/* One update is enough */
if (!test_and_set_bit(0, &tick_nohz_active))
- timers_update_migration();
+ timers_update_migration(true);
}
/**
diff --git a/kernel/timer.c b/kernel/timer.c
index 4fcb630..08c96e1 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -88,6 +88,7 @@ struct tvec_base {
unsigned long active_timers;
int cpu;
bool migration_enabled;
+ bool nohz_active;
struct tvec_root tv1;
struct tvec tv2;
struct tvec tv3;
@@ -103,7 +104,7 @@ static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
unsigned int sysctl_timer_migration = 1;
-void timers_update_migration(void)
+void timers_update_migration(bool update_nohz)
{
bool on = sysctl_timer_migration && tick_nohz_active;
unsigned int cpu;
@@ -119,6 +120,10 @@ void timers_update_migration(void)
tvec_base->migration_enabled = on;
hrtimer_base = &per_cpu(hrtimer_bases, cpu);
hrtimer_base->migration_enabled = on;
+ if (!update_nohz)
+ continue;
+ tvec_base->nohz_active = true;
+ hrtimer_base->nohz_active = true;
}
}
@@ -132,7 +137,7 @@ int timer_migration_handler(struct ctl_table *table, int write,
mutex_lock(&mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret && write)
- timers_update_migration();
+ timers_update_migration(false);
mutex_unlock(&mutex);
return ret;
}
@@ -482,8 +487,11 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
* require special care against races with idle_cpu(), lets deal
* with that later.
*/
- if (!tbase_get_deferrable(base) || tick_nohz_full_cpu(base->cpu))
+ if (base->nohz_active) {
+ if (!tbase_get_deferrable(base) ||
+ tick_nohz_full_cpu(base->cpu))
wake_up_nohz_cpu(base->cpu);
+ }
}
#ifdef CONFIG_TIMER_STATS
--
1.8.3.1