ccfeeef59d
Backport the patches for this issue: https://bugzilla.redhat.com/show_bug.cgi?id=1819868 We met such an issue: When testing a large number of pods (> 230), occasionally observed a number of issues related to systemd process: systemd ran continually 90-100% cpu usage systemd memory usage started increasing rapidly (20GB/hour) systemctl commands would always timeout (Failed to get properties: Connection timed out) sm services failed and can't recover: open-ldap, registry-token-server, docker-distribution, etcd new pods can't start, and got stuck in state ContainerCreating Those patches work to prevent excessive /proc/1/mountinfo reparsing. It has been verified that those patches can improve this performance greatly. 16 commits are listed in sequence (from [1] to [16]) at below link for the issue: https://github.com/systemd-rhel/rhel-8/pull/154/commits [16](10)core: prevent excessive /proc/self/mountinfo parsing [15][Dropped-6]test: add ratelimiting test [14](9)sd-event: add ability to ratelimit event sources [13](8)sd-event: increase n_enabled_child_sources just once [12](7)sd-event: update state at the end in event_source_enable [11](6)sd-event: remove earliest_index/latest_index into common part of event source objects [10][Dropped-5]sd-event: follow coding style with naming return parameter [9] [Dropped-4]sd-event: ref event loop while in sd_event_prepare() ot sd_event_run() [8] (5)sd-event: refuse running default event loops in any other thread than the one they are default for [7] [Dropped-3]sd-event: let's suffix last_run/last_log with "_usec" [6] [Dropped-2]sd-event: fix delays assert brain-o (#17790) [5] (4)sd-event: split out code to add/remove timer event sources to earliest/latest prioq [4] (3)sd-event: split clock data allocation out of sd_event_add_time() [3] [Dropped-1]sd-event: mention that two debug logged events are ignored [2] (2)sd-event: split out enable and disable codepaths from sd_event_source_set_enabled() [1] (1)sd-event: split out helper functions for reshuffling prioqs I ported 10 of them back (from (1) to (10)) to fix this issue and dropped the other 6 (from [Dropped-1] to [Dropped-6]) for those reasons: [Dropped-1]Only changes error log. [Dropped-2]Fixes a bug introduced in a commit which doesn't exist in this version. [Dropped-3]Only changes vars' names and there is no functional change. [Dropped-4]More commits are needed for merging it, while I don't see any help on adding the rate-limiting ability. [Dropped-5]Change coding style for a function which isn't really used by anyone. [Dropped-6]Add test cases. Closes-Bug: #1924686 Signed-off-by: Li Zhou <li.zhou@windriver.com> Change-Id: Ia4c8f162cb1a47b40d1b26cf4d604976b97e92d6
217 lines
8.0 KiB
Diff
217 lines
8.0 KiB
Diff
From ea762f1c0206c99d2ba4d3cba41cadf70311a3cc Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Michal=20Sekleta=CC=81r?= <msekleta@redhat.com>
|
|
Date: Fri, 23 Oct 2020 18:29:27 +0200
|
|
Subject: [PATCH 03/20] sd-event: split out helper functions for reshuffling
|
|
prioqs
|
|
|
|
We typically don't just reshuffle a single prioq at once, but always
|
|
two. Let's add two helper functions that do this, and reuse them
|
|
everywhere.
|
|
|
|
(Note that this drops one minor optimization:
|
|
sd_event_source_set_time_accuracy() previously only reshuffled the
|
|
"latest" prioq, since changing the accuracy has no effect on the
|
|
earliest time of an event source, just the latest time an event source
|
|
can run. This optimization is removed to simplify things, given that
|
|
it's not really worth the effort as prioq_reshuffle() on properly
|
|
ordered prioqs has practically zero cost O(1)).
|
|
|
|
(Slightly generalized, commented and split out of #17284 by Lennart)
|
|
|
|
(cherry picked from commit e1951c16a8fbe5b0b9ecc08f4f835a806059d28f)
|
|
|
|
Related: #1819868
|
|
|
|
[commit 4ce10f8e41a85a56ad9b805442eb1149ece7c82a from
|
|
https://github.com/systemd-rhel/rhel-8/]
|
|
|
|
Signed-off-by: Li Zhou <li.zhou@windriver.com>
|
|
---
|
|
src/libsystemd/sd-event/sd-event.c | 96 ++++++++++++------------------
|
|
1 file changed, 38 insertions(+), 58 deletions(-)
|
|
|
|
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c
|
|
index 26ef3ea..eb3182f 100644
|
|
--- a/src/libsystemd/sd-event/sd-event.c
|
|
+++ b/src/libsystemd/sd-event/sd-event.c
|
|
@@ -784,6 +784,33 @@ static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig)
|
|
event_unmask_signal_data(e, d, sig);
|
|
}
|
|
|
|
+static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
|
|
+ assert(s);
|
|
+
|
|
+ /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
|
|
+ * they are enabled/disabled or marked pending and such. */
|
|
+
|
|
+ if (s->pending)
|
|
+ prioq_reshuffle(s->event->pending, s, &s->pending_index);
|
|
+
|
|
+ if (s->prepare)
|
|
+ prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
|
|
+}
|
|
+
|
|
+static void event_source_time_prioq_reshuffle(sd_event_source *s) {
|
|
+ struct clock_data *d;
|
|
+
|
|
+ assert(s);
|
|
+ assert(EVENT_SOURCE_IS_TIME(s->type));
|
|
+
|
|
+ /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
|
|
+ * pending, enable state. Makes sure the two prioq's are ordered properly again. */
|
|
+ assert_se(d = event_get_clock_data(s->event, s->type));
|
|
+ prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
|
|
+ prioq_reshuffle(d->latest, s, &s->time.latest_index);
|
|
+ d->needs_rearm = true;
|
|
+}
|
|
+
|
|
static void source_disconnect(sd_event_source *s) {
|
|
sd_event *event;
|
|
|
|
@@ -905,16 +932,8 @@ static int source_set_pending(sd_event_source *s, bool b) {
|
|
} else
|
|
assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
|
|
|
|
- if (EVENT_SOURCE_IS_TIME(s->type)) {
|
|
- struct clock_data *d;
|
|
-
|
|
- d = event_get_clock_data(s->event, s->type);
|
|
- assert(d);
|
|
-
|
|
- prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
|
|
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
|
|
- d->needs_rearm = true;
|
|
- }
|
|
+ if (EVENT_SOURCE_IS_TIME(s->type))
|
|
+ event_source_time_prioq_reshuffle(s);
|
|
|
|
if (s->type == SOURCE_SIGNAL && !b) {
|
|
struct signal_data *d;
|
|
@@ -1570,11 +1589,7 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority)
|
|
} else
|
|
s->priority = priority;
|
|
|
|
- if (s->pending)
|
|
- prioq_reshuffle(s->event->pending, s, &s->pending_index);
|
|
-
|
|
- if (s->prepare)
|
|
- prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
|
|
+ event_source_pp_prioq_reshuffle(s);
|
|
|
|
if (s->type == SOURCE_EXIT)
|
|
prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
|
|
@@ -1622,18 +1637,10 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
|
|
case SOURCE_TIME_BOOTTIME:
|
|
case SOURCE_TIME_MONOTONIC:
|
|
case SOURCE_TIME_REALTIME_ALARM:
|
|
- case SOURCE_TIME_BOOTTIME_ALARM: {
|
|
- struct clock_data *d;
|
|
-
|
|
+ case SOURCE_TIME_BOOTTIME_ALARM:
|
|
s->enabled = m;
|
|
- d = event_get_clock_data(s->event, s->type);
|
|
- assert(d);
|
|
-
|
|
- prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
|
|
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
|
|
- d->needs_rearm = true;
|
|
+ event_source_time_prioq_reshuffle(s);
|
|
break;
|
|
- }
|
|
|
|
case SOURCE_SIGNAL:
|
|
s->enabled = m;
|
|
@@ -1679,18 +1686,10 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
|
|
case SOURCE_TIME_BOOTTIME:
|
|
case SOURCE_TIME_MONOTONIC:
|
|
case SOURCE_TIME_REALTIME_ALARM:
|
|
- case SOURCE_TIME_BOOTTIME_ALARM: {
|
|
- struct clock_data *d;
|
|
-
|
|
+ case SOURCE_TIME_BOOTTIME_ALARM:
|
|
s->enabled = m;
|
|
- d = event_get_clock_data(s->event, s->type);
|
|
- assert(d);
|
|
-
|
|
- prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
|
|
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
|
|
- d->needs_rearm = true;
|
|
+ event_source_time_prioq_reshuffle(s);
|
|
break;
|
|
- }
|
|
|
|
case SOURCE_SIGNAL:
|
|
|
|
@@ -1737,11 +1736,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
|
|
}
|
|
}
|
|
|
|
- if (s->pending)
|
|
- prioq_reshuffle(s->event->pending, s, &s->pending_index);
|
|
-
|
|
- if (s->prepare)
|
|
- prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
|
|
+ event_source_pp_prioq_reshuffle(s);
|
|
|
|
return 0;
|
|
}
|
|
@@ -1757,7 +1752,6 @@ _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
|
|
}
|
|
|
|
_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
|
|
- struct clock_data *d;
|
|
|
|
assert_return(s, -EINVAL);
|
|
assert_return(usec != (uint64_t) -1, -EINVAL);
|
|
@@ -1769,13 +1763,7 @@ _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
|
|
|
|
source_set_pending(s, false);
|
|
|
|
- d = event_get_clock_data(s->event, s->type);
|
|
- assert(d);
|
|
-
|
|
- prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
|
|
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
|
|
- d->needs_rearm = true;
|
|
-
|
|
+ event_source_time_prioq_reshuffle(s);
|
|
return 0;
|
|
}
|
|
|
|
@@ -1790,7 +1778,6 @@ _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *use
|
|
}
|
|
|
|
_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
|
|
- struct clock_data *d;
|
|
|
|
assert_return(s, -EINVAL);
|
|
assert_return(usec != (uint64_t) -1, -EINVAL);
|
|
@@ -1805,12 +1792,7 @@ _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec
|
|
|
|
source_set_pending(s, false);
|
|
|
|
- d = event_get_clock_data(s->event, s->type);
|
|
- assert(d);
|
|
-
|
|
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
|
|
- d->needs_rearm = true;
|
|
-
|
|
+ event_source_time_prioq_reshuffle(s);
|
|
return 0;
|
|
}
|
|
|
|
@@ -2088,9 +2070,7 @@ static int process_timer(
|
|
if (r < 0)
|
|
return r;
|
|
|
|
- prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
|
|
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
|
|
- d->needs_rearm = true;
|
|
+ event_source_time_prioq_reshuffle(s);
|
|
}
|
|
|
|
return 0;
|
|
--
|
|
2.17.1
|
|
|