integ/base/systemd/centos/patches/903-sd-event-split-out-helper-functions-for-reshuffling-.patch
Li Zhou ccfeeef59d systemd: Prevent excessive /proc/1/mountinfo reparsing
Backport the patches for this issue:
https://bugzilla.redhat.com/show_bug.cgi?id=1819868

We met such an issue:
When testing a large number of pods (> 230), occasionally observed a
number of issues related to systemd process:
    systemd ran continually 90-100% cpu usage
    systemd memory usage started increasing rapidly (20GB/hour)
    systemctl commands would always timeout (Failed to get properties:
        Connection timed out)
    sm services failed and can't recover: open-ldap,
        registry-token-server, docker-distribution, etcd
    new pods can't start, and got stuck in state ContainerCreating

Those patches work to prevent excessive /proc/1/mountinfo reparsing.
It has been verified that those patches can improve this performance
greatly.

16 commits are listed in sequence (from [1] to [16]) at below link
for the issue:
https://github.com/systemd-rhel/rhel-8/pull/154/commits

[16](10)core: prevent excessive /proc/self/mountinfo parsing
[15][Dropped-6]test: add ratelimiting test
[14](9)sd-event: add ability to ratelimit event sources
[13](8)sd-event: increase n_enabled_child_sources just once
[12](7)sd-event: update state at the end in event_source_enable
[11](6)sd-event: remove earliest_index/latest_index into common part of
event source objects
[10][Dropped-5]sd-event: follow coding style with naming return
parameter
[9] [Dropped-4]sd-event: ref event loop while in sd_event_prepare() ot
sd_event_run()
[8] (5)sd-event: refuse running default event loops in any other thread
than the one they are default for
[7] [Dropped-3]sd-event: let's suffix last_run/last_log with "_usec"
[6] [Dropped-2]sd-event: fix delays assert brain-o (#17790)
[5] (4)sd-event: split out code to add/remove timer event sources to
earliest/latest prioq
[4] (3)sd-event: split clock data allocation out of sd_event_add_time()
[3] [Dropped-1]sd-event: mention that two debug logged events are
ignored
[2] (2)sd-event: split out enable and disable codepaths from
sd_event_source_set_enabled()
[1] (1)sd-event: split out helper functions for reshuffling prioqs

I ported 10 of them back (from (1) to (10)) to fix this issue
and dropped the other 6 (from [Dropped-1] to [Dropped-6]) for those
reasons:
[Dropped-1]Only changes error log.
[Dropped-2]Fixes a bug introduced in a commit which doesn't exist in
this version.
[Dropped-3]Only changes vars' names and there is no functional change.
[Dropped-4]More commits are needed for merging it, while I don't see
any help on adding the rate-limiting ability.
[Dropped-5]Change coding style for a function which isn't really used
by anyone.
[Dropped-6]Add test cases.

Closes-Bug: #1924686
Signed-off-by: Li Zhou <li.zhou@windriver.com>
Change-Id: Ia4c8f162cb1a47b40d1b26cf4d604976b97e92d6
2021-04-22 22:09:33 -04:00

217 lines
8.0 KiB
Diff

From ea762f1c0206c99d2ba4d3cba41cadf70311a3cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Sekleta=CC=81r?= <msekleta@redhat.com>
Date: Fri, 23 Oct 2020 18:29:27 +0200
Subject: [PATCH 03/20] sd-event: split out helper functions for reshuffling
prioqs
We typically don't just reshuffle a single prioq at once, but always
two. Let's add two helper functions that do this, and reuse them
everywhere.
(Note that this drops one minor optimization:
sd_event_source_set_time_accuracy() previously only reshuffled the
"latest" prioq, since changing the accuracy has no effect on the
earliest time of an event source, just the latest time an event source
can run. This optimization is removed to simplify things, given that
it's not really worth the effort as prioq_reshuffle() on properly
ordered prioqs has practically zero cost O(1)).
(Slightly generalized, commented and split out of #17284 by Lennart)
(cherry picked from commit e1951c16a8fbe5b0b9ecc08f4f835a806059d28f)
Related: #1819868
[commit 4ce10f8e41a85a56ad9b805442eb1149ece7c82a from
https://github.com/systemd-rhel/rhel-8/]
Signed-off-by: Li Zhou <li.zhou@windriver.com>
---
src/libsystemd/sd-event/sd-event.c | 96 ++++++++++++------------------
1 file changed, 38 insertions(+), 58 deletions(-)
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c
index 26ef3ea..eb3182f 100644
--- a/src/libsystemd/sd-event/sd-event.c
+++ b/src/libsystemd/sd-event/sd-event.c
@@ -784,6 +784,33 @@ static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig)
event_unmask_signal_data(e, d, sig);
}
+static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
+ assert(s);
+
+ /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
+ * they are enabled/disabled or marked pending and such. */
+
+ if (s->pending)
+ prioq_reshuffle(s->event->pending, s, &s->pending_index);
+
+ if (s->prepare)
+ prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
+}
+
+static void event_source_time_prioq_reshuffle(sd_event_source *s) {
+ struct clock_data *d;
+
+ assert(s);
+ assert(EVENT_SOURCE_IS_TIME(s->type));
+
+ /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
+ * pending, enable state. Makes sure the two prioq's are ordered properly again. */
+ assert_se(d = event_get_clock_data(s->event, s->type));
+ prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(d->latest, s, &s->time.latest_index);
+ d->needs_rearm = true;
+}
+
static void source_disconnect(sd_event_source *s) {
sd_event *event;
@@ -905,16 +932,8 @@ static int source_set_pending(sd_event_source *s, bool b) {
} else
assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
- if (EVENT_SOURCE_IS_TIME(s->type)) {
- struct clock_data *d;
-
- d = event_get_clock_data(s->event, s->type);
- assert(d);
-
- prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
- d->needs_rearm = true;
- }
+ if (EVENT_SOURCE_IS_TIME(s->type))
+ event_source_time_prioq_reshuffle(s);
if (s->type == SOURCE_SIGNAL && !b) {
struct signal_data *d;
@@ -1570,11 +1589,7 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority)
} else
s->priority = priority;
- if (s->pending)
- prioq_reshuffle(s->event->pending, s, &s->pending_index);
-
- if (s->prepare)
- prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
+ event_source_pp_prioq_reshuffle(s);
if (s->type == SOURCE_EXIT)
prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
@@ -1622,18 +1637,10 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
case SOURCE_TIME_BOOTTIME:
case SOURCE_TIME_MONOTONIC:
case SOURCE_TIME_REALTIME_ALARM:
- case SOURCE_TIME_BOOTTIME_ALARM: {
- struct clock_data *d;
-
+ case SOURCE_TIME_BOOTTIME_ALARM:
s->enabled = m;
- d = event_get_clock_data(s->event, s->type);
- assert(d);
-
- prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
- d->needs_rearm = true;
+ event_source_time_prioq_reshuffle(s);
break;
- }
case SOURCE_SIGNAL:
s->enabled = m;
@@ -1679,18 +1686,10 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
case SOURCE_TIME_BOOTTIME:
case SOURCE_TIME_MONOTONIC:
case SOURCE_TIME_REALTIME_ALARM:
- case SOURCE_TIME_BOOTTIME_ALARM: {
- struct clock_data *d;
-
+ case SOURCE_TIME_BOOTTIME_ALARM:
s->enabled = m;
- d = event_get_clock_data(s->event, s->type);
- assert(d);
-
- prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
- d->needs_rearm = true;
+ event_source_time_prioq_reshuffle(s);
break;
- }
case SOURCE_SIGNAL:
@@ -1737,11 +1736,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
}
}
- if (s->pending)
- prioq_reshuffle(s->event->pending, s, &s->pending_index);
-
- if (s->prepare)
- prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
+ event_source_pp_prioq_reshuffle(s);
return 0;
}
@@ -1757,7 +1752,6 @@ _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
}
_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
- struct clock_data *d;
assert_return(s, -EINVAL);
assert_return(usec != (uint64_t) -1, -EINVAL);
@@ -1769,13 +1763,7 @@ _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
source_set_pending(s, false);
- d = event_get_clock_data(s->event, s->type);
- assert(d);
-
- prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
- d->needs_rearm = true;
-
+ event_source_time_prioq_reshuffle(s);
return 0;
}
@@ -1790,7 +1778,6 @@ _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *use
}
_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
- struct clock_data *d;
assert_return(s, -EINVAL);
assert_return(usec != (uint64_t) -1, -EINVAL);
@@ -1805,12 +1792,7 @@ _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec
source_set_pending(s, false);
- d = event_get_clock_data(s->event, s->type);
- assert(d);
-
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
- d->needs_rearm = true;
-
+ event_source_time_prioq_reshuffle(s);
return 0;
}
@@ -2088,9 +2070,7 @@ static int process_timer(
if (r < 0)
return r;
- prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
- prioq_reshuffle(d->latest, s, &s->time.latest_index);
- d->needs_rearm = true;
+ event_source_time_prioq_reshuffle(s);
}
return 0;
--
2.17.1