Rework external ID cache loader

The current system uses an ExternalId cache that is serialized and
written to disk (if configured). The cache holds an entire generation
of all external IDs, keyed by the SHA1 of `refs/meta/external-ids`.
This is roughly `Cache<ObjectId, List<ExternalID>>`.

Prior to this commit, on servers where an update does not originate
(on other masters or slaves), the cache loader would re-read all
external IDs from Git when it was called. On googlesource.com, these
regenerations can take up to 60 seconds. Within this time, the Gerrit
instance comes to a grinding halt as lots of code paths depend on a
value of this cache being present. Authentication is one of them.

This commit rewrites the loader and implements a differential
computation approach to compute the new state from a previously cached
state by applying the modifications using a Git diff.

Given the SHA1 (tip in refs/meta/external-ids) that is requested, the
logic first tries to find a state that we have cached by walking Git
history. This is best-effort and we allow at most 10 commits to be
walked.

Once a prior state is found, we use that state's SHA1 to do a tree diff
between that and the requested state. The new state is then generated by
applying the same mutations.
JGit's DiffFormatter is smart in that it only traverses trees that have
changed and doesn't load any file content which ensures that we only
perform the minimal number of Git operations necessary. This is
necessary because NotesMap (the storage format of external ids on disk)
shards pretty aggressively and we don't want to load all trees when
applying only deltas.

Once the (tree) diff is computed, we read the newly added external IDs
using an ObjectReader.

There is currently a broader discussion going on about if the primary
storage format of external IDs should be changed (I87119506ec04).
This commit doesn't answer or interfere with that discussion. However,
if that redesign is required will - apart from other things - depend on
the impact of this commit on the problems that I87119506ec04 outlines.

We hope that this commit already mitigates a large chunk of the slow
loading issues. We will use micro benchmarking and look closer at how
the collections are handled if there is a need after this commit.

Change-Id: I0e67d3538e2ad17812598a1523e78fd71a7bd88a
This commit is contained in:
Patrick Hiesel
2019-07-22 09:32:37 +02:00
parent bb669aac65
commit 42b47b11c4
14 changed files with 860 additions and 168 deletions

View File

@@ -8,6 +8,11 @@ GERRIT_GLOBAL_MODULE_SRC = [
"config/GerritGlobalModule.java",
]
TESTING_SRC = [
"account/externalids/testing/ExternalIdInserter.java",
"account/externalids/testing/ExternalIdTestUtil.java",
]
java_library(
name = "constants",
srcs = CONSTANTS_SRC,
@@ -24,7 +29,7 @@ java_library(
name = "server",
srcs = glob(
["**/*.java"],
exclude = CONSTANTS_SRC + GERRIT_GLOBAL_MODULE_SRC,
exclude = CONSTANTS_SRC + GERRIT_GLOBAL_MODULE_SRC + TESTING_SRC,
),
resource_strip_prefix = "resources",
resources = ["//resources/com/google/gerrit/server"],

View File

@@ -14,17 +14,12 @@
package com.google.gerrit.server.account.externalids;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.collect.MultimapBuilder;
import com.google.common.collect.SetMultimap;
import com.google.common.flogger.FluentLogger;
import com.google.gerrit.reviewdb.client.Account;
import com.google.gerrit.server.logging.Metadata;
import com.google.gerrit.server.logging.TraceContext;
import com.google.gerrit.server.logging.TraceContext.TraceTimer;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
@@ -147,24 +142,4 @@ class ExternalIdCacheImpl implements ExternalIdCache {
lock.unlock();
}
}
static class Loader extends CacheLoader<ObjectId, AllExternalIds> {
private final ExternalIdReader externalIdReader;
@Inject
Loader(ExternalIdReader externalIdReader) {
this.externalIdReader = externalIdReader;
}
@Override
public AllExternalIds load(ObjectId notesRev) throws Exception {
try (TraceTimer timer =
TraceContext.newTimer(
"Loading external IDs", Metadata.builder().revision(notesRev.name()).build())) {
ImmutableSet<ExternalId> externalIds = externalIdReader.all(notesRev);
externalIds.forEach(ExternalId::checkThatBlobIdIsSet);
return AllExternalIds.create(externalIds);
}
}
}
}

View File

@@ -0,0 +1,274 @@
// Copyright (C) 2019 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.server.account.externalids;
import com.google.common.base.CharMatcher;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheLoader;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.flogger.FluentLogger;
import com.google.gerrit.metrics.Counter1;
import com.google.gerrit.metrics.Description;
import com.google.gerrit.metrics.Description.Units;
import com.google.gerrit.metrics.Field;
import com.google.gerrit.metrics.MetricMaker;
import com.google.gerrit.metrics.Timer0;
import com.google.gerrit.reviewdb.client.Account;
import com.google.gerrit.reviewdb.client.RefNames;
import com.google.gerrit.server.config.AllUsersName;
import com.google.gerrit.server.config.GerritServerConfig;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.logging.Metadata;
import com.google.gerrit.server.logging.TraceContext;
import com.google.gerrit.server.logging.TraceContext.TraceTimer;
import com.google.inject.Inject;
import com.google.inject.Provider;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.eclipse.jgit.errors.ConfigInvalidException;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.Ref;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.TreeFilter;
/** Loads cache values for the external ID cache using either a full or a partial reload. */
@Singleton
public class ExternalIdCacheLoader extends CacheLoader<ObjectId, AllExternalIds> {
private static final FluentLogger logger = FluentLogger.forEnclosingClass();
// Maximum number of prior states we inspect to find a base for differential. If no cached state
// is found within this number of parents, we fall back to reading everything from scratch.
private static final int MAX_HISTORY_LOOKBACK = 10;
// Maximum number of changes we perform using the differential approach. If more updates need to
// be applied, we fall back to reading everything from scratch.
private static final int MAX_DIFF_UPDATES = 50;
private final ExternalIdReader externalIdReader;
private final Provider<Cache<ObjectId, AllExternalIds>> externalIdCache;
private final GitRepositoryManager gitRepositoryManager;
private final AllUsersName allUsersName;
private final Counter1<Boolean> reloadCounter;
private final Timer0 reloadDifferential;
private final boolean enablePartialReloads;
@Inject
ExternalIdCacheLoader(
GitRepositoryManager gitRepositoryManager,
AllUsersName allUsersName,
ExternalIdReader externalIdReader,
@Named(ExternalIdCacheImpl.CACHE_NAME)
Provider<Cache<ObjectId, AllExternalIds>> externalIdCache,
MetricMaker metricMaker,
@GerritServerConfig Config config) {
this.externalIdReader = externalIdReader;
this.externalIdCache = externalIdCache;
this.gitRepositoryManager = gitRepositoryManager;
this.allUsersName = allUsersName;
this.reloadCounter =
metricMaker.newCounter(
"notedb/external_id_cache_load_count",
new Description("Total number of external ID cache reloads from Git.")
.setRate()
.setUnit("updates"),
Field.ofBoolean("partial", Metadata.Builder::partial).build());
this.reloadDifferential =
metricMaker.newTimer(
"notedb/external_id_partial_read_latency",
new Description(
"Latency for generating a new external ID cache state from a prior state.")
.setCumulative()
.setUnit(Units.MILLISECONDS));
this.enablePartialReloads =
config.getBoolean("cache", ExternalIdCacheImpl.CACHE_NAME, "enablePartialReloads", false);
}
@Override
public AllExternalIds load(ObjectId notesRev) throws IOException, ConfigInvalidException {
if (!enablePartialReloads) {
logger.atInfo().log(
"Partial reloads of "
+ ExternalIdCacheImpl.CACHE_NAME
+ " disabled. Falling back to full reload.");
return reloadAllExternalIds(notesRev);
}
// The requested value was not in the cache (hence, this loader was invoked). Therefore, try to
// create this entry from a past value using the minimal amount of Git operations possible to
// reduce latency.
//
// First, try to find the most recent state we have in the cache. Most of the time, this will be
// the state before the last update happened, but it can also date further back. We try a best
// effort approach and check the last 10 states. If nothing is found, we default to loading the
// value from scratch.
//
// If a prior state was found, we use Git to diff the trees and find modifications. This is
// faster than just loading the complete current tree and working off of that because of how the
// data is structured: NotesMaps use nested trees, so, for example, a NotesMap with 200k entries
// has two layers of nesting: 12/34/1234..99. TreeWalk is smart in skipping the traversal of
// identical subtrees.
//
// Once we know what files changed, we apply additions and removals to the previously cached
// state.
try (Repository repo = gitRepositoryManager.openRepository(allUsersName);
RevWalk rw = new RevWalk(repo)) {
long start = System.nanoTime();
Ref extIdRef = repo.exactRef(RefNames.REFS_EXTERNAL_IDS);
if (extIdRef == null) {
logger.atInfo().log(
RefNames.REFS_EXTERNAL_IDS + " not initialized, falling back to full reload.");
return reloadAllExternalIds(notesRev);
}
RevCommit currentCommit = rw.parseCommit(extIdRef.getObjectId());
rw.markStart(currentCommit);
RevCommit parentWithCacheValue;
AllExternalIds oldExternalIds = null;
int i = 0;
while ((parentWithCacheValue = rw.next()) != null
&& i++ < MAX_HISTORY_LOOKBACK
&& parentWithCacheValue.getParentCount() < 2) {
oldExternalIds = externalIdCache.get().getIfPresent(parentWithCacheValue.getId());
if (oldExternalIds != null) {
// We found a previously cached state.
break;
}
}
if (oldExternalIds == null) {
logger.atWarning().log(
"Unable to find an old ExternalId cache state, falling back to full reload");
return reloadAllExternalIds(notesRev);
}
// Diff trees to recognize modifications
Set<ObjectId> removals = new HashSet<>(); // Set<Blob-Object-Id>
Map<ObjectId, ObjectId> additions = new HashMap<>(); // Map<Name-ObjectId, Blob-Object-Id>
try (TreeWalk treeWalk = new TreeWalk(repo)) {
treeWalk.setFilter(TreeFilter.ANY_DIFF);
treeWalk.setRecursive(true);
treeWalk.reset(parentWithCacheValue.getTree(), currentCommit.getTree());
while (treeWalk.next()) {
String path = treeWalk.getPathString();
ObjectId oldBlob = treeWalk.getObjectId(0);
ObjectId newBlob = treeWalk.getObjectId(1);
if (ObjectId.zeroId().equals(newBlob)) {
// Deletion
removals.add(oldBlob);
} else if (ObjectId.zeroId().equals(oldBlob)) {
// Addition
additions.put(fileNameToObjectId(path), newBlob);
} else {
// Modification
removals.add(oldBlob);
additions.put(fileNameToObjectId(path), newBlob);
}
}
}
AllExternalIds allExternalIds =
buildAllExternalIds(repo, oldExternalIds, additions, removals);
reloadCounter.increment(true);
reloadDifferential.record(System.nanoTime() - start, TimeUnit.NANOSECONDS);
return allExternalIds;
}
}
private static ObjectId fileNameToObjectId(String path) {
return ObjectId.fromString(CharMatcher.is('/').removeFrom(path));
}
/**
* Build a new {@link AllExternalIds} from an old state by applying additions and removals that
* were performed since then.
*
* <p>Removals are applied before additions.
*
* @param repo open repository
* @param oldExternalIds prior state that is used as base
* @param additions map of name to blob ID for each external ID that should be added
* @param removals set of name {@link ObjectId}s that should be removed
*/
private static AllExternalIds buildAllExternalIds(
Repository repo,
AllExternalIds oldExternalIds,
Map<ObjectId, ObjectId> additions,
Set<ObjectId> removals)
throws IOException {
ImmutableSetMultimap.Builder<Account.Id, ExternalId> byAccount = ImmutableSetMultimap.builder();
ImmutableSetMultimap.Builder<String, ExternalId> byEmail = ImmutableSetMultimap.builder();
// Copy over old ExternalIds but exclude deleted ones
for (ExternalId externalId : oldExternalIds.byAccount().values()) {
if (removals.contains(externalId.blobId())) {
continue;
}
byAccount.put(externalId.accountId(), externalId);
if (externalId.email() != null) {
byEmail.put(externalId.email(), externalId);
}
}
// Add newly discovered ExternalIds
try (ObjectReader reader = repo.newObjectReader()) {
for (Map.Entry<ObjectId, ObjectId> nameToBlob : additions.entrySet()) {
ExternalId parsedExternalId;
try {
parsedExternalId =
ExternalId.parse(
nameToBlob.getKey().name(),
reader.open(nameToBlob.getValue()).getCachedBytes(),
nameToBlob.getValue());
} catch (ConfigInvalidException | RuntimeException e) {
logger.atSevere().withCause(e).log(
"Ignoring invalid external ID note %s", nameToBlob.getKey().name());
continue;
}
byAccount.put(parsedExternalId.accountId(), parsedExternalId);
if (parsedExternalId.email() != null) {
byEmail.put(parsedExternalId.email(), parsedExternalId);
}
}
}
return new AutoValue_AllExternalIds(byAccount.build(), byEmail.build());
}
private AllExternalIds reloadAllExternalIds(ObjectId notesRev)
throws IOException, ConfigInvalidException {
try (TraceTimer ignored =
TraceContext.newTimer(
"Loading external IDs from scratch",
Metadata.builder().revision(notesRev.name()).build())) {
ImmutableSet<ExternalId> externalIds = externalIdReader.all(notesRev);
externalIds.forEach(ExternalId::checkThatBlobIdIsSet);
AllExternalIds allExternalIds = AllExternalIds.create(externalIds);
reloadCounter.increment(false);
return allExternalIds;
}
}
}

View File

@@ -14,7 +14,6 @@
package com.google.gerrit.server.account.externalids;
import com.google.gerrit.server.account.externalids.ExternalIdCacheImpl.Loader;
import com.google.gerrit.server.cache.CacheModule;
import com.google.gerrit.server.cache.serialize.ObjectIdCacheSerializer;
import com.google.inject.TypeLiteral;
@@ -31,9 +30,12 @@ public class ExternalIdModule extends CacheModule {
// from the cache. Extend the cache size by 1 to cover this case, but expire the extra
// object after a short period of time, since it may be a potentially large amount of
// memory.
// When loading a new value because the primary data advanced, we want to leverage the old
// cache state to recompute only what changed. This doesn't affect cache size though as
// Guava calls the loader first and evicts later on.
.maximumWeight(2)
.expireFromMemoryAfterAccess(Duration.ofMinutes(1))
.loader(Loader.class)
.loader(ExternalIdCacheLoader.class)
.diskLimit(-1)
.version(1)
.keySerializer(ObjectIdCacheSerializer.INSTANCE)

View File

@@ -0,0 +1,11 @@
java_library(
name = "testing",
testonly = 1,
srcs = glob(["**/*.java"]),
visibility = ["//visibility:public"],
deps = [
"//java/com/google/gerrit/reviewdb:server",
"//java/com/google/gerrit/server",
"//lib/jgit/org.eclipse.jgit:jgit",
],
)

View File

@@ -0,0 +1,25 @@
// Copyright (C) 2019 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.server.account.externalids.testing;
import java.io.IOException;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.notes.NoteMap;
@FunctionalInterface
public interface ExternalIdInserter {
public ObjectId addNote(ObjectInserter ins, NoteMap noteMap) throws IOException;
}

View File

@@ -0,0 +1,163 @@
// Copyright (C) 2019 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.server.account.externalids.testing;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.eclipse.jgit.lib.Constants.OBJ_BLOB;
import static org.eclipse.jgit.lib.Constants.OBJ_TREE;
import com.google.gerrit.reviewdb.client.Account;
import com.google.gerrit.reviewdb.client.RefNames;
import com.google.gerrit.server.account.externalids.ExternalId;
import com.google.gerrit.server.account.externalids.ExternalIdReader;
import java.io.IOException;
import org.eclipse.jgit.lib.CommitBuilder;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectInserter;
import org.eclipse.jgit.lib.PersonIdent;
import org.eclipse.jgit.lib.RefUpdate;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.notes.NoteMap;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
/** Common methods for dealing with external IDs in tests. */
public class ExternalIdTestUtil {
public static String insertExternalIdWithoutAccountId(
Repository repo, RevWalk rw, PersonIdent ident, Account.Id accountId, String externalId)
throws IOException {
return insertExternalId(
repo,
rw,
ident,
(ins, noteMap) -> {
ExternalId extId = ExternalId.create(ExternalId.Key.parse(externalId), accountId);
ObjectId noteId = extId.key().sha1();
Config c = new Config();
extId.writeToConfig(c);
c.unset("externalId", extId.key().get(), "accountId");
byte[] raw = c.toText().getBytes(UTF_8);
ObjectId dataBlob = ins.insert(OBJ_BLOB, raw);
noteMap.set(noteId, dataBlob);
return noteId;
});
}
public static String insertExternalIdWithKeyThatDoesntMatchNoteId(
Repository repo, RevWalk rw, PersonIdent ident, Account.Id accountId, String externalId)
throws IOException {
return insertExternalId(
repo,
rw,
ident,
(ins, noteMap) -> {
ExternalId extId = ExternalId.create(ExternalId.Key.parse(externalId), accountId);
ObjectId noteId = ExternalId.Key.parse(externalId + "x").sha1();
Config c = new Config();
extId.writeToConfig(c);
byte[] raw = c.toText().getBytes(UTF_8);
ObjectId dataBlob = ins.insert(OBJ_BLOB, raw);
noteMap.set(noteId, dataBlob);
return noteId;
});
}
public static String insertExternalIdWithInvalidConfig(
Repository repo, RevWalk rw, PersonIdent ident, String externalId) throws IOException {
return insertExternalId(
repo,
rw,
ident,
(ins, noteMap) -> {
ObjectId noteId = ExternalId.Key.parse(externalId).sha1();
byte[] raw = "bad-config".getBytes(UTF_8);
ObjectId dataBlob = ins.insert(OBJ_BLOB, raw);
noteMap.set(noteId, dataBlob);
return noteId;
});
}
public static String insertExternalIdWithEmptyNote(
Repository repo, RevWalk rw, PersonIdent ident, String externalId) throws IOException {
return insertExternalId(
repo,
rw,
ident,
(ins, noteMap) -> {
ObjectId noteId = ExternalId.Key.parse(externalId).sha1();
byte[] raw = "".getBytes(UTF_8);
ObjectId dataBlob = ins.insert(OBJ_BLOB, raw);
noteMap.set(noteId, dataBlob);
return noteId;
});
}
private static String insertExternalId(
Repository repo, RevWalk rw, PersonIdent ident, ExternalIdInserter extIdInserter)
throws IOException {
ObjectId rev = ExternalIdReader.readRevision(repo);
NoteMap noteMap = ExternalIdReader.readNoteMap(rw, rev);
try (ObjectInserter ins = repo.newObjectInserter()) {
ObjectId noteId = extIdInserter.addNote(ins, noteMap);
CommitBuilder cb = new CommitBuilder();
cb.setMessage("Update external IDs");
cb.setTreeId(noteMap.writeTree(ins));
cb.setAuthor(ident);
cb.setCommitter(ident);
if (!rev.equals(ObjectId.zeroId())) {
cb.setParentId(rev);
} else {
cb.setParentIds(); // Ref is currently nonexistent, commit has no parents.
}
if (cb.getTreeId() == null) {
if (rev.equals(ObjectId.zeroId())) {
cb.setTreeId(ins.insert(OBJ_TREE, new byte[] {})); // No parent, assume empty tree.
} else {
RevCommit p = rw.parseCommit(rev);
cb.setTreeId(p.getTree()); // Copy tree from parent.
}
}
ObjectId commitId = ins.insert(cb);
ins.flush();
RefUpdate u = repo.updateRef(RefNames.REFS_EXTERNAL_IDS);
u.setExpectedOldObjectId(rev);
u.setNewObjectId(commitId);
RefUpdate.Result res = u.update();
switch (res) {
case NEW:
case FAST_FORWARD:
case NO_CHANGE:
case RENAMED:
case FORCED:
break;
case LOCK_FAILURE:
case IO_FAILURE:
case NOT_ATTEMPTED:
case REJECTED:
case REJECTED_CURRENT_BRANCH:
case REJECTED_MISSING_OBJECT:
case REJECTED_OTHER_REASON:
default:
throw new IOException("Updating external IDs failed with " + res);
}
return noteId.getName();
}
}
}

View File

@@ -86,9 +86,12 @@ public abstract class Metadata {
// The name of the implementation method.
public abstract Optional<String> methodName();
// Boolean: one or more
// One or more resources
public abstract Optional<Boolean> multiple();
// Partial or full computation
public abstract Optional<Boolean> partial();
// Path of a metadata file in NoteDb.
public abstract Optional<String> noteDbFilePath();
@@ -182,6 +185,8 @@ public abstract class Metadata {
public abstract Builder multiple(boolean multiple);
public abstract Builder partial(boolean partial);
public abstract Builder noteDbFilePath(@Nullable String noteDbFilePath);
public abstract Builder noteDbRefName(@Nullable String noteDbRefName);