Reduce chance of deadlock in account cache

This deadlock is not the typical deadlock where 2 threads locked a
resource and each one is waiting to lock a second resource already
locked by the other thread. The thread owning the account cache lock is
parked, which tell us that the locked was not released. I could not
determine the exact sequence of events leading to this deadlock making
it really hard to report/fix the problem.

While investigating, I realized that there quite a few reported issues
in Guava that could be major for Gerrit:

  Our deadlock happening in account cache
  https://bugs.chromium.org/p/gerrit/issues/detail?id=7645

  Other deadlock
  https://github.com/google/guava/issues/2976
  https://github.com/google/guava/issues/2863

  Performance
  https://github.com/google/guava/issues/2063

  Race condition
  https://github.com/google/guava/issues/2971

Because I could not reproduce the deadlock in a dev environment or in
a unit test making it almost impossible to fix, I considered other
options such as replacing Guava by something else.

The maintainer of Caffeine[1] cache claims that Caffeine is a high
performance[2], near optimal caching library designed/implemented base
on the experience of designing Guava's cache and ConcurrentLinkedHashMap.
I also did some benchmarks about spawning a lot of threads reading/writing
values from the caches. I ran those benchmarks on both Guava and Caffeine
and Guava was always taking at least double the time than Caffeine to
complete all operations.

Migrating to Caffeine is almost a drop-in replacement. Caffeine
interface are very similar to Guava cache and there is an adapter to
migrate to Caffeine and keep using Guava's interfaces. After migrating
to Caffeine, we saw that deadlock occurrence was reduced from once a day
to once every 2 weeks in our production server.

The maintainer of Caffeine, Ben Manes pointed us to the possible
cause[3] of this issue, a bug[4] in the kernel and its fix[5]. Our
kernel version is supposed to have the fix but we will try another OS
and kernel version.

Replacing Guava caches by Caffeine brings 2 things, it reduces the
chances of having the deadlock most likely caused by a kernel bug and
improve the cache performance.

[1]https://github.com/ben-manes/caffeine
[2]https://github.com/ben-manes/caffeine/wiki/Benchmarks
[3]https://groups.google.com/forum/#!topic/mechanical-sympathy/QbmpZxp6C64
[4]b0c29f79ec
[5]76835b0ebf

Bug: Issue 7645
Change-Id: I8d2b17a94d0e9daf9fa9cdda563316c5768b29ae
This commit is contained in:
Hugo Arès 2017-11-27 13:59:19 -05:00 committed by Hugo Arès
parent 0f2faafdda
commit 00fc15ac00
6 changed files with 91 additions and 33 deletions

View File

@ -212,6 +212,18 @@ maven_jar(
sha1 = GUAVA_BIN_SHA1,
)
maven_jar(
name = "caffeine",
artifact = "com.github.ben-manes.caffeine:caffeine:2.6.1",
sha1 = "fc7a29feda0a3c5aaf1ff55e0df5417025e6d5f4",
)
maven_jar(
name = "caffeine_guava",
artifact = "com.github.ben-manes.caffeine:guava:2.6.1",
sha1 = "e1fbe0d8c06639d6fee74404f687f00da25671eb",
)
maven_jar(
name = "velocity",
artifact = "org.apache.velocity:velocity:1.7",

View File

@ -8,6 +8,8 @@ java_library(
"//gerrit-common:server",
"//gerrit-extension-api:api",
"//gerrit-server:server",
"//lib:caffeine",
"//lib:caffeine_guava",
"//lib:guava",
"//lib:h2",
"//lib/guice",
@ -22,6 +24,8 @@ junit_tests(
deps = [
":cache-h2",
"//gerrit-server:server",
"//lib:caffeine",
"//lib:caffeine_guava",
"//lib:guava",
"//lib:h2",
"//lib:junit",

View File

@ -14,12 +14,16 @@
package com.google.gerrit.server.cache.h2;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.RemovalCause;
import com.github.benmanes.caffeine.cache.RemovalListener;
import com.github.benmanes.caffeine.cache.Weigher;
import com.github.benmanes.caffeine.guava.CaffeinatedGuava;
import com.google.common.base.Strings;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.cache.Weigher;
import com.google.common.cache.RemovalNotification;
import com.google.gerrit.lifecycle.LifecycleModule;
import com.google.gerrit.server.cache.CacheBinding;
import com.google.gerrit.server.cache.ForwardingRemovalListener;
@ -57,35 +61,48 @@ public class DefaultCacheFactory implements MemoryCacheFactory {
@Override
public <K, V> Cache<K, V> build(CacheBinding<K, V> def) {
return create(def, false).build();
return CaffeinatedGuava.build(create(def, false));
}
@Override
public <K, V> LoadingCache<K, V> build(CacheBinding<K, V> def, CacheLoader<K, V> loader) {
return create(def, false).build(loader);
return CaffeinatedGuava.build(create(def, false), loader);
}
@SuppressWarnings("unchecked")
<K, V> CacheBuilder<K, V> create(CacheBinding<K, V> def, boolean unwrapValueHolder) {
CacheBuilder<K, V> builder = newCacheBuilder();
<K, V> Caffeine<K, V> create(CacheBinding<K, V> def, boolean unwrapValueHolder) {
Caffeine<K, V> builder = newCacheBuilder();
builder.recordStats();
builder.maximumWeight(cfg.getLong("cache", def.name(), "memoryLimit", def.maximumWeight()));
builder = builder.removalListener(forwardingRemovalListenerFactory.create(def.name()));
builder =
builder.removalListener(
new CaffeineToGuavaRemovalListener<K, V>(
forwardingRemovalListenerFactory.create(def.name())));
Weigher<K, V> weigher = def.weigher();
if (weigher != null && unwrapValueHolder) {
final Weigher<K, V> impl = weigher;
weigher =
(Weigher<K, V>)
new Weigher<K, ValueHolder<V>>() {
@Override
public int weigh(K key, ValueHolder<V> value) {
return impl.weigh(key, value.value);
}
};
} else if (weigher == null) {
weigher = unitWeight();
Weigher<K, V> weigher = null;
com.google.common.cache.Weigher<K, V> guavaWeigher = def.weigher();
if (guavaWeigher != null) {
if (unwrapValueHolder) {
weigher =
(Weigher<K, V>)
new Weigher<K, ValueHolder<V>>() {
@Override
public int weigh(K key, ValueHolder<V> value) {
return guavaWeigher.weigh(key, value.value);
}
};
} else {
weigher =
new Weigher<K, V>() {
@Override
public int weigh(K key, V value) {
return guavaWeigher.weigh(key, value);
}
};
}
} else {
weigher = Weigher.singletonWeigher();
}
builder.weigher(weigher);
@ -107,16 +124,24 @@ public class DefaultCacheFactory implements MemoryCacheFactory {
}
@SuppressWarnings("unchecked")
private static <K, V> CacheBuilder<K, V> newCacheBuilder() {
return (CacheBuilder<K, V>) CacheBuilder.newBuilder();
private static <K, V> Caffeine<K, V> newCacheBuilder() {
return (Caffeine<K, V>) Caffeine.newBuilder();
}
private static <K, V> Weigher<K, V> unitWeight() {
return new Weigher<K, V>() {
@Override
public int weigh(K key, V value) {
return 1;
}
};
private static class CaffeineToGuavaRemovalListener<K, V> implements RemovalListener<K, V> {
private com.google.common.cache.RemovalListener<K, V> guavalistener;
private CaffeineToGuavaRemovalListener(
com.google.common.cache.RemovalListener<K, V> guavalistener) {
this.guavalistener = guavalistener;
}
@Override
public void onRemoval(K key, V value, RemovalCause cause) {
guavalistener.onRemoval(
RemovalNotification.create(
key, value, com.google.common.cache.RemovalCause.valueOf(cause.name())));
}
}
}

View File

@ -14,6 +14,7 @@
package com.google.gerrit.server.cache.h2;
import com.github.benmanes.caffeine.guava.CaffeinatedGuava;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
@ -168,7 +169,7 @@ class H2CacheFactory implements PersistentCacheFactory, LifecycleListener {
executor,
store,
def.keyType(),
(Cache<K, ValueHolder<V>>) defaultFactory.create(def, true).build());
(Cache<K, ValueHolder<V>>) CaffeinatedGuava.build(defaultFactory.create(def, true)));
synchronized (caches) {
caches.add(cache);
}
@ -188,9 +189,9 @@ class H2CacheFactory implements PersistentCacheFactory, LifecycleListener {
newSqlStore(def.name(), def.keyType(), limit, def.expireAfterWrite(TimeUnit.SECONDS));
Cache<K, ValueHolder<V>> mem =
(Cache<K, ValueHolder<V>>)
defaultFactory
.create(def, true)
.build((CacheLoader<K, V>) new H2CacheImpl.Loader<>(executor, store, loader));
CaffeinatedGuava.build(
defaultFactory.create(def, true),
(CacheLoader<K, V>) new H2CacheImpl.Loader<>(executor, store, loader));
H2CacheImpl<K, V> cache = new H2CacheImpl<>(executor, store, def.keyType(), mem);
caches.add(cache);
return cache;

View File

@ -20,6 +20,8 @@ EXPORTS = [
"//gerrit-gwtexpui:server",
"//gerrit-reviewdb:server",
"//gerrit-server/src/main/prolog:common",
"//lib:caffeine",
"//lib:caffeine_guava",
"//lib/commons:dbcp",
"//lib/commons:lang",
"//lib/commons:lang3",

View File

@ -82,6 +82,20 @@ java_library(
exports = ["@guava//jar"],
)
java_library(
name = "caffeine",
data = ["//lib:LICENSE-Apache2.0"],
visibility = ["//visibility:public"],
exports = ["@caffeine//jar"],
)
java_library(
name = "caffeine_guava",
data = ["//lib:LICENSE-Apache2.0"],
visibility = ["//visibility:public"],
exports = ["@caffeine_guava//jar"],
)
java_library(
name = "velocity",
data = ["//lib:LICENSE-Apache2.0"],