From 2e1cb2b8495761a7e9abc09ebca24ca6bbf2afbe Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Thu, 24 May 2012 14:28:40 -0700 Subject: [PATCH] Back in-memory caches with Guava, disk caches with H2 Instead of using Ehcache for in-memory caches, use Guava. The Guava cache code has been more completely tested by Google in high load production environments, and it tends to have fewer bugs. It enables caches to be built at any time, rather than only at server startup. By creating a Guava cache as soon as it is declared, rather than during the LifecycleListener.start() for the CachePool, we can promise any downstream consumer of the cache that the cache is ready to execute requests the moment it is supplied by Guice. This fixes a startup ordering problem in the GroupCache and the ProjectCache, where code wants to use one of these caches during startup to resolve a group or project by name. Tracking the Gauva backend caches with a DynamicMap makes it possible for plugins to define their own in-memory caches using CacheModule's cache() function to declare the cache. It allows the core server to make the cache available to administrators over SSH with the gerrit show-caches and gerrit flush-caches commands. Persistent caches store in a private H2 database per cache, with a simple one-table schema that stores each entry in a table row as a pair of serialized objects (key and value). Database reads are gated by a BloomFilter, to reduce the number of calls made to H2 during cache misses. In theory less than 3% of cache misses will reach H2 and find nothing. Stores happen on a background thread quickly after the put is made to the cache, reducing the risk that a diff or web_session record is lost during an ungraceful shutdown. Cache databases are capped around 128M worth of stored data by running a prune cycle each day at 1 AM local server time. Records are removed from the database by ordering on the last access time, where last accessed is the last time the record was moved from disk to memory. Change-Id: Ia82d056796b5af9bcb1f219fe06d905c9c0fbc84 --- Documentation/config-gerrit.txt | 92 +-- Documentation/licenses.txt | 2 +- ReleaseNotes/ReleaseNotes-2.5.txt | 39 + .../.gitignore | 4 +- .../org.eclipse.core.resources.prefs | 3 +- .../.settings/org.eclipse.core.runtime.prefs | 0 .../.settings/org.eclipse.jdt.core.prefs | 2 +- .../.settings/org.eclipse.jdt.ui.prefs | 0 {gerrit-ehcache => gerrit-cache-h2}/pom.xml | 23 +- .../server/cache/h2/DefaultCacheFactory.java | 120 +++ .../server/cache/h2/H2CacheFactory.java | 198 +++++ .../gerrit/server/cache/h2/H2CacheImpl.java | 709 ++++++++++++++++++ .../gerrit/ehcache/EhcachePoolImpl.java | 272 ------- .../gerrit/ehcache/PopulatingCache.java | 114 --- .../google/gerrit/ehcache/SimpleCache.java | 81 -- .../gerrit/httpd/CacheBasedWebSession.java | 13 +- .../gerrit/httpd/GitOverHttpServlet.java | 18 +- .../gerrit/httpd/WebSessionManager.java | 30 +- .../httpd/plugins/HttpPluginModule.java | 12 + .../httpd/plugins/HttpPluginServlet.java | 125 +-- .../google/gerrit/httpd/plugins/Resource.java | 40 + .../gerrit/httpd/plugins/ResourceKey.java | 45 ++ .../gerrit/httpd/plugins/ResourceWeigher.java | 14 +- .../gerrit/httpd/plugins/SmallResource.java | 66 ++ .../changedetail/PatchSetDetailFactory.java | 23 +- .../httpd/rpc/patch/PatchScriptFactory.java | 14 +- .../java/com/google/gerrit/pgm/Daemon.java | 4 +- .../google/gerrit/pgm/ExportReviewNotes.java | 5 +- gerrit-plugin-api/pom.xml | 2 - gerrit-server/pom.xml | 5 + .../com/google/gerrit/rules/StoredValues.java | 7 +- .../account/AccountByEmailCacheImpl.java | 63 +- .../server/account/AccountCacheImpl.java | 99 ++- .../gerrit/server/account/GroupCacheImpl.java | 280 ++++--- .../server/account/GroupIncludeCacheImpl.java | 55 +- .../gerrit/server/auth/ldap/LdapModule.java | 19 +- .../gerrit/server/auth/ldap/LdapRealm.java | 71 +- .../com/google/gerrit/server/cache/Cache.java | 35 - .../gerrit/server/cache/CacheBinding.java | 46 ++ .../gerrit/server/cache/CacheModule.java | 181 +++-- .../gerrit/server/cache/CacheProvider.java | 220 +++--- .../server/cache/ConcurrentHashMapCache.java | 48 -- .../gerrit/server/cache/EntryCreator.java | 40 - ...ionPolicy.java => MemoryCacheFactory.java} | 17 +- .../server/cache/NamedCacheBinding.java | 35 - ...ePool.java => PersistentCacheFactory.java} | 14 +- .../gerrit/server/cache/ProxyCache.java | 40 - .../server/config/GerritGlobalModule.java | 5 +- .../gerrit/server/events/EventFactory.java | 22 +- .../google/gerrit/server/git/BanCommit.java | 30 +- .../google/gerrit/server/git/TagCache.java | 53 +- .../com/google/gerrit/server/git/TagSet.java | 9 + .../gerrit/server/mail/ChangeEmail.java | 6 +- .../gerrit/server/mail/CommentSender.java | 10 +- .../gerrit/server/patch/IntraLineDiffKey.java | 3 + .../gerrit/server/patch/IntraLineLoader.java | 11 +- .../gerrit/server/patch/IntraLineWeigher.java | 28 + .../gerrit/server/patch/PatchListCache.java | 5 +- .../server/patch/PatchListCacheImpl.java | 62 +- .../gerrit/server/patch/PatchListEntry.java | 16 + .../gerrit/server/patch/PatchListLoader.java | 12 +- .../PatchListNotAvailableException.java} | 12 +- .../gerrit/server/patch/PatchListWeigher.java | 30 + .../server/project/ProjectCacheImpl.java | 106 +-- .../server/project/SectionSortCache.java | 9 +- .../server/query/change/ChangeData.java | 10 +- .../gerrit/server/project/RefControlTest.java | 10 +- gerrit-sshd/pom.xml | 2 +- .../google/gerrit/sshd/SshKeyCacheImpl.java | 39 +- .../sshd/commands/BanCommitCommand.java | 3 - .../gerrit/sshd/commands/CacheCommand.java | 32 +- .../gerrit/sshd/commands/FlushCaches.java | 22 +- .../gerrit/sshd/commands/ShowCaches.java | 192 +++-- .../gerrit/httpd/WebAppInitializer.java | 4 +- .../src/main/resources/log4j.properties | 4 - pom.xml | 14 +- 76 files changed, 2432 insertions(+), 1674 deletions(-) rename {gerrit-ehcache => gerrit-cache-h2}/.gitignore (83%) rename {gerrit-ehcache => gerrit-cache-h2}/.settings/org.eclipse.core.resources.prefs (58%) rename {gerrit-ehcache => gerrit-cache-h2}/.settings/org.eclipse.core.runtime.prefs (100%) rename {gerrit-ehcache => gerrit-cache-h2}/.settings/org.eclipse.jdt.core.prefs (99%) rename {gerrit-ehcache => gerrit-cache-h2}/.settings/org.eclipse.jdt.ui.prefs (100%) rename {gerrit-ehcache => gerrit-cache-h2}/pom.xml (76%) create mode 100644 gerrit-cache-h2/src/main/java/com/google/gerrit/server/cache/h2/DefaultCacheFactory.java create mode 100644 gerrit-cache-h2/src/main/java/com/google/gerrit/server/cache/h2/H2CacheFactory.java create mode 100644 gerrit-cache-h2/src/main/java/com/google/gerrit/server/cache/h2/H2CacheImpl.java delete mode 100644 gerrit-ehcache/src/main/java/com/google/gerrit/ehcache/EhcachePoolImpl.java delete mode 100644 gerrit-ehcache/src/main/java/com/google/gerrit/ehcache/PopulatingCache.java delete mode 100644 gerrit-ehcache/src/main/java/com/google/gerrit/ehcache/SimpleCache.java create mode 100644 gerrit-httpd/src/main/java/com/google/gerrit/httpd/plugins/Resource.java create mode 100644 gerrit-httpd/src/main/java/com/google/gerrit/httpd/plugins/ResourceKey.java rename gerrit-server/src/main/java/com/google/gerrit/server/cache/UnnamedCacheBinding.java => gerrit-httpd/src/main/java/com/google/gerrit/httpd/plugins/ResourceWeigher.java (64%) create mode 100644 gerrit-httpd/src/main/java/com/google/gerrit/httpd/plugins/SmallResource.java delete mode 100644 gerrit-server/src/main/java/com/google/gerrit/server/cache/Cache.java create mode 100644 gerrit-server/src/main/java/com/google/gerrit/server/cache/CacheBinding.java delete mode 100644 gerrit-server/src/main/java/com/google/gerrit/server/cache/ConcurrentHashMapCache.java delete mode 100644 gerrit-server/src/main/java/com/google/gerrit/server/cache/EntryCreator.java rename gerrit-server/src/main/java/com/google/gerrit/server/cache/{EvictionPolicy.java => MemoryCacheFactory.java} (61%) delete mode 100644 gerrit-server/src/main/java/com/google/gerrit/server/cache/NamedCacheBinding.java rename gerrit-server/src/main/java/com/google/gerrit/server/cache/{CachePool.java => PersistentCacheFactory.java} (61%) delete mode 100644 gerrit-server/src/main/java/com/google/gerrit/server/cache/ProxyCache.java create mode 100644 gerrit-server/src/main/java/com/google/gerrit/server/patch/IntraLineWeigher.java rename gerrit-server/src/main/java/com/google/gerrit/server/{git/IncompleteUserInfoException.java => patch/PatchListNotAvailableException.java} (71%) create mode 100644 gerrit-server/src/main/java/com/google/gerrit/server/patch/PatchListWeigher.java diff --git a/Documentation/config-gerrit.txt b/Documentation/config-gerrit.txt index 2c050d4c29..6e50ef4cf1 100644 --- a/Documentation/config-gerrit.txt +++ b/Documentation/config-gerrit.txt @@ -354,8 +354,8 @@ Default is unset, no disk cache. [[cache.name.maxAge]]cache..maxAge:: + -Maximum age to keep an entry in the cache. If an entry has not -been accessed in this period of time, it is removed from the cache. +Maximum age to keep an entry in the cache. Entries are removed from +the cache and refreshed from source data every maxAge interval. Values should use common unit suffixes to express their setting: + * s, sec, second, seconds @@ -371,7 +371,7 @@ If a unit suffix is not specified, `minutes` is assumed. If 0 is supplied, the maximum age is infinite and items are never purged except when the cache is full. + -Default is `90 days` for most caches, except: +Default is `0`, meaning store forever with no expire, except: + * `"adv_bases"`: default is `10 minutes` * `"ldap_groups"`: default is `1 hour` @@ -379,33 +379,42 @@ Default is `90 days` for most caches, except: [[cache.name.memoryLimit]]cache..memoryLimit:: + -Maximum number of cache items to retain in memory. Keep in mind -this is total number of items, not bytes of heap used. +The total cost of entries to retain in memory. The cost computation +varies by the cache. For most caches where the in-memory size of each +entry is relatively the same, memoryLimit is currently defined to be +the number of entries held by the cache (each entry costs 1). ++ +For caches where the size of an entry can vary significantly between +individual entries (notably `"diff"`, `"diff_intraline"`), memoryLimit +is an approximation of the total number of bytes stored by the cache. +Larger entries that represent bigger patch sets or longer source files +will consume a bigger portion of the memoryLimit. For these caches the +memoryLimit should be set to roughly the amount of RAM (in bytes) the +administrator can dedicate to the cache. + Default is 1024 for most caches, except: + * `"adv_bases"`: default is `4096` -* `"diff"`: default is `128` -* `"diff_intraline"`: default is `128` +* `"diff"`: default is `10m` (10 MiB of memory) +* `"diff_intraline"`: default is `10m` (10 MiB of memory) +* `"plugin_resources"`: default is 2m (2 MiB of memory) + ++ +If set to 0 the cache is disabled. Entries are removed immediately +after being stored by the cache. This is primarily useful for testing. [[cache.name.diskLimit]]cache..diskLimit:: + -Maximum number of cache items to retain on disk, if this cache -supports storing its items to disk. Like memoryLimit, this is -total number of items, not bytes of disk used. If 0, disk storage -for this cache is disabled. +Total size in bytes of the keys and values stored on disk. Caches that +have grown bigger than this size are scanned daily at 1 AM local +server time to trim the cache. Entries are removed in least recently +accessed order until the cache fits within this limit. Caches may +grow larger than this during the day, as the size check is only +performed once every 24 hours. + -Default is 16384. - -[[cache.name.diskBuffer]]cache..diskBuffer:: +Default is 128 MiB per cache. + -Number of bytes to buffer in memory before writing less frequently -accessed cache items to disk, if this cache supports storing its -items to disk. -+ -Default is 5 MiB. -+ -Common unit suffixes of 'k', 'm', or 'g' are supported. +If 0, disk storage for the cache is disabled. [[cache_names]]Standard Caches ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -447,14 +456,10 @@ Each item caches the differences between two commits, at both the directory and file levels. Gerrit uses this cache to accelerate the display of affected file names, as well as file contents. + -Entries in this cache are relatively large, so the memory limit -should not be set incredibly high. Administrators should try to -target cache.diff.memoryLimit to be roughly the number of changes -which their users will process in a 1 or 2 day span. -+ -Keeping entries for 90 days gives sufficient time for most changes -to be submitted or abandoned before their relevant difference items -expire out. +Entries in this cache are relatively large, so memoryLimit is an +estimate in bytes of memory used. Administrators should try to target +cache.diff.memoryLimit to fit all changes users will view in a 1 or 2 +day span. cache `"diff_intraline"`:: + @@ -462,14 +467,10 @@ Each item caches the intraline difference of one file, when compared between two commits. Gerrit uses this cache to accelerate display of intraline differences when viewing a file. + -Entries in this cache are relatively large, so the memory limit -should not be set incredibly high. Administrators should try to -target cache.diff.memoryLimit to be roughly the number of changes -which their users will process in a 1 or 2 day span. -+ -Keeping entries for 90 days gives sufficient time for most changes -to be submitted or abandoned before their relevant difference items -expire out. +Entries in this cache are relatively large, so memoryLimit is an +estimate in bytes of memory used. Administrators should try to target +cache.diff.memoryLimit to fit all files users will view in a 1 or 2 +day span. cache `"git_tags"`:: + @@ -517,6 +518,12 @@ reference. Sorting the sections can be expensive when regular expressions are used, so this cache remembers the ordering for each branch. +cache `"plugin_resources"`:: ++ +Caches formatted plugin resources, such as plugin documentation that +has been converted from Markdown to HTML. The memoryLimit refers to +the bytes of memory dedicated to storing the documentation. + cache `"projects"`:: + Caches the project description records, from the `projects` table @@ -550,8 +557,8 @@ and need to sign-in again after the restart, as the cache was unable to persist the session information. Enabling a disk cache is strongly recommended. + -Session storage is relatively inexpensive, the average entry in -this cache is approximately 248 bytes, depending on the JVM. +Session storage is relatively inexpensive. The average entry in +this cache is approximately 346 bytes. See also link:cmd-flush-caches.html[gerrit flush-caches]. @@ -598,13 +605,6 @@ configuration. + Default is true, enabled. -cache.plugin_resources.memoryLimit:: -+ -Number of bytes of memory to use to cache formatted plugin resources, -such as plugin documentation that has been converted from Markdown to -HTML. Default is 2 MiB. Common unit suffixes of 'k', 'm', or 'g' are -supported. - cache.projects.checkFrequency:: + How often project configuration should be checked for update from Git. diff --git a/Documentation/licenses.txt b/Documentation/licenses.txt index e50979a4e6..4186026114 100644 --- a/Documentation/licenses.txt +++ b/Documentation/licenses.txt @@ -18,6 +18,7 @@ Included Components |Google Gson | <> |Google Web Toolkit | <> |Guice | <> +|Guava Libraries | <> |Apache Commons Codec | <> |Apache Commons DBCP | <> |Apache Commons Http Client | <> @@ -33,7 +34,6 @@ Included Components |Apache Xerces | <> |OpenID4Java | <> |Neko HTML | <> -|Ehcache | <> |mime-util | <> |Jetty | <>, or link:http://www.eclipse.org/legal/epl-v10.html[EPL] |Prolog Cafe | <> diff --git a/ReleaseNotes/ReleaseNotes-2.5.txt b/ReleaseNotes/ReleaseNotes-2.5.txt index 34af3dd5a7..60c4f08d99 100644 --- a/ReleaseNotes/ReleaseNotes-2.5.txt +++ b/ReleaseNotes/ReleaseNotes-2.5.txt @@ -14,3 +14,42 @@ Replication Gerrit 2.5 no longer includes replication support out of the box. Servers that reply upon `replication.config` to copy Git repository data to other locations must also install the replication plugin. + +Cache Configuration +~~~~~~~~~~~~~~~~~~~ + +Disk caches are now backed by individual H2 databases, rather than +Ehcache's own private format. Administrators are encouraged to clear +the `'$site_path'/cache` directory before starting the new server. + +The `cache.NAME.diskLimit` configuration variable is now expressed in +bytes of disk used. This is a change from previous versions of Gerrit, +which expressed the limit as the number of entries rather than bytes. +Bytes of disk is a more accurate way to size what is held. Admins that +set this variable must update their configurations, as the old values +are too small. For example a setting of `diskLimit = 65535` will only +store 64 KiB worth of data on disk and can no longer hold 65,000 patch +sets. It is recommended to delete the diskLimit variable (if set) and +rely on the built-in default of `128m`. + +The `cache.diff.memoryLimit` and `cache.diff_intraline.memoryLimit` +configuration variables are now expressed in bytes of memory used, +rather than number of entries in the cache. This is a change from +previous versions of Gerrit and gives administrators more control over +how memory is partioned within a server. Admins that set this variable +must update their configurations, as the old values are too small. +For example a setting of `memoryLimit = 1024` now means only 1 KiB of +data (which may not even hold 1 patch set), not 1024 patch sets. It +is recommended to set these to `10m` for 10 MiB of memory, and +increase as necessary. + +The `cache.NAME.maxAge` variable now means the maximum amount of time +that can elapse between reads of the source data into the cache, no +matter how often it is being accessed. In prior versions it meant how +long an item could be held without being requested by a client before +it was discarded. The new meaning of elapsed time before consulting +the source data is more useful, as it enables a strict bound on how +stale the cached data can be. This is especially useful for slave +servers account and permission data, or the `ldap_groups` cache, where +updates are often made to the source without telling Gerrit to reload +the cache. diff --git a/gerrit-ehcache/.gitignore b/gerrit-cache-h2/.gitignore similarity index 83% rename from gerrit-ehcache/.gitignore rename to gerrit-cache-h2/.gitignore index fe190c9619..cb430b8d67 100644 --- a/gerrit-ehcache/.gitignore +++ b/gerrit-cache-h2/.gitignore @@ -1,6 +1,6 @@ /target /.classpath /.project -/.settings/org.eclipse.m2e.core.prefs /.settings/org.maven.ide.eclipse.prefs -/gerrit-ehcache.iml \ No newline at end of file +/.settings/org.eclipse.m2e.core.prefs +/gerrit-cache-h2.iml diff --git a/gerrit-ehcache/.settings/org.eclipse.core.resources.prefs b/gerrit-cache-h2/.settings/org.eclipse.core.resources.prefs similarity index 58% rename from gerrit-ehcache/.settings/org.eclipse.core.resources.prefs rename to gerrit-cache-h2/.settings/org.eclipse.core.resources.prefs index 97e731bce8..fc11c3fe6f 100644 --- a/gerrit-ehcache/.settings/org.eclipse.core.resources.prefs +++ b/gerrit-cache-h2/.settings/org.eclipse.core.resources.prefs @@ -1,4 +1,5 @@ -#Tue May 15 09:21:09 PDT 2012 +#Thu Jul 28 11:02:36 PDT 2011 eclipse.preferences.version=1 encoding//src/main/java=UTF-8 +encoding//src/test/java=UTF-8 encoding/=UTF-8 diff --git a/gerrit-ehcache/.settings/org.eclipse.core.runtime.prefs b/gerrit-cache-h2/.settings/org.eclipse.core.runtime.prefs similarity index 100% rename from gerrit-ehcache/.settings/org.eclipse.core.runtime.prefs rename to gerrit-cache-h2/.settings/org.eclipse.core.runtime.prefs diff --git a/gerrit-ehcache/.settings/org.eclipse.jdt.core.prefs b/gerrit-cache-h2/.settings/org.eclipse.jdt.core.prefs similarity index 99% rename from gerrit-ehcache/.settings/org.eclipse.jdt.core.prefs rename to gerrit-cache-h2/.settings/org.eclipse.jdt.core.prefs index e89c0486b2..470942d4f6 100644 --- a/gerrit-ehcache/.settings/org.eclipse.jdt.core.prefs +++ b/gerrit-cache-h2/.settings/org.eclipse.jdt.core.prefs @@ -1,4 +1,4 @@ -#Thu Jan 19 12:55:44 PST 2012 +#Thu Jul 28 11:02:36 PDT 2011 eclipse.preferences.version=1 org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 diff --git a/gerrit-ehcache/.settings/org.eclipse.jdt.ui.prefs b/gerrit-cache-h2/.settings/org.eclipse.jdt.ui.prefs similarity index 100% rename from gerrit-ehcache/.settings/org.eclipse.jdt.ui.prefs rename to gerrit-cache-h2/.settings/org.eclipse.jdt.ui.prefs diff --git a/gerrit-ehcache/pom.xml b/gerrit-cache-h2/pom.xml similarity index 76% rename from gerrit-ehcache/pom.xml rename to gerrit-cache-h2/pom.xml index f9117b9e94..4d4303c2e0 100644 --- a/gerrit-ehcache/pom.xml +++ b/gerrit-cache-h2/pom.xml @@ -1,6 +1,6 @@