Reindex: Don't load changes from db to get change number and projects

Reindex does 2 steps:

1. Load all changes from the database to know the number of changes
   and the list of projects.
2. Scan each project for change refs and reindex each change.

For step 1 we are now getting the list of projects from the project
cache, and to know the number of changes we scan the change refs here
too.

For now we are fine with scanning the change refs twice, and we don't
want to cache all change IDs in memory to avoid this.

Change-Id: Ibca563d2978476a89dcf1fe4629562ebc391cd08
Signed-off-by: Edwin Kempin <ekempin@google.com>
This commit is contained in:
Edwin Kempin 2016-02-12 16:44:54 +01:00
parent a048124c8e
commit 7c69516b8d
2 changed files with 28 additions and 21 deletions

View File

@ -24,10 +24,9 @@ import com.google.gerrit.lucene.LuceneIndexModule;
import com.google.gerrit.pgm.util.BatchProgramModule;
import com.google.gerrit.pgm.util.SiteProgram;
import com.google.gerrit.pgm.util.ThreadLimiter;
import com.google.gerrit.reviewdb.client.Change;
import com.google.gerrit.reviewdb.client.Project;
import com.google.gerrit.reviewdb.server.ReviewDb;
import com.google.gerrit.server.config.GerritServerConfig;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.git.ScanningChangeCacheImpl;
import com.google.gerrit.server.index.ChangeIndex;
import com.google.gerrit.server.index.ChangeSchemas;
@ -35,12 +34,15 @@ import com.google.gerrit.server.index.IndexCollection;
import com.google.gerrit.server.index.IndexModule;
import com.google.gerrit.server.index.IndexModule.IndexType;
import com.google.gerrit.server.index.SiteIndexer;
import com.google.gerrit.server.notedb.ChangeNotes;
import com.google.gerrit.server.project.ProjectCache;
import com.google.inject.Injector;
import com.google.inject.Key;
import com.google.inject.Module;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.TextProgressMonitor;
import org.eclipse.jgit.util.io.NullOutputStream;
import org.kohsuke.args4j.Option;
@ -70,6 +72,8 @@ public class Reindex extends SiteProgram {
private Injector sysInjector;
private Config globalConfig;
private ChangeIndex index;
private ProjectCache projectCache;
private GitRepositoryManager repoManager;
@Override
public int run() throws Exception {
@ -93,6 +97,9 @@ public class Reindex extends SiteProgram {
sysManager.add(sysInjector);
sysManager.start();
projectCache = sysInjector.getInstance(ProjectCache.class);
repoManager = sysInjector.getInstance(GitRepositoryManager.class);
index = sysInjector.getInstance(IndexCollection.class).getSearchIndex();
int result = 0;
try {
@ -150,14 +157,13 @@ public class Reindex extends SiteProgram {
pm.beginTask("Collecting projects", ProgressMonitor.UNKNOWN);
Set<Project.NameKey> projects = Sets.newTreeSet();
int changeCount = 0;
try (ReviewDb db = sysInjector.getInstance(ReviewDb.class)) {
for (Change change : db.changes().all()) {
changeCount++;
if (projects.add(change.getProject())) {
for (Project.NameKey project : projectCache.all()) {
try (Repository repo = repoManager.openRepository(project)) {
changeCount += ChangeNotes.Factory.scan(repo).size();
}
projects.add(project);
pm.update(1);
}
}
}
pm.endTask();
SiteIndexer batchIndexer =

View File

@ -74,7 +74,7 @@ import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -312,15 +312,7 @@ public class ChangeNotes extends AbstractChangeNotes<ChangeNotes> {
private List<ChangeNotes> scanDb(Repository repo, ReviewDb db)
throws OrmException, IOException {
Map<String, Ref> refs =
repo.getRefDatabase().getRefs(RefNames.REFS_CHANGES);
Set<Change.Id> ids = new LinkedHashSet<>();
for (Ref r : refs.values()) {
Change.Id id = Change.Id.fromRef(r.getName());
if (id != null) {
ids.add(id);
}
}
Set<Change.Id> ids = scan(repo);
List<ChangeNotes> notes = new ArrayList<>(ids.size());
// A batch size of N may overload get(Iterable), so use something smaller,
// but still >1.
@ -334,16 +326,25 @@ public class ChangeNotes extends AbstractChangeNotes<ChangeNotes> {
private List<ChangeNotes> scanNotedb(Repository repo, ReviewDb db,
Project.NameKey project) throws OrmException, IOException {
Set<Change.Id> ids = scan(repo);
List<ChangeNotes> changeNotes = new ArrayList<>(ids.size());
for (Change.Id id : ids) {
changeNotes.add(create(db, project, id));
}
return changeNotes;
}
public static Set<Change.Id> scan(Repository repo) throws IOException {
Map<String, Ref> refs =
repo.getRefDatabase().getRefs(RefNames.REFS_CHANGES);
List<ChangeNotes> changeNotes = new ArrayList<>(refs.size());
Set<Change.Id> ids = new HashSet<>(refs.size());
for (Ref r : refs.values()) {
Change.Id id = Change.Id.fromRef(r.getName());
if (id != null) {
changeNotes.add(create(db, project, id));
ids.add(id);
}
}
return changeNotes;
return ids;
}
}