Reindex: Don't load changes from db to get change number and projects

Reindex does 2 steps:

1. Load all changes from the database to know the number of changes
   and the list of projects.
2. Scan each project for change refs and reindex each change.

For step 1 we are now getting the list of projects from the project
cache, and to know the number of changes we scan the change refs here
too.

For now we are fine with scanning the change refs twice, and we don't
want to cache all change IDs in memory to avoid this.

Change-Id: Ibca563d2978476a89dcf1fe4629562ebc391cd08
Signed-off-by: Edwin Kempin <ekempin@google.com>
This commit is contained in:
Edwin Kempin 2016-02-12 16:44:54 +01:00
parent a048124c8e
commit 7c69516b8d
2 changed files with 28 additions and 21 deletions

View File

@ -24,10 +24,9 @@ import com.google.gerrit.lucene.LuceneIndexModule;
import com.google.gerrit.pgm.util.BatchProgramModule; import com.google.gerrit.pgm.util.BatchProgramModule;
import com.google.gerrit.pgm.util.SiteProgram; import com.google.gerrit.pgm.util.SiteProgram;
import com.google.gerrit.pgm.util.ThreadLimiter; import com.google.gerrit.pgm.util.ThreadLimiter;
import com.google.gerrit.reviewdb.client.Change;
import com.google.gerrit.reviewdb.client.Project; import com.google.gerrit.reviewdb.client.Project;
import com.google.gerrit.reviewdb.server.ReviewDb;
import com.google.gerrit.server.config.GerritServerConfig; import com.google.gerrit.server.config.GerritServerConfig;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.git.ScanningChangeCacheImpl; import com.google.gerrit.server.git.ScanningChangeCacheImpl;
import com.google.gerrit.server.index.ChangeIndex; import com.google.gerrit.server.index.ChangeIndex;
import com.google.gerrit.server.index.ChangeSchemas; import com.google.gerrit.server.index.ChangeSchemas;
@ -35,12 +34,15 @@ import com.google.gerrit.server.index.IndexCollection;
import com.google.gerrit.server.index.IndexModule; import com.google.gerrit.server.index.IndexModule;
import com.google.gerrit.server.index.IndexModule.IndexType; import com.google.gerrit.server.index.IndexModule.IndexType;
import com.google.gerrit.server.index.SiteIndexer; import com.google.gerrit.server.index.SiteIndexer;
import com.google.gerrit.server.notedb.ChangeNotes;
import com.google.gerrit.server.project.ProjectCache;
import com.google.inject.Injector; import com.google.inject.Injector;
import com.google.inject.Key; import com.google.inject.Key;
import com.google.inject.Module; import com.google.inject.Module;
import org.eclipse.jgit.lib.Config; import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.TextProgressMonitor; import org.eclipse.jgit.lib.TextProgressMonitor;
import org.eclipse.jgit.util.io.NullOutputStream; import org.eclipse.jgit.util.io.NullOutputStream;
import org.kohsuke.args4j.Option; import org.kohsuke.args4j.Option;
@ -70,6 +72,8 @@ public class Reindex extends SiteProgram {
private Injector sysInjector; private Injector sysInjector;
private Config globalConfig; private Config globalConfig;
private ChangeIndex index; private ChangeIndex index;
private ProjectCache projectCache;
private GitRepositoryManager repoManager;
@Override @Override
public int run() throws Exception { public int run() throws Exception {
@ -93,6 +97,9 @@ public class Reindex extends SiteProgram {
sysManager.add(sysInjector); sysManager.add(sysInjector);
sysManager.start(); sysManager.start();
projectCache = sysInjector.getInstance(ProjectCache.class);
repoManager = sysInjector.getInstance(GitRepositoryManager.class);
index = sysInjector.getInstance(IndexCollection.class).getSearchIndex(); index = sysInjector.getInstance(IndexCollection.class).getSearchIndex();
int result = 0; int result = 0;
try { try {
@ -150,13 +157,12 @@ public class Reindex extends SiteProgram {
pm.beginTask("Collecting projects", ProgressMonitor.UNKNOWN); pm.beginTask("Collecting projects", ProgressMonitor.UNKNOWN);
Set<Project.NameKey> projects = Sets.newTreeSet(); Set<Project.NameKey> projects = Sets.newTreeSet();
int changeCount = 0; int changeCount = 0;
try (ReviewDb db = sysInjector.getInstance(ReviewDb.class)) { for (Project.NameKey project : projectCache.all()) {
for (Change change : db.changes().all()) { try (Repository repo = repoManager.openRepository(project)) {
changeCount++; changeCount += ChangeNotes.Factory.scan(repo).size();
if (projects.add(change.getProject())) {
pm.update(1);
}
} }
projects.add(project);
pm.update(1);
} }
pm.endTask(); pm.endTask();

View File

@ -74,7 +74,7 @@ import java.io.IOException;
import java.sql.Timestamp; import java.sql.Timestamp;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.LinkedHashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -312,15 +312,7 @@ public class ChangeNotes extends AbstractChangeNotes<ChangeNotes> {
private List<ChangeNotes> scanDb(Repository repo, ReviewDb db) private List<ChangeNotes> scanDb(Repository repo, ReviewDb db)
throws OrmException, IOException { throws OrmException, IOException {
Map<String, Ref> refs = Set<Change.Id> ids = scan(repo);
repo.getRefDatabase().getRefs(RefNames.REFS_CHANGES);
Set<Change.Id> ids = new LinkedHashSet<>();
for (Ref r : refs.values()) {
Change.Id id = Change.Id.fromRef(r.getName());
if (id != null) {
ids.add(id);
}
}
List<ChangeNotes> notes = new ArrayList<>(ids.size()); List<ChangeNotes> notes = new ArrayList<>(ids.size());
// A batch size of N may overload get(Iterable), so use something smaller, // A batch size of N may overload get(Iterable), so use something smaller,
// but still >1. // but still >1.
@ -334,16 +326,25 @@ public class ChangeNotes extends AbstractChangeNotes<ChangeNotes> {
private List<ChangeNotes> scanNotedb(Repository repo, ReviewDb db, private List<ChangeNotes> scanNotedb(Repository repo, ReviewDb db,
Project.NameKey project) throws OrmException, IOException { Project.NameKey project) throws OrmException, IOException {
Set<Change.Id> ids = scan(repo);
List<ChangeNotes> changeNotes = new ArrayList<>(ids.size());
for (Change.Id id : ids) {
changeNotes.add(create(db, project, id));
}
return changeNotes;
}
public static Set<Change.Id> scan(Repository repo) throws IOException {
Map<String, Ref> refs = Map<String, Ref> refs =
repo.getRefDatabase().getRefs(RefNames.REFS_CHANGES); repo.getRefDatabase().getRefs(RefNames.REFS_CHANGES);
List<ChangeNotes> changeNotes = new ArrayList<>(refs.size()); Set<Change.Id> ids = new HashSet<>(refs.size());
for (Ref r : refs.values()) { for (Ref r : refs.values()) {
Change.Id id = Change.Id.fromRef(r.getName()); Change.Id id = Change.Id.fromRef(r.getName());
if (id != null) { if (id != null) {
changeNotes.add(create(db, project, id)); ids.add(id);
} }
} }
return changeNotes; return ids;
} }
} }