Support background sync of missing refs

A recent change added support for synchronously fetching missing
refs for a change, however, if something happened to the git repo
(eg, it was garbage collected or completely removed) then gertty
won't be able to work offline (not to mention will be very slow).

Add db columns to store the fetch locations and a sync job that
will verify that all of the commits gertty needs (for open changes
at least) are on disk, and if not, will fetch them.

Change-Id: If608302b6efacceb9ac3630b7ac402cf65ae6d68
This commit is contained in:
James E. Blair 2014-05-31 15:43:54 -07:00
parent 00d3f6dbfd
commit 95568e32fe
3 changed files with 94 additions and 3 deletions

View File

@ -0,0 +1,41 @@
"""Add fetch ref column
Revision ID: 725816dc500
Revises: 38104b4c1b84
Create Date: 2014-05-31 14:51:08.078616
"""
# revision identifiers, used by Alembic.
revision = '725816dc500'
down_revision = '38104b4c1b84'
import warnings
from alembic import op
import sqlalchemy as sa
from gertty.dbsupport import sqlite_alter_columns
def upgrade():
with warnings.catch_warnings():
warnings.simplefilter("ignore")
op.add_column('revision', sa.Column('fetch_auth', sa.Boolean()))
op.add_column('revision', sa.Column('fetch_ref', sa.String(length=255)))
conn = op.get_bind()
res = conn.execute("select r.key, r.number, c.number from revision r, change c where r.change_key=c.key")
for (rkey, rnumber, cnumber) in res.fetchall():
q = sa.text("update revision set fetch_auth=:auth, fetch_ref=:ref where key=:key")
ref = 'refs/changes/%s/%s/%s' % (str(cnumber)[-2:], cnumber, rnumber)
res = conn.execute(q, key=rkey, ref=ref, auth=False)
sqlite_alter_columns('revision', [
sa.Column('fetch_auth', sa.Boolean(), nullable=False),
sa.Column('fetch_ref', sa.String(length=255), nullable=False)
])
def downgrade():
op.drop_column('revision', 'fetch_auth')
op.drop_column('revision', 'fetch_ref')

View File

@ -59,6 +59,8 @@ revision_table = Table(
Column('message', Text, nullable=False),
Column('commit', String(255), nullable=False),
Column('parent', String(255), nullable=False),
Column('fetch_auth', Boolean, nullable=False),
Column('fetch_ref', String(255), nullable=False),
)
message_table = Table(
'message', metadata,
@ -212,12 +214,14 @@ class Change(object):
return l
class Revision(object):
def __init__(self, change, number, message, commit, parent):
def __init__(self, change, number, message, commit, parent, fetch_auth, fetch_ref):
self.change_key = change.key
self.number = number
self.message = message
self.commit = commit
self.parent = parent
self.fetch_auth = fetch_auth
self.fetch_ref = fetch_ref
def createMessage(self, *args, **kw):
session = Session.object_session(self)

View File

@ -246,7 +246,9 @@ class SyncChangeTask(Task):
url = sync.app.config.url + change.project.name
if 'anonymous http' in remote_revision['fetch']:
ref = remote_revision['fetch']['anonymous http']['ref']
auth = False
else:
auth = True
ref = remote_revision['fetch']['http']['ref']
url = list(urlparse.urlsplit(url))
url[1] = '%s:%s@%s' % (sync.app.config.username,
@ -257,7 +259,8 @@ class SyncChangeTask(Task):
if not revision:
revision = change.createRevision(remote_revision['_number'],
remote_revision['commit']['message'], remote_commit,
remote_revision['commit']['parents'][0]['commit'])
remote_revision['commit']['parents'][0]['commit'],
auth, ref)
new_revision = True
# TODO: handle multiple parents
parent_revision = session.getRevisionByCommit(revision.parent)
@ -382,6 +385,48 @@ class SyncChangeTask(Task):
self.log.debug("git fetch %s %s" % (url, ref))
repo.fetch(url, ref)
class CheckRevisionsTask(Task):
def __repr__(self):
return '<CheckRevisionsTask>'
def run(self, sync):
app = sync.app
to_fetch = []
with app.db.getSession() as session:
for project in session.getProjects():
if not project.open_changes:
continue
repo = app.getRepo(project.name)
for change in project.open_changes:
for revision in change.revisions:
if not (repo.hasCommit(revision.parent) and
repo.hasCommit(revision.commit)):
if revision.fetch_ref:
to_fetch.append((project.name, revision.fetch_ref, revision.fetch_auth))
for name, ref, auth in to_fetch:
sync.submitTask(FetchRefTask(name, ref, auth, priority=self.priority))
class FetchRefTask(Task):
def __init__(self, project_name, ref, auth, priority=NORMAL_PRIORITY):
super(FetchRefTask, self).__init__(priority)
self.project_name = project_name
self.ref = ref
self.auth = auth
def __repr__(self):
return '<FetchRefTask %s %s>' % (self.project_name, self.ref)
def run(self, sync):
# TODO: handle multiple parents
url = sync.app.config.url + self.project_name
if self.auth:
url = list(urlparse.urlsplit(url))
url[1] = '%s:%s@%s' % (sync.app.config.username,
sync.app.config.password, url[1])
url = urlparse.urlunsplit(url)
self.log.debug("git fetch %s %s" % (url, self.ref))
repo = sync.app.getRepo(self.project_name)
repo.fetch(url, self.ref)
class UploadReviewsTask(Task):
def __repr__(self):
@ -442,9 +487,10 @@ class Sync(object):
self.app = app
self.log = logging.getLogger('gertty.sync')
self.queue = MultiQueue([HIGH_PRIORITY, NORMAL_PRIORITY, LOW_PRIORITY])
self.submitTask(UploadReviewsTask(HIGH_PRIORITY))
self.submitTask(SyncProjectListTask(HIGH_PRIORITY))
self.submitTask(SyncSubscribedProjectsTask(HIGH_PRIORITY))
self.submitTask(UploadReviewsTask(HIGH_PRIORITY))
self.submitTask(CheckRevisionsTask(LOW_PRIORITY))
self.periodic_thread = threading.Thread(target=self.periodicSync)
self.periodic_thread.daemon = True
self.periodic_thread.start()