Add project updated column

This should greatly reduce the number of unecessary sync calls by
storing the last time a gertty queried for changes to a project.
Previously, we used the updated time of the latest change in a
project, however, subsequent queries with that value would typically
return the same change even though it needn't be synced.  Adjusting
that value by a small amount is unlikely to work reliably because
the query is for a relative time and it takes some time to process.
Adjusting for a larger amount (eg, a few seconds) might miss data.
Clock skew is also a concern in this system because we are using
subtracting the server time from the client's time.

By storing the last sync time locally, we can continue to update
it past the highest value that gerrit has, so that we eventually
get queries which return no results.  Clock skew is not an issue
because the delta arithmetic only involves client generated times.
We can also increase the window slightly to account for query
processing time without continuously sync already-synced changes.

Change-Id: I8cd0af9bd4d3669f436f169059e4b602d4d3036c
This commit is contained in:
James E. Blair 2014-05-31 07:30:24 -07:00
parent 77ffdfb550
commit 00d3f6dbfd
3 changed files with 54 additions and 11 deletions

View File

@ -0,0 +1,33 @@
"""Added project updated column
Revision ID: 38104b4c1b84
Revises: 56e48a4a064a
Create Date: 2014-05-31 06:52:12.452205
"""
# revision identifiers, used by Alembic.
revision = '38104b4c1b84'
down_revision = '56e48a4a064a'
from alembic import op
import sqlalchemy as sa
def upgrade():
op.add_column('project', sa.Column('updated', sa.DateTime))
conn = op.get_bind()
res = conn.execute("select key, name from project")
for (key, name) in res.fetchall():
q = sa.text("select max(updated) from change where project_key=:key")
res = conn.execute(q, key=key)
for (updated,) in res.fetchall():
q = sa.text("update project set updated=:updated where key=:key")
conn.execute(q, key=key, updated=updated)
op.create_index(op.f('ix_project_updated'), 'project', ['updated'], unique=False)
def downgrade():
op.drop_index(op.f('ix_project_updated'), table_name='project')
op.drop_column('project', 'updated')

View File

@ -32,6 +32,7 @@ project_table = Table(
Column('name', String(255), index=True, unique=True, nullable=False),
Column('subscribed', Boolean, index=True, default=False),
Column('description', Text, nullable=False, default=''),
Column('updated', DateTime, index=True),
)
change_table = Table(
'change', metadata,
@ -296,10 +297,6 @@ mapper(Project, project_table, properties=dict(
change_table.c.status!='ABANDONED'),
order_by=change_table.c.number,
),
updated = column_property(
select([func.max(change_table.c.updated)]).where(
change_table.c.project_key==project_table.c.key)
),
))
mapper(Change, change_table, properties=dict(
revisions=relationship(Revision, backref='change',

View File

@ -130,26 +130,39 @@ class SyncProjectTask(Task):
def run(self, sync):
app = sync.app
now = datetime.datetime.utcnow()
with app.db.getSession() as session:
project = session.getProject(self.project_key)
query = 'project:%s' % project.name
if project.updated:
query += ' -age:%ss' % (int(math.ceil((datetime.datetime.utcnow()-project.updated).total_seconds())) + 0,)
# Allow 4 seconds for request time, etc.
query += ' -age:%ss' % (int(math.ceil((now-project.updated).total_seconds())) + 4,)
changes = sync.get('changes/?q=%s' % query)
self.log.debug('Query: %s ' % (query,))
with app.db.getSession() as session:
for c in reversed(changes):
# The list we get is newest to oldest; if we are
# interrupted, we will have already synced the newest
# change and a subsequent sync will not catch up the
# old ones. So reverse the list before we process it
# so that the updated time is accurate.
for c in changes:
# For now, just sync open changes or changes already
# in the db optionally we could sync all changes ever
change = session.getChangeByID(c['id'])
if change or (c['status'] not in self._closed_statuses):
sync.submitTask(SyncChangeTask(c['id'], priority=self.priority))
self.log.debug("Change %s update %s" % (c['id'], c['updated']))
sync.submitTask(SetProjectUpdatedTask(self.project_key, now, priority=self.priority))
class SetProjectUpdatedTask(Task):
def __init__(self, project_key, updated, priority=NORMAL_PRIORITY):
super(SetProjectUpdatedTask, self).__init__(priority)
self.project_key = project_key
self.updated = updated
def __repr__(self):
return '<SetProjectUpdatedTask %s %s>' % (self.project_key, self.updated)
def run(self, sync):
app = sync.app
with app.db.getSession() as session:
project = session.getProject(self.project_key)
project.updated = self.updated
class SyncChangeByCommitTask(Task):
def __init__(self, commit, priority=NORMAL_PRIORITY):