Browse Source

Add files table

In order to support searching for changes by file modified, keep
a local table of files modified in each revision.  In order to
populate the table without needing to re-sync every change, run
a diffstat on every revision during the migration.

Once established, the table will be populated during the normal
change sync.

Add support for the "file:" operator in searching.

Change-Id: Idbd44944405bdbc93af8bc3c513498e4425ba75b
changes/91/391991/1
James E. Blair 7 years ago
parent
commit
a61c5fdf67
  1. 93
      gertty/alembic/versions/50344aecd1c2_add_files_table.py
  2. 65
      gertty/db.py
  3. 5
      gertty/search/__init__.py
  4. 10
      gertty/search/parser.py
  5. 2
      gertty/search/tokenizer.py
  6. 14
      gertty/sync.py
  7. 15
      gertty/view/change.py

93
gertty/alembic/versions/50344aecd1c2_add_files_table.py

@ -0,0 +1,93 @@
"""add files table
Revision ID: 50344aecd1c2
Revises: 1bb187bcd401
Create Date: 2015-04-13 08:08:08.682803
"""
# revision identifiers, used by Alembic.
revision = '50344aecd1c2'
down_revision = '1bb187bcd401'
import re
import sys
from alembic import op, context
import sqlalchemy as sa
import git.exc
import gertty.db
def upgrade():
op.create_table('file',
sa.Column('key', sa.Integer(), nullable=False),
sa.Column('revision_key', sa.Integer(), nullable=False, index=True),
sa.Column('path', sa.Text(), nullable=False, index=True),
sa.Column('old_path', sa.Text(), index=True),
sa.Column('status', sa.String(length=1)),
sa.Column('inserted', sa.Integer()),
sa.Column('deleted', sa.Integer()),
sa.PrimaryKeyConstraint('key')
)
pathre = re.compile('((.*?)\{|^)(.*?) => (.*?)(\}(.*)|$)')
insert = sa.text('insert into file (key, revision_key, path, old_path, status, inserted, deleted) '
' values (NULL, :revision_key, :path, :old_path, :status, :inserted, :deleted)')
conn = op.get_bind()
countres = conn.execute('select count(*) from revision')
revisions = countres.fetchone()[0]
if revisions > 50:
print('')
print('Adding support for searching for changes by file modified. '
'This may take a while.')
qres = conn.execute('select p.name, c.number, c.status, r.key, r.number, r."commit", r.parent from project p, change c, revision r '
'where r.change_key=c.key and c.project_key=p.key order by p.name')
count = 0
for (pname, cnumber, cstatus, rkey, rnumber, commit, parent) in qres.fetchall():
count += 1
sys.stdout.write('Diffstat revision %s / %s\r' % (count, revisions))
sys.stdout.flush()
ires = conn.execute(insert, revision_key=rkey, path='/COMMIT_MSG', old_path=None,
status=None, inserted=None, deleted=None)
repo = context.config.gertty_app.getRepo(pname)
try:
stats = repo.diffstat(parent, commit)
except git.exc.GitCommandError:
# Probably a missing commit
if cstatus not in ['MERGED', 'ABANDONED']:
print("Unable to examine diff for %s %s change %s,%s" % (cstatus, pname, cnumber, rnumber))
continue
for stat in stats:
try:
(added, removed, path) = stat
except ValueError:
if cstatus not in ['MERGED', 'ABANDONED']:
print("Empty diffstat for %s %s change %s,%s" % (cstatus, pname, cnumber, rnumber))
m = pathre.match(path)
status = gertty.db.File.STATUS_MODIFIED
old_path = None
if m:
status = gertty.db.File.STATUS_RENAMED
pre = m.group(2) or ''
post = m.group(6) or ''
old_path = pre+m.group(3)+post
path = pre+m.group(4)+post
try:
added = int(added)
except ValueError:
added = None
try:
removed = int(removed)
except ValueError:
removed = None
conn.execute(insert, revision_key=rkey, path=path, old_path=old_path,
status=status, inserted=added, deleted=removed)
print('')
def downgrade():
pass

65
gertty/db.py

@ -153,6 +153,17 @@ sync_query_table = Table(
Column('name', String(255), index=True, unique=True, nullable=False),
Column('updated', DateTime, index=True),
)
file_table = Table(
'file', metadata,
Column('key', Integer, primary_key=True),
Column('revision_key', Integer, ForeignKey("revision.key"), index=True),
Column('path', Text, nullable=False, index=True),
Column('old_path', Text, index=True),
Column('inserted', Integer),
Column('deleted', Integer),
Column('status', String(1), nullable=False),
)
class Account(object):
def __init__(self, id, name=None, username=None, email=None):
@ -354,6 +365,15 @@ class Revision(object):
session.flush()
return c
def createFile(self, *args, **kw):
session = Session.object_session(self)
args = [self] + list(args)
f = File(*args, **kw)
self.files.append(f)
session.add(f)
session.flush()
return f
def getPendingMessage(self):
for m in self.messages:
if m.pending:
@ -433,6 +453,44 @@ class SyncQuery(object):
def __init__(self, name):
self.name = name
class File(object):
STATUS_ADDED = 'A'
STATUS_DELETED = 'D'
STATUS_RENAMED = 'R'
STATUS_COPIED = 'C'
STATUS_REWRITTEN = 'W'
STATUS_MODIFIED = 'M'
def __init__(self, revision, path, status, old_path=None,
inserted=None, deleted=None):
self.revision_key = revision.key
self.path = path
self.status = status
self.old_path = old_path
self.inserted = inserted
self.deleted = deleted
@property
def display_path(self):
if not self.old_path:
return self.path
pre = []
post = []
for start in range(len(self.old_path)):
if self.path[start] == self.old_path[start]:
pre.append(self.old_path[start])
else:
break
pre = ''.join(pre)
for end in range(1, len(self.old_path)-1):
if self.path[0-end] == self.old_path[0-end]:
post.insert(0, self.old_path[0-end])
else:
break
post = ''.join(post)
mid = '{%s => %s}' % (self.old_path[start:0-end+1], self.path[start:0-end+1])
return pre + mid + post
mapper(Account, account_table)
mapper(Project, project_table, properties=dict(
branches=relationship(Branch, backref='project',
@ -485,10 +543,12 @@ mapper(Revision, revision_table, properties=dict(
comment_table.c.draft==True),
order_by=(comment_table.c.line,
comment_table.c.created)),
files=relationship(File, backref='revision'),
pending_cherry_picks=relationship(PendingCherryPick, backref='revision'),
))
mapper(Message, message_table, properties=dict(
author=relationship(Account)))
mapper(File, file_table)
mapper(Comment, comment_table, properties=dict(
author=relationship(Account)))
mapper(Label, label_table)
@ -513,7 +573,7 @@ class Database(object):
self.app = app
self.engine = create_engine(self.app.config.dburi)
#metadata.create_all(self.engine)
self.migrate()
self.migrate(app)
# If we want the objects returned from query() to be usable
# outside of the session, we need to expunge them from the session,
# and since the DatabaseSession always calls commit() on the session
@ -528,7 +588,7 @@ class Database(object):
def getSession(self):
return DatabaseSession(self)
def migrate(self):
def migrate(self, app):
conn = self.engine.connect()
context = alembic.migration.MigrationContext.configure(conn)
current_rev = context.get_current_revision()
@ -539,6 +599,7 @@ class Database(object):
config = alembic.config.Config()
config.set_main_option("script_location", "gertty:alembic")
config.set_main_option("sqlalchemy.url", self.app.config.dburi)
config.gertty_app = app
if current_rev is None and has_table:
self.log.debug('Stamping database as initial revision')

5
gertty/search/__init__.py

@ -55,6 +55,11 @@ class SearchCompiler(object):
result = and_(gertty.db.change_table.c.account_key == gertty.db.account_table.c.key,
result)
tables.remove(gertty.db.account_table)
if gertty.db.file_table in tables:
result = and_(gertty.db.file_table.c.revision_key == gertty.db.revision_table.c.key,
gertty.db.revision_table.c.change_key == gertty.db.change_table.c.key,
result)
tables.remove(gertty.db.file_table)
if tables:
raise Exception("Unknown table in search: %s" % tables)
return result

10
gertty/search/parser.py

@ -76,6 +76,7 @@ def SearchParser():
| has_term
| is_term
| status_term
| file_term
| limit_term
| op_term'''
p[0] = p[1]
@ -294,6 +295,15 @@ def SearchParser():
else:
raise gertty.search.SearchSyntaxError('Syntax error: is:%s is not supported' % p[2])
def p_file_term(p):
'''file_term : OP_FILE string'''
if p[2].startswith('^'):
p[0] = or_(func.matches(p[2], gertty.db.file_table.c.path),
func.matches(p[2], gertty.db.file_table.c.old_path))
else:
p[0] = or_(gertty.db.file_table.c.path == p[2],
gertty.db.file_table.c.old_path == p[2])
def p_status_term(p):
'''status_term : OP_STATUS string'''
if p[2] == 'open':

2
gertty/search/tokenizer.py

@ -32,7 +32,7 @@ operators = {
'label': 'OP_LABEL',
'message': 'OP_MESSAGE',
'comment': 'OP_COMMENT',
#'file': 'OP_FILE', # needs local file list
'file': 'OP_FILE',
'has': 'OP_HAS',
'is': 'OP_IS',
'status': 'OP_STATUS',

14
gertty/sync.py

@ -525,7 +525,7 @@ class SyncChangeTask(Task):
def run(self, sync):
start_time = time.time()
app = sync.app
remote_change = sync.get('changes/%s?o=DETAILED_LABELS&o=ALL_REVISIONS&o=ALL_COMMITS&o=MESSAGES&o=DETAILED_ACCOUNTS&o=CURRENT_ACTIONS' % self.change_id)
remote_change = sync.get('changes/%s?o=DETAILED_LABELS&o=ALL_REVISIONS&o=ALL_COMMITS&o=MESSAGES&o=DETAILED_ACCOUNTS&o=CURRENT_ACTIONS&o=ALL_FILES' % self.change_id)
# Perform subqueries this task will need outside of the db session
for remote_commit, remote_revision in remote_change.get('revisions', {}).items():
remote_comments_data = sync.get('changes/%s/revisions/%s/comments' % (self.change_id, remote_commit))
@ -617,6 +617,18 @@ class SyncChangeTask(Task):
(change.id, remote_revision['_number'], revision.parent))
parent_commits.add(revision.parent)
result.updateRelatedChanges(session, change)
filemap = {}
for remote_path, remote_file in remote_revision['files'].items():
if remote_file.get('binary'):
inserted = deleted = None
else:
inserted = remote_file.get('lines_inserted', 0)
deleted = remote_file.get('lines_deleted', 0)
f = revision.createFile(remote_path, remote_file.get('status', 'M'),
remote_file.get('old_path'), inserted, deleted)
filemap[remote_path] = f
remote_comments_data = remote_revision['_gertty_remote_comments_data']
for remote_file, remote_comments in remote_comments_data.items():
for remote_comment in remote_comments:

15
gertty/view/change.py

@ -233,22 +233,15 @@ class RevisionRow(urwid.WidgetWrap):
self.commit_sha = revision.commit
self.can_submit = revision.can_submit
self.title = mywid.TextButton(u'', on_press = self.expandContract)
stats = repo.diffstat(revision.parent, revision.commit)
table = mywid.Table(columns=3)
total_added = 0
total_removed = 0
for added, removed, filename in stats:
try:
added = int(added)
except ValueError:
added = 0
try:
removed = int(removed)
except ValueError:
removed = 0
for rfile in revision.files:
added = rfile.inserted or 0
removed = rfile.deleted or 0
total_added += added
total_removed += removed
table.addRow([urwid.Text(('filename', filename), wrap='clip'),
table.addRow([urwid.Text(('filename', rfile.display_path), wrap='clip'),
urwid.Text([('lines-added', '+%i' % (added,)), ', '],
align=urwid.RIGHT),
urwid.Text(('lines-removed', '-%i' % (removed,)))])

Loading…
Cancel
Save