From a61c5fdf673712104e0271bf3135fa8710c1f4b1 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Mon, 13 Apr 2015 15:35:57 -0400 Subject: [PATCH] Add files table In order to support searching for changes by file modified, keep a local table of files modified in each revision. In order to populate the table without needing to re-sync every change, run a diffstat on every revision during the migration. Once established, the table will be populated during the normal change sync. Add support for the "file:" operator in searching. Change-Id: Idbd44944405bdbc93af8bc3c513498e4425ba75b --- .../versions/50344aecd1c2_add_files_table.py | 93 +++++++++++++++++++ gertty/db.py | 65 ++++++++++++- gertty/search/__init__.py | 5 + gertty/search/parser.py | 10 ++ gertty/search/tokenizer.py | 2 +- gertty/sync.py | 14 ++- gertty/view/change.py | 15 +-- 7 files changed, 189 insertions(+), 15 deletions(-) create mode 100644 gertty/alembic/versions/50344aecd1c2_add_files_table.py diff --git a/gertty/alembic/versions/50344aecd1c2_add_files_table.py b/gertty/alembic/versions/50344aecd1c2_add_files_table.py new file mode 100644 index 0000000..d04f1e4 --- /dev/null +++ b/gertty/alembic/versions/50344aecd1c2_add_files_table.py @@ -0,0 +1,93 @@ +"""add files table + +Revision ID: 50344aecd1c2 +Revises: 1bb187bcd401 +Create Date: 2015-04-13 08:08:08.682803 + +""" + +# revision identifiers, used by Alembic. +revision = '50344aecd1c2' +down_revision = '1bb187bcd401' + +import re +import sys + +from alembic import op, context +import sqlalchemy as sa +import git.exc + +import gertty.db + +def upgrade(): + op.create_table('file', + sa.Column('key', sa.Integer(), nullable=False), + sa.Column('revision_key', sa.Integer(), nullable=False, index=True), + sa.Column('path', sa.Text(), nullable=False, index=True), + sa.Column('old_path', sa.Text(), index=True), + sa.Column('status', sa.String(length=1)), + sa.Column('inserted', sa.Integer()), + sa.Column('deleted', sa.Integer()), + sa.PrimaryKeyConstraint('key') + ) + + pathre = re.compile('((.*?)\{|^)(.*?) => (.*?)(\}(.*)|$)') + insert = sa.text('insert into file (key, revision_key, path, old_path, status, inserted, deleted) ' + ' values (NULL, :revision_key, :path, :old_path, :status, :inserted, :deleted)') + + conn = op.get_bind() + + countres = conn.execute('select count(*) from revision') + revisions = countres.fetchone()[0] + if revisions > 50: + print('') + print('Adding support for searching for changes by file modified. ' + 'This may take a while.') + + qres = conn.execute('select p.name, c.number, c.status, r.key, r.number, r."commit", r.parent from project p, change c, revision r ' + 'where r.change_key=c.key and c.project_key=p.key order by p.name') + + count = 0 + for (pname, cnumber, cstatus, rkey, rnumber, commit, parent) in qres.fetchall(): + count += 1 + sys.stdout.write('Diffstat revision %s / %s\r' % (count, revisions)) + sys.stdout.flush() + ires = conn.execute(insert, revision_key=rkey, path='/COMMIT_MSG', old_path=None, + status=None, inserted=None, deleted=None) + repo = context.config.gertty_app.getRepo(pname) + try: + stats = repo.diffstat(parent, commit) + except git.exc.GitCommandError: + # Probably a missing commit + if cstatus not in ['MERGED', 'ABANDONED']: + print("Unable to examine diff for %s %s change %s,%s" % (cstatus, pname, cnumber, rnumber)) + continue + for stat in stats: + try: + (added, removed, path) = stat + except ValueError: + if cstatus not in ['MERGED', 'ABANDONED']: + print("Empty diffstat for %s %s change %s,%s" % (cstatus, pname, cnumber, rnumber)) + m = pathre.match(path) + status = gertty.db.File.STATUS_MODIFIED + old_path = None + if m: + status = gertty.db.File.STATUS_RENAMED + pre = m.group(2) or '' + post = m.group(6) or '' + old_path = pre+m.group(3)+post + path = pre+m.group(4)+post + try: + added = int(added) + except ValueError: + added = None + try: + removed = int(removed) + except ValueError: + removed = None + conn.execute(insert, revision_key=rkey, path=path, old_path=old_path, + status=status, inserted=added, deleted=removed) + print('') + +def downgrade(): + pass diff --git a/gertty/db.py b/gertty/db.py index 03d675a..418d45c 100644 --- a/gertty/db.py +++ b/gertty/db.py @@ -153,6 +153,17 @@ sync_query_table = Table( Column('name', String(255), index=True, unique=True, nullable=False), Column('updated', DateTime, index=True), ) +file_table = Table( + 'file', metadata, + Column('key', Integer, primary_key=True), + Column('revision_key', Integer, ForeignKey("revision.key"), index=True), + Column('path', Text, nullable=False, index=True), + Column('old_path', Text, index=True), + Column('inserted', Integer), + Column('deleted', Integer), + Column('status', String(1), nullable=False), + ) + class Account(object): def __init__(self, id, name=None, username=None, email=None): @@ -354,6 +365,15 @@ class Revision(object): session.flush() return c + def createFile(self, *args, **kw): + session = Session.object_session(self) + args = [self] + list(args) + f = File(*args, **kw) + self.files.append(f) + session.add(f) + session.flush() + return f + def getPendingMessage(self): for m in self.messages: if m.pending: @@ -433,6 +453,44 @@ class SyncQuery(object): def __init__(self, name): self.name = name +class File(object): + STATUS_ADDED = 'A' + STATUS_DELETED = 'D' + STATUS_RENAMED = 'R' + STATUS_COPIED = 'C' + STATUS_REWRITTEN = 'W' + STATUS_MODIFIED = 'M' + + def __init__(self, revision, path, status, old_path=None, + inserted=None, deleted=None): + self.revision_key = revision.key + self.path = path + self.status = status + self.old_path = old_path + self.inserted = inserted + self.deleted = deleted + + @property + def display_path(self): + if not self.old_path: + return self.path + pre = [] + post = [] + for start in range(len(self.old_path)): + if self.path[start] == self.old_path[start]: + pre.append(self.old_path[start]) + else: + break + pre = ''.join(pre) + for end in range(1, len(self.old_path)-1): + if self.path[0-end] == self.old_path[0-end]: + post.insert(0, self.old_path[0-end]) + else: + break + post = ''.join(post) + mid = '{%s => %s}' % (self.old_path[start:0-end+1], self.path[start:0-end+1]) + return pre + mid + post + mapper(Account, account_table) mapper(Project, project_table, properties=dict( branches=relationship(Branch, backref='project', @@ -485,10 +543,12 @@ mapper(Revision, revision_table, properties=dict( comment_table.c.draft==True), order_by=(comment_table.c.line, comment_table.c.created)), + files=relationship(File, backref='revision'), pending_cherry_picks=relationship(PendingCherryPick, backref='revision'), )) mapper(Message, message_table, properties=dict( author=relationship(Account))) +mapper(File, file_table) mapper(Comment, comment_table, properties=dict( author=relationship(Account))) mapper(Label, label_table) @@ -513,7 +573,7 @@ class Database(object): self.app = app self.engine = create_engine(self.app.config.dburi) #metadata.create_all(self.engine) - self.migrate() + self.migrate(app) # If we want the objects returned from query() to be usable # outside of the session, we need to expunge them from the session, # and since the DatabaseSession always calls commit() on the session @@ -528,7 +588,7 @@ class Database(object): def getSession(self): return DatabaseSession(self) - def migrate(self): + def migrate(self, app): conn = self.engine.connect() context = alembic.migration.MigrationContext.configure(conn) current_rev = context.get_current_revision() @@ -539,6 +599,7 @@ class Database(object): config = alembic.config.Config() config.set_main_option("script_location", "gertty:alembic") config.set_main_option("sqlalchemy.url", self.app.config.dburi) + config.gertty_app = app if current_rev is None and has_table: self.log.debug('Stamping database as initial revision') diff --git a/gertty/search/__init__.py b/gertty/search/__init__.py index 0aa58bb..956662f 100644 --- a/gertty/search/__init__.py +++ b/gertty/search/__init__.py @@ -55,6 +55,11 @@ class SearchCompiler(object): result = and_(gertty.db.change_table.c.account_key == gertty.db.account_table.c.key, result) tables.remove(gertty.db.account_table) + if gertty.db.file_table in tables: + result = and_(gertty.db.file_table.c.revision_key == gertty.db.revision_table.c.key, + gertty.db.revision_table.c.change_key == gertty.db.change_table.c.key, + result) + tables.remove(gertty.db.file_table) if tables: raise Exception("Unknown table in search: %s" % tables) return result diff --git a/gertty/search/parser.py b/gertty/search/parser.py index beb6ea6..d2e4600 100644 --- a/gertty/search/parser.py +++ b/gertty/search/parser.py @@ -76,6 +76,7 @@ def SearchParser(): | has_term | is_term | status_term + | file_term | limit_term | op_term''' p[0] = p[1] @@ -294,6 +295,15 @@ def SearchParser(): else: raise gertty.search.SearchSyntaxError('Syntax error: is:%s is not supported' % p[2]) + def p_file_term(p): + '''file_term : OP_FILE string''' + if p[2].startswith('^'): + p[0] = or_(func.matches(p[2], gertty.db.file_table.c.path), + func.matches(p[2], gertty.db.file_table.c.old_path)) + else: + p[0] = or_(gertty.db.file_table.c.path == p[2], + gertty.db.file_table.c.old_path == p[2]) + def p_status_term(p): '''status_term : OP_STATUS string''' if p[2] == 'open': diff --git a/gertty/search/tokenizer.py b/gertty/search/tokenizer.py index 3ae95c6..4034147 100644 --- a/gertty/search/tokenizer.py +++ b/gertty/search/tokenizer.py @@ -32,7 +32,7 @@ operators = { 'label': 'OP_LABEL', 'message': 'OP_MESSAGE', 'comment': 'OP_COMMENT', - #'file': 'OP_FILE', # needs local file list + 'file': 'OP_FILE', 'has': 'OP_HAS', 'is': 'OP_IS', 'status': 'OP_STATUS', diff --git a/gertty/sync.py b/gertty/sync.py index be4f85f..63d6c24 100644 --- a/gertty/sync.py +++ b/gertty/sync.py @@ -525,7 +525,7 @@ class SyncChangeTask(Task): def run(self, sync): start_time = time.time() app = sync.app - remote_change = sync.get('changes/%s?o=DETAILED_LABELS&o=ALL_REVISIONS&o=ALL_COMMITS&o=MESSAGES&o=DETAILED_ACCOUNTS&o=CURRENT_ACTIONS' % self.change_id) + remote_change = sync.get('changes/%s?o=DETAILED_LABELS&o=ALL_REVISIONS&o=ALL_COMMITS&o=MESSAGES&o=DETAILED_ACCOUNTS&o=CURRENT_ACTIONS&o=ALL_FILES' % self.change_id) # Perform subqueries this task will need outside of the db session for remote_commit, remote_revision in remote_change.get('revisions', {}).items(): remote_comments_data = sync.get('changes/%s/revisions/%s/comments' % (self.change_id, remote_commit)) @@ -617,6 +617,18 @@ class SyncChangeTask(Task): (change.id, remote_revision['_number'], revision.parent)) parent_commits.add(revision.parent) result.updateRelatedChanges(session, change) + + filemap = {} + for remote_path, remote_file in remote_revision['files'].items(): + if remote_file.get('binary'): + inserted = deleted = None + else: + inserted = remote_file.get('lines_inserted', 0) + deleted = remote_file.get('lines_deleted', 0) + f = revision.createFile(remote_path, remote_file.get('status', 'M'), + remote_file.get('old_path'), inserted, deleted) + filemap[remote_path] = f + remote_comments_data = remote_revision['_gertty_remote_comments_data'] for remote_file, remote_comments in remote_comments_data.items(): for remote_comment in remote_comments: diff --git a/gertty/view/change.py b/gertty/view/change.py index ec43040..31fe316 100644 --- a/gertty/view/change.py +++ b/gertty/view/change.py @@ -233,22 +233,15 @@ class RevisionRow(urwid.WidgetWrap): self.commit_sha = revision.commit self.can_submit = revision.can_submit self.title = mywid.TextButton(u'', on_press = self.expandContract) - stats = repo.diffstat(revision.parent, revision.commit) table = mywid.Table(columns=3) total_added = 0 total_removed = 0 - for added, removed, filename in stats: - try: - added = int(added) - except ValueError: - added = 0 - try: - removed = int(removed) - except ValueError: - removed = 0 + for rfile in revision.files: + added = rfile.inserted or 0 + removed = rfile.deleted or 0 total_added += added total_removed += removed - table.addRow([urwid.Text(('filename', filename), wrap='clip'), + table.addRow([urwid.Text(('filename', rfile.display_path), wrap='clip'), urwid.Text([('lines-added', '+%i' % (added,)), ', '], align=urwid.RIGHT), urwid.Text(('lines-removed', '-%i' % (removed,)))])