Support (most of) gerrit search syntax

Adds a parser that compiles gerrit search queries into sqlalchemy filters to support the gerrit search syntax both in arbitrary searches (ctrl-o) as well as dashboards. Change-Id: Ibc8e5c180b0ca16727938725cfe6380ba7315205
2014-07-23 15:52:50 -07:00 · 2014-07-23 15:52:50 -07:00 · 6181e4f9bf
commit 6181e4f9bf
parent 42419904a8
8 changed files with 512 additions and 28 deletions
--- a/gertty/app.py
+++ b/gertty/app.py
@ -27,6 +27,7 @@ from gertty import config
 from gertty import gitrepo
 from gertty import mywid
 from gertty import sync
+from gertty import search
 from gertty.view import change_list as view_change_list
 from gertty.view import project_list as view_project_list
 from gertty.view import change as view_change
@ -106,6 +107,7 @@ class App(object):
                            level=level)
        self.log = logging.getLogger('gertty.App')
        self.log.debug("Starting")
+        self.search = search.SearchCompiler(self)
        self.db = db.Database(self)
        self.sync = sync.Sync(self)

@ -209,14 +211,12 @@ class App(object):

    def _syncOneChangeFromQuery(self, query):
        number = changeid = None
-        if query.startswith("number:"):
+        if query.startswith("change:"):
            number = query.split(':')[1].strip()
            try:
                number = int(number)
-            except Exception:
-                pass
-        if query.startswith("changeid:"):
-            changeid = query.split(':')[1].strip()
+            except ValueError:
+                changeid = query.split(':')[1].strip()
        if not (number or changeid):
            return
        with self.db.getSession() as session:
@ -243,14 +243,17 @@ class App(object):
        if change_key is None:
            raise Exception('Change is not in local database.')

-    def search(self, query):
+    def doSearch(self, query):
        self.log.debug("Search query: %s" % query)
        try:
            self._syncOneChangeFromQuery(query)
        except Exception as e:
            return self.error(e.message)
        with self.db.getSession() as session:
-            changes = session.getChanges(query)
+            try:
+                changes = session.getChanges(query)
+            except gertty.search.SearchSyntaxError as e:
+                return self.error(e.message)
            change_key = None
            if len(changes) == 1:
                change_key = changes[0].key
@ -261,7 +264,7 @@ class App(object):
                view = view_change_list.ChangeListView(self, query)
            self.changeScreen(view)
        except gertty.view.DisplayError as e:
-            self.error(e.message)
+            return self.error(e.message)

    def searchDialog(self):
        dialog = SearchDialog()
@ -275,10 +278,10 @@ class App(object):
        self.backScreen()
        query = dialog.entry.edit_text
        try:
-            query = 'number:%s' % int(query)
-        except Exception:
+            query = 'change:%s' % int(query)
+        except ValueError:
            pass
-        self.search(query)
+        self.doSearch(query)

    def error(self, message):
        dialog = mywid.MessageDialog('Error', message)
--- a/gertty/db.py
+++ b/gertty/db.py
@ -374,6 +374,7 @@ class DatabaseSession(object):
    def __init__(self, database):
        self.database = database
        self.session = database.session
+        self.search = database.app.search

    def __enter__(self):
        self.database.lock.acquire()
@ -443,25 +444,13 @@ class DatabaseSession(object):
            return None

    def getChanges(self, query, unreviewed=False):
-        #TODO(jeblair): use a real parser that supports the full gerrit query syntax
-        q = self.session().query(Change)
-        for term in query.split():
-            key, data = term.split(':')
-            if key == 'number':
-                q = q.filter(change_table.c.number==data)
-            elif key == 'changeid':
-                q = q.filter(change_table.c.change_id==data)
-            elif key == 'project_key':
-                q = q.filter(change_table.c.project_key==data)
-            elif key == 'status':
-                if data == 'open':
-                    q = q.filter(change_table.c.status.notin_(['MERGED', 'ABANDONED']))
-                else:
-                    q = q.filter(change_table.c.status==data)
+        self.database.log.debug("Search query: %s" % query)
+        search_filter = self.search.parse(query)
+        q = self.session().query(Change).filter(search_filter).order_by(change_table.c.number)
        if unreviewed:
            q = q.filter(change_table.c.hidden==False, change_table.c.reviewed==False)
        try:
-            return q.order_by(change_table.c.number).all()
+            return q.all()
        except sqlalchemy.orm.exc.NoResultFound:
            return []

--- a/gertty/search/init.py
+++ b/gertty/search/init.py
@ -0,0 +1,26 @@
+# Copyright 2014 Hewlett-Packard Development Company, L.P.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from gertty.search import tokenizer, parser
+
+class SearchSyntaxError(Exception):
+    pass
+
+class SearchCompiler(object):
+    def __init__(self, app):
+        self.lexer = tokenizer.SearchTokenizer()
+        self.parser = parser.SearchParser()
+
+    def parse(self, data):
+        return self.parser.parse(data, lexer=self.lexer)
--- a/gertty/search/parser.py
+++ b/gertty/search/parser.py
@ -0,0 +1,236 @@
+# Copyright 2014 Hewlett-Packard Development Company, L.P.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import datetime
+import re
+
+import ply.yacc as yacc
+from sqlalchemy.sql.expression import and_, or_
+
+import gertty.db
+import gertty.search
+
+from tokenizer import tokens
+
+def SearchParser():
+    def p_terms(p):
+        '''expression : list_expr
+                      | paren_expr
+                      | boolean_expr
+                      | negative_expr
+                      | term'''
+        p[0] = p[1]
+
+    def p_list_expr(p):
+        '''list_expr : expression expression'''
+        p[0] = and_(p[1], p[2])
+
+    def p_paren_expr(p):
+        '''paren_expr : LPAREN expression RPAREN'''
+        p[0] = p[2]
+
+    def p_boolean_expr(p):
+        '''boolean_expr : expression AND expression
+                        | expression OR expression'''
+        if p[2] == 'and':
+            p[0] = and_(p[1], p[3])
+        elif p[2] == 'or':
+            p[0] = or_(p[1], p[3])
+        else:
+            raise SyntaxErro()
+
+    def p_negative_expr(p):
+        '''negative_expr : NOT expression
+                         | NEG expression'''
+        p[0] = not p[1]
+
+    def p_term(p):
+        '''term : age_term
+                | change_term
+                | owner_term
+                | reviewer_term
+                | commit_term
+                | project_term
+                | project_key_term
+                | branch_term
+                | topic_term
+                | ref_term
+                | label_term
+                | message_term
+                | comment_term
+                | has_term
+                | is_term
+                | status_term
+                | op_term'''
+        p[0] = p[1]
+
+    def p_string(p):
+        '''string : SSTRING
+                  | DSTRING
+                  | USTRING'''
+        p[0] = p[1]
+
+    def p_age_unit(p):
+        '''age_unit : SECONDS
+                    | MINUTES
+                    | HOURS
+                    | DAYS
+                    | WEEKS
+                    | MONTHS
+                    | YEARS'''
+        p[0] = p[1]
+
+    def p_age_term(p):
+        '''age_term : OP_AGE NUMBER age_unit'''
+        now = datetime.datetime.utcnow()
+        delta = p[1]
+        unit = p[2]
+        if unit == 'minutes':
+            delta = delta * 60
+        elif unit == 'hours':
+            delta = delta * 60 * 60
+        elif unit == 'days':
+            delta = delta * 60 * 60 * 60
+        elif unit == 'weeks':
+            delta = delta * 60 * 60 * 60 * 7
+        elif unit == 'months':
+            delta = delta * 60 * 60 * 60 * 30
+        elif unit == 'years':
+            delta = delta * 60 * 60 * 60 * 365
+        p[0] = gertty.db.change_table.c.updated < (now-delta)
+
+    def p_change_term(p):
+        '''change_term : OP_CHANGE CHANGE_ID
+                       | OP_CHANGE NUMBER'''
+        if type(p[2]) == int:
+            p[0] = gertty.db.change_table.c.number == p[2]
+        else:
+            p[0] = gertty.db.change_table.c.change_id == p[2]
+
+    def p_owner_term(p):
+        '''owner_term : OP_OWNER string'''
+        p[0] = gertty.db.change_table.c.owner == p[2]
+
+    def p_reviewer_term(p):
+        '''reviewer_term : OP_REVIEWER string'''
+        p[0] = gertty.db.approval_table.c.name == p[2]
+
+    def p_commit_term(p):
+        '''commit_term : OP_COMMIT string'''
+        p[0] = gertty.db.revision_table.c.commit == p[2]
+
+    def p_project_term(p):
+        '''project_term : OP_PROJECT string'''
+        #TODO: support regex
+        p[0] = gertty.db.project_table.c.name == p[2]
+
+    def p_project_key_term(p):
+        '''project_key_term : OP_PROJECT_KEY NUMBER'''
+        p[0] = gertty.db.change_table.c.project_key == p[2]
+
+    def p_branch_term(p):
+        '''branch_term : OP_BRANCH string'''
+        #TODO: support regex
+        p[0] = gertty.db.change_table.c.branch == p[2]
+
+    def p_topic_term(p):
+        '''topic_term : OP_TOPIC string'''
+        #TODO: support regex
+        p[0] = gertty.db.change_table.c.topic == p[2]
+
+    def p_ref_term(p):
+        '''ref_term : OP_REF string'''
+        #TODO: support regex
+        p[0] = gertty.db.change_table.c.branch == p[2][len('refs/heads/'):]
+
+    label_re = re.compile(r'(?P<label>[a-zA-Z0-9_-]+([a-zA-Z]|((?<![-+])[0-9])))'
+                          r'(?P<operator>[<>]?=?)(?P<value>[-+]?[0-9]+)'
+                          r'($|,user=(?P<user>\S+))')
+
+    def p_label_term(p):
+        '''label_term : OP_LABEL string'''
+        args = label_re.match(p[2])
+        label = args.group('label')
+        op = args.group('operator') or '='
+        value = int(args.group('value'))
+        user = args.group('user')
+
+        filters = []
+        filters.append(gertty.db.approval_table.c.category == label)
+        if op == '=':
+            filters.append(gertty.db.approval_table.c.value == value)
+        elif op == '>=':
+            filters.append(gertty.db.approval_table.c.value >= value)
+        elif op == '<=':
+            filters.append(gertty.db.approval_table.c.value <= value)
+        if user is not None:
+            filters.append(gertty.db.approval_table.c.name == user)
+        p[0] = and_(*filters)
+
+    def p_message_term(p):
+        '''message_term : OP_MESSAGE string'''
+        p[0] = gertty.db.revision_table.c.message.like(p[1])
+
+    def p_comment_term(p):
+        '''comment_term : OP_COMMENT string'''
+        p[0] = and_(gertty.db.message_table.c.message.like(p[1]),
+                    gertty.db.comment_table.c.message.like(p[1]))
+
+    def p_has_term(p):
+        '''has_term : OP_HAS string'''
+        #TODO: implement star
+        if p[2] == 'draft':
+            p[0] = gertty.db.message_table.c.pending == True
+        else:
+            raise gertty.search.SearchSyntaxError('Syntax error: has:%s is not supported' % p[2])
+
+    def p_is_term(p):
+        '''is_term : OP_IS string'''
+        #TODO: implement starred, watched, owner, reviewer, draft
+        if p[2] == 'reviewed':
+            p[0] = gertty.db.approval_table.c.value != 0
+        elif p[2] == 'open':
+            p[0] = gertty.db.change_table.c.status.notin_(['MERGED', 'ABANDONED'])
+        elif p[2] == 'closed':
+            p[0] = gertty.db.change_table.c.status.in_(['MERGED', 'ABANDONED'])
+        elif p[2] == 'submitted':
+            p[0] = gertty.db.change_table.c.status == 'SUBMITTED'
+        elif p[2] == 'merged':
+            p[0] = gertty.db.change_table.c.status == 'MERGED'
+        elif p[2] == 'abandoned':
+            p[0] = gertty.db.change_table.c.status == 'ABANDONED'
+        else:
+            raise gertty.search.SearchSyntaxError('Syntax error: has:%s is not supported' % p[2])
+
+    def p_status_term(p):
+        '''status_term : OP_STATUS string'''
+        if p[2] == 'open':
+            p[0] = gertty.db.change_table.c.status.notin_(['MERGED', 'ABANDONED'])
+        elif p[2] == 'closed':
+            p[0] = gertty.db.change_table.c.status.in_(['MERGED', 'ABANDONED'])
+        else:
+            p[0] = gertty.db.change_table.c.status == p[2].upper()
+
+    def p_op_term(p):
+        'op_term : OP'
+        raise SyntaxError()
+
+    def p_error(p):
+        if p:
+            raise gertty.search.SearchSyntaxError('Syntax error at "%s" in search string "%s" (col %s)' % (
+                    p.lexer.lexdata[p.lexpos:], p.lexer.lexdata, p.lexpos))
+        else:
+            raise gertty.search.SearchSyntaxError('Syntax error: EOF in search string')
+
+    return yacc.yacc(debug=0, write_tables=0)
--- a/gertty/search/test.py
+++ b/gertty/search/test.py
@ -0,0 +1,73 @@
+import gertty.search
+import re
+import sys
+
+label_re = re.compile(r'(?P<label>[a-zA-Z0-9_-]+([a-zA-Z]|((?<![-+])[0-9])))'
+                      r'(?P<operator>[<>]?=?)(?P<value>[-+]?[0-9]+)'
+                      r'($|,user=(?P<user>\S+))')
+
+for a in [
+    'Code-Review=1',
+    'Code-Review=+1',
+    'Code-Review=-1',
+    'Code-Review>=+1',
+    'Code-Review<=-1',
+    'Code-Review+1',
+    'Code-Review-1',
+    ]:
+    for b in [
+        '',
+        ',user=corvus',
+        ]:
+        data = a+b
+        print
+        print data
+        m = label_re.match(data)
+        print 'res', m and m.groups()
+
+#sys.exit(0)
+parser = gertty.search.SearchCompiler(None)
+
+import tokenizer
+lexer = tokenizer.SearchTokenizer()
+lexer.input("project:foo/bar")
+
+# Tokenize
+while True:
+    tok = lexer.token()
+    if not tok: break      # No more input
+    print tok
+
+#TODO: unit test
+for a in [
+    'label:Code-Review=1',
+    'label:Code-Review=+1',
+    'label:Code-Review=-1',
+    'label:Code-Review>=+1',
+    'label:Code-Review<=-1',
+    'label:Code-Review+1',
+    'label:Code-Review-1',
+    ]:
+    for b in [
+        '',
+        ',user=corvus',
+        ]:
+        data = a+b
+        print
+        print data
+        result = parser.parse(data)
+        print 'res', str(result)
+
+for data in [
+    '_project_key:18 status:open',
+    'project:foo/bar status:open',
+    'project:foo and status:open',
+    'project:foo or status:open',
+    'project:foo and (status:merged or status:new)',
+    'project:foo or project:bar or project:baz',
+    'project:foo project:bar project:baz',
+    ]:
+    print
+    print data
+    result = parser.parse(data)
+    print 'res', str(result)
--- a/gertty/search/tokenizer.py
+++ b/gertty/search/tokenizer.py
@ -0,0 +1,156 @@
+# Copyright 2014 Hewlett-Packard Development Company, L.P.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import ply.lex as lex
+
+operators = {
+    'age': 'OP_AGE',
+    'change': 'OP_CHANGE',
+    'owner': 'OP_OWNER',
+    #'OP_OWNERIN', # needs local group membership
+    'reviewer': 'OP_REVIEWER',
+    #'OP_REVIEWERIN', # needs local group membership
+    'commit': 'OP_COMMIT',
+    'project': 'OP_PROJECT',
+    '_project_key': 'OP_PROJECT_KEY',  # internal gertty use only
+    'branch': 'OP_BRANCH',
+    'topic': 'OP_TOPIC',
+    'ref': 'OP_REF',
+    #'tr': 'OP_TR', # needs trackingids
+    #'bug': 'OP_BUG', # needs trackingids
+    'label': 'OP_LABEL',
+    'message': 'OP_MESSAGE',
+    'comment': 'OP_COMMENT',
+    #'file': 'OP_FILE', # needs local file list
+    'has': 'OP_HAS',
+    'is': 'OP_IS',
+    'status': 'OP_STATUS',
+    }
+
+reserved = {
+    'or|OR': 'OR',
+    'not|NOT': 'NOT',
+    }
+
+tokens = [
+    'OP',
+    'AND',
+    'OR',
+    'NOT',
+    'NEG',
+    'LPAREN',
+    'RPAREN',
+    'SECONDS',
+    'MINUTES',
+    'HOURS',
+    'DAYS',
+    'WEEKS',
+    'MONTHS',
+    'YEARS',
+    'NUMBER',
+    'CHANGE_ID',
+    'SSTRING',
+    'DSTRING',
+    'USTRING',
+    #'REGEX',
+    #'SHA',
+    ] + operators.values()
+
+def SearchTokenizer():
+    t_CHANGE_ID  = r'I[a-fA-F0-9]{7,40}'
+    t_LPAREN     = r'\('
+    t_RPAREN     = r'\)'
+    t_NEG        = r'!'
+
+    def t_OP(t):
+        r'[a-zA-Z_][a-zA-Z_]*:'
+        t.type = operators.get(t.value[:-1], 'OP')
+        return t
+
+    def t_SSTRING(t):
+        r"'([^\\']+|\\'|\\\\)*'"
+        t.value=t.value[1:-1].decode("string-escape")
+        return t
+
+    def t_DSTRING(t):
+        r'"([^\\"]+|\\"|\\\\)*"'
+        t.value=t.value[1:-1].decode("string-escape")
+        return t
+
+    def t_AND(t):
+        r'and|AND'
+        return t
+
+    def t_OR(t):
+        r'or|OR'
+        return t
+
+    def t_NOT(t):
+        r'not|NOT'
+        return t
+
+    def t_INTEGER(t):
+        r'[+-]\d+'
+        t.value = int(t.value)
+        return t
+
+    def t_NUMBER(t):
+        r'\d+'
+        t.value = int(t.value)
+        return t
+
+    def t_USTRING(t):
+        r'([^\s\(\)!]+)'
+        t.value=t.value.decode("string-escape")
+        return t
+
+    def t_SECONDS(t):
+        r's|sec|second|seconds'
+        t.value = 'seconds'
+
+    def t_MINUTES(t):
+        r'm|min|minute|minutes'
+        t.value = 'minutes'
+
+    def t_HOURS(t):
+        r'h|hr|hour|hours'
+        t.value = 'hours'
+
+    def t_DAYS(t):
+        r'd|day|days'
+        t.value = 'days'
+
+    def t_WEEKS(t):
+        r'w|week|weeks'
+        t.value = 'weeks'
+
+    def t_MONTHS(t):
+        r'mon|month|months'
+        t.value = 'months'
+
+    def t_YEARS(t):
+        r'y|year|years'
+        t.value = 'years'
+
+    def t_newline(t):
+        r'\n+'
+        t.lexer.lineno += len(t.value)
+
+    t_ignore  = ' \t'
+
+    def t_error(t):
+        print "Illegal character '%s'" % t.value[0]
+        t.lexer.skip(1)
+
+    return lex.lex()
--- a/gertty/view/project_list.py
+++ b/gertty/view/project_list.py
@ -129,7 +129,7 @@ class ProjectListView(urwid.WidgetWrap):
        project_key, project_name = data
        self.app.changeScreen(view_change_list.ChangeListView(
                self.app,
-                "project_key:%s status:open" % project_key,
+                "_project_key:%s status:open" % project_key,
                project_name, unreviewed=True))

    def keypress(self, size, key):
--- a/requirements.txt
+++ b/requirements.txt
@ -9,3 +9,4 @@ ordereddict
 alembic>=0.4.1
 PyYAML>=3.1.0
 voluptuous>=0.7
+ply>=3.4