Map file comment line numbers

After a build finishes, if it returned file comments, the executor will use the repo in the workspace (if it exists) to map the supplied line numbers to the original lines in the change (in case an intervening change has altered the files). A new facility for reporting warning messages is added, and if the executor is unable to perform the mapping, or the file comment syntax is incorrect, a warning is reported. Change-Id: Iad48168d41df034f575b66976744dbe94ec289bc
2018-08-09 10:23:13 -07:00
parent 5ec14aa8cb
commit 4e70bebafb
18 changed files with 337 additions and 25 deletions
--- a/zuul/executor/client.py
+++ b/zuul/executor/client.py
@@ -298,7 +298,7 @@ class ExecutorClient(object):

        if job.name == 'noop':
            self.sched.onBuildStarted(build)
-            self.sched.onBuildCompleted(build, 'SUCCESS', {})
+            self.sched.onBuildCompleted(build, 'SUCCESS', {}, [])
            return build

        gearman_job = gear.TextJob('executor:execute', json_dumps(params),
@@ -391,14 +391,15 @@ class ExecutorClient(object):
                # Always retry if the executor just went away
                build.retry = True
            result_data = data.get('data', {})
-            self.log.info("Build %s complete, result %s" %
-                          (job, result))
+            warnings = data.get('warnings', [])
+            self.log.info("Build %s complete, result %s, warnings %s" %
+                          (job, result, warnings))
            # If the build should be retried, don't supply the result
            # so that elsewhere we don't have to deal with keeping
            # track of which results are non-final.
            if build.retry:
                result = None
-            self.sched.onBuildCompleted(build, result, result_data)
+            self.sched.onBuildCompleted(build, result, result_data, warnings)
            # The test suite expects the build to be removed from the
            # internal dict after it's added to the report queue.
            del self.builds[job.unique]
@@ -453,7 +454,7 @@ class ExecutorClient(object):
            # Since this isn't otherwise going to get a build complete
            # event, send one to the scheduler so that it can unlock
            # the nodes.
-            self.sched.onBuildCompleted(build, 'CANCELED', {})
+            self.sched.onBuildCompleted(build, 'CANCELED', {}, [])
            return True
        return False

--- a/zuul/executor/server.py
+++ b/zuul/executor/server.py
@@ -26,10 +26,12 @@ import tempfile
 import threading
 import time
 import traceback
+import git

 from zuul.lib.yamlutil import yaml
 from zuul.lib.config import get_default
 from zuul.lib.statsd import get_statsd
+from zuul.lib import filecomments

 try:
    import ara.plugins.callbacks as ara_callbacks
@@ -793,10 +795,15 @@ class AnsibleJob(object):
                                  project['name'])
            repos[project['canonical_name']] = repo

+        # The commit ID of the original item (before merging).  Used
+        # later for line mapping.
+        item_commit = None
+
        merge_items = [i for i in args['items'] if i.get('number')]
        if merge_items:
-            if not self.doMergeChanges(merger, merge_items,
-                                       args['repo_state']):
+            item_commit = self.doMergeChanges(merger, merge_items,
+                                              args['repo_state'])
+            if item_commit is None:
                # There was a merge conflict and we have already sent
                # a work complete result, don't run any jobs
                return
@@ -879,7 +886,10 @@ class AnsibleJob(object):
        if self.aborted_reason == self.RESULT_DISK_FULL:
            result = 'DISK_FULL'
        data = self.getResultData()
+        warnings = []
+        self.mapLines(merger, args, data, item_commit, warnings)
        result_data = json.dumps(dict(result=result,
+                                      warnings=warnings,
                                      data=data))
        self.log.debug("Sending result: %s" % (result_data,))
        self.job.sendWorkComplete(result_data)
@@ -895,6 +905,75 @@ class AnsibleJob(object):
            self.log.exception("Unable to load result data:")
        return data

+    def mapLines(self, merger, args, data, commit, warnings):
+        # The data and warnings arguments are mutated in this method.
+
+        # If we received file comments, map the line numbers before
+        # we send the result.
+        fc = data.get('zuul', {}).get('file_comments')
+        if not fc:
+            return
+        disable = data.get('zuul', {}).get('disable_file_comment_line_mapping')
+        if disable:
+            return
+
+        try:
+            filecomments.validate(fc)
+        except Exception as e:
+            warnings.append("Job %s: validation error in file comments: %s" %
+                            (args['zuul']['job'], str(e)))
+            del data['zuul']['file_comments']
+            return
+
+        repo = None
+        for project in args['projects']:
+            if (project['canonical_name'] !=
+                args['zuul']['project']['canonical_name']):
+                continue
+            repo = merger.getRepo(project['connection'],
+                                  project['name'])
+        # If the repo doesn't exist, abort
+        if not repo:
+            return
+
+        # Check out the selected ref again in case the job altered the
+        # repo state.
+        p = args['zuul']['projects'][project['canonical_name']]
+        selected_ref = p['checkout']
+
+        self.log.info("Checking out %s %s for line mapping",
+                      project['canonical_name'], selected_ref)
+        try:
+            repo.checkout(selected_ref)
+        except Exception:
+            # If checkout fails, abort
+            self.log.exception("Error checking out repo for line mapping")
+            warnings.append("Job %s: unable to check out repo "
+                            "for file comments" % (args['zuul']['job']))
+            return
+
+        lines = filecomments.extractLines(fc)
+
+        new_lines = {}
+        for (filename, lineno) in lines:
+            try:
+                new_lineno = repo.mapLine(commit, filename, lineno)
+            except Exception as e:
+                # Log at debug level since it's likely a job issue
+                self.log.debug("Error mapping line:", exc_info=True)
+                if isinstance(e, git.GitCommandError):
+                    msg = e.stderr
+                else:
+                    msg = str(e)
+                warnings.append("Job %s: unable to map line "
+                                "for file comments: %s" %
+                                (args['zuul']['job'], msg))
+                new_lineno = None
+            if new_lineno is not None:
+                new_lines[(filename, lineno)] = new_lineno
+
+        filecomments.updateLines(fc, new_lines)
+
    def doMergeChanges(self, merger, items, repo_state):
        try:
            ret = merger.mergeChanges(items, repo_state=repo_state)
@@ -905,24 +984,25 @@ class AnsibleJob(object):
            self.log.exception("Could not fetch refs to merge from remote")
            result = dict(result='ABORTED')
            self.job.sendWorkComplete(json.dumps(result))
-            return False
+            return None
        if not ret:  # merge conflict
            result = dict(result='MERGER_FAILURE')
            if self.executor_server.statsd:
                base_key = "zuul.executor.{hostname}.merger"
                self.executor_server.statsd.incr(base_key + ".FAILURE")
            self.job.sendWorkComplete(json.dumps(result))
-            return False
+            return None

        if self.executor_server.statsd:
            base_key = "zuul.executor.{hostname}.merger"
            self.executor_server.statsd.incr(base_key + ".SUCCESS")
        recent = ret[3]
+        orig_commit = ret[4]
        for key, commit in recent.items():
            (connection, project, branch) = key
            repo = merger.getRepo(connection, project)
            repo.setRef('refs/heads/' + branch, commit)
-        return True
+        return orig_commit

    def resolveBranch(self, project_canonical_name, ref, zuul_branch,
                      job_override_branch, job_override_checkout,
@@ -2403,5 +2483,5 @@ class ExecutorServer(object):
            result['commit'] = result['files'] = result['repo_state'] = None
        else:
            (result['commit'], result['files'], result['repo_state'],
-             recent) = ret
+             recent, orig_commit) = ret
        job.sendWorkComplete(json.dumps(result))
--- a/zuul/lib/filecomments.py
+++ b/zuul/lib/filecomments.py
@@ -0,0 +1,64 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import voluptuous as vs
+
+
+FILE_COMMENT = {
+    'line': int,
+    'message': str,
+    'range': {
+        'start_line': int,
+        'start_character': int,
+        'end_line': int,
+        'end_character': int,
+    }
+}
+
+FILE_COMMENTS = {str: [FILE_COMMENT]}
+
+FILE_COMMENTS_SCHEMA = vs.Schema(FILE_COMMENTS)
+
+
+def validate(file_comments):
+    FILE_COMMENTS_SCHEMA(file_comments)
+
+
+def extractLines(file_comments):
+    """Extract file/line tuples from file comments for mapping"""
+
+    lines = set()
+    for path, comments in file_comments.items():
+        for comment in comments:
+            if 'line' in comment:
+                lines.add((path, int(comment['line'])))
+            if 'range' in comment:
+                rng = comment['rng']
+                for key in ['start_line', 'end_line']:
+                    if key in rng:
+                        lines.add((path, int(rng[key])))
+    return list(lines)
+
+
+def updateLines(file_comments, lines):
+    """Update the line numbers in file_comments with the supplied mapping"""
+
+    for path, comments in file_comments.items():
+        for comment in comments:
+            if 'line' in comment:
+                comment['line'] = lines.get((path, comment['line']),
+                                            comment['line'])
+            if 'range' in comment:
+                rng = comment['rng']
+                for key in ['start_line', 'end_line']:
+                    if key in rng:
+                        rng[key] = lines.get((path, rng[key]), rng[key])
--- a/zuul/merger/merger.py
+++ b/zuul/merger/merger.py
@@ -67,6 +67,9 @@ def timeout_handler(path):


 class Repo(object):
+    commit_re = re.compile('^commit ([0-9a-f]{40})$')
+    diff_re = re.compile('^@@ -\d+,\d \+(\d+),\d @@$')
+
    def __init__(self, remote, local, email, username, speed_limit, speed_time,
                 sshkey=None, cache_path=None, logger=None, git_timeout=300,
                 retry_attempts=3, retry_interval=30):
@@ -348,6 +351,10 @@ class Repo(object):
        #   So try again if an AssertionError is caught.
        self._git_fetch(repo, 'origin', ref)

+    def revParse(self, ref):
+        repo = self.createRepoObject()
+        return repo.git.rev_parse(ref)
+
    def fetchFrom(self, repository, ref):
        repo = self.createRepoObject()
        self._git_fetch(repo, repository, ref)
@@ -415,6 +422,24 @@ class Repo(object):
        self.remote_url = url
        self._git_set_remote_url(self.createRepoObject(), self.remote_url)

+    def mapLine(self, commit, filename, lineno):
+        repo = self.createRepoObject()
+        # Trace the specified line back to the specified commit and
+        # return the line number in that commit.
+        cur_commit = None
+        out = repo.git.log(L='%s,%s:%s' % (lineno, lineno, filename))
+        for l in out.split('\n'):
+            if cur_commit is None:
+                m = self.commit_re.match(l)
+                if m:
+                    if m.group(1) == commit:
+                        cur_commit = commit
+                continue
+            m = self.diff_re.match(l)
+            if m:
+                return int(m.group(1))
+        return None
+

 class Merger(object):
    def __init__(self, working_root, connections, email, username,
@@ -537,7 +562,7 @@ class Merger(object):
            repo.checkout(ref)
        except Exception:
            self.log.exception("Unable to checkout %s" % ref)
-            return None
+            return None, None

        try:
            mode = item['merge_mode']
@@ -553,12 +578,13 @@ class Merger(object):
            # Log git exceptions at debug level because they are
            # usually benign merge conflicts
            self.log.debug("Unable to merge %s" % item, exc_info=True)
-            return None
+            return None, None
        except Exception:
            self.log.exception("Exception while merging a change:")
-            return None
+            return None, None

-        return commit
+        orig_commit = repo.revParse('FETCH_HEAD')
+        return orig_commit, commit

    def _mergeItem(self, item, recent, repo_state):
        self.log.debug("Processing ref %s for project %s/%s / %s uuid %s" %
@@ -579,7 +605,7 @@ class Merger(object):
                repo.reset()
            except Exception:
                self.log.exception("Unable to reset repo %s" % repo)
-                return None
+                return None, None
            self._restoreRepoState(item['connection'], item['project'], repo,
                                   repo_state)

@@ -598,12 +624,12 @@ class Merger(object):
            repo.setRemoteRef(item['branch'], base)

        # Merge the change
-        commit = self._mergeChange(item, base)
+        orig_commit, commit = self._mergeChange(item, base)
        if not commit:
-            return None
+            return None, None
        # Store this commit as the most recent for this project-branch
        recent[key] = commit
-        return commit
+        return orig_commit, commit

    def mergeChanges(self, items, files=None, dirs=None, repo_state=None):
        # connection+project+branch -> commit
@@ -616,7 +642,7 @@ class Merger(object):
        for item in items:
            self.log.debug("Merging for change %s,%s" %
                           (item["number"], item["patchset"]))
-            commit = self._mergeItem(item, recent, repo_state)
+            orig_commit, commit = self._mergeItem(item, recent, repo_state)
            if not commit:
                return None
            if files or dirs:
@@ -630,7 +656,7 @@ class Merger(object):
        ret_recent = {}
        for k, v in recent.items():
            ret_recent[k] = v.hexsha
-        return commit.hexsha, read_files, repo_state, ret_recent
+        return commit.hexsha, read_files, repo_state, ret_recent, orig_commit

    def setRepoState(self, items, repo_state):
        # Sets the repo state for the items
--- a/zuul/merger/server.py
+++ b/zuul/merger/server.py
@@ -142,7 +142,7 @@ class MergeServer(object):
            result['commit'] = result['files'] = result['repo_state'] = None
        else:
            (result['commit'], result['files'], result['repo_state'],
-             recent) = ret
+             recent, orig_commit) = ret
        job.sendWorkComplete(json.dumps(result))

    def refstate(self, job):
--- a/zuul/model.py
+++ b/zuul/model.py
@@ -1741,6 +1741,7 @@ class BuildSet(object):
        self.config_errors = []  # list of ConfigurationErrors
        self.failing_reasons = []
        self.debug_messages = []
+        self.warning_messages = []
        self.merge_state = self.NEW
        self.nodesets = {}  # job -> nodeset
        self.node_requests = {}  # job -> reqs
@@ -1919,6 +1920,9 @@ class QueueItem(object):
            indent = ''
        self.current_build_set.debug_messages.append(indent + msg)

+    def warning(self, msg):
+        self.current_build_set.warning_messages.append(msg)
+
    def freezeJobGraph(self):
        """Find or create actual matching jobs for this item's change and
        store the resulting job tree."""
--- a/zuul/reporter/init.py
+++ b/zuul/reporter/init.py
@@ -97,6 +97,10 @@ class BaseReporter(object, metaclass=abc.ABCMeta):
        a reporter taking free-form text."""
        ret = self._getFormatter()(item, with_jobs)

+        if item.current_build_set.warning_messages:
+            warning = '\n  '.join(item.current_build_set.warning_messages)
+            ret += '\nWarning:\n  ' + warning + '\n'
+
        if item.current_build_set.debug_messages:
            debug = '\n  '.join(item.current_build_set.debug_messages)
            ret += '\nDebug information:\n  ' + debug + '\n'
--- a/zuul/scheduler.py
+++ b/zuul/scheduler.py
@@ -417,9 +417,10 @@ class Scheduler(threading.Thread):
        self.result_event_queue.put(event)
        self.wake_event.set()

-    def onBuildCompleted(self, build, result, result_data):
+    def onBuildCompleted(self, build, result, result_data, warnings):
        build.end_time = time.time()
        build.result_data = result_data
+        build.build_set.warning_messages.extend(warnings)
        # Note, as soon as the result is set, other threads may act
        # upon this, even though the event hasn't been fully
        # processed.  Ensure that any other data from the event (eg,