From c2dc7051473c58fa6d96ccd5e0ab19dca24fbc1b Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Tue, 19 Mar 2024 16:32:10 -0700 Subject: [PATCH] Record merger operations This records the merger git operations so that later we can provide them to the user for reconstructing the repo state manually (ie, for local testing). Change-Id: Ic1f8fc3cb45d382cd136630e2b25d23718329aca --- tests/unit/test_merger_repo.py | 8 +-- zuul/executor/server.py | 5 ++ zuul/merger/merger.py | 115 ++++++++++++++++++++++++--------- zuul/merger/server.py | 2 +- zuul/model.py | 18 ++++++ 5 files changed, 114 insertions(+), 34 deletions(-) diff --git a/tests/unit/test_merger_repo.py b/tests/unit/test_merger_repo.py index a1059db593..02dba43d2a 100644 --- a/tests/unit/test_merger_repo.py +++ b/tests/unit/test_merger_repo.py @@ -940,7 +940,7 @@ class TestMerger(ZuulTestCase): # Merge A result = merger.mergeChanges([item_a], files=files, dirs=dirs) self.assertIsNotNone(result) - hexsha, read_files, repo_state, ret_recent, orig_commit = result + hexsha, read_files, repo_state, ret_recent, orig_commit, ops = result self.assertEqual(len(read_files), 1) self.assertEqual(read_files[0]['project'], 'org/project') self.assertEqual(read_files[0]['branch'], 'master') @@ -949,7 +949,7 @@ class TestMerger(ZuulTestCase): # Merge A -> B result = merger.mergeChanges([item_a, item_b], files=files, dirs=dirs) self.assertIsNotNone(result) - hexsha, read_files, repo_state, ret_recent, orig_commit = result + hexsha, read_files, repo_state, ret_recent, orig_commit, ops = result self.assertEqual(len(read_files), 1) self.assertEqual(read_files[0]['project'], 'org/project') self.assertEqual(read_files[0]['branch'], 'master') @@ -960,7 +960,7 @@ class TestMerger(ZuulTestCase): result = merger.mergeChanges([item_a, item_b, item_c], files=files, dirs=dirs) self.assertIsNotNone(result) - hexsha, read_files, repo_state, ret_recent, orig_commit = result + hexsha, read_files, repo_state, ret_recent, orig_commit, ops = result self.assertEqual(len(read_files), 1) self.assertEqual(read_files[0]['project'], 'org/project') self.assertEqual(read_files[0]['branch'], 'master') @@ -972,7 +972,7 @@ class TestMerger(ZuulTestCase): result = merger.mergeChanges([item_a, item_b, item_c, item_d], files=files, dirs=dirs) self.assertIsNotNone(result) - hexsha, read_files, repo_state, ret_recent, orig_commit = result + hexsha, read_files, repo_state, ret_recent, orig_commit, ops = result self.assertEqual(len(read_files), 2) self.assertEqual(read_files[0]['project'], 'org/project') diff --git a/zuul/executor/server.py b/zuul/executor/server.py index b96e8d3d6c..1c8b01f3a8 100644 --- a/zuul/executor/server.py +++ b/zuul/executor/server.py @@ -1029,6 +1029,7 @@ class AnsibleJob(object): self._resume_event = threading.Event() self.thread = None self.project_info = {} + self.merge_ops = [] self.private_key_file = get_default(self.executor_server.config, 'executor', 'private_key_file', '~/.ssh/id_rsa') @@ -1536,6 +1537,9 @@ class AnsibleJob(object): p['checkout'] = selected_ref p['checkout_description'] = selected_desc p['commit'] = commit.hexsha + self.merge_ops.append(zuul.model.MergeOp( + cmd=['git', 'checkout', selected_ref], + path=repo.workspace_project_path)) # Set the URL of the origin remote for each repo to a bogus # value. Keeping the remote allows tools to use it to determine @@ -1729,6 +1733,7 @@ class AnsibleJob(object): self.executor_server.statsd.incr(base_key + ".SUCCESS") recent = ret[3] orig_commit = ret[4] + self.merge_ops = ret[5] or [] for key, commit in recent.items(): (connection, project, branch) = key restored_repos.add((connection, project)) diff --git a/zuul/merger/merger.py b/zuul/merger/merger.py index 6cb4e37eae..e4312ddc41 100644 --- a/zuul/merger/merger.py +++ b/zuul/merger/merger.py @@ -75,7 +75,8 @@ class Repo(object): def __init__(self, remote, local, email, username, speed_limit, speed_time, sshkey=None, cache_path=None, logger=None, git_timeout=300, - zuul_event_id=None, retry_timeout=None, skip_refs=False): + zuul_event_id=None, retry_timeout=None, skip_refs=False, + workspace_project_path=None): if logger is None: self.log = logging.getLogger("zuul.Repo") else: @@ -96,6 +97,7 @@ class Repo(object): self.remote_url = remote self.local_path = local + self.workspace_project_path = workspace_project_path self.email = email self.username = username self.cache_path = cache_path @@ -659,55 +661,88 @@ class Repo(object): } return {} - def merge(self, ref, strategy=None, zuul_event_id=None, timestamp=None): + def merge(self, ref, strategy=None, zuul_event_id=None, timestamp=None, + ops=None): log = get_annotated_logger(self.log, zuul_event_id) repo = self.createRepoObject(zuul_event_id) args = [] if strategy: args += ['-s', strategy] args.append('FETCH_HEAD') + msg = f"Merge '{ref}'" self.fetch(ref, zuul_event_id=zuul_event_id) + if ops is not None: + ops.append(zuul.model.MergeOp(cmd=['git', 'fetch', 'origin', ref], + path=self.workspace_project_path)) log.debug("Merging %s with args %s", ref, args) with repo.git.custom_environment(**self._getTimestampEnv(timestamp)): # Use a custom message to avoid introducing # merger/executor path details - repo.git.merge(message=f"Merge '{ref}'", *args) + repo.git.merge(message=msg, *args) + if ops is not None: + ops.append(zuul.model.MergeOp( + cmd=['git', 'merge', '-m', msg, *args], + path=self.workspace_project_path, + timestamp=timestamp)) return repo.head.commit - def squashMerge(self, item, zuul_event_id=None, timestamp=None): + def squashMerge(self, item, zuul_event_id=None, timestamp=None, ops=None): log = get_annotated_logger(self.log, zuul_event_id) repo = self.createRepoObject(zuul_event_id) args = ['--squash', 'FETCH_HEAD'] ref = item['ref'] + msg = f"Merge '{ref}'" self.fetch(ref, zuul_event_id=zuul_event_id) + if ops is not None: + ops.append(zuul.model.MergeOp(cmd=['git', 'fetch', 'origin', ref], + path=self.workspace_project_path)) log.debug("Squash-Merging %s with args %s", ref, args) with repo.git.custom_environment(**self._getTimestampEnv(timestamp)): repo.git.merge(*args) # Use a custom message to avoid introducing # merger/executor path details - repo.git.commit( - message='Merge change %s,%s' % ( - item['number'], item['patchset']), - allow_empty=True) + repo.git.commit(message=msg, allow_empty=True) + if ops is not None: + ops.append(zuul.model.MergeOp( + cmd=['git', 'merge', *args], + path=self.workspace_project_path, + timestamp=timestamp)) + ops.append(zuul.model.MergeOp( + cmd=['git', 'commit', '-m', msg], + path=self.workspace_project_path, + timestamp=timestamp)) return repo.head.commit - def rebaseMerge(self, item, base, zuul_event_id=None, timestamp=None): + def rebaseMerge(self, item, base, zuul_event_id=None, timestamp=None, + ops=None): log = get_annotated_logger(self.log, zuul_event_id) repo = self.createRepoObject(zuul_event_id) - args = [base] + args = [str(base)] ref = item['ref'] self.fetch(ref, zuul_event_id=zuul_event_id) + if ops is not None: + ops.append(zuul.model.MergeOp(cmd=['git', 'fetch', 'origin', ref], + path=self.workspace_project_path)) log.debug("Rebasing %s with args %s", ref, args) repo.git.checkout('FETCH_HEAD') + if ops is not None: + ops.append(zuul.model.MergeOp( + cmd=['git', 'checkout', 'FETCH_HEAD'], + path=self.workspace_project_path)) with repo.git.custom_environment(**self._getTimestampEnv(timestamp)): try: repo.git.rebase(*args) except Exception: repo.git.rebase(abort=True) raise + if ops is not None: + ops.append(zuul.model.MergeOp( + cmd=['git', 'rebase', *args], + path=self.workspace_project_path, + timestamp=timestamp)) return repo.head.commit - def cherryPick(self, ref, zuul_event_id=None, timestamp=None): + def cherryPick(self, ref, zuul_event_id=None, timestamp=None, ops=None): log = get_annotated_logger(self.log, zuul_event_id) repo = self.createRepoObject(zuul_event_id) self.fetch(ref, zuul_event_id=zuul_event_id) @@ -716,11 +751,16 @@ class Repo(object): args = ["-s", "resolve", "FETCH_HEAD"] log.debug("Merging %s with args %s instead of cherry-picking", ref, args) + msg = f"Merge '{ref}'" with repo.git.custom_environment( **self._getTimestampEnv(timestamp)): # Use a custom message to avoid introducing # merger/executor path details - repo.git.merge(message=f"Merge '{ref}'", *args) + repo.git.merge(message=msg, *args) + op = zuul.model.MergeOp( + cmd=['git', 'merge', '-m', msg, *args], + path=self.workspace_project_path, + timestamp=timestamp) else: log.debug("Cherry-picking %s", ref) # Git doesn't have an option to ignore commits that are already @@ -730,6 +770,11 @@ class Repo(object): with repo.git.custom_environment( **self._getTimestampEnv(timestamp)): repo.git.cherry_pick("FETCH_HEAD", keep_redundant_commits=True) + op = zuul.model.MergeOp( + cmd=['git', 'cherry-pick', 'FETCH_HEAD', + '--keep-redundant-commits'], + path=self.workspace_project_path, + timestamp=timestamp) # If the newly applied commit is empty, it means either: # 1) The commit being cherry-picked was empty, in which the empty @@ -742,7 +787,13 @@ class Repo(object): any(fetch_head.diff(fetch_head.parents[0])): log.debug("%s was already applied. Removing it", ref) self._checkout(repo, parent) - + op = zuul.model.MergeOp(comment=f"Already applied {ref}") + if ops is not None: + if op.cmd: + ops.append(zuul.model.MergeOp( + cmd=['git', 'fetch', 'origin', ref], + path=self.workspace_project_path)) + ops.append(op) return repo.head.commit def fetch(self, ref, zuul_event_id=None): @@ -1023,9 +1074,9 @@ class Merger(object): repo = None key = '/'.join([hostname, project_name]) try: - path = os.path.join(self.working_root, - strings.workspace_project_path( - hostname, project_name, self.scheme)) + workspace_project_path = strings.workspace_project_path( + hostname, project_name, self.scheme) + path = os.path.join(self.working_root, workspace_project_path) self.repo_roots.add(path) if self.cache_root: cache_path = os.path.join( @@ -1039,7 +1090,8 @@ class Merger(object): self.speed_time, sshkey=sshkey, cache_path=cache_path, logger=self.logger, git_timeout=self.git_timeout, zuul_event_id=zuul_event_id, retry_timeout=retry_timeout, - skip_refs=self.execution_context) + skip_refs=self.execution_context, + workspace_project_path=workspace_project_path) self.repos[key] = repo except Exception: @@ -1182,7 +1234,7 @@ class Merger(object): for message in messages: ref_log.debug(message) - def _mergeChange(self, item, base, zuul_event_id): + def _mergeChange(self, item, base, zuul_event_id, ops): log = get_annotated_logger(self.log, zuul_event_id) repo = self.getRepo(item['connection'], item['project'], zuul_event_id=zuul_event_id) @@ -1191,37 +1243,40 @@ class Merger(object): except Exception: log.exception("Unable to checkout %s", base) return None, None + ops.append(zuul.model.MergeOp( + cmd=['git', 'checkout', item['branch']], + path=repo.workspace_project_path)) timestamp = item.get('configured_time') try: mode = item['merge_mode'] if mode == zuul.model.MERGER_MERGE: commit = repo.merge(item['ref'], zuul_event_id=zuul_event_id, - timestamp=timestamp) + timestamp=timestamp, ops=ops) elif mode == zuul.model.MERGER_MERGE_RESOLVE: commit = repo.merge(item['ref'], 'resolve', zuul_event_id=zuul_event_id, - timestamp=timestamp) + timestamp=timestamp, ops=ops) elif mode == zuul.model.MERGER_MERGE_RECURSIVE: commit = repo.merge(item['ref'], 'recursive', zuul_event_id=zuul_event_id, - timestamp=timestamp) + timestamp=timestamp, ops=ops) elif mode == zuul.model.MERGER_MERGE_ORT: commit = repo.merge(item['ref'], 'ort', zuul_event_id=zuul_event_id, - timestamp=timestamp) + timestamp=timestamp, ops=ops) elif mode == zuul.model.MERGER_CHERRY_PICK: commit = repo.cherryPick(item['ref'], zuul_event_id=zuul_event_id, - timestamp=timestamp) + timestamp=timestamp, ops=ops) elif mode == zuul.model.MERGER_SQUASH_MERGE: commit = repo.squashMerge( item, zuul_event_id=zuul_event_id, - timestamp=timestamp) + timestamp=timestamp, ops=ops) elif mode == zuul.model.MERGER_REBASE: commit = repo.rebaseMerge( item, base, zuul_event_id=zuul_event_id, - timestamp=timestamp) + timestamp=timestamp, ops=ops) else: raise Exception("Unsupported merge mode: %s" % mode) except git.GitCommandError: @@ -1237,7 +1292,7 @@ class Merger(object): return orig_commit, commit def _mergeItem(self, item, recent, repo_state, zuul_event_id, - branches=None, process_worker=None): + ops, branches=None, process_worker=None): log = get_annotated_logger(self.log, zuul_event_id) log.debug("Processing ref %s for project %s/%s / %s uuid %s" % (item['ref'], item['connection'], @@ -1281,7 +1336,7 @@ class Merger(object): zuul_event_id=zuul_event_id) # Merge the change - orig_commit, commit = self._mergeChange(item, base, zuul_event_id) + orig_commit, commit = self._mergeChange(item, base, zuul_event_id, ops) if not commit: return None, None # Store this commit as the most recent for this project-branch @@ -1316,6 +1371,8 @@ class Merger(object): # connection -> project -> ref -> commit if repo_state is None: repo_state = {} + # A log of git operations + ops = [] for item in items: # If we're in the executor context we have the repo_locks object # and perform per repo locking. @@ -1333,7 +1390,7 @@ class Merger(object): (item["number"], item["patchset"])) try: orig_commit, commit = self._mergeItem( - item, recent, repo_state, zuul_event_id, + item, recent, repo_state, zuul_event_id, ops, branches=branches, process_worker=process_worker) except BrokenProcessPool: @@ -1357,7 +1414,7 @@ class Merger(object): files=repo_files) return ( commit.hexsha, list(read_files.values()), repo_state, recent, - orig_commit + orig_commit, ops ) def setRepoState(self, connection_name, project_name, repo_state, diff --git a/zuul/merger/server.py b/zuul/merger/server.py index 2ca7fb1c2e..b078b8c143 100644 --- a/zuul/merger/server.py +++ b/zuul/merger/server.py @@ -315,7 +315,7 @@ class BaseMergeServer(metaclass=ABCMeta): if ret is not None: result['merged'] = True (result['commit'], result['files'], result['repo_state'], - recent, orig_commit) = ret + recent, orig_commit, ops) = ret return result def refstate(self, merge_request, args): diff --git a/zuul/model.py b/zuul/model.py index 4a1c9a68eb..87796e21fc 100644 --- a/zuul/model.py +++ b/zuul/model.py @@ -169,6 +169,24 @@ class QueryCache: self.clear(ltime) +class MergeOp: + def __init__(self, cmd=None, timestamp=None, comment=None, path=None): + """A class representing a merge operation, returned by the merger to + tell the user what was done.""" + self.cmd = cmd + self.timestamp = timestamp + self.comment = comment + self.path = path + + def toDict(self): + ret = {} + for k in ['cmd', 'timestamp', 'comment', 'path']: + v = getattr(self, k) + if v is not None: + ret[k] = v + return ret + + class ZuulMark: # The yaml mark class differs between the C and python versions. # The C version does not provide a snippet, and also appears to