Fix bug in getting changed files

The fix including 2 parts:
1. For Gtihub, we use the base_sha instead of target branch to
   be passed as "tosha" parameter to get precise changed files
2. In method getFilesChanges(), use diff() result to filter out
   those files that changed and reverted between commits.

The reason we do not direcly use diff() is that for those
drivers other than github, the "base_sha" is not available yet,
using diff() may include unexpected files when target branch
has diverged from the feature branch.

This solution works for  99.9% of the caseses, it may still get
incorrect list of changed files in following corner case:
1. In non-github connection, whose base_sha is not implented, and
2. Files changed and reverted between commits in the change, and
3. The same file has also diverged in target branch.

The above corner case can be fixed by making base_sha available in
other drivers.

Change-Id: Ifae7018a8078c16f2caf759ae675648d8b33c538
This commit is contained in:
Dong Zhang
2022-03-23 10:47:45 +01:00
committed by James E. Blair
parent 58b195b583
commit 79b6252370
9 changed files with 122 additions and 11 deletions

View File

@@ -2301,13 +2301,14 @@ class FakeGithubPullRequest(object):
self.merge_message = None
self.state = 'open'
self.url = 'https://%s/%s/pull/%s' % (github.server, project, number)
self._createPRRef(base_sha=base_sha)
self.base_sha = base_sha
self.pr_ref = self._createPRRef(base_sha=base_sha)
self._addCommitToRepo(files=files)
self._updateTimeStamp()
def addCommit(self, files=None):
def addCommit(self, files=None, delete_files=None):
"""Adds a commit on top of the actual PR head."""
self._addCommitToRepo(files=files)
self._addCommitToRepo(files=files, delete_files=delete_files)
self._updateTimeStamp()
def forcePush(self, files=None):
@@ -2473,10 +2474,10 @@ class FakeGithubPullRequest(object):
def _createPRRef(self, base_sha=None):
base_sha = base_sha or 'refs/tags/init'
repo = self._getRepo()
GithubChangeReference.create(
return GithubChangeReference.create(
repo, self.getPRReference(), base_sha)
def _addCommitToRepo(self, files=None, reset=False):
def _addCommitToRepo(self, files=None, delete_files=None, reset=False):
repo = self._getRepo()
ref = repo.references[self.getPRReference()]
if reset:
@@ -2496,11 +2497,11 @@ class FakeGithubPullRequest(object):
normalized_files[fn] = content
else:
normalized_files[tests.fakegithub.FakeFile(fn)] = content
self.files = normalized_files
self.files.update(normalized_files)
else:
fn = '%s-%s' % (self.branch.replace('/', '_'), self.number)
content = f"test {self.branch} {self.number}\n"
self.files = {tests.fakegithub.FakeFile(fn): content}
self.files.update({tests.fakegithub.FakeFile(fn): content})
msg = self.subject + '-' + str(self.number_of_commits)
for fake_file, content in self.files.items():
@@ -2509,8 +2510,14 @@ class FakeGithubPullRequest(object):
f.write(content)
repo.index.add([fn])
if delete_files:
for fn in delete_files:
fn = os.path.join(repo.working_dir, fn)
repo.index.remove([fn])
self.head_sha = repo.index.commit(msg).hexsha
repo.create_head(self.getPRReference(), self.head_sha, force=True)
self.pr_ref.set_commit(self.head_sha)
# Create an empty set of statuses for the given sha,
# each sha on a PR may have a status set on it
self.statuses[self.head_sha] = []
@@ -5131,8 +5138,9 @@ class ZuulTestCase(BaseTestCase):
repo.head.reset(working_tree=True)
repo.delete_head(repo.heads[branch], force=True)
def create_commit(self, project, files=None, head='master',
message='Creating a fake commit', **kwargs):
def create_commit(self, project, files=None, delete_files=None,
head='master', message='Creating a fake commit',
**kwargs):
path = os.path.join(self.upstream_root, project)
repo = git.Repo(path)
repo.head.reference = repo.heads[head]
@@ -5144,6 +5152,12 @@ class ZuulTestCase(BaseTestCase):
with open(file_name, 'a') as f:
f.write(content)
repo.index.add([file_name])
delete_files = delete_files or []
for name in delete_files:
file_name = os.path.join(path, name)
repo.index.remove([file_name])
commit = repo.index.commit(message, **kwargs)
return commit.hexsha

View File

@@ -541,6 +541,7 @@ class FakePull(object):
'full_name': pr.project
},
'ref': pr.branch,
'sha': pr.base_sha,
},
'user': {
'login': 'octocat'

View File

@@ -17,8 +17,15 @@
- .*-requires
run: playbooks/project-test1.yaml
- job:
name: project-test2
files:
- .*-removed
run: playbooks/project-test1.yaml
- project:
name: org/project
check:
jobs:
- project-test1
- project-test2

View File

@@ -171,6 +171,40 @@ class TestGithubDriver(ZuulTestCase):
changes="%s,%s" % (A.number, A.head_sha)),
])
@simple_layout('layouts/files-github.yaml', driver='github')
def test_changed_file_match_filter(self):
path = os.path.join(self.upstream_root, 'org/project')
base_sha = git.Repo(path).head.object.hexsha
files = {'{:03d}.txt'.format(n): 'test' for n in range(300)}
files["foobar-requires"] = "test"
files["to-be-removed"] = "test"
A = self.fake_github.openFakePullRequest(
'org/project', 'master', 'A', files=files, base_sha=base_sha)
self.fake_github.emitEvent(A.getPullRequestOpenedEvent())
self.waitUntilSettled()
# project-test1 and project-test2 should be run
self.assertEqual(2, len(self.history))
@simple_layout('layouts/files-github.yaml', driver='github')
def test_changed_and_reverted_file_not_match_filter(self):
path = os.path.join(self.upstream_root, 'org/project')
base_sha = git.Repo(path).head.object.hexsha
files = {'{:03d}.txt'.format(n): 'test' for n in range(300)}
files["foobar-requires"] = "test"
files["to-be-removed"] = "test"
A = self.fake_github.openFakePullRequest(
'org/project', 'master', 'A', files=files, base_sha=base_sha)
A.addCommit(delete_files=['to-be-removed'])
self.fake_github.emitEvent(A.getPullRequestOpenedEvent())
self.waitUntilSettled()
# Only project-test1 should be run, because the file to-be-removed
# is reverted and not in changed files to trigger project-test2
self.assertEqual(1, len(self.history))
@simple_layout('layouts/files-github.yaml', driver='github')
def test_pull_file_rename(self):
A = self.fake_github.openFakePullRequest(

View File

@@ -363,6 +363,39 @@ class TestMergerRepo(ZuulTestCase):
self.assertEqual(sorted(['README', 'feature.txt']),
sorted(changed_files))
def test_files_changes_add_and_remove_files(self):
"""
If the changed files in previous commits are reverted in later commits,
they should not be considered as changed in the PR.
"""
parent_path = os.path.join(self.upstream_root, 'org/project1')
self.create_branch('org/project1', 'feature1')
base_sha = git.Repo(parent_path).commit('master').hexsha
# Let the file that is also changed in the feature branch diverge
# in master. This change should NOT be considered in the changed
# files list.
files = {'to-be-deleted.txt': 'FAIL'}
self.create_commit('org/project1', files=files, head='master',
message='Add master file')
work_repo = Repo(parent_path, self.workspace_root,
'none@example.org', 'User Name', '0', '0')
# Add a file in first commit
files = {'to-be-deleted.txt': 'test'}
self.create_commit('org/project1', files=files, head='feature1',
message='Add file')
changed_files = work_repo.getFilesChanges('feature1', base_sha)
self.assertEqual(sorted(['README', 'to-be-deleted.txt']),
sorted(changed_files))
# Delete the file in second commit
delete_files = ['to-be-deleted.txt']
self.create_commit('org/project1', files={},
delete_files=delete_files, head='feature1',
message='Delete file')
changed_files = work_repo.getFilesChanges('feature1', base_sha)
self.assertEqual(['README'], changed_files)
def test_files_changes_master_fork_merges(self):
"""Regression test for getFilesChanges()

View File

@@ -1565,6 +1565,7 @@ class GithubConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
change.patchset)
change.ref = "refs/pull/%s/head" % change.number
change.branch = change.pr.get('base').get('ref')
change.base_sha = change.pr.get('base').get('sha')
change.commit_id = change.pr.get('head').get('sha')
change.owner = change.pr.get('user').get('login')
# Don't overwrite the files list. The change object is bound to a

View File

@@ -1223,7 +1223,10 @@ class PipelineManager(metaclass=ABCMeta):
log.debug("Scheduling fileschanged for item %s", item)
build_set = item.current_build_set
to_sha = getattr(item.change, "branch", None)
# if base_sha is not available, fallback to branch
to_sha = getattr(item.change, "base_sha",
getattr(item.change, "branch", None))
self.sched.merger.getFilesChanges(
item.change.project.connection_name, item.change.project.name,
item.change.ref, to_sha, build_set=build_set,

View File

@@ -690,9 +690,22 @@ class Repo(object):
files = set()
if tosha:
# When "tosha" is the target branch, the result of diff() correctly
# excluds the files whose changes are reverted between the commits.
# But it may also include the files that are not changed in the
# referenced commit(s). This can happen, e.g. if the target branch
# has diverged from the feature branch.
# The idea is to use this result to filter out the files whose
# changes are reverted between the commits.
diff_files = set()
head_commit = repo.commit(head.hexsha)
diff_index = head_commit.diff(tosha)
diff_files.update((item.a_path for item in diff_index))
commit_diff = "{}..{}".format(tosha, head.hexsha)
for cmt in repo.iter_commits(commit_diff, no_merges=True):
files.update(cmt.stats.files.keys())
files.update(f for f in cmt.stats.files.keys()
if f in diff_files)
else:
files.update(head.stats.files.keys())
return list(files)

View File

@@ -5449,6 +5449,9 @@ class Change(Branch):
# This can be the commit id of the patchset enqueued or
# in the case of a PR the id of HEAD of the branch.
self.commit_id = None
# The sha of the base commit, e.g. the sha of the target branch
# from which the feature branch is created.
self.base_sha = None
def deserialize(self, data):
super().deserialize(data)
@@ -5472,6 +5475,7 @@ class Change(Branch):
self.owner = data.get("owner")
self.message = data.get("message")
self.commit_id = data.get("commit_id")
self.base_sha = data.get("base_sha")
def serialize(self):
d = super().serialize()
@@ -5492,6 +5496,7 @@ class Change(Branch):
"owner": self.owner,
"message": self.message,
"commit_id": self.commit_id,
"base_sha": self.base_sha,
})
return d