From ffc03cfcc16965cdc81c3f7b88f650788a8fe5de Mon Sep 17 00:00:00 2001 From: Alex Hornung Date: Thu, 19 Jan 2023 14:21:35 +0000 Subject: [PATCH] gerrit driver: fix bug around unicode branch names If a branch name contains unicode characters that are more than 1-byte wide, the size in bytes of the pack record won't match the size in characters, and the pack parsing will be incorrect. Instead, treat everything as an encoded byte string until parsing is done - and only decode when handling a single, parsed, record. Change-Id: I7f1a0cc96a36129fbc04c7a8687da3f66c1eef99 --- tests/unit/test_gerrit.py | 45 ++++++++++++++++++++++++++ zuul/driver/gerrit/gerritconnection.py | 9 ++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_gerrit.py b/tests/unit/test_gerrit.py index 2e3057af64..2a63d5ef83 100644 --- a/tests/unit/test_gerrit.py +++ b/tests/unit/test_gerrit.py @@ -957,3 +957,48 @@ class TestGerritConnection(ZuulTestCase): self.assertEqual(B.queried, 2) self.assertEqual(A.data['status'], 'MERGED') self.assertEqual(B.data['status'], 'MERGED') + + +class TestGerritUnicodeRefs(ZuulTestCase): + config_file = 'zuul-gerrit-web.conf' + tenant_config_file = 'config/single-tenant/main.yaml' + + upload_pack_data = (b'014452944ee370db5c87691e62e0f9079b6281319b4e HEAD' + b'\x00multi_ack thin-pack side-band side-band-64k ' + b'ofs-delta shallow deepen-since deepen-not ' + b'deepen-relative no-progress include-tag ' + b'multi_ack_detailed allow-tip-sha1-in-want ' + b'allow-reachable-sha1-in-want ' + b'symref=HEAD:refs/heads/faster filter ' + b'object-format=sha1 agent=git/2.37.1.gl1\n' + b'003d5f42665d737b3fd4ec22ca0209e6191859f09fd6 ' + b'refs/for/faster\n' + b'004952944ee370db5c87691e62e0f9079b6281319b4e ' + b'refs/heads/foo/\xf0\x9f\x94\xa5\xf0\x9f\x94\xa5' + b'\xf0\x9f\x94\xa5\n' + b'003f52944ee370db5c87691e62e0f9079b6281319b4e ' + b'refs/heads/faster\n0000').decode("utf-8") + + def test_mb_unicode_refs(self): + gerrit_config = { + 'user': 'gerrit', + 'server': 'localhost', + } + driver = GerritDriver() + gerrit = GerritConnection(driver, 'review_gerrit', gerrit_config) + + def _uploadPack(project): + return self.upload_pack_data + + self.patch(gerrit, '_uploadPack', _uploadPack) + + project = gerrit.source.getProject('org/project') + refs = gerrit.getInfoRefs(project) + + self.assertEqual(refs, + {'refs/for/faster': + '5f42665d737b3fd4ec22ca0209e6191859f09fd6', + 'refs/heads/foo/🔥🔥🔥': + '52944ee370db5c87691e62e0f9079b6281319b4e', + 'refs/heads/faster': + '52944ee370db5c87691e62e0f9079b6281319b4e'}) diff --git a/zuul/driver/gerrit/gerritconnection.py b/zuul/driver/gerrit/gerritconnection.py index 0a1f0ee614..276365e1d1 100644 --- a/zuul/driver/gerrit/gerritconnection.py +++ b/zuul/driver/gerrit/gerritconnection.py @@ -1643,7 +1643,10 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection): def getInfoRefs(self, project: Project) -> Dict[str, str]: try: - data = self._uploadPack(project) + # Encode the UTF-8 data back to a byte array, as the size of + # each record in the pack is in bytes, and so the slicing must + # also be done on a byte-basis. + data = self._uploadPack(project).encode("utf-8") except Exception: self.log.error("Cannot get references from %s" % project) raise # keeps error information @@ -1662,7 +1665,9 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection): plen -= 4 if len(data) - i < plen: raise Exception("Invalid data in info/refs") - line = data[i:i + plen] + # Once the pack data is sliced, we can safely decode it back + # into a (UTF-8) string. + line = data[i:i + plen].decode("utf-8") i += plen if not read_advertisement: read_advertisement = True