gerrit driver: fix bug around unicode branch names

If a branch name contains unicode characters that are more than 1-byte
wide, the size in bytes of the pack record won't match the size in
characters, and the pack parsing will be incorrect.

Instead, treat everything as an encoded byte string until parsing is
done - and only decode when handling a single, parsed, record.

Change-Id: I7f1a0cc96a36129fbc04c7a8687da3f66c1eef99
This commit is contained in:
Alex Hornung 2023-01-19 14:21:35 +00:00
parent 944b9852c9
commit ffc03cfcc1
2 changed files with 52 additions and 2 deletions

View File

@ -957,3 +957,48 @@ class TestGerritConnection(ZuulTestCase):
self.assertEqual(B.queried, 2)
self.assertEqual(A.data['status'], 'MERGED')
self.assertEqual(B.data['status'], 'MERGED')
class TestGerritUnicodeRefs(ZuulTestCase):
config_file = 'zuul-gerrit-web.conf'
tenant_config_file = 'config/single-tenant/main.yaml'
upload_pack_data = (b'014452944ee370db5c87691e62e0f9079b6281319b4e HEAD'
b'\x00multi_ack thin-pack side-band side-band-64k '
b'ofs-delta shallow deepen-since deepen-not '
b'deepen-relative no-progress include-tag '
b'multi_ack_detailed allow-tip-sha1-in-want '
b'allow-reachable-sha1-in-want '
b'symref=HEAD:refs/heads/faster filter '
b'object-format=sha1 agent=git/2.37.1.gl1\n'
b'003d5f42665d737b3fd4ec22ca0209e6191859f09fd6 '
b'refs/for/faster\n'
b'004952944ee370db5c87691e62e0f9079b6281319b4e '
b'refs/heads/foo/\xf0\x9f\x94\xa5\xf0\x9f\x94\xa5'
b'\xf0\x9f\x94\xa5\n'
b'003f52944ee370db5c87691e62e0f9079b6281319b4e '
b'refs/heads/faster\n0000').decode("utf-8")
def test_mb_unicode_refs(self):
gerrit_config = {
'user': 'gerrit',
'server': 'localhost',
}
driver = GerritDriver()
gerrit = GerritConnection(driver, 'review_gerrit', gerrit_config)
def _uploadPack(project):
return self.upload_pack_data
self.patch(gerrit, '_uploadPack', _uploadPack)
project = gerrit.source.getProject('org/project')
refs = gerrit.getInfoRefs(project)
self.assertEqual(refs,
{'refs/for/faster':
'5f42665d737b3fd4ec22ca0209e6191859f09fd6',
'refs/heads/foo/🔥🔥🔥':
'52944ee370db5c87691e62e0f9079b6281319b4e',
'refs/heads/faster':
'52944ee370db5c87691e62e0f9079b6281319b4e'})

View File

@ -1643,7 +1643,10 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
def getInfoRefs(self, project: Project) -> Dict[str, str]:
try:
data = self._uploadPack(project)
# Encode the UTF-8 data back to a byte array, as the size of
# each record in the pack is in bytes, and so the slicing must
# also be done on a byte-basis.
data = self._uploadPack(project).encode("utf-8")
except Exception:
self.log.error("Cannot get references from %s" % project)
raise # keeps error information
@ -1662,7 +1665,9 @@ class GerritConnection(ZKChangeCacheMixin, ZKBranchCacheMixin, BaseConnection):
plen -= 4
if len(data) - i < plen:
raise Exception("Invalid data in info/refs")
line = data[i:i + plen]
# Once the pack data is sliced, we can safely decode it back
# into a (UTF-8) string.
line = data[i:i + plen].decode("utf-8")
i += plen
if not read_advertisement:
read_advertisement = True