Report NODE_FAILURES caused by node request failures to SQL database
Currently, NODE_FAILURE results are not reported via SQL in case the node request failed. Tis is because those results are directly evaluated in the pipeline manager before the build is even started. Thus, there are no build result events sent by the executor and the "normal" build result event handling is skipped for those builds. As those build results are not stored in the database they are also not visible in the UI. Thus, there could be cases where a buildset failed because of a NODE_FAILURE, but all builds that are shown were successful. To fix this, we could directly call the SQL reporter when the NODE_FAILURE is evaluated in the pipeline manager. Also adapt the reportBuildEnd() method in the sql reporter so that the build entry is created in case its not present. This could be the case if the build started event was not processed or did not happen at all (e.g. for the NODE_FAILURE results or any result that is created via a "fake build" directly in the pipeline manager). Change-Id: I2603a7ccf26a41e6747c9276cb37c9b0fd668f75
This commit is contained in:
@@ -130,10 +130,11 @@ class TestSQLConnectionMysql(ZuulTestCase):
|
||||
sa.sql.select([reporter.connection.zuul_buildset_table]))
|
||||
|
||||
buildsets = result.fetchall()
|
||||
self.assertEqual(3, len(buildsets))
|
||||
self.assertEqual(4, len(buildsets))
|
||||
buildset0 = buildsets[0]
|
||||
buildset1 = buildsets[1]
|
||||
buildset2 = buildsets[2]
|
||||
buildset3 = buildsets[3]
|
||||
|
||||
self.assertEqual('check', buildset0['pipeline'])
|
||||
self.assertEqual('org/project', buildset0['project'])
|
||||
@@ -201,6 +202,25 @@ class TestSQLConnectionMysql(ZuulTestCase):
|
||||
buildset2_builds[0]['job_name'])
|
||||
self.assertEqual("SUCCESS", buildset2_builds[0]['result'])
|
||||
|
||||
buildset3_builds = conn.execute(
|
||||
sa.sql.select([
|
||||
reporter.connection.zuul_build_table
|
||||
]).where(
|
||||
reporter.connection.zuul_build_table.c.buildset_id ==
|
||||
buildset3['id']
|
||||
)
|
||||
).fetchall()
|
||||
|
||||
self.assertEqual(
|
||||
'project-test1', buildset3_builds[1]['job_name'])
|
||||
self.assertEqual('NODE_FAILURE', buildset3_builds[1]['result'])
|
||||
self.assertEqual(None, buildset3_builds[1]['log_url'])
|
||||
self.assertIsNotNone(buildset3_builds[1]['start_time'])
|
||||
self.assertIsNotNone(buildset3_builds[1]['end_time'])
|
||||
self.assertGreaterEqual(
|
||||
buildset3_builds[1]['end_time'],
|
||||
buildset3_builds[1]['start_time'])
|
||||
|
||||
self.executor_server.hold_jobs_in_build = True
|
||||
|
||||
# Add a success result
|
||||
@@ -229,6 +249,21 @@ class TestSQLConnectionMysql(ZuulTestCase):
|
||||
self.orderedRelease()
|
||||
self.waitUntilSettled()
|
||||
|
||||
# Add a node_failure result
|
||||
self.fake_nodepool.pause()
|
||||
C = self.fake_gerrit.addFakeChange('org/project', 'master', 'C')
|
||||
C.addApproval('Code-Review', 2)
|
||||
self.fake_gerrit.addEvent(C.addApproval('Approved', 1))
|
||||
self.waitUntilSettled()
|
||||
self.orderedRelease()
|
||||
self.waitUntilSettled()
|
||||
req = self.fake_nodepool.getNodeRequests()[0]
|
||||
self.fake_nodepool.addFailRequest(req)
|
||||
self.fake_nodepool.unpause()
|
||||
self.waitUntilSettled()
|
||||
self.orderedRelease()
|
||||
self.waitUntilSettled()
|
||||
|
||||
check_results()
|
||||
|
||||
def test_sql_results_retry_builds(self):
|
||||
|
||||
@@ -91,31 +91,19 @@ class SQLReporter(BaseReporter):
|
||||
f"{buildset.uuid} in DB")
|
||||
|
||||
def reportBuildStart(self, build):
|
||||
buildset = build.build_set
|
||||
start_time = build.start_time or time.time()
|
||||
start = datetime.datetime.fromtimestamp(start_time,
|
||||
tz=datetime.timezone.utc)
|
||||
with self.connection.getSession() as db:
|
||||
db_buildset = db.getBuildset(
|
||||
tenant=buildset.item.pipeline.tenant.name, uuid=buildset.uuid)
|
||||
|
||||
db_build = db_buildset.createBuild(
|
||||
uuid=build.uuid,
|
||||
job_name=build.job.name,
|
||||
start_time=start,
|
||||
voting=build.job.voting,
|
||||
nodeset=build.job.nodeset.name,
|
||||
)
|
||||
db_build = self._createBuild(db, build)
|
||||
return db_build
|
||||
|
||||
def reportBuildEnd(self, build, tenant, final):
|
||||
end_time = build.end_time or time.time()
|
||||
end = datetime.datetime.fromtimestamp(end_time,
|
||||
tz=datetime.timezone.utc)
|
||||
with self.connection.getSession() as db:
|
||||
db_build = db.getBuild(tenant=tenant, uuid=build.uuid)
|
||||
if not db_build:
|
||||
return None
|
||||
db_build = self._createBuild(db, build)
|
||||
|
||||
end_time = build.end_time or time.time()
|
||||
end = datetime.datetime.fromtimestamp(
|
||||
end_time, tz=datetime.timezone.utc)
|
||||
|
||||
db_build.result = build.result
|
||||
db_build.end_time = end
|
||||
@@ -136,6 +124,23 @@ class SQLReporter(BaseReporter):
|
||||
|
||||
return db_build
|
||||
|
||||
def _createBuild(self, db, build):
|
||||
start_time = build.start_time or time.time()
|
||||
start = datetime.datetime.fromtimestamp(start_time,
|
||||
tz=datetime.timezone.utc)
|
||||
buildset = build.build_set
|
||||
db_buildset = db.getBuildset(
|
||||
tenant=buildset.item.pipeline.tenant.name, uuid=buildset.uuid)
|
||||
|
||||
db_build = db_buildset.createBuild(
|
||||
uuid=build.uuid,
|
||||
job_name=build.job.name,
|
||||
start_time=start,
|
||||
voting=build.job.voting,
|
||||
nodeset=build.job.nodeset.name,
|
||||
)
|
||||
return db_build
|
||||
|
||||
def getBuilds(self, *args, **kw):
|
||||
"""Return a list of Build objects"""
|
||||
return self.connection.getBuilds(*args, **kw)
|
||||
|
||||
@@ -1693,7 +1693,13 @@ class PipelineManager(metaclass=ABCMeta):
|
||||
log.info("Node request %s: failure for %s",
|
||||
request, request.job_name)
|
||||
job = build_set.item.getJob(request.job_name)
|
||||
build_set.item.setNodeRequestFailure(job)
|
||||
fakebuild = build_set.item.setNodeRequestFailure(job)
|
||||
try:
|
||||
self.sql.reportBuildEnd(
|
||||
fakebuild, tenant=build_set.item.pipeline.tenant.name,
|
||||
final=True)
|
||||
except Exception:
|
||||
log.exception("Error reporting build completion to DB:")
|
||||
self._resumeBuilds(build_set)
|
||||
tenant = build_set.item.pipeline.tenant
|
||||
tenant.semaphore_handler.release(build_set.item, job)
|
||||
|
||||
@@ -4772,6 +4772,7 @@ class QueueItem(zkobject.ZKObject):
|
||||
)
|
||||
self.addBuild(fakebuild)
|
||||
self.setResult(fakebuild)
|
||||
return fakebuild
|
||||
|
||||
def setDequeuedNeedingChange(self):
|
||||
self.updateAttributes(
|
||||
|
||||
Reference in New Issue
Block a user