Cleanup leaked build request parameters
Now that build request parameters are sharded outside of a transaction, we might write them but fail to write the actual build request. To address that, add a periodic checke for leaked parameters which is run after the search for lost build requests. Change-Id: I0787d94513572f90ae66616a6619d488c9592a5d
This commit is contained in:
parent
965f25b8d9
commit
a99b2dace3
|
@ -3173,28 +3173,15 @@ class TestingExecutorApi(HoldableExecutorApi):
|
|||
# As this method is used for assertions in the tests, it
|
||||
# should look up the build requests directly from ZooKeeper
|
||||
# and not from a cache layer.
|
||||
zones = []
|
||||
if self.zone_filter:
|
||||
zones = self.zone_filter
|
||||
else:
|
||||
try:
|
||||
# Get all available zones from ZooKeeper
|
||||
zones = self.kazoo_client.get_children(
|
||||
'/'.join([self.BUILD_REQUEST_ROOT, 'zones']))
|
||||
zones.append(None)
|
||||
except kazoo.exceptions.NoNodeError:
|
||||
zones = [None]
|
||||
zones = self._getAllZones()
|
||||
|
||||
all_builds = []
|
||||
for zone in zones:
|
||||
try:
|
||||
zone_path = self._getZoneRoot(zone)
|
||||
builds = self.kazoo_client.get_children(zone_path)
|
||||
except kazoo.exceptions.NoNodeError:
|
||||
# Skip this zone as it doesn't have any builds
|
||||
continue
|
||||
|
||||
for build_uuid in builds:
|
||||
zone_path = self._getZoneRoot(zone)
|
||||
for build_uuid in self._getAllBuildIds([zone]):
|
||||
build = self.get("/".join([zone_path, build_uuid]))
|
||||
if build and (not states or build.state in states):
|
||||
all_builds.append(build)
|
||||
|
|
|
@ -629,6 +629,29 @@ class TestExecutorApi(ZooKeeperBaseTestCase):
|
|||
self.assertEqual(2, len(lost_build_requests))
|
||||
self.assertEqual(b.path, lost_build_requests[0].path)
|
||||
|
||||
def test_lost_build_request_params(self):
|
||||
# Test cleaning up orphaned request parameters
|
||||
executor_api = ExecutorApi(self.zk_client)
|
||||
|
||||
path_a = executor_api.submit(
|
||||
"A", "tenant", "pipeline", {}, "zone", '1')
|
||||
|
||||
params_root = executor_api.BUILD_PARAMS_ROOT
|
||||
self.assertEqual(len(executor_api._getAllBuildIds()), 1)
|
||||
self.assertEqual(len(
|
||||
self.zk_client.client.get_children(params_root)), 1)
|
||||
|
||||
# Delete the request but not the params
|
||||
self.zk_client.client.delete(path_a)
|
||||
self.assertEqual(len(executor_api._getAllBuildIds()), 0)
|
||||
self.assertEqual(len(
|
||||
self.zk_client.client.get_children(params_root)), 1)
|
||||
|
||||
# Clean up leaked params
|
||||
executor_api.cleanup(0)
|
||||
self.assertEqual(len(
|
||||
self.zk_client.client.get_children(params_root)), 0)
|
||||
|
||||
def test_existing_build_request(self):
|
||||
# Test that an executor sees an existing build request when
|
||||
# coming online
|
||||
|
|
|
@ -229,6 +229,7 @@ class ExecutorClient(object):
|
|||
self.cleanupLostBuildRequest(build_request)
|
||||
except Exception:
|
||||
self.log.exception("Exception cleaning up lost build request:")
|
||||
self.executor_api.cleanup()
|
||||
|
||||
def cleanupLostBuildRequest(self, build_request):
|
||||
result = {"result": "ABORTED"}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
from contextlib import suppress
|
||||
|
@ -424,6 +425,76 @@ class ExecutorApi(ZooKeeperSimpleBase):
|
|||
self.inState(BuildRequest.RUNNING, BuildRequest.PAUSED),
|
||||
)
|
||||
|
||||
def _getAllZones(self):
|
||||
# Get a list of all zones without using the cache.
|
||||
try:
|
||||
# Get all available zones from ZooKeeper
|
||||
zones = self.kazoo_client.get_children(
|
||||
'/'.join([self.BUILD_REQUEST_ROOT, 'zones']))
|
||||
zones.append(None)
|
||||
except NoNodeError:
|
||||
zones = [None]
|
||||
return zones
|
||||
|
||||
def _getAllBuildIds(self, zones=None):
|
||||
# Get a list of all build uuids without using the cache.
|
||||
if zones is None:
|
||||
zones = self._getAllZones()
|
||||
|
||||
all_builds = set()
|
||||
for zone in zones:
|
||||
try:
|
||||
zone_path = self._getZoneRoot(zone)
|
||||
all_builds.update(self.kazoo_client.get_children(zone_path))
|
||||
except NoNodeError:
|
||||
# Skip this zone as it doesn't have any builds
|
||||
continue
|
||||
return all_builds
|
||||
|
||||
def _findLostParams(self, age):
|
||||
# Get data nodes which are older than the specified age (we
|
||||
# don't want to delete nodes which are just being written
|
||||
# slowly).
|
||||
# Convert to MS
|
||||
now = int(time.time() * 1000)
|
||||
age = age * 1000
|
||||
data_nodes = dict()
|
||||
for data_id in self.kazoo_client.get_children(self.BUILD_PARAMS_ROOT):
|
||||
data_path = self._getParamsPath(data_id)
|
||||
data_zstat = self.kazoo_client.exists(data_path)
|
||||
if now - data_zstat.mtime > age:
|
||||
data_nodes[data_id] = data_path
|
||||
|
||||
# If there are no candidate data nodes, we don't need to
|
||||
# filter them by known requests.
|
||||
if not data_nodes:
|
||||
return data_nodes.values()
|
||||
|
||||
# Remove current request uuids
|
||||
for request_id in self._getAllBuildIds():
|
||||
if request_id in data_nodes:
|
||||
del data_nodes[request_id]
|
||||
|
||||
# Return the paths
|
||||
return data_nodes.values()
|
||||
|
||||
def cleanup(self, age=300):
|
||||
# Delete build request params which are not associated with
|
||||
# any current build requests. Note, this does not clean up
|
||||
# lost build requests themselves; the executor client takes
|
||||
# care of that.
|
||||
try:
|
||||
for path in self._findLostParams(age):
|
||||
try:
|
||||
self.log.error("Removing build request params: %s", path)
|
||||
self.kazoo_client.delete(path, recursive=True)
|
||||
except Exception:
|
||||
self.log.execption(
|
||||
"Unable to delete build request params %s", path)
|
||||
except Exception:
|
||||
self.log.exception(
|
||||
"Error cleaning up build request queue %s", self)
|
||||
|
||||
@staticmethod
|
||||
def _bytesToDict(data):
|
||||
return json.loads(data.decode("utf-8"))
|
||||
|
|
Loading…
Reference in New Issue