Limit number of K8s API requests

As the K8s client is logging every response to every request, we're
starting to hit limits of tox' or Zuul's log side limit. This commit
attempts to limit number of requests by making sure that in case of
status checks we're iterating a list of pods instead of calling API for
every pod. Moreover many `time.sleep()` occurences are modified to
reduce the number of the requests made.

Change-Id: Ifc2dfce2405429bbcae8c01f13f06d4e9fae9c8a
This commit is contained in:
Michał Dulko 2022-02-22 12:39:46 +01:00
parent 85807e55a9
commit 86423cc26c
5 changed files with 39 additions and 36 deletions

View File

@ -228,11 +228,11 @@ class BaseKuryrScenarioTest(manager.NetworkScenarioTest):
@classmethod @classmethod
def wait_for_pod_status(cls, pod_name, namespace="default", def wait_for_pod_status(cls, pod_name, namespace="default",
pod_status=None, retries=30): pod_status=None, retries=6):
while pod_status != cls.get_pod_status( while pod_status != cls.get_pod_status(
pod_name, pod_name,
namespace=namespace): namespace=namespace):
time.sleep(1) time.sleep(5)
retries -= 1 retries -= 1
if retries == 0: if retries == 0:
raise lib_exc.TimeoutException() raise lib_exc.TimeoutException()
@ -273,31 +273,33 @@ class BaseKuryrScenarioTest(manager.NetworkScenarioTest):
except kubernetes.client.rest.ApiException: except kubernetes.client.rest.ApiException:
return False return False
return cls.get_readiness_state_from_pod(pod,
container_name=container_name)
@classmethod
def get_readiness_state_from_pod(cls, pod, container_name=None):
if container_name: if container_name:
for container in pod.status.container_statuses: for container in pod.status.container_statuses:
if container.name == container_name: if container.name == container_name:
return container.ready return container.ready
else: elif pod.status.conditions:
for condition in pod.status.conditions: for condition in pod.status.conditions:
if condition.type == 'Ready': if condition.type == 'Ready':
return condition.status == 'True' return condition.status == 'True'
return False return False
@classmethod @classmethod
def check_pods_ready_num(cls, namespace="default", def check_pods_ready_num(cls, namespace="default", label="", num_pods=1):
label="", num_pods=1): pods = cls.get_pod_list(namespace=namespace, label_selector=label)
pods = cls.get_pod_name_list(namespace=namespace, ready_pods = sum([cls.get_readiness_state_from_pod(p) for p in pods])
label_selector=label) return num_pods == ready_pods
ready_pods = sum([cls.get_readiness_state(p) for p in pods])
return (num_pods == ready_pods)
@classmethod @classmethod
def check_pods_status_num(cls, namespace="default", label="", num_pods=1, def check_pods_status_num(cls, namespace="default", label="", num_pods=1,
status="Running"): status="Running"):
pods = cls.get_pod_name_list(namespace=namespace, pods = cls.get_pod_list(namespace=namespace, label_selector=label)
label_selector=label) status_pods = sum([p.status.phase == status for p in pods])
status_pods = sum([cls.get_pod_status(p) == status for p in pods]) return num_pods == status_pods
return (num_pods == status_pods)
@classmethod @classmethod
def get_pod_readiness(cls, pod_name, namespace="default"): def get_pod_readiness(cls, pod_name, namespace="default"):
@ -921,7 +923,7 @@ class BaseKuryrScenarioTest(manager.NetworkScenarioTest):
kuryr_crd_annotation = K8S_ANNOTATION_PREFIX + "-net-crd" kuryr_crd_annotation = K8S_ANNOTATION_PREFIX + "-net-crd"
# wait until namespace gets created # wait until namespace gets created
while True: while True:
time.sleep(1) time.sleep(10)
ns = cls.k8s_client.CoreV1Api().read_namespace_status(name) ns = cls.k8s_client.CoreV1Api().read_namespace_status(name)
if (ns.metadata.annotations and if (ns.metadata.annotations and
ns.metadata.annotations.get(kuryr_crd_annotation)): ns.metadata.annotations.get(kuryr_crd_annotation)):

View File

@ -50,7 +50,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
ready = False ready = False
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
try: try:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
sg_id, _, ready = self.get_np_crd_info( sg_id, _, ready = self.get_np_crd_info(
name=network_policy_name, namespace=namespace) name=network_policy_name, namespace=namespace)
if sg_id and ready: if sg_id and ready:
@ -95,7 +95,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
egress_cidrs_found))): egress_cidrs_found))):
rules_match = True rules_match = True
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
if not rules_match: if not rules_match:
msg = 'Timed out waiting sg rules for np %s to match' % np msg = 'Timed out waiting sg rules for np %s to match' % np
@ -124,7 +124,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
try: try:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
sg_id, _, ready = self.get_np_crd_info( sg_id, _, ready = self.get_np_crd_info(
network_policy_name, namespace=namespace_name) network_policy_name, namespace=namespace_name)
if sg_id and ready: if sg_id and ready:
@ -206,7 +206,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
try: try:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
sg_id, _, _ = self.get_np_crd_info(network_policy_name, sg_id, _, _ = self.get_np_crd_info(network_policy_name,
namespace=namespace_name) namespace=namespace_name)
if sg_id: if sg_id:
@ -247,7 +247,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
try: try:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
self.get_np_crd_info(network_policy_name, self.get_np_crd_info(network_policy_name,
namespace=namespace_name) namespace=namespace_name)
except kubernetes.client.rest.ApiException as e: except kubernetes.client.rest.ApiException as e:
@ -277,7 +277,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
try: try:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
sg_id, _, _ = self.get_np_crd_info(network_policy_name) sg_id, _, _ = self.get_np_crd_info(network_policy_name)
if sg_id: if sg_id:
break break
@ -291,7 +291,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
self.delete_network_policy(network_policy_name) self.delete_network_policy(network_policy_name)
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
if network_policy_name in self.list_network_policies(): if network_policy_name in self.list_network_policies():
continue continue
try: try:
@ -320,7 +320,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
try: try:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
_, crd_pod_selector, _ = self.get_np_crd_info( _, crd_pod_selector, _ = self.get_np_crd_info(
network_policy_name) network_policy_name)
if crd_pod_selector: if crd_pod_selector:
@ -341,7 +341,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
try: try:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
_, crd_pod_selector, _ = self.get_np_crd_info( _, crd_pod_selector, _ = self.get_np_crd_info(
network_policy_name) network_policy_name)
labels = crd_pod_selector.get('matchLabels') labels = crd_pod_selector.get('matchLabels')
@ -368,7 +368,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
try: try:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
sg_id, _, _ = self.get_np_crd_info(network_policy_name, sg_id, _, _ = self.get_np_crd_info(network_policy_name,
namespace=ns_name) namespace=ns_name)
if sg_id: if sg_id:
@ -384,7 +384,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
self.delete_namespace(ns_name) self.delete_namespace(ns_name)
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
if network_policy_name in self.list_network_policies( if network_policy_name in self.list_network_policies(
namespace=ns_name): namespace=ns_name):
continue continue
@ -398,7 +398,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
sg_ids_after = [sg['id'] for sg in sgs_after] sg_ids_after = [sg['id'] for sg in sgs_after]
if sg_id not in sg_ids_after: if sg_id not in sg_ids_after:
break break
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
if time.time() - start >= TIMEOUT_PERIOD: if time.time() - start >= TIMEOUT_PERIOD:
raise lib_exc.TimeoutException('Sec group ID still exists') raise lib_exc.TimeoutException('Sec group ID still exists')
@ -416,7 +416,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
try: try:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
sg_id, _, _ = self.get_np_crd_info(network_policy_name, sg_id, _, _ = self.get_np_crd_info(network_policy_name,
namespace=ns_name) namespace=ns_name)
if sg_id: if sg_id:
@ -430,7 +430,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
self.delete_network_policy(network_policy_name, namespace=ns_name) self.delete_network_policy(network_policy_name, namespace=ns_name)
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
if network_policy_name in self.list_network_policies( if network_policy_name in self.list_network_policies(
namespace=ns_name): namespace=ns_name):
continue continue
@ -444,7 +444,7 @@ class TestNetworkPolicyScenario(base.BaseKuryrScenarioTest,
sg_ids_after = [sg['id'] for sg in sgs_after] sg_ids_after = [sg['id'] for sg in sgs_after]
if sg_id not in sg_ids_after: if sg_id not in sg_ids_after:
break break
time.sleep(1) time.sleep(consts.NP_CHECK_SLEEP_TIME)
if time.time() - start >= TIMEOUT_PERIOD: if time.time() - start >= TIMEOUT_PERIOD:
raise lib_exc.TimeoutException('Sec group ID still exists') raise lib_exc.TimeoutException('Sec group ID still exists')

View File

@ -21,5 +21,6 @@ TIME_TO_APPLY_SGS = 30
POD_STATUS_RETRIES = 240 POD_STATUS_RETRIES = 240
POD_CHECK_TIMEOUT = 240 POD_CHECK_TIMEOUT = 240
POD_CHECK_SLEEP_TIME = 5 POD_CHECK_SLEEP_TIME = 5
NP_CHECK_SLEEP_TIME = 10
NS_TIMEOUT = 600 NS_TIMEOUT = 600
REPETITIONS_PER_BACKEND = 10 REPETITIONS_PER_BACKEND = 10

View File

@ -51,11 +51,11 @@ class TestKuryrRestartScenario(base.BaseKuryrScenarioTest):
namespace=CONF.kuryr_kubernetes.kube_system_namespace) namespace=CONF.kuryr_kubernetes.kube_system_namespace)
# make sure the kuryr pod was deleted # make sure the kuryr pod was deleted
pod_delete_retries = 30 pod_delete_retries = 6
while self.get_pod_status( while self.get_pod_status(
kuryr_pod_name, kuryr_pod_name,
namespace=CONF.kuryr_kubernetes.kube_system_namespace): namespace=CONF.kuryr_kubernetes.kube_system_namespace):
time.sleep(1) time.sleep(5)
pod_delete_retries -= 1 pod_delete_retries -= 1
if pod_delete_retries == 0: if pod_delete_retries == 0:
raise lib_exc.TimeoutException() raise lib_exc.TimeoutException()

View File

@ -275,7 +275,7 @@ class TestNamespaceScenario(base.BaseKuryrScenarioTest):
start = time.time() start = time.time()
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
time.sleep(1) time.sleep(10)
try: try:
if CONF.kuryr_kubernetes.kuryrnetworks: if CONF.kuryr_kubernetes.kuryrnetworks:
self.get_kuryr_network_crds(namespace) self.get_kuryr_network_crds(namespace)
@ -286,7 +286,7 @@ class TestNamespaceScenario(base.BaseKuryrScenarioTest):
# Also wait for the namespace removal # Also wait for the namespace removal
while time.time() - start < TIMEOUT_PERIOD: while time.time() - start < TIMEOUT_PERIOD:
time.sleep(1) time.sleep(10)
try: try:
self.get_namespace(namespace) self.get_namespace(namespace)
except kubernetes.client.rest.ApiException: except kubernetes.client.rest.ApiException:
@ -390,11 +390,11 @@ class TestNamespaceScenario(base.BaseKuryrScenarioTest):
self.delete_namespace(ns_name) self.delete_namespace(ns_name)
# wait for namespace to be deleted # wait for namespace to be deleted
# FIXME(itzikb) Set retries to 120 when BZ#1997120 is fixed # FIXME(itzikb) Set retries to 24 when BZ#1997120 is fixed
retries = 600 retries = 120
while True: while True:
try: try:
time.sleep(1) time.sleep(5)
self.k8s_client.CoreV1Api().read_namespace(ns_name) self.k8s_client.CoreV1Api().read_namespace(ns_name)
retries -= 1 retries -= 1
self.assertNotEqual(0, retries, self.assertNotEqual(0, retries,