Merge "Cleanup KuryrPort when Pod is missing"

This commit is contained in:
Zuul 2022-09-22 10:46:45 +00:00 committed by Gerrit Code Review
commit 624a106fe2
11 changed files with 139 additions and 140 deletions

View File

@ -358,7 +358,7 @@ class PodVIFDriver(DriverBase, metaclass=abc.ABCMeta):
raise NotImplementedError()
@abc.abstractmethod
def release_vif(self, pod, vif, project_id=None, security_groups=None):
def release_vif(self, pod, vif, project_id=None):
"""Unlinks Neutron port corresponding to VIF object from pod.
Implementing drivers must ensure the port is either deleted or made
@ -367,13 +367,10 @@ class PodVIFDriver(DriverBase, metaclass=abc.ABCMeta):
:param pod: dict containing Kubernetes Pod object
:param vif: VIF object as returned by `PodVIFDriver.request_vif`
:param project_id: OpenStack project ID
:param security_groups: list containing security groups'
IDs as returned by
`PodSecurityGroupsDriver.get_security_groups`
"""
raise NotImplementedError()
def release_vifs(self, pods, vifs, project_id=None, security_groups=None):
def release_vifs(self, pods, vifs, project_id=None):
"""Unlinks Neutron ports corresponding to VIF objects.
It follows the same pattern as release_vif but releasing num_ports
@ -383,9 +380,6 @@ class PodVIFDriver(DriverBase, metaclass=abc.ABCMeta):
:param vifs: list of VIF objects as returned by
`PodVIFDriver.request_vif`
:param project_id: (optional) OpenStack project ID
:param security_groups: (optional) list containing security groups'
IDs as returned by
`PodSecurityGroupsDriver.get_security_groups`
"""
raise NotImplementedError()

View File

@ -51,7 +51,7 @@ class NestedDpdkPodVIFDriver(nested_vif.NestedPodVIFDriver):
# TODO(garyloug): provide an implementation
raise NotImplementedError()
def release_vif(self, pod, vif, project_id=None, security_groups=None):
def release_vif(self, pod, vif, project_id=None):
compute = clients.get_compute_client()
vm_id = self._get_parent_port(pod).device_id

View File

@ -65,7 +65,7 @@ class NestedMacvlanPodVIFDriver(nested_vif.NestedPodVIFDriver):
# TODO(mchiappe): provide an implementation
raise NotImplementedError()
def release_vif(self, pod, vif, project_id=None, security_groups=None):
def release_vif(self, pod, vif, project_id=None):
os_net = clients.get_network_client()
attempts = kuryr_config.CONF.pod_vif_nested.rev_update_attempts

View File

@ -157,7 +157,7 @@ class NestedVlanPodVIFDriver(nested_vif.NestedPodVIFDriver):
"rechecked when event will be retried.", vif.id)
raise
def release_vif(self, pod, vif, project_id=None, security_groups=None):
def release_vif(self, pod, vif, project_id=None):
os_net = clients.get_network_client()
parent_port = self._get_parent_port(pod)
trunk_id = self._get_trunk_id(parent_port)

View File

@ -93,7 +93,7 @@ class NeutronPodVIFDriver(base.PodVIFDriver):
vifs.append(vif)
return vifs
def release_vif(self, pod, vif, project_id=None, security_groups=None):
def release_vif(self, pod, vif, project_id=None):
clients.get_network_client().delete_port(vif.id)
def activate_vif(self, vif, **kwargs):

View File

@ -315,8 +315,7 @@ class BaseVIFPool(base.VIFPoolDriver, metaclass=abc.ABCMeta):
return False
return True
def release_vif(self, pod, vif, project_id, security_groups,
host_addr=None):
def release_vif(self, pod, vif, project_id, host_addr=None):
if not self._recovered_pools:
LOG.debug("Kuryr-controller not yet ready to remove pods.")
raise exceptions.ResourceNotReady(pod)
@ -836,23 +835,7 @@ class NestedVIFPool(BaseVIFPool):
def set_vif_driver(self, driver):
self._drv_vif = driver
def _get_parent_port_id(self, vif):
os_net = clients.get_network_client()
tags = []
if config.CONF.neutron_defaults.resource_tags:
tags = config.CONF.neutron_defaults.resource_tags
trunks = os_net.trunks(tags=tags)
for trunk in trunks:
for sp in trunk.sub_ports:
if sp['port_id'] == vif.id:
return trunk.port_id
return None
def release_vif(self, pod, vif, project_id, security_groups):
def release_vif(self, pod, vif, project_id):
if not self._recovered_pools:
LOG.debug("Kuryr-controller not yet ready to remove pods.")
raise exceptions.ResourceNotReady(pod)
@ -865,17 +848,17 @@ class NestedVIFPool(BaseVIFPool):
"determine the IP by calling Neutron.",
name)
parent_id = self._get_parent_port_id(vif)
parent_id = utils.get_parent_port_id(vif)
if not parent_id:
LOG.warning("Port %s not found, ignoring its release request.",
vif.id)
return
host_addr = self._get_parent_port_ip(parent_id)
host_addr = utils.get_parent_port_ip(parent_id)
LOG.debug("Determined hostIP for pod %s is %s", name, host_addr)
super(NestedVIFPool, self).release_vif(
pod, vif, project_id, security_groups, host_addr=host_addr)
pod, vif, project_id, host_addr=host_addr)
def _set_port_debug(self, port_id, pod):
os_net = clients.get_network_client()
@ -967,11 +950,6 @@ class NestedVIFPool(BaseVIFPool):
self._known_trunk_ids[pool_key] = trunk_id
return trunk_id
def _get_parent_port_ip(self, port_id):
os_net = clients.get_network_client()
parent_port = os_net.get_port(port_id)
return parent_port.fixed_ips[0]['ip_address']
def sync_pools(self):
super(NestedVIFPool, self).sync_pools()
# NOTE(ltomasbo): Ensure previously created ports are recovered into

View File

@ -142,24 +142,52 @@ class KuryrPortHandler(k8s_base.ResourceEventHandler):
self._update_services(services, crd_pod_selectors,
project_id)
def _mock_cleanup_pod(self, kuryrport_crd):
"""Mock Pod that doesn't exist anymore for cleanup purposes"""
pod = {
'apiVersion': 'v1',
'kind': 'Pod',
'metadata': kuryrport_crd['metadata'].copy(),
}
# No need to try to delete the finalizer from the pod later, as
# pod's gone.
del pod['metadata']['finalizers']
main_vif = objects.base.VersionedObject.obj_from_primitive(
kuryrport_crd['status']['vifs'][constants.DEFAULT_IFNAME]
['vif'])
port_id = utils.get_parent_port_id(main_vif)
host_ip = utils.get_parent_port_ip(port_id)
pod['status'] = {'hostIP': host_ip}
return pod
def on_finalize(self, kuryrport_crd, *args, **kwargs):
name = kuryrport_crd['metadata']['name']
namespace = kuryrport_crd['metadata']['namespace']
cleanup = False # If we're doing a cleanup, raise no error.
try:
pod = self.k8s.get(f"{constants.K8S_API_NAMESPACES}"
f"/{namespace}/pods/{name}")
if pod['metadata']['uid'] != kuryrport_crd['spec']['podUid']:
# Seems like this is KuryrPort created for an old Pod instance,
# with the same name. Cleaning it up instead of regular delete.
raise k_exc.K8sResourceNotFound(
'Pod %s' % pod['metadata']['uid'])
except k_exc.K8sResourceNotFound:
LOG.error("Pod %s/%s doesn't exists, deleting orphaned KuryrPort",
namespace, name)
# TODO(gryf): Free resources
try:
LOG.warning('Pod for KuryrPort %s was forcibly deleted. '
'Attempting a cleanup before releasing KuryrPort.',
utils.get_res_unique_name(kuryrport_crd))
self.k8s.add_event(kuryrport_crd, 'MissingPod',
'Pod does not exist anymore, attempting to '
'cleanup orphaned KuryrPort', 'Warning')
if kuryrport_crd['status']['vifs']:
pod = self._mock_cleanup_pod(kuryrport_crd)
cleanup = True # Make sure we don't raise on release_vif()
else:
# Remove the finalizer, most likely ports never got created.
self.k8s.remove_finalizer(kuryrport_crd,
constants.KURYRPORT_FINALIZER)
except k_exc.K8sClientException as ex:
LOG.exception("Failed to remove finalizer from KuryrPort %s",
ex)
raise
return
return
if ('deletionTimestamp' not in pod['metadata'] and
not utils.is_pod_completed(pod)):
@ -193,21 +221,16 @@ class KuryrPortHandler(k8s_base.ResourceEventHandler):
self.k8s.add_event(pod, 'SkipingSGDeletion', 'Skipping SG rules '
'deletion')
crd_pod_selectors = []
try:
security_groups = self._drv_sg.get_security_groups(pod, project_id)
except k_exc.ResourceNotReady:
# NOTE(ltomasbo): If the namespace object gets deleted first the
# namespace security group driver will raise a ResourceNotReady
# exception as it cannot access anymore the kuryrnetwork CRD
# annotated on the namespace object. In such case we set security
# groups to empty list so that if pools are enabled they will be
# properly released.
security_groups = []
for data in kuryrport_crd['status']['vifs'].values():
vif = objects.base.VersionedObject.obj_from_primitive(data['vif'])
self._drv_vif_pool.release_vif(pod, vif, project_id,
security_groups)
try:
self._drv_vif_pool.release_vif(pod, vif, project_id)
except Exception:
if not cleanup:
raise
LOG.warning('Error when cleaning up VIF %s, ignoring.',
utils.get_res_unique_name(kuryrport_crd))
if (driver_utils.is_network_policy_enabled() and crd_pod_selectors and
oslo_cfg.CONF.octavia_defaults.enforce_sg_rules):
services = driver_utils.get_services()
@ -225,12 +248,27 @@ class KuryrPortHandler(k8s_base.ResourceEventHandler):
f"/{kuryrport_crd['metadata']['namespace']}"
f"/pods"
f"/{kuryrport_crd['metadata']['name']}")
except k_exc.K8sResourceNotFound as ex:
LOG.exception("Failed to get pod: %s", ex)
# TODO(gryf): Release resources
self.k8s.remove_finalizer(kuryrport_crd,
constants.KURYRPORT_FINALIZER)
raise
if pod['metadata']['uid'] != kuryrport_crd['spec']['podUid']:
# Seems like this is KuryrPort created for an old Pod, deleting
# it anyway.
raise k_exc.K8sResourceNotFound(
'Pod %s' % pod['metadata']['uid'])
except k_exc.K8sResourceNotFound:
LOG.warning('Pod for KuryrPort %s was forcibly deleted. Deleting'
'the KuryrPort too to attempt resource cleanup.',
utils.get_res_unique_name(kuryrport_crd))
self.k8s.add_event(kuryrport_crd, 'MissingPod',
'Pod does not exist anymore, attempting to '
'delete orphaned KuryrPort', 'Warning')
try:
self.k8s.delete(utils.get_res_link(kuryrport_crd))
except k_exc.K8sResourceNotFound:
pass
except k_exc.K8sClientException:
LOG.exception('Error when trying to delete KuryrPort %s. Will '
'retry later.',
utils.get_res_unique_name(kuryrport_crd))
return False
project_id = self._drv_project.get_project(pod)
security_groups = self._drv_sg.get_security_groups(pod, project_id)

View File

@ -1322,21 +1322,6 @@ class NestedVIFPool(test_base.TestCase):
port_id)
os_net.delete_port.assert_not_called()
def test__get_parent_port_ip(self):
cls = vif_pool.NestedVIFPool
m_driver = mock.MagicMock(spec=cls)
os_net = self.useFixture(k_fix.MockNetworkClient()).client
port_id = str(uuid.uuid4())
ip_address = mock.sentinel.ip_address
port_obj = fake.get_port_obj(ip_address=ip_address)
os_net.get_port.return_value = port_obj
self.assertEqual(ip_address, cls._get_parent_port_ip(m_driver,
port_id))
@mock.patch('kuryr_kubernetes.utils.get_subnet')
def test__get_trunk_info(self, m_get_subnet):
cls = vif_pool.NestedVIFPool

View File

@ -45,13 +45,17 @@ class TestKuryrPortHandler(test_base.TestCase):
self._kp_namespace = mock.sentinel.namespace
self._kp_uid = mock.sentinel.kp_uid
self._kp_name = 'pod1'
self._pod_uid = 'deadbeef'
self._pod = {'apiVersion': 'v1',
'kind': 'Pod',
'metadata': {'resourceVersion': self._pod_version,
'name': self._kp_name,
'deletionTimestamp': mock.sentinel.date,
'namespace': self._kp_namespace},
'metadata': {
'resourceVersion': self._pod_version,
'name': self._kp_name,
'deletionTimestamp': mock.sentinel.date,
'namespace': self._kp_namespace,
'uid': self._pod_uid,
},
'spec': {'nodeName': self._host}}
self._kp = {
@ -63,10 +67,11 @@ class TestKuryrPortHandler(test_base.TestCase):
'namespace': self._kp_namespace,
'labels': {
constants.KURYRPORT_LABEL: self._host
}
},
'finalizers': [],
},
'spec': {
'podUid': 'deadbeef',
'podUid': self._pod_uid,
'podNodeName': self._host
},
'status': {'vifs': {}}
@ -91,6 +96,7 @@ class TestKuryrPortHandler(test_base.TestCase):
self._pod_uri = (f"{constants.K8S_API_NAMESPACES}"
f"/{self._kp['metadata']['namespace']}/pods/"
f"{self._kp['metadata']['name']}")
self._kp_uri = utils.get_res_link(self._kp)
self.useFixture(k_fix.MockNetworkClient())
self._driver = multi_vif.NoopMultiVIFDriver()
@ -312,12 +318,18 @@ class TestKuryrPortHandler(test_base.TestCase):
update_crd.assert_called_once_with(self._kp, self._vifs)
create_sgr.assert_called_once_with(self._pod)
@mock.patch('kuryr_kubernetes.controller.drivers.default_project.'
'DefaultPodProjectDriver.get_project')
@mock.patch('kuryr_kubernetes.utils.get_parent_port_id')
@mock.patch('kuryr_kubernetes.utils.get_parent_port_ip')
@mock.patch('kuryr_kubernetes.clients.get_kubernetes_client')
@mock.patch('kuryr_kubernetes.controller.drivers.base.MultiVIFDriver.'
'get_enabled_drivers')
def test_on_finalize_exception_on_pod(self, ged, k8s):
def test_on_finalize_exception_on_pod(self, ged, k8s, gppip, gppid,
project_driver):
ged.return_value = [self._driver]
kp = kuryrport.KuryrPortHandler()
self._kp['metadata']['deletionTimestamp'] = 'foobar'
self._kp['status']['vifs'] = self._vifs_primitive
with mock.patch.object(kp, 'k8s') as k8s:
@ -325,46 +337,10 @@ class TestKuryrPortHandler(test_base.TestCase):
self.assertIsNone(kp.on_finalize(self._kp))
k8s.get.assert_called_once_with(self._pod_uri)
k8s.remove_finalizer.assert_called_once_with(
self._kp, constants.KURYRPORT_FINALIZER)
@mock.patch('kuryr_kubernetes.controller.drivers.vif_pool.MultiVIFPool.'
'release_vif')
@mock.patch('kuryr_kubernetes.controller.drivers.default_security_groups.'
'DefaultPodSecurityGroupsDriver.get_security_groups')
@mock.patch('kuryr_kubernetes.controller.drivers.default_security_groups.'
'DefaultPodSecurityGroupsDriver.delete_sg_rules')
@mock.patch('kuryr_kubernetes.controller.drivers.default_project.'
'DefaultPodProjectDriver.get_project')
@mock.patch('kuryr_kubernetes.clients.get_kubernetes_client')
@mock.patch('kuryr_kubernetes.controller.drivers.base.MultiVIFDriver.'
'get_enabled_drivers')
def test_on_finalize_crd_sg_exceptions(self, ged, k8s, get_project,
delete_sg_rules, get_sg,
release_vif):
ged.return_value = [self._driver]
kp = kuryrport.KuryrPortHandler()
self._kp['status']['vifs'] = self._vifs_primitive
get_project.return_value = self._project_id
delete_sg_rules.side_effect = k_exc.ResourceNotReady(self._pod)
get_sg.side_effect = k_exc.ResourceNotReady(self._pod)
with mock.patch.object(kp, 'k8s') as k8s:
k8s.get.return_value = self._pod
kp.on_finalize(self._kp)
k8s.get.assert_called_once_with(self._pod_uri)
k8s.remove_finalizer.assert_has_calls(
[mock.call(self._pod, constants.POD_FINALIZER),
mock.call(self._kp, constants.KURYRPORT_FINALIZER)])
delete_sg_rules.assert_called_once_with(self._pod)
get_sg.assert_called_once_with(self._pod, self._project_id)
release_vif.assert_has_calls([mock.call(self._pod, self._vif1,
self._project_id, []),
mock.call(self._pod, self._vif2,
self._project_id, [])])
(mock.call(mock.ANY, constants.POD_FINALIZER),
mock.call(self._kp, constants.KURYRPORT_FINALIZER)))
@mock.patch('kuryr_kubernetes.controller.handlers.kuryrport.'
'KuryrPortHandler._update_services')
@ -377,8 +353,6 @@ class TestKuryrPortHandler(test_base.TestCase):
'is_network_policy_enabled')
@mock.patch('kuryr_kubernetes.controller.drivers.vif_pool.MultiVIFPool.'
'release_vif')
@mock.patch('kuryr_kubernetes.controller.drivers.default_security_groups.'
'DefaultPodSecurityGroupsDriver.get_security_groups')
@mock.patch('kuryr_kubernetes.controller.drivers.default_security_groups.'
'DefaultPodSecurityGroupsDriver.delete_sg_rules')
@mock.patch('kuryr_kubernetes.controller.drivers.default_project.'
@ -387,9 +361,8 @@ class TestKuryrPortHandler(test_base.TestCase):
@mock.patch('kuryr_kubernetes.controller.drivers.base.MultiVIFDriver.'
'get_enabled_drivers')
def test_on_finalize_np(self, ged, k8s, get_project, delete_sg_rules,
get_sg, release_vif, is_np_enabled,
get_lb_instance, get_sg_instance, get_services,
update_services):
release_vif, is_np_enabled, get_lb_instance,
get_sg_instance, get_services, update_services):
ged.return_value = [self._driver]
CONF.set_override('enforce_sg_rules', True, group='octavia_defaults')
self.addCleanup(CONF.clear_override, 'enforce_sg_rules',
@ -399,7 +372,6 @@ class TestKuryrPortHandler(test_base.TestCase):
get_project.return_value = self._project_id
selector = mock.sentinel.selector
delete_sg_rules.return_value = selector
get_sg.return_value = self._security_groups
get_services.return_value = mock.sentinel.services
with mock.patch.object(kp, 'k8s') as k8s:
@ -413,13 +385,10 @@ class TestKuryrPortHandler(test_base.TestCase):
mock.call(self._kp, constants.KURYRPORT_FINALIZER)])
delete_sg_rules.assert_called_once_with(self._pod)
get_sg.assert_called_once_with(self._pod, self._project_id)
release_vif.assert_has_calls([mock.call(self._pod, self._vif1,
self._project_id,
self._security_groups),
self._project_id),
mock.call(self._pod, self._vif2,
self._project_id,
self._security_groups)])
self._project_id)])
get_services.assert_called_once()
update_services.assert_called_once_with(mock.sentinel.services,
@ -486,11 +455,10 @@ class TestKuryrPortHandler(test_base.TestCase):
kp = kuryrport.KuryrPortHandler()
kp.k8s.get.side_effect = k_exc.K8sResourceNotFound(self._pod)
self.assertRaises(k_exc.K8sResourceNotFound, kp.get_vifs, self._kp)
self.assertFalse(kp.get_vifs(self._kp))
kp.k8s.get.assert_called_once_with(self._pod_uri)
kp.k8s.remove_finalizer.assert_called_once_with(
self._kp, constants.KURYRPORT_FINALIZER)
kp.k8s.delete.assert_called_once_with(self._kp_uri)
@mock.patch('kuryr_kubernetes.controller.drivers.default_subnet.'
'DefaultPodSubnetDriver.get_subnets')

View File

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import mock
import uuid
import munch
from openstack import exceptions as os_exc
@ -23,6 +24,7 @@ from kuryr_kubernetes import constants as k_const
from kuryr_kubernetes import exceptions as k_exc
from kuryr_kubernetes.objects import vif
from kuryr_kubernetes.tests import base as test_base
from kuryr_kubernetes.tests import fake
from kuryr_kubernetes.tests.unit import kuryr_fixtures as k_fix
from kuryr_kubernetes import utils
@ -554,3 +556,14 @@ class TestUtils(test_base.TestCase):
m_get_net.assert_called_once()
m_net.delete_port.assert_not_called()
def test__get_parent_port_ip(self):
os_net = self.useFixture(k_fix.MockNetworkClient()).client
port_id = str(uuid.uuid4())
ip_address = mock.sentinel.ip_address
port_obj = fake.get_port_obj(ip_address=ip_address)
os_net.get_port.return_value = port_obj
self.assertEqual(ip_address, utils.get_parent_port_ip(port_id))

View File

@ -821,3 +821,26 @@ def cleanup_dead_networks():
except os_exc.SDKException as ex:
LOG.warning('There was an issue with network "%s" '
'removal: %s', net, ex)
def get_parent_port_id(vif_obj):
os_net = clients.get_network_client()
tags = []
if CONF.neutron_defaults.resource_tags:
tags = CONF.neutron_defaults.resource_tags
trunks = os_net.trunks(tags=tags)
for trunk in trunks:
for sp in trunk.sub_ports:
if sp['port_id'] == vif_obj.id:
return trunk.port_id
return None
def get_parent_port_ip(port_id):
os_net = clients.get_network_client()
parent_port = os_net.get_port(port_id)
return parent_port.fixed_ips[0]['ip_address']