vmware-nsx/vmware_nsx/plugins/nsx_v/housekeeper/error_dhcp_edge.py

273 lines
12 KiB
Python

# Copyright 2017 VMware, Inc.
# All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from neutron_lib import constants
from oslo_log import log
from oslo_utils import uuidutils
from vmware_nsx.common import locking
from vmware_nsx.db import nsxv_db
from vmware_nsx.extensions import projectpluginmap
from vmware_nsx.plugins.common.housekeeper import base_job
from vmware_nsx.plugins.nsx_v.vshield.common import constants as vcns_const
LOG = log.getLogger(__name__)
class ErrorDhcpEdgeJob(base_job.BaseJob):
def get_project_plugin(self, plugin):
return plugin.get_plugin_by_type(projectpluginmap.NsxPlugins.NSX_V)
def get_name(self):
return 'error_dhcp_edge'
def get_description(self):
return 'revalidate DHCP Edge appliances in ERROR state'
def run(self, context):
super(ErrorDhcpEdgeJob, self).run(context)
# Gather ERROR state DHCP edges into dict
filters = {'status': [constants.ERROR]}
error_edge_bindings = nsxv_db.get_nsxv_router_bindings(
context.session, filters=filters)
if not error_edge_bindings:
LOG.debug('Housekeeping: no DHCP edges in ERROR state detected')
return
with locking.LockManager.get_lock('nsx-dhcp-edge-pool'):
edge_dict = {}
for binding in error_edge_bindings:
if binding['router_id'].startswith(
vcns_const.DHCP_EDGE_PREFIX):
bind_list = edge_dict.get(binding['edge_id'],
[])
bind_list.append(binding)
edge_dict[binding['edge_id']] = bind_list
# Get valid neutron networks and create a prefix dict.
networks = [net['id'] for net in
self.plugin.get_networks(context, fields=['id'])]
pfx_dict = {net[:36 - len(vcns_const.DHCP_EDGE_PREFIX)]: net
for net in networks}
for edge_id in edge_dict.keys():
try:
self._validate_dhcp_edge(
context, edge_dict, pfx_dict, networks, edge_id)
except Exception as e:
LOG.error('Failed to recover DHCP Edge %s (%s)',
edge_id, e)
def _validate_dhcp_edge(
self, context, edge_dict, pfx_dict, networks, edge_id):
# Also metadata network should be a valid network for the edge
az_name = self.plugin.get_availability_zone_name_by_edge(context,
edge_id)
with locking.LockManager.get_lock(edge_id):
vnic_binds = nsxv_db.get_edge_vnic_bindings_by_edge(
context.session, edge_id)
edge_networks = [bind['network_id'] for bind in vnic_binds]
# Step (A)
# Find router bindings which are mapped to dead networks, or
# do not have interfaces registered in nsxv tables
for binding in edge_dict[edge_id]:
router_id = binding['router_id']
net_pfx = router_id[len(vcns_const.DHCP_EDGE_PREFIX):]
net_id = pfx_dict.get(net_pfx)
if net_id is None:
# Delete router binding as we do not have such network
# in Neutron
LOG.warning('Housekeeping: router binding %s for edge '
'%s has no matching neutron network',
router_id, edge_id)
if not self.readonly:
nsxv_db.delete_nsxv_router_binding(
context.session, binding['router_id'])
else:
if net_id not in edge_networks:
# Create vNic bind here
LOG.warning('Housekeeping: edge %s vnic binding '
'missing for network %s', edge_id,
net_id)
if not self.readonly:
nsxv_db.allocate_edge_vnic_with_tunnel_index(
context.session, edge_id, net_id, az_name)
# Step (B)
# Find vNic bindings which reference invalid networks or aren't
# bound to any router binding
# Reread vNic binds as we might created more or deleted some in
# step (A)
vnic_binds = nsxv_db.get_edge_vnic_bindings_by_edge(
context.session, edge_id)
for bind in vnic_binds:
if bind['network_id'] not in networks:
LOG.warning('Housekeeping: edge vnic binding for edge '
'%s is for invalid network id %s',
edge_id, bind['network_id'])
if not self.readonly:
nsxv_db.free_edge_vnic_by_network(
context.session, edge_id, bind['network_id'])
# Step (C)
# Verify that backend is in sync with Neutron
# Reread vNic binds as we might deleted some in step (B)
vnic_binds = nsxv_db.get_edge_vnic_bindings_by_edge(
context.session, edge_id)
# Transform to network-keyed dict
vnic_dict = {vnic['network_id']: {
'vnic_index': vnic['vnic_index'],
'tunnel_index': vnic['tunnel_index']
} for vnic in vnic_binds}
backend_vnics = self.plugin.nsx_v.vcns.get_interfaces(
edge_id)[1].get('vnics', [])
if_changed = {}
self._validate_edge_subinterfaces(
context, edge_id, backend_vnics, vnic_dict, if_changed)
self._add_missing_subinterfaces(
context, edge_id, vnic_binds, backend_vnics, if_changed)
if not self.readonly:
for vnic in backend_vnics:
if if_changed[vnic['index']]:
self.plugin.nsx_v.vcns.update_interface(edge_id,
vnic)
self._update_router_bindings(context, edge_id)
def _validate_edge_subinterfaces(self, context, edge_id, backend_vnics,
vnic_dict, if_changed):
# Validate that all the interfaces on the Edge
# appliance are registered in nsxv_edge_vnic_bindings
for vnic in backend_vnics:
if_changed[vnic['index']] = False
if (vnic['isConnected'] and vnic['type'] == 'trunk'
and vnic['subInterfaces']):
for sub_if in vnic['subInterfaces']['subInterfaces']:
# Subinterface name field contains the net id
vnic_bind = vnic_dict.get(sub_if['logicalSwitchName'])
if (vnic_bind
and vnic_bind['vnic_index'] == vnic['index']
and vnic_bind['tunnel_index'] == sub_if['tunnelId']):
pass
else:
LOG.warning('Housekeeping: subinterface %s for vnic '
'%s on edge %s is not defined in '
'nsxv_edge_vnic_bindings',
sub_if['tunnelId'],
vnic['index'], edge_id)
if_changed[vnic['index']] = True
vnic['subInterfaces']['subInterfaces'].remove(sub_if)
def _add_missing_subinterfaces(self, context, edge_id, vnic_binds,
backend_vnics, if_changed):
# Verify that all the entries in
# nsxv_edge_vnic_bindings are attached on the Edge
# Arrange the vnic binds in a list of lists - vnics and subinterfaces
metadata_nets = [
net['network_id'] for net in
nsxv_db.get_nsxv_internal_networks(
context.session,
vcns_const.InternalEdgePurposes.INTER_EDGE_PURPOSE)]
for vnic_bind in vnic_binds:
if vnic_bind['network_id'] in metadata_nets:
continue
for vnic in backend_vnics:
if vnic['index'] == vnic_bind['vnic_index']:
found = False
tunnel_index = vnic_bind['tunnel_index']
network_id = vnic_bind['network_id']
for sub_if in (vnic.get('subInterfaces', {}).get(
'subInterfaces', [])):
if sub_if['tunnelId'] == tunnel_index:
found = True
if sub_if.get('logicalSwitchName') != network_id:
LOG.warning('Housekeeping: subinterface %s on '
'vnic %s on edge %s should be '
'connected to network %s',
tunnel_index, vnic['index'],
edge_id, network_id)
if_changed[vnic['index']] = True
if not self.readonly:
self._recreate_vnic_subinterface(
context, network_id, edge_id, vnic,
tunnel_index)
sub_if['name'] = network_id
if not found:
LOG.warning('Housekeeping: subinterface %s on vnic '
'%s on edge %s should be connected to '
'network %s but is missing', tunnel_index,
vnic['index'], edge_id, network_id)
if_changed[vnic['index']] = True
if not self.readonly:
self._recreate_vnic_subinterface(
context, network_id, edge_id, vnic,
tunnel_index)
def _recreate_vnic_subinterface(
self, context, network_id, edge_id, vnic, tunnel_index):
vnic_index = vnic['index']
network_name_item = [edge_id, str(vnic_index), str(tunnel_index)]
network_name = ('-'.join(network_name_item) +
uuidutils.generate_uuid())[:36]
port_group_id = vnic.get('portgroupId')
address_groups = self.plugin._create_network_dhcp_address_group(
context, network_id)
port_group_id, iface = self.plugin.edge_manager._create_sub_interface(
context, network_id, network_name, tunnel_index,
address_groups, port_group_id)
if not vnic.get('subInterfaces'):
vnic['subInterfaces'] = {'subInterfaces': []}
vnic['subInterfaces']['subInterfaces'].append(iface)
if vnic['type'] != 'trunk':
# reinitialize the interface as it is missing config
vnic['name'] = (vcns_const.INTERNAL_VNIC_NAME +
str(vnic['index']))
vnic['type'] = 'trunk'
vnic['portgroupId'] = port_group_id
vnic['mtu'] = 1500
vnic['enableProxyArp'] = False
vnic['enableSendRedirects'] = True
vnic['isConnected'] = True
def _update_router_bindings(self, context, edge_id):
edge_router_binds = nsxv_db.get_nsxv_router_bindings_by_edge(
context.session, edge_id)
for b in edge_router_binds:
nsxv_db.update_nsxv_router_binding(
context.session, b['router_id'], status='ACTIVE')