app/ofctl: fix possible deadlock

When datapath reconnects/disconnects, waiting requests on old
datapath should be canceled.  Otherwise, threads that sent
requests may fall into deadlock because replies possibly never
come.

This patch is backport of the Ryu patch [1].

Related-Bug: #1944201

[1] 6f906e72c9

Change-Id: I0bde9df2cd651b88e60631b53d69ad50f8d86d02
This commit is contained in:
Slawek Kaplonski 2021-10-04 11:30:01 +02:00
parent f4e5e04aac
commit 2e8cf5bed2
1 changed files with 27 additions and 13 deletions

View File

@ -64,6 +64,22 @@ class OfctlService(app_manager.OSKenApp):
self.unobserve_event(ev_cls)
self.logger.debug('ofctl: stop observing %s', ev_cls)
def _cancel(self, info, barrier_xid, exception):
xid = info.barriers.pop(barrier_xid)
req = info.xids.pop(xid)
msg = req.msg
datapath = msg.datapath
parser = datapath.ofproto_parser
is_barrier = isinstance(msg, parser.OFPBarrierRequest)
info.results.pop(xid)
if not is_barrier and req.reply_cls is not None:
self._unobserve_msg(req.reply_cls)
self.logger.error('failed to send message <%s>', req.msg)
self.reply_to_request(req, event.Reply(exception=exception))
@staticmethod
def _is_error(msg):
return (ofp_event.ofp_msg_to_ev_cls(type(msg)) ==
@ -81,6 +97,9 @@ class OfctlService(app_manager.OSKenApp):
self._switches[id] = new_info
if old_info:
old_info.datapath.close()
for xid in list(old_info.barriers):
self._cancel(
old_info, xid, exception.InvalidDatapath(result=id))
@set_ev_cls(ofp_event.EventOFPStateChange, DEAD_DISPATCHER)
def _handle_dead(self, ev):
@ -96,6 +115,8 @@ class OfctlService(app_manager.OSKenApp):
if info.datapath is datapath:
self.logger.debug('forget info %s', info)
self._switches.pop(id)
for xid in list(info.barriers):
self._cancel(info, xid, exception.InvalidDatapath(result=id))
@set_ev_cls(event.GetDatapathRequest, MAIN_DISPATCHER)
def _handle_get_datapath(self, req):
@ -131,15 +152,6 @@ class OfctlService(app_manager.OSKenApp):
si.xids[xid] = req
si.barriers[barrier_xid] = xid
def _cancel(barrier_xid, exc):
xid = si.barriers.pop(barrier_xid)
si.results.pop(xid)
si.xids.pop(xid)
if not is_barrier and req.reply_cls is not None:
self._unobserve_msg(req.reply_cls)
self.logger.error('failed to send message <%s>', msg)
self.reply_to_request(req, event.Reply(exception=exc))
if is_barrier:
barrier = msg
datapath.set_xid(barrier)
@ -152,12 +164,14 @@ class OfctlService(app_manager.OSKenApp):
datapath.set_xid(barrier)
_store_xid(msg.xid, barrier.xid)
if not datapath.send_msg(msg):
return _cancel(barrier.xid,
exception.InvalidDatapath(result=datapath.id))
return self._cancel(
si, barrier.xid,
exception.InvalidDatapath(result=datapath.id))
if not datapath.send_msg(barrier):
return _cancel(barrier.xid,
exception.InvalidDatapath(result=datapath.id))
return self._cancel(
si, barrier.xid,
exception.InvalidDatapath(result=datapath.id))
@set_ev_cls(ofp_event.EventOFPBarrierReply, MAIN_DISPATCHER)
def _handle_barrier(self, ev):