From 3a4d901fa67533c3e1bcdb4f6dc07736ac28167b Mon Sep 17 00:00:00 2001 From: Tabitha Date: Wed, 28 Oct 2020 23:22:11 +0100 Subject: [PATCH] Returns CNI errors in specified form CNI spec defines the way to return errors from the CNI plugin, along with well-defined error codes that can be used by the CNI to decide how to treat the error. This change updates service.py and main.go to return CNI errors in the specified form. Change-Id: Ib76debb56aeb746b92c8260be00f3445cca5948f Closes-Bug: #1899489 --- kuryr_cni/main.go | 87 ++++++++++++++++++++------ kuryr_kubernetes/cni/daemon/service.py | 71 ++++++++++++++++++--- 2 files changed, 130 insertions(+), 28 deletions(-) diff --git a/kuryr_cni/main.go b/kuryr_cni/main.go index f28be054e..a866aaee1 100644 --- a/kuryr_cni/main.go +++ b/kuryr_cni/main.go @@ -14,7 +14,6 @@ import ( cni "github.com/containernetworking/cni/pkg/types" "github.com/containernetworking/cni/pkg/types/current" "github.com/containernetworking/cni/pkg/version" - "github.com/pkg/errors" ) const ( @@ -24,6 +23,9 @@ const ( urlBase = "http://localhost:5036/" addPath = "addNetwork" delPath = "delNetwork" + + ErrVif uint = 899 + ErrParsing uint = 799 ) type KuryrDaemonData struct { @@ -40,7 +42,12 @@ func transformData(args *skel.CmdArgs, command string) (KuryrDaemonData, error) var conf interface{} err := json.Unmarshal(args.StdinData, &conf) if err != nil { - return KuryrDaemonData{}, err + newErr := types.Error{ + Code: types.ErrDecodingFailure, + Msg: fmt.Sprintf("Error when reading configuration: %v", err), + Details: "", + } + return KuryrDaemonData{}, &newErr } return KuryrDaemonData{ @@ -59,7 +66,11 @@ func makeDaemonRequest(data KuryrDaemonData, expectedCode int) ([]byte, error) { b, err := json.Marshal(data) if err != nil { - return []byte{}, errors.Wrapf(err, "Error when preparing payload for kuryr-daemon") + return []byte{}, &types.Error{ + Code: types.ErrInvalidNetworkConfig, + Msg: fmt.Sprintf("Error when preparing payload for kuryr-daemon: %v", err), + Details: "", + } } url := "" @@ -69,27 +80,43 @@ func makeDaemonRequest(data KuryrDaemonData, expectedCode int) ([]byte, error) { case "DEL": url = urlBase + delPath default: - return []byte{}, errors.Errorf("Cannot handle command %s", data.Command) + return []byte{}, &types.Error{ + Code: types.ErrInvalidEnvironmentVariables, + Msg: fmt.Sprintf("Cannot handle command %s", data.Command), + Details: "", + } } resp, err := http.Post(url, "application/json", bytes.NewBuffer(b)) if err != nil { - return []byte{}, errors.Wrapf(err, "Looks like %s cannot be reached. Is kuryr-daemon running?", url) + return []byte{}, &types.Error{ + Code: types.ErrTryAgainLater, + Msg: fmt.Sprintf("Looks like %s cannot be reached. Is kuryr-daemon running?", url), + Details: fmt.Sprintf("%v", err), + } } defer resp.Body.Close() body, _ := ioutil.ReadAll(resp.Body) if resp.StatusCode != expectedCode { - return []byte{}, errors.Errorf("CNI Daemon returned error %d %s", resp.StatusCode, body) + if len(body) > 1 { + var err types.Error + json.Unmarshal(body, &err) + return []byte{}, &err + } + return []byte{}, &types.Error{ + Code: uint(resp.StatusCode), + Msg: fmt.Sprintf("CNI Daemon returned error %d %s", resp.StatusCode, body), + Details: "", + } } - return body, nil } func cmdAdd(args *skel.CmdArgs) error { data, err := transformData(args, "ADD") if err != nil { - return errors.Wrap(err, "Error when reading configuration") + return err } body, err := makeDaemonRequest(data, 202) @@ -98,9 +125,13 @@ func cmdAdd(args *skel.CmdArgs) error { } vif := VIF{} - err = json.Unmarshal(body, &vif) - if err != nil { - return errors.Wrapf(err, "Error when reading response from kuryr-daemon: %s", string(body)) + er := json.Unmarshal(body, &vif) + if er != nil { + return &types.Error{ + Code: ErrVif, + Msg: fmt.Sprintf("Error when reading response from kuryr-daemon: %s", string(body)), + Details: fmt.Sprintf("%v", er), + } } iface := current.Interface{} @@ -115,13 +146,19 @@ func cmdAdd(args *skel.CmdArgs) error { addrStr := subnet.Ips[0].Address addr := net.ParseIP(addrStr) if addr == nil { - return errors.Errorf("Error when parsing IP address %s received from kuryr-daemon", addrStr) - + return &types.Error{ + Code: ErrParsing, + Msg: fmt.Sprintf("Error when parsing IP address %s received from kuryr-daemon", addrStr), + Details: "", + } } _, cidr, err := net.ParseCIDR(subnet.Cidr) if err != nil { - return errors.Wrapf(err, "Error when parsing CIDR %s received from kuryr-daemon", - subnet.Cidr) + return &types.Error{ + Code: ErrParsing, + Msg: fmt.Sprintf("Error when parsing CIDR %s received from kuryr-daemon", subnet.Cidr), + Details: fmt.Sprintf("%v", err), + } } ver := "4" @@ -133,7 +170,11 @@ func cmdAdd(args *skel.CmdArgs) error { ifaceCIDR := fmt.Sprintf("%s/%d", addr.String(), prefixSize) ipAddress, err := cni.ParseCIDR(ifaceCIDR) if err != nil { - return errors.Wrapf(err, "Error when parsing CIDR %s received from kuryr-daemon", ifaceCIDR) + return &types.Error{ + Code: ErrParsing, + Msg: fmt.Sprintf("Error when parsing CIDR %s received from kuryr-daemon", ifaceCIDR), + Details: fmt.Sprintf("%v", err), + } } ifaceNum := 0 @@ -147,14 +188,20 @@ func cmdAdd(args *skel.CmdArgs) error { for _, route := range subnet.Routes { _, dst, err := net.ParseCIDR(route.Cidr) if err != nil { - return errors.Wrapf(err, "Error when parsing CIDR %s received from kuryr-daemon", - route.Cidr) + return &types.Error{ + Code: ErrParsing, + Msg: fmt.Sprintf("Error when parsing CIDR %s received from kuryr-daemon", route.Cidr), + Details: fmt.Sprintf("%v", err), + } } gw := net.ParseIP(route.Gateway) if gw == nil { - return errors.Errorf("Error when parsing IP address %s received from kuryr-daemon", - route.Gateway) + return &types.Error{ + Code: ErrParsing, + Msg: fmt.Sprintf("Error when parsing IP address %s received from kuryr-daemon", route.Gateway), + Details: "", + } } routes = append(routes, &types.Route{Dst: *dst, GW: gw}) diff --git a/kuryr_kubernetes/cni/daemon/service.py b/kuryr_kubernetes/cni/daemon/service.py index ac2ce4f33..e188d0722 100644 --- a/kuryr_kubernetes/cni/daemon/service.py +++ b/kuryr_kubernetes/cni/daemon/service.py @@ -13,6 +13,7 @@ # limitations under the License. from ctypes import c_bool +import errno from http import client as httplib import multiprocessing import os @@ -25,6 +26,7 @@ import urllib3 import cotyledon import flask +import pyroute2 from pyroute2.ipdb import transactional import os_vif @@ -48,6 +50,9 @@ from kuryr_kubernetes import watcher as k_watcher LOG = logging.getLogger(__name__) CONF = cfg.CONF HEALTH_CHECKER_DELAY = 5 +ErrInvalidEnvironmentVariables = 4 +ErrTryAgainLater = 11 +ErrInternal = 999 class DaemonServer(object): @@ -70,26 +75,64 @@ class DaemonServer(object): params.CNI_COMMAND, params) return params + def _error(self, error_code, message, details=""): + template = { + "code": error_code, + "msg": message, + "details": details + } + data = jsonutils.dumps(template) + return data + def add(self): try: params = self._prepare_request() except Exception: self._check_failure() LOG.exception('Exception when reading CNI params.') - return '', httplib.BAD_REQUEST, self.headers + error = self._error(ErrInvalidEnvironmentVariables, + "Required CNI params missing.") + return error, httplib.BAD_REQUEST, self.headers try: vif = self.plugin.add(params) data = jsonutils.dumps(vif.obj_to_primitive()) - except exceptions.ResourceNotReady: + except exceptions.ResourceNotReady as e: self._check_failure() LOG.error('Error when processing addNetwork request') - return '', httplib.GATEWAY_TIMEOUT, self.headers + error = self._error(ErrTryAgainLater, + f"{e}. Try Again Later.") + return error, httplib.GATEWAY_TIMEOUT, self.headers + except pyroute2.NetlinkError as e: + if e.code == errno.EEXIST: + self._check_failure() + args = {'kind': 'vlan', 'vlan_id': vif.vlan_id} + LOG.warning( + f'Creation of pod interface failed due to VLAN ID ' + f'(vlan_info={args}) conflict. Probably the CRI had not ' + f'cleaned up the network namespace of deleted pods. ' + f'Attempting to retry.') + error = self._error(ErrTryAgainLater, + "Creation of pod interface failed due to" + " vlan_id. Try Again Later", + f"vlan_id:{vif.vlan_id}") + return error, httplib.GATEWAY_TIMEOUT, self.headers + raise except Exception: - self._check_failure() + if not self.healthy.value: + error = self._error(ErrInternal, + "Maximum CNI ADD Failures Reached.", + "Error when processing addNetwork request." + " CNI Params: {}".format(params)) + else: + self._check_failure() + error = self._error(ErrInternal, + "Error processing request", + "Failure processing addNetwork request. " + "CNI Params: {}".format(params)) LOG.exception('Error when processing addNetwork request. CNI ' 'Params: %s', params) - return '', httplib.INTERNAL_SERVER_ERROR, self.headers + return error, httplib.INTERNAL_SERVER_ERROR, self.headers return data, httplib.ACCEPTED, self.headers @@ -98,7 +141,9 @@ class DaemonServer(object): params = self._prepare_request() except Exception: LOG.exception('Exception when reading CNI params.') - return '', httplib.BAD_REQUEST, self.headers + error = self._error(ErrInvalidEnvironmentVariables, + "Required CNI params missing.") + return error, httplib.BAD_REQUEST, self.headers try: self.plugin.delete(params) @@ -112,10 +157,20 @@ class DaemonServer(object): 'Ignoring this error, pod is most likely gone') return '', httplib.NO_CONTENT, self.headers except Exception: - self._check_failure() + if not self.healthy.value: + error = self._error(ErrInternal, + "Maximum CNI DEL Failures Reached.", + "Error processing delNetwork request. " + "CNI Params: {}".format(params)) + else: + self._check_failure() + error = self._error(ErrInternal, + "Error processing request", + "Failure processing delNetwork request. " + "CNI Params: {}".format(params)) LOG.exception('Error when processing delNetwork request. CNI ' 'Params: %s.', params) - return '', httplib.INTERNAL_SERVER_ERROR, self.headers + return error, httplib.INTERNAL_SERVER_ERROR, self.headers return '', httplib.NO_CONTENT, self.headers def run(self):