From a3fcc07c7a52d72cee497f1abc8e4b8bb661557a Mon Sep 17 00:00:00 2001 From: Victor Chembaev Date: Fri, 24 May 2024 11:15:40 +0300 Subject: [PATCH] Fix octavia-interface timeout Added Restart=on-failure policy to octavia-interface systemd unit Added octavia_interface_wait_timeout variable to control TimeoutStartSec in octavia-interface systemd unit Change-Id: I9de6c27131ce78e85aac56ea5d91d9740fd58354 Closes-Bug: 2067036 --- .../templates/octavia-interface.service.j2 | 4 ++++ doc/source/reference/networking/octavia.rst | 18 ++++++++++++++++++ ...via-interface-timeout-5e87ea2501d5ab3c.yaml | 10 ++++++++++ 3 files changed, 32 insertions(+) create mode 100644 releasenotes/notes/fix-octavia-interface-timeout-5e87ea2501d5ab3c.yaml diff --git a/ansible/roles/octavia/templates/octavia-interface.service.j2 b/ansible/roles/octavia/templates/octavia-interface.service.j2 index 7f04d9fb42..532cdc72e5 100644 --- a/ansible/roles/octavia/templates/octavia-interface.service.j2 +++ b/ansible/roles/octavia/templates/octavia-interface.service.j2 @@ -7,6 +7,10 @@ After=docker.service Type=oneshot User=root Group=root +Restart=on-failure +{% if octavia_interface_wait_timeout is defined %} +TimeoutStartSec={{ octavia_interface_wait_timeout }} +{% endif %} RemainAfterExit=true ExecStartPre=/sbin/ip link set dev {{ octavia_network_interface }} address {{ port_info.port.mac_address }} ExecStart=/sbin/dhclient -v {{ octavia_network_interface }} -cf /etc/dhcp/octavia-dhclient.conf diff --git a/doc/source/reference/networking/octavia.rst b/doc/source/reference/networking/octavia.rst index 72d2a04432..53f266065f 100644 --- a/doc/source/reference/networking/octavia.rst +++ b/doc/source/reference/networking/octavia.rst @@ -437,6 +437,24 @@ Add ``octavia_network_type`` to ``globals.yml`` and set the value to ``tenant`` Next,follow the deployment instructions as normal. +Failure handling +---------------- + +On large deployments, where neutron-openvswitch-agent sync could takes +more then 5 minutes, you can get an error on octavia-interface.service +systemd unit, because it can't wait either o-hm0 interface is already +attached to br-int, or octavia management VxLAN is already configured +on that host. In this case you have to add ``octavia_interface_wait_timeout`` +to ``globals.yml`` and set the value to new timeout in seconds + +.. code-block:: yaml + + octavia_interface_wait_timeout: 1800 + +On deployments with up to 2500 network ports per network node sync process +could take up to 30mins. But you have to consider this value according +to your deployment size. + OVN provider ============ diff --git a/releasenotes/notes/fix-octavia-interface-timeout-5e87ea2501d5ab3c.yaml b/releasenotes/notes/fix-octavia-interface-timeout-5e87ea2501d5ab3c.yaml new file mode 100644 index 0000000000..1046e1ede5 --- /dev/null +++ b/releasenotes/notes/fix-octavia-interface-timeout-5e87ea2501d5ab3c.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + Fixes 2067036. + Added ``octavia_interface_wait_timeout`` to control + octavia-interface.service timeout to be able wait + openvswitch agent sync has been finished and + octavia-lb-net is reachable from the host. + Also set restart policy for this unit to on-failure + `LP#2067036 `__