Fix racey mdev init workaround

The original implementation is prone to racing with the nvidia driver so that mdevs are not registered by the time the script runs. Change-Id: I0dd8002a91d4026a71b8176610efac28ea50c33b Signed-off-by: Edward Hope-Morley <edward.hope-morley@canonical.com>
2025-09-26 22:14:18 +01:00
parent c87c8bf7b8
commit 1b992b26e6
3 changed files with 31 additions and 14 deletions
--- a/files/initialise_nova_mdevs.sh
+++ b/files/initialise_nova_mdevs.sh
@@ -1,6 +1,16 @@
 #!/bin/bash -e
 # Ensure all SRIOV devices have been setup
-/usr/lib/nvidia/sriov-manage -e srvio-manage -e ALL
+sleep 20
+/usr/lib/nvidia/sriov-manage -e ALL
+# Ensure mdev devices are registered before continuing
+max=10
+while true; do
+    if ! $(/usr/lib/nvidia/sriov-manage -e  $(nvidia-smi -q | grep ^GPU| cut -d ' ' -f2-)| grep -q "already has VFs enabled."); then
+        echo "Waiting for GPU nvidia mdev registration"
+        sleep 1
+        ((max--)) && continue
+    fi
+    break
+done
 # Now go through all domains and initialise any used mdevs
 /opt/remediate-nova-mdevs
-
--- a/templates/remediate_nova_mdevs.py
+++ b/templates/remediate_nova_mdevs.py
@@ -45,7 +45,6 @@ class PlacementHelper():
    """
    Helper for Placement operations.
    """
-    DRIVER_TRAIT_MAPPING = {'nvidia-610': 'CUSTOM_VGPU_PLACEMENT'}

    def __init__(self):
        self.fqdn = socket.getfqdn()
@@ -53,6 +52,10 @@ class PlacementHelper():
        if self.client is None:
            raise PlacementError("failed to get placement client")

+    @property
+    def driver_trait_mapping(self):
+        return {mtype: 'CUSTOM_VGPU_PLACEMENT' for mtype in MDEV_TYPES}
+
    @staticmethod
    def _get_sdk_adapter_helper(service_type):
        count = 1
@@ -99,7 +102,7 @@ class PlacementHelper():
        if not _traits:
            raise PlacementError("no traits identified from the placement api")

-        for trait in self.DRIVER_TRAIT_MAPPING.values():
+        for trait in self.driver_trait_mapping.values():
            if trait not in _traits['traits']:
                raise PlacementError(f"trait {trait} not found in placement "
                                     "traits")
@@ -170,7 +173,14 @@ class PlacementHelper():
        pci_id_parts = addr.split('_')
        return get_pci_address(*pci_id_parts)

-    def update_gpu_traits(self, rpname, rpuuid, dry_run=False):
+    def update_gpu_traits(self, dry_run=False):
+        if not self.local_compute_rps:
+            return
+
+        for rp in self.local_compute_rps:
+            self.update_gpu_trait(rp['name'], rp['uuid'], dry_run)
+
+    def update_gpu_trait(self, rpname, rpuuid, dry_run=False):
        LOG.info("updating gpu traits for resource provider %s", rpuuid)
        traits = self.get_traits_for_rp(rpuuid)
        if traits is None:
@@ -187,12 +197,12 @@ class PlacementHelper():

            return

-        if driver not in self.DRIVER_TRAIT_MAPPING:
+        if driver not in self.driver_trait_mapping:
            LOG.error("failed to map driver '%s' to a trait for PCI "
                      "address %s", driver, pci_address)
            return

-        expected_traits = [self.DRIVER_TRAIT_MAPPING[driver]]
+        expected_traits = [self.driver_trait_mapping[driver]]
        if expected_traits != traits['traits']:
            if dry_run:
                LOG.warning("rp %s for %s is mapped to driver %s but "
@@ -334,12 +344,7 @@ def main(dry_run=False):
                LOG.error(exc)
                failed = True

-    if not pm.local_compute_rps:
-        return
-
-    for rp in pm.local_compute_rps:
-        pm.update_gpu_traits(rp['name'], rp['uuid'], dry_run)
-
+    pm.update_gpu_traits(dry_run)
    if failed:
        raise PlacementError("failed to update one or more placement traits")

--- a/templates/systemd-mdev-workaround.service
+++ b/templates/systemd-mdev-workaround.service
@@ -1,9 +1,11 @@
 [Unit]
 Description=GPU MDev Initialisation Workaround for OpenStack Nova
 Before=nova-compute.service
-After=syslog.target network.target libvirtd.service nvidia-vgpu-mgr.service
+After=syslog.target network.target
+After=nvidia-vgpu-mgr.service nvidia-vgpud.service libvirtd.service

 [Service]
+User=root
 Environment="MDEV_INIT_DRY_RUN=False"
 Type=oneshot
 ExecStart=/bin/bash /opt/initialise_nova_mdevs.sh