From 9cbab364cbf5f3e3870b9fff18dc96e096e50855 Mon Sep 17 00:00:00 2001 From: Rudi Schlatte Date: Mon, 29 Jan 2024 14:41:48 +0100 Subject: [PATCH] Refine VM requirements - Add IaaS machine type to controller VM requirements - Require Ubuntu everywhere. - Collect hardware requirements from more places in KubeVela. Change-Id: I4c20e69440f5a5e27bb5c0f5441e36c18dd8dcb5 --- .../optimiser/controller/NebulousApp.java | 26 ++++--- .../controller/NebulousAppDeployer.java | 78 +++++++++++++++---- 2 files changed, 79 insertions(+), 25 deletions(-) diff --git a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java index 65e19cd..11eff8d 100644 --- a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java +++ b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousApp.java @@ -12,7 +12,6 @@ import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; -import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -24,15 +23,11 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; import org.ow2.proactive.sal.model.AttributeRequirement; -import org.ow2.proactive.sal.model.CommandsInstallation; -import org.ow2.proactive.sal.model.Communication; -import org.ow2.proactive.sal.model.IaasDefinition; -import org.ow2.proactive.sal.model.JobDefinition; -import org.ow2.proactive.sal.model.JobInformation; -import org.ow2.proactive.sal.model.NodeCandidate; +import org.ow2.proactive.sal.model.NodeType; +import org.ow2.proactive.sal.model.NodeTypeRequirement; +import org.ow2.proactive.sal.model.OperatingSystemFamily; import org.ow2.proactive.sal.model.Requirement; import org.ow2.proactive.sal.model.RequirementOperator; -import org.ow2.proactive.sal.model.TaskDefinition; /** * Internal representation of a NebulOus app. @@ -76,12 +71,19 @@ public class NebulousApp { * The requirements of the node running the NebulOuS controller. This * machine runs the Kubernetes cluster and KubeVela. */ - @Getter - private static final List controllerRequirements - = List.of( + public static List getControllerRequirements(String jobID) { + return List.of( + new NodeTypeRequirement(List.of(NodeType.IAAS), jobID, jobID), + // TODO: untested; we rely on the fact that SAL has an abstraction + // over operating systems. See + // https://github.com/ow2-proactive/scheduling-abstraction-layer/blob/master/sal-common/src/main/java/org/ow2/proactive/sal/model/OperatingSystemFamily.java#L39 + // and + // https://github.com/ow2-proactive/scheduling-abstraction-layer/blob/master/sal-service/src/main/java/org/ow2/proactive/sal/service/nc/NodeCandidateUtils.java#L159 + new AttributeRequirement("image", "operatingSystem.family", + RequirementOperator.IN, OperatingSystemFamily.UBUNTU.toString()), new AttributeRequirement("hardware", "memory", RequirementOperator.GEQ, "2048"), new AttributeRequirement("hardware", "cpu", RequirementOperator.GEQ, "2")); - + } /** * The UUID of the app. This is the UUID that identifies a specific * application's ActiveMQ messages. diff --git a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java index 5faaf33..f599654 100644 --- a/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java +++ b/optimiser-controller/src/main/java/eu/nebulouscloud/optimiser/controller/NebulousAppDeployer.java @@ -12,6 +12,7 @@ import org.ow2.proactive.sal.model.IaasDefinition; import org.ow2.proactive.sal.model.JobDefinition; import org.ow2.proactive.sal.model.JobInformation; import org.ow2.proactive.sal.model.NodeCandidate; +import org.ow2.proactive.sal.model.OperatingSystemFamily; import org.ow2.proactive.sal.model.Requirement; import org.ow2.proactive.sal.model.RequirementOperator; import org.ow2.proactive.sal.model.TaskDefinition; @@ -29,10 +30,51 @@ import lombok.extern.slf4j.Slf4j; @Slf4j public class NebulousAppDeployer { + /** + * Given a KubeVela file, extract how many nodes to deploy for each + * component. + * + * We currently detect replica count with the following component trait: + * --- + * traits: + * - type: scaler + * properties: + * replicas: 2 + * + * @param kubevela the parsed KubeVela file. + * @return A map from component name to number of instances to generate. + */ + public static Map getNodeCountFromKubevela (JsonNode kubevela) { + Map result = new HashMap<>(); + ArrayNode components = kubevela.withArray("/spec/components"); + for (final JsonNode c : components) { + result.put(c.get("name").asText(), 1); // default value + for (final JsonNode t : c.withArray("/traits")) { + if (t.at("/type").asText().equals("scaler") + && t.at("/properties/replicas").canConvertToExactIntegral()) + { + result.put(c.get("name").asText(), t.at("/properties/replicas").asInt()); + } + } + } + return result; + } + /** * Given a KubeVela file, extract its VM requirements in a form we can * send to the SAL `findNodeCandidates` endpoint.

* + * We add the requirement that OS family == Ubuntu. + * + * We read the following attributes for each component: + * + * - `properties.cpu`, `properties.requests.cpu`: round up to next integer + * and generate requirement `hardware.cores` + * + * - `properties.memory`, `properties.requests.memory`: Handle "200Mi", + * "0.2Gi" and bare number, convert to MB and generate requirement + * `hardware.memory` + * * Notes:

* * - For the first version, we specify all requirements as "greater or @@ -44,6 +86,10 @@ public class NebulousAppDeployer { * nearest integer and ask for "this or more" cores, since we might end * up with “strange” numbers of cores.

* + * - We should use `traits.*.properties.replicas` if `traits.*.type` == + * "scaler" to create multiple instances -- but that's propably a + * separate method + * * @param kubevela the parsed KubeVela file. * @return a map of component name to (potentially empty) list of * requirements for that component. No requirements mean any node will @@ -54,13 +100,13 @@ public class NebulousAppDeployer { ArrayNode components = kubevela.withArray("/spec/components"); for (final JsonNode c : components) { ArrayList reqs = new ArrayList<>(); - result.put(c.get("name").asText(), reqs); - JsonNode properties = c.path("properties"); - if (properties.has("cpu")) { + reqs.add(new AttributeRequirement("image", "operatingSystem.family", + RequirementOperator.IN, OperatingSystemFamily.UBUNTU.toString())); + JsonNode cpu = c.at("/properties/cpu"); + if (cpu.isMissingNode()) cpu = c.at("/properties/resources/requests/cpu"); + if (!cpu.isMissingNode() && cpu.isNumber()) { // KubeVela has fractional core /cpu requirements - String kubevela_cpu_str = properties.get("cpu").asText(); - // TODO: catch NumberFormatException - double kubevela_cpu = Double.parseDouble(kubevela_cpu_str); + double kubevela_cpu = Double.parseDouble(cpu.asText()); long sal_cores = Math.round(Math.ceil(kubevela_cpu)); if (sal_cores > 0) { reqs.add(new AttributeRequirement("hardware", "cores", @@ -70,18 +116,22 @@ public class NebulousAppDeployer { log.warn("CPU of component {} is 0 or not a number", c.get("name").asText()); } } - if (properties.has("memory")) {; - String sal_memory = properties.get("memory").asText(); + JsonNode memory = c.at("/properties/memory"); + if (memory.isMissingNode()) cpu = c.at("/properties/resources/requests/memory"); + if (!memory.isMissingNode()) {; + String sal_memory = memory.asText(); if (sal_memory.endsWith("Mi")) { sal_memory = sal_memory.substring(0, sal_memory.length() - 2); } else if (sal_memory.endsWith("Gi")) { sal_memory = String.valueOf(Integer.parseInt(sal_memory.substring(0, sal_memory.length() - 2)) * 1024); - } else if (!properties.get("memory").isNumber()) { + } else if (!memory.isNumber()) { log.warn("Unsupported memory specification in component {} :{} (wanted 'Mi' or 'Gi') ", - properties.get("name").asText(), - properties.get("memory").asText()); + c.get("name").asText(), + memory.asText()); sal_memory = null; } + // Fall-through: we rewrote the KubeVela file and didn't add + // the "Mi" suffix, but it's a number if (sal_memory != null) { reqs.add(new AttributeRequirement("hardware", "memory", RequirementOperator.GEQ, sal_memory)); @@ -90,6 +140,8 @@ public class NebulousAppDeployer { for (final JsonNode t : c.withArray("traits")) { // Check for node affinity / geoLocation / country } + // Finally, add requirements for this job to the map + result.put(c.get("name").asText(), reqs); } return result; } @@ -160,10 +212,10 @@ public class NebulousAppDeployer { // 3. Create coordinator node log.debug("Creating app coordinator node"); List controller_candidates - = NebulousApp.getSalConnector().findNodeCandidates(NebulousApp.getControllerRequirements()); + = NebulousApp.getSalConnector().findNodeCandidates(NebulousApp.getControllerRequirements(appUUID)); if (controller_candidates.isEmpty()) { log.error("Could not find node candidates for controller node; requirements: {}", - NebulousApp.getControllerRequirements()); + NebulousApp.getControllerRequirements(appUUID)); return; } NodeCandidate controller_candidate = controller_candidates.get(0);