Refine VM requirements

- Add IaaS machine type to controller VM requirements

- Require Ubuntu everywhere.

- Collect hardware requirements from more places in KubeVela.

Change-Id: I4c20e69440f5a5e27bb5c0f5441e36c18dd8dcb5
This commit is contained in:
Rudi Schlatte
2024-01-29 14:41:48 +01:00
parent 2fd8be5497
commit 9cbab364cb
2 changed files with 79 additions and 25 deletions

View File

@@ -12,7 +12,6 @@ import lombok.Getter;
import lombok.Setter; import lombok.Setter;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@@ -24,15 +23,11 @@ import java.util.stream.Collectors;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
import org.ow2.proactive.sal.model.AttributeRequirement; import org.ow2.proactive.sal.model.AttributeRequirement;
import org.ow2.proactive.sal.model.CommandsInstallation; import org.ow2.proactive.sal.model.NodeType;
import org.ow2.proactive.sal.model.Communication; import org.ow2.proactive.sal.model.NodeTypeRequirement;
import org.ow2.proactive.sal.model.IaasDefinition; import org.ow2.proactive.sal.model.OperatingSystemFamily;
import org.ow2.proactive.sal.model.JobDefinition;
import org.ow2.proactive.sal.model.JobInformation;
import org.ow2.proactive.sal.model.NodeCandidate;
import org.ow2.proactive.sal.model.Requirement; import org.ow2.proactive.sal.model.Requirement;
import org.ow2.proactive.sal.model.RequirementOperator; import org.ow2.proactive.sal.model.RequirementOperator;
import org.ow2.proactive.sal.model.TaskDefinition;
/** /**
* Internal representation of a NebulOus app. * Internal representation of a NebulOus app.
@@ -76,12 +71,19 @@ public class NebulousApp {
* The requirements of the node running the NebulOuS controller. This * The requirements of the node running the NebulOuS controller. This
* machine runs the Kubernetes cluster and KubeVela. * machine runs the Kubernetes cluster and KubeVela.
*/ */
@Getter public static List<Requirement> getControllerRequirements(String jobID) {
private static final List<Requirement> controllerRequirements return List.of(
= List.of( new NodeTypeRequirement(List.of(NodeType.IAAS), jobID, jobID),
// TODO: untested; we rely on the fact that SAL has an abstraction
// over operating systems. See
// https://github.com/ow2-proactive/scheduling-abstraction-layer/blob/master/sal-common/src/main/java/org/ow2/proactive/sal/model/OperatingSystemFamily.java#L39
// and
// https://github.com/ow2-proactive/scheduling-abstraction-layer/blob/master/sal-service/src/main/java/org/ow2/proactive/sal/service/nc/NodeCandidateUtils.java#L159
new AttributeRequirement("image", "operatingSystem.family",
RequirementOperator.IN, OperatingSystemFamily.UBUNTU.toString()),
new AttributeRequirement("hardware", "memory", RequirementOperator.GEQ, "2048"), new AttributeRequirement("hardware", "memory", RequirementOperator.GEQ, "2048"),
new AttributeRequirement("hardware", "cpu", RequirementOperator.GEQ, "2")); new AttributeRequirement("hardware", "cpu", RequirementOperator.GEQ, "2"));
}
/** /**
* The UUID of the app. This is the UUID that identifies a specific * The UUID of the app. This is the UUID that identifies a specific
* application's ActiveMQ messages. * application's ActiveMQ messages.

View File

@@ -12,6 +12,7 @@ import org.ow2.proactive.sal.model.IaasDefinition;
import org.ow2.proactive.sal.model.JobDefinition; import org.ow2.proactive.sal.model.JobDefinition;
import org.ow2.proactive.sal.model.JobInformation; import org.ow2.proactive.sal.model.JobInformation;
import org.ow2.proactive.sal.model.NodeCandidate; import org.ow2.proactive.sal.model.NodeCandidate;
import org.ow2.proactive.sal.model.OperatingSystemFamily;
import org.ow2.proactive.sal.model.Requirement; import org.ow2.proactive.sal.model.Requirement;
import org.ow2.proactive.sal.model.RequirementOperator; import org.ow2.proactive.sal.model.RequirementOperator;
import org.ow2.proactive.sal.model.TaskDefinition; import org.ow2.proactive.sal.model.TaskDefinition;
@@ -29,10 +30,51 @@ import lombok.extern.slf4j.Slf4j;
@Slf4j @Slf4j
public class NebulousAppDeployer { public class NebulousAppDeployer {
/**
* Given a KubeVela file, extract how many nodes to deploy for each
* component.
*
* We currently detect replica count with the following component trait:
* ---
* traits:
* - type: scaler
* properties:
* replicas: 2
*
* @param kubevela the parsed KubeVela file.
* @return A map from component name to number of instances to generate.
*/
public static Map<String, Integer> getNodeCountFromKubevela (JsonNode kubevela) {
Map<String, Integer> result = new HashMap<>();
ArrayNode components = kubevela.withArray("/spec/components");
for (final JsonNode c : components) {
result.put(c.get("name").asText(), 1); // default value
for (final JsonNode t : c.withArray("/traits")) {
if (t.at("/type").asText().equals("scaler")
&& t.at("/properties/replicas").canConvertToExactIntegral())
{
result.put(c.get("name").asText(), t.at("/properties/replicas").asInt());
}
}
}
return result;
}
/** /**
* Given a KubeVela file, extract its VM requirements in a form we can * Given a KubeVela file, extract its VM requirements in a form we can
* send to the SAL `findNodeCandidates` endpoint. <p> * send to the SAL `findNodeCandidates` endpoint. <p>
* *
* We add the requirement that OS family == Ubuntu.
*
* We read the following attributes for each component:
*
* - `properties.cpu`, `properties.requests.cpu`: round up to next integer
* and generate requirement `hardware.cores`
*
* - `properties.memory`, `properties.requests.memory`: Handle "200Mi",
* "0.2Gi" and bare number, convert to MB and generate requirement
* `hardware.memory`
*
* Notes:<p> * Notes:<p>
* *
* - For the first version, we specify all requirements as "greater or * - For the first version, we specify all requirements as "greater or
@@ -44,6 +86,10 @@ public class NebulousAppDeployer {
* nearest integer and ask for "this or more" cores, since we might end * nearest integer and ask for "this or more" cores, since we might end
* up with “strange” numbers of cores. <p> * up with “strange” numbers of cores. <p>
* *
* - We should use `traits.*.properties.replicas` if `traits.*.type` ==
* "scaler" to create multiple instances -- but that's propably a
* separate method
*
* @param kubevela the parsed KubeVela file. * @param kubevela the parsed KubeVela file.
* @return a map of component name to (potentially empty) list of * @return a map of component name to (potentially empty) list of
* requirements for that component. No requirements mean any node will * requirements for that component. No requirements mean any node will
@@ -54,13 +100,13 @@ public class NebulousAppDeployer {
ArrayNode components = kubevela.withArray("/spec/components"); ArrayNode components = kubevela.withArray("/spec/components");
for (final JsonNode c : components) { for (final JsonNode c : components) {
ArrayList<Requirement> reqs = new ArrayList<>(); ArrayList<Requirement> reqs = new ArrayList<>();
result.put(c.get("name").asText(), reqs); reqs.add(new AttributeRequirement("image", "operatingSystem.family",
JsonNode properties = c.path("properties"); RequirementOperator.IN, OperatingSystemFamily.UBUNTU.toString()));
if (properties.has("cpu")) { JsonNode cpu = c.at("/properties/cpu");
if (cpu.isMissingNode()) cpu = c.at("/properties/resources/requests/cpu");
if (!cpu.isMissingNode() && cpu.isNumber()) {
// KubeVela has fractional core /cpu requirements // KubeVela has fractional core /cpu requirements
String kubevela_cpu_str = properties.get("cpu").asText(); double kubevela_cpu = Double.parseDouble(cpu.asText());
// TODO: catch NumberFormatException
double kubevela_cpu = Double.parseDouble(kubevela_cpu_str);
long sal_cores = Math.round(Math.ceil(kubevela_cpu)); long sal_cores = Math.round(Math.ceil(kubevela_cpu));
if (sal_cores > 0) { if (sal_cores > 0) {
reqs.add(new AttributeRequirement("hardware", "cores", reqs.add(new AttributeRequirement("hardware", "cores",
@@ -70,18 +116,22 @@ public class NebulousAppDeployer {
log.warn("CPU of component {} is 0 or not a number", c.get("name").asText()); log.warn("CPU of component {} is 0 or not a number", c.get("name").asText());
} }
} }
if (properties.has("memory")) {; JsonNode memory = c.at("/properties/memory");
String sal_memory = properties.get("memory").asText(); if (memory.isMissingNode()) cpu = c.at("/properties/resources/requests/memory");
if (!memory.isMissingNode()) {;
String sal_memory = memory.asText();
if (sal_memory.endsWith("Mi")) { if (sal_memory.endsWith("Mi")) {
sal_memory = sal_memory.substring(0, sal_memory.length() - 2); sal_memory = sal_memory.substring(0, sal_memory.length() - 2);
} else if (sal_memory.endsWith("Gi")) { } else if (sal_memory.endsWith("Gi")) {
sal_memory = String.valueOf(Integer.parseInt(sal_memory.substring(0, sal_memory.length() - 2)) * 1024); sal_memory = String.valueOf(Integer.parseInt(sal_memory.substring(0, sal_memory.length() - 2)) * 1024);
} else if (!properties.get("memory").isNumber()) { } else if (!memory.isNumber()) {
log.warn("Unsupported memory specification in component {} :{} (wanted 'Mi' or 'Gi') ", log.warn("Unsupported memory specification in component {} :{} (wanted 'Mi' or 'Gi') ",
properties.get("name").asText(), c.get("name").asText(),
properties.get("memory").asText()); memory.asText());
sal_memory = null; sal_memory = null;
} }
// Fall-through: we rewrote the KubeVela file and didn't add
// the "Mi" suffix, but it's a number
if (sal_memory != null) { if (sal_memory != null) {
reqs.add(new AttributeRequirement("hardware", "memory", reqs.add(new AttributeRequirement("hardware", "memory",
RequirementOperator.GEQ, sal_memory)); RequirementOperator.GEQ, sal_memory));
@@ -90,6 +140,8 @@ public class NebulousAppDeployer {
for (final JsonNode t : c.withArray("traits")) { for (final JsonNode t : c.withArray("traits")) {
// Check for node affinity / geoLocation / country // Check for node affinity / geoLocation / country
} }
// Finally, add requirements for this job to the map
result.put(c.get("name").asText(), reqs);
} }
return result; return result;
} }
@@ -160,10 +212,10 @@ public class NebulousAppDeployer {
// 3. Create coordinator node // 3. Create coordinator node
log.debug("Creating app coordinator node"); log.debug("Creating app coordinator node");
List<NodeCandidate> controller_candidates List<NodeCandidate> controller_candidates
= NebulousApp.getSalConnector().findNodeCandidates(NebulousApp.getControllerRequirements()); = NebulousApp.getSalConnector().findNodeCandidates(NebulousApp.getControllerRequirements(appUUID));
if (controller_candidates.isEmpty()) { if (controller_candidates.isEmpty()) {
log.error("Could not find node candidates for controller node; requirements: {}", log.error("Could not find node candidates for controller node; requirements: {}",
NebulousApp.getControllerRequirements()); NebulousApp.getControllerRequirements(appUUID));
return; return;
} }
NodeCandidate controller_candidate = controller_candidates.get(0); NodeCandidate controller_candidate = controller_candidates.get(0);