promenade/tools/g2/manifests/resiliency.json
Ahmad Mahmoudi 9f42b502f7 Updated resiliency gate
Updated resiliency gate script to consistently pass all gate stages,
using ubuntu bionic image for node deployment.

- Updated developer-onbording.rst with information on how to configure
  and run the resilency gate behind corporate proxy.
- Updated the gate scripts to use the proxy configuration.
- Updated up.sh to pull the hyperkube image as cache, to speed up and
  stabalize the initial kublet deployment of kubernetes cluster services.
- Updated and added sleeps and retries in some of gate stages and
  scripts to avoid gate failures due to transient environment issues.
- Updated the ubuntu base image for node deployments from xenial to\
  bionic base image.
- Added code in treadown-nodes stage to manually remove the etcd
  members: kubernetes and calico, since they still remain listed as
  etcd members on genesis node, even after genesis is torn down.

Change-Id: Ia11d66ab30ac7a07626d4f1d02a6da48155f862d
2020-07-21 22:45:23 +00:00

175 lines
3.7 KiB
JSON

{
"configuration": [
"examples/basic",
"promenade/schemas"
],
"stages": [
{
"name": "Report Disk IO",
"script": "report-disk-io.sh"
},
{
"name": "Gate Setup",
"script": "gate-setup.sh"
},
{
"name": "Build Image",
"script": "build-image.sh"
},
{
"name": "Generate Certificates",
"script": "generate-certificates.sh",
"arguments": [
"-x", "PKICatalog-addition.yaml"
]
},
{
"name": "Build Scripts",
"script": "build-scripts.sh"
},
{
"name": "Create VMs",
"script": "create-vms.sh"
},
{
"name": "Genesis",
"script": "genesis.sh",
"on_error": "collect_genesis_info.sh"
},
{
"name": "Join Masters",
"script": "join-nodes.sh",
"arguments": [
"-v", "n0",
"-n", "n1",
"-n", "n2",
"-l", "calico-etcd=enabled",
"-l", "kubernetes-apiserver=enabled",
"-l", "kubernetes-controller-manager=enabled",
"-l", "kubernetes-etcd=enabled",
"-l", "kubernetes-scheduler=enabled",
"-l", "ucp-control-plane=enabled"
]
},
{
"name": "Check initial etcd cluster",
"script": "check-etcd-health.sh",
"arguments": [
"-w", "10",
"-e", "kubernetes n0 n0 n1 n2",
"-e", "calico n0 n0 n1 n2"
]
},
{
"name": "Verify Join Failure",
"script": "fail-join-node.sh",
"arguments": [
"-v", "n0",
"-n", "n3"
]
},
{
"name": "Power off n2",
"script": "power-down-node.sh",
"arguments": [
"-s",
"-n", "n2"
]
},
{
"name": "Update Generated Certs",
"script": "generate-certificates.sh",
"arguments": [
"-u"
]
},
{
"name": "Join Final Master",
"script": "join-nodes.sh",
"arguments": [
"-v", "n0",
"-n", "n3",
"-l", "calico-etcd=enabled",
"-l", "coredns=enabled",
"-l", "kubernetes-apiserver=enabled",
"-l", "kubernetes-controller-manager=enabled",
"-l", "kubernetes-etcd=enabled",
"-l", "kubernetes-scheduler=enabled",
"-l", "ucp-control-plane=enabled"
]
},
{
"name": "Power up n2",
"script": "power-up-node.sh",
"arguments": [
"-v", "n0",
"-n", "n2",
"-w", "120"
]
},
{
"name": "Check full etcd cluster",
"script": "check-etcd-health.sh",
"arguments": [
"-w", "60",
"-e", "kubernetes n0 n0 n1 n2 n3",
"-e", "calico n0 n0 n1 n2 n3"
]
},
{
"name": "Teardown Genesis",
"script": "teardown-nodes.sh",
"arguments": [
"-e", "kubernetes",
"-e", "calico",
"-v", "n1",
"-n", "n0",
"-r"
]
},
{
"name": "Check post-teardown etcd cluster",
"script": "check-etcd-health.sh",
"arguments": [
"-e", "kubernetes n1 n1 n2 n3",
"-e", "calico n1 n1 n2 n3"
]
},
{
"name": "Join n0 as Worker",
"script": "join-nodes.sh",
"arguments": [
"-v", "n1",
"-n", "n0",
"-l", "ucp-control-plane=enabled"
]
},
{
"name": "Check final etcd cluster",
"script": "check-etcd-health.sh",
"arguments": [
"-e", "kubernetes n1 n1 n2 n3",
"-e", "calico n1 n1 n2 n3"
]
},
{
"name": "Hard Reboot Cluster",
"script": "hard-reboot-cluster.sh"
},
{
"name": "Move Master",
"script": "move-master.sh"
}
],
"vm": {
"memory": 4096,
"names": [
"n0",
"n1",
"n2",
"n3"
],
"vcpus": 2
}
}