Merge "Remove Kubernetes checks during optimized restore"

2023-08-31 02:45:31 +00:00
parent 8e52618cba 8628316c10
commit 72d4f48ad1
1 changed files with 0 additions and 100 deletions
--- a/playbookconfig/src/playbooks/roles/optimized-restore/restore-data/tasks/restore-kubernetes.yml
+++ b/playbookconfig/src/playbooks/roles/optimized-restore/restore-data/tasks/restore-kubernetes.yml
@@ -161,106 +161,6 @@
 - name: Restore helm service
  import_tasks: restore-helm.yml

- name: Set Kubernetes components list
-  set_fact:
-    kube_component_list:
-      - k8s-app=calico-node
-      - k8s-app=kube-proxy
-      - app=multus
-      - app=sriov-cni
-      - component=kube-apiserver
-      - component=kube-controller-manager
-      - component=kube-scheduler
-
- name: Update Kubernetes components list
-  set_fact:
-    # We skip the calico-node pod on AIO-DX and STANDARD setups
-    # because the pods running on a different host than controller-0 will
-    # be unreachable at this moment and the calico-node pods
-    # will try to connect to them and fail forever
-    kube_component_list: >-
-     {{ kube_component_list | reject('search', 'calico-node') | list }}
-
- name: Get coredns deployment desired replicas
-  command: >-
-    kubectl --kubeconfig=/etc/kubernetes/admin.conf get deployment
-    -n kube-system coredns -o jsonpath={.spec.replicas}
-  register: coredns_get_replicas
-
-  # We scale these deployments down and back up because in setups with more
-  # than 3 nodes, the cluster could be in the PartialDisruption state and
-  # the pods may not be rescheduled off of a down
-  # node. This ensures that the pods will be on controller-0 and will
-  # become available.
- name: Scale calico-kube-controllers & coredns deployments to 0
-  command: >-
-    kubectl --kubeconfig=/etc/kubernetes/admin.conf scale deployment
-    -n {{ item.namespace }} {{ item.deployment }} --replicas=0
-  with_items:
-    - { namespace: kube-system, deployment: calico-kube-controllers }
-    - { namespace: kube-system, deployment: coredns }
-
- name: Scale calico-kube-controllers deployment back to 1
-  command: >-
-    kubectl --kubeconfig=/etc/kubernetes/admin.conf scale deployment
-    -n {{ item.namespace }} {{ item.deployment }} --replicas=1
-  with_items:
-    - { namespace: kube-system, deployment: calico-kube-controllers }
-
- name: Scale coredns deployment back to original size
-  command: >-
-    kubectl --kubeconfig=/etc/kubernetes/admin.conf scale deployment
-    -n kube-system coredns --replicas={{ coredns_get_replicas.stdout }}
-
- name: Override async parameters
-  set_fact:
-    async_timeout: 120
-    async_retries: 40
-
- name: Start parallel tasks to wait for Kubernetes component and Networking pods to reach ready state
-  # Only check for pods on the current host to avoid waiting for pods on downed nodes
-  # This speeds up "Get wait tasks results" on multi-node systems
-  command: >-
-    kubectl --kubeconfig=/etc/kubernetes/admin.conf wait --namespace=kube-system
-    --for=condition=Ready pods --selector {{ item }} --field-selector spec.nodeName=controller-0
-    --timeout={{ async_timeout }}s
-  async: "{{ async_timeout }}"
-  poll: 0
-  with_items: "{{ kube_component_list }}"
-  register: wait_for_kube_system_pods
-
- name: Start wait for calico-kube-controllers & coredns deployments to reach Available state
-  # Check the deployment status rather than the pod status in case some pods are down on other nodes
-  command: >-
-    kubectl --kubeconfig=/etc/kubernetes/admin.conf wait --namespace={{ item.namespace }}
-    --for=condition=Available deployment {{ item.deployment }} --timeout={{ async_timeout }}s
-  async: "{{ async_timeout }}"
-  poll: 0
-  with_items:
-    - { namespace: kube-system, deployment: calico-kube-controllers }
-    - { namespace: kube-system, deployment: coredns }
-  register: wait_for_deployments
-
- name: Get wait tasks results
-  async_status:
-    jid: "{{ item.ansible_job_id }}"
-  register: wait_job_result
-  until: wait_job_result.finished
-  # The retry length should be x2 the length of the async_timeout
-  # eg async_retries = async_timeout * 2 / delay
-  retries: "{{ async_retries }}"
-  delay: "{{ async_timeout * 2 // async_retries }}"
-  failed_when: false
-  with_items:
-    - "{{ wait_for_kube_system_pods.results }}"
-    - "{{ wait_for_deployments.results }}"
-
- name: Fail if any of the Kubernetes component or Networking pod are not ready by this time
-  fail:
-    msg: "Pod {{ item.item.item }} is still not ready."
-  when: item.stdout is not search(" condition met")
-  with_items: "{{ wait_job_result.results }}"
-
 # we need to restart sysinv conductor here because it seem to be caching old data
 # this prevents it from interacting with a fresh kubernetes cluster. For example,
 # if the user changes OAM network and forces the kubernetes cluster to be torn down