Add cert renewal in enrollment init timeout
In some cases, the runtime configuration that is supposed to install the ssl certificate containing the new OAM IP in /etc/ssl/private/server-cert.pem is not working during enrollment, when the REST API/GUI certificate is updated. This change adds a remediation for the enrollment failure when this happens, triggering another renewal of the REST API/GUI certificate. It also increases the retries before trying to renew the cert from 30 to 45, and the timeout before the sysinv API is ready after the first reboot from 900s to 1020s. Test plan: PASS: Verify that renewal tasks are triggered when REST API/GUI cert is not valid (curl command fails). Verify that fail message is shown when certificate doesn't exist. PASS: Enroll system as subcloud. Closes-bug: 2091437 Change-Id: Ibd549cc8c2d0f07db4ac6d7889803e806a6bb7a2 Signed-off-by: Marcelo de Castro Loebens <Marcelo.DeCastroLoebens@windriver.com>
This commit is contained in:
parent
9c9f94c6f5
commit
b3530f1844
@ -19,7 +19,7 @@
|
|||||||
- set_fact:
|
- set_fact:
|
||||||
sysinv_port: "{{ sysinv_port | default(6385) }}"
|
sysinv_port: "{{ sysinv_port | default(6385) }}"
|
||||||
boot_wait_time: "{{ enroll_boot_wait_time | default(150) }}"
|
boot_wait_time: "{{ enroll_boot_wait_time | default(150) }}"
|
||||||
wait_for_timeout: "{{ enroll_wait_for_timeout | default(900) }}"
|
wait_for_timeout: "{{ enroll_wait_for_timeout | default(1020) }}"
|
||||||
job_retry_delay: "{{ 120 | random }}"
|
job_retry_delay: "{{ 120 | random }}"
|
||||||
protocol: "{{ protocol | default('https') }}"
|
protocol: "{{ protocol | default('https') }}"
|
||||||
operation_string: "enroll-init"
|
operation_string: "enroll-init"
|
||||||
@ -57,17 +57,75 @@
|
|||||||
# Although the endpoints are reconfigured by now, the API and certs may not
|
# Although the endpoints are reconfigured by now, the API and certs may not
|
||||||
# be fully updated. A simple curl request can be used to verify both, specifically
|
# be fully updated. A simple curl request can be used to verify both, specifically
|
||||||
# checking the region_id API, which will be needed shortly after this playbook completes.
|
# checking the region_id API, which will be needed shortly after this playbook completes.
|
||||||
|
- set_fact:
|
||||||
|
sysinv_check_endpoint: >-
|
||||||
|
{{ protocol }}://{{ enroll_reconfigured_oam | ipwrap }}:{{ sysinv_port }}/v1/isystems/region_id
|
||||||
|
|
||||||
- name: Wait for the sysinv API to be ready and for certs to be updated for the reconfigured OAM endpoint
|
- name: Wait for the sysinv API to be ready and for certs to be updated for the reconfigured OAM endpoint
|
||||||
shell: |
|
shell: |
|
||||||
curl -s -o /dev/null -w '%{http_code}' \
|
curl -s -o /dev/null -w '%{http_code}' {{ sysinv_check_endpoint }}
|
||||||
{{ protocol }}://{{ enroll_reconfigured_oam | ipwrap }}:{{ sysinv_port }}/v1/isystems/region_id
|
|
||||||
register: api_response
|
register: api_response
|
||||||
retries: 30
|
retries: 45
|
||||||
delay: 20
|
delay: 20
|
||||||
until: api_response.stdout == "200"
|
until: api_response.stdout == "200"
|
||||||
delegate_to: localhost
|
delegate_to: localhost
|
||||||
|
failed_when: false
|
||||||
args:
|
args:
|
||||||
# Disable warning that suggests using the get_url and uri module:
|
# Disable warning that suggests using the get_url and uri module:
|
||||||
# - get_url is unnecessary as we're not actually downloading.
|
# - get_url is unnecessary as we're not actually downloading.
|
||||||
# - uri module doesn't seem to work for our cert update check
|
# - uri module doesn't seem to work for our cert update check
|
||||||
warn: false
|
warn: false
|
||||||
|
|
||||||
|
- name: Check and retry certificate renewal upon timeout
|
||||||
|
block:
|
||||||
|
- name: Perform insecure sysinv API check to confirm invalid cert
|
||||||
|
shell: |
|
||||||
|
curl -k -s -o /dev/null -w '%{http_code}' {{ sysinv_check_endpoint }}
|
||||||
|
register: insecure_api_response
|
||||||
|
delegate_to: localhost
|
||||||
|
failed_when: false
|
||||||
|
args:
|
||||||
|
warn: false
|
||||||
|
|
||||||
|
- name: Abort on failed insecure sysinv endpoint request
|
||||||
|
fail:
|
||||||
|
msg: >-
|
||||||
|
Requests to sysinv API through the OAM network are not succeeding. Check the
|
||||||
|
subcloud logs (cloud-init-output.log) and for errors in the network
|
||||||
|
reconfiguration and reattempt.
|
||||||
|
when: insecure_api_response.rc != 0 or insecure_api_response.stdout != "200"
|
||||||
|
|
||||||
|
- name: Check the existence of the k8s Certificate
|
||||||
|
command: kubectl get certificate -n deployment system-restapi-gui-certificate
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||||
|
register: cert_get_result
|
||||||
|
|
||||||
|
- name: Fail if REST/API GUI K8s Certificate doesn't exist
|
||||||
|
fail:
|
||||||
|
msg: >-
|
||||||
|
REST/API GUI certificate is not managed by cert-manager. The procedure to
|
||||||
|
update platform certificates (previously known as cert-manager migration)
|
||||||
|
should be followed to create the required resources.
|
||||||
|
when: cert_get_result.rc != 0
|
||||||
|
|
||||||
|
- name: Renew Rest API/GUI certificate
|
||||||
|
shell: |
|
||||||
|
kubectl delete secret -n deployment system-restapi-gui-certificate
|
||||||
|
kubectl wait certificate -n deployment system-restapi-gui-certificate \
|
||||||
|
--for=condition=Ready --timeout=90s
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: "/etc/kubernetes/admin.conf"
|
||||||
|
|
||||||
|
- name: Retry waiting for sysinv API and REST API/GUI certificate to be updated for the new OAM endpoint
|
||||||
|
shell: |
|
||||||
|
curl -s -o /dev/null -w '%{http_code}' {{ sysinv_check_endpoint }}
|
||||||
|
register: api_response
|
||||||
|
retries: 15
|
||||||
|
delay: 20
|
||||||
|
until: api_response.stdout == "200"
|
||||||
|
delegate_to: localhost
|
||||||
|
args:
|
||||||
|
warn: false
|
||||||
|
when:
|
||||||
|
- api_response.rc != 0 or api_response.stdout != "200"
|
||||||
|
Loading…
Reference in New Issue
Block a user