From fe2608d00a63f2b874e5607d7a87403d8fae9cfe Mon Sep 17 00:00:00 2001 From: Sergii Golovatiuk Date: Wed, 27 Jun 2018 19:00:52 +0200 Subject: [PATCH] Poweroff server after 10 tries Some faulty iPXE FW cannot load kernel or initrd file causing DoS on http server (especially when environment has hundreds of faulty nods). This patch changes the behavior to poweroff the hardware node after 10 unsuccessful retries to get kernel or initrd over http. Story: #2002928 Task: #22915 Change-Id: Iec4650499c51c4c7dac38c279728d294ed3434b6 Co-Authored-By: Julia Kreger --- ironic/drivers/modules/ipxe_config.template | 22 ++++++++++++++++--- .../tests/unit/drivers/ipxe_config.template | 20 +++++++++++++++-- ...fig_boot_from_volume_extra_volume.template | 20 +++++++++++++++-- ...boot_from_volume_no_extra_volumes.template | 20 +++++++++++++++-- .../unit/drivers/ipxe_config_timeout.template | 20 +++++++++++++++-- ...eroff-after-10-tries-c592506f02c167c0.yaml | 9 ++++++++ 6 files changed, 100 insertions(+), 11 deletions(-) create mode 100644 releasenotes/notes/poweroff-after-10-tries-c592506f02c167c0.yaml diff --git a/ironic/drivers/modules/ipxe_config.template b/ironic/drivers/modules/ipxe_config.template index 375febdf4f..0f530244ac 100644 --- a/ironic/drivers/modules/ipxe_config.template +++ b/ironic/drivers/modules/ipxe_config.template @@ -1,14 +1,30 @@ #!ipxe +set attempts:int32 10 +set i:int32 0 + goto deploy :deploy imgfree -kernel {% if pxe_options.ipxe_timeout > 0 %}--timeout {{ pxe_options.ipxe_timeout }} {% endif %}{{ pxe_options.deployment_aki_path }} selinux=0 troubleshoot=0 text {{ pxe_options.pxe_append_params|default("", true) }} BOOTIF=${mac} ipa-api-url={{ pxe_options['ipa-api-url'] }} initrd={{ pxe_options.initrd_filename|default("deploy_ramdisk", true) }} coreos.configdrive=0 || goto deploy +kernel {% if pxe_options.ipxe_timeout > 0 %}--timeout {{ pxe_options.ipxe_timeout }} {% endif %}{{ pxe_options.deployment_aki_path }} selinux=0 troubleshoot=0 text {{ pxe_options.pxe_append_params|default("", true) }} BOOTIF=${mac} ipa-api-url={{ pxe_options['ipa-api-url'] }} initrd={{ pxe_options.initrd_filename|default("deploy_ramdisk", true) }} coreos.configdrive=0 || goto retry -initrd {% if pxe_options.ipxe_timeout > 0 %}--timeout {{ pxe_options.ipxe_timeout }} {% endif %}{{ pxe_options.deployment_ari_path }} || goto deploy +initrd {% if pxe_options.ipxe_timeout > 0 %}--timeout {{ pxe_options.ipxe_timeout }} {% endif %}{{ pxe_options.deployment_ari_path }} || goto retry boot +:retry +iseq ${i} ${attempts} && goto fail || +inc i +echo No response, retrying in {i} seconds. +sleep ${i} +goto deploy + +:fail +echo Failed to get a response after ${attempts} attempts +echo Powering off in 30 seconds. +sleep 30 +poweroff + :boot_partition imgfree kernel {% if pxe_options.ipxe_timeout > 0 %}--timeout {{ pxe_options.ipxe_timeout }} {% endif %}{{ pxe_options.aki_path }} root={{ ROOT }} ro text {{ pxe_options.pxe_append_params|default("", true) }} initrd=ramdisk || goto boot_partition @@ -39,4 +55,4 @@ goto boot_iscsi {%- endif %} :boot_whole_disk -sanboot --no-describe \ No newline at end of file +sanboot --no-describe diff --git a/ironic/tests/unit/drivers/ipxe_config.template b/ironic/tests/unit/drivers/ipxe_config.template index d02c8dcd47..e0eca334f5 100644 --- a/ironic/tests/unit/drivers/ipxe_config.template +++ b/ironic/tests/unit/drivers/ipxe_config.template @@ -1,14 +1,30 @@ #!ipxe +set attempts:int32 10 +set i:int32 0 + goto deploy :deploy imgfree -kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto deploy +kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto retry -initrd http://1.2.3.4:1234/deploy_ramdisk || goto deploy +initrd http://1.2.3.4:1234/deploy_ramdisk || goto retry boot +:retry +iseq ${i} ${attempts} && goto fail || +inc i +echo No response, retrying in {i} seconds. +sleep ${i} +goto deploy + +:fail +echo Failed to get a response after ${attempts} attempts +echo Powering off in 30 seconds. +sleep 30 +poweroff + :boot_partition imgfree kernel http://1.2.3.4:1234/kernel root={{ ROOT }} ro text test_param initrd=ramdisk || goto boot_partition diff --git a/ironic/tests/unit/drivers/ipxe_config_boot_from_volume_extra_volume.template b/ironic/tests/unit/drivers/ipxe_config_boot_from_volume_extra_volume.template index 03e6276505..9b86e04cec 100644 --- a/ironic/tests/unit/drivers/ipxe_config_boot_from_volume_extra_volume.template +++ b/ironic/tests/unit/drivers/ipxe_config_boot_from_volume_extra_volume.template @@ -1,14 +1,30 @@ #!ipxe +set attempts:int32 10 +set i:int32 0 + goto deploy :deploy imgfree -kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto deploy +kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto retry -initrd http://1.2.3.4:1234/deploy_ramdisk || goto deploy +initrd http://1.2.3.4:1234/deploy_ramdisk || goto retry boot +:retry +iseq ${i} ${attempts} && goto fail || +inc i +echo No response, retrying in {i} seconds. +sleep ${i} +goto deploy + +:fail +echo Failed to get a response after ${attempts} attempts +echo Powering off in 30 seconds. +sleep 30 +poweroff + :boot_partition imgfree kernel http://1.2.3.4:1234/kernel root={{ ROOT }} ro text test_param initrd=ramdisk || goto boot_partition diff --git a/ironic/tests/unit/drivers/ipxe_config_boot_from_volume_no_extra_volumes.template b/ironic/tests/unit/drivers/ipxe_config_boot_from_volume_no_extra_volumes.template index c9dbab2c98..244eb53c86 100644 --- a/ironic/tests/unit/drivers/ipxe_config_boot_from_volume_no_extra_volumes.template +++ b/ironic/tests/unit/drivers/ipxe_config_boot_from_volume_no_extra_volumes.template @@ -1,14 +1,30 @@ #!ipxe +set attempts:int32 10 +set i:int32 0 + goto deploy :deploy imgfree -kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto deploy +kernel http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto retry -initrd http://1.2.3.4:1234/deploy_ramdisk || goto deploy +initrd http://1.2.3.4:1234/deploy_ramdisk || goto retry boot +:retry +iseq ${i} ${attempts} && goto fail || +inc i +echo No response, retrying in {i} seconds. +sleep ${i} +goto deploy + +:fail +echo Failed to get a response after ${attempts} attempts +echo Powering off in 30 seconds. +sleep 30 +poweroff + :boot_partition imgfree kernel http://1.2.3.4:1234/kernel root={{ ROOT }} ro text test_param initrd=ramdisk || goto boot_partition diff --git a/ironic/tests/unit/drivers/ipxe_config_timeout.template b/ironic/tests/unit/drivers/ipxe_config_timeout.template index 8318b480f3..821b66ad3d 100644 --- a/ironic/tests/unit/drivers/ipxe_config_timeout.template +++ b/ironic/tests/unit/drivers/ipxe_config_timeout.template @@ -1,14 +1,30 @@ #!ipxe +set attempts:int32 10 +set i:int32 0 + goto deploy :deploy imgfree -kernel --timeout 120 http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto deploy +kernel --timeout 120 http://1.2.3.4:1234/deploy_kernel selinux=0 troubleshoot=0 text test_param BOOTIF=${mac} ipa-api-url=http://192.168.122.184:6385 initrd=deploy_ramdisk coreos.configdrive=0 || goto retry -initrd --timeout 120 http://1.2.3.4:1234/deploy_ramdisk || goto deploy +initrd --timeout 120 http://1.2.3.4:1234/deploy_ramdisk || goto retry boot +:retry +iseq ${i} ${attempts} && goto fail || +inc i +echo No response, retrying in {i} seconds. +sleep ${i} +goto deploy + +:fail +echo Failed to get a response after ${attempts} attempts +echo Powering off in 30 seconds. +sleep 30 +poweroff + :boot_partition imgfree kernel --timeout 120 http://1.2.3.4:1234/kernel root={{ ROOT }} ro text test_param initrd=ramdisk || goto boot_partition diff --git a/releasenotes/notes/poweroff-after-10-tries-c592506f02c167c0.yaml b/releasenotes/notes/poweroff-after-10-tries-c592506f02c167c0.yaml new file mode 100644 index 0000000000..5512e29eab --- /dev/null +++ b/releasenotes/notes/poweroff-after-10-tries-c592506f02c167c0.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Changes the iPXE behavior to retry a total of 10 times with an increasing + backoff time between each retry in order to not create a Denial of Service + situation with the iPXE HTTP server. Should the retries fail, the node will + be powered-off after a warning is displayed on the console for 30 seconds. + For more information, see + `story `_.