From e09350a1cef1e710b90d1179878c1e92712dea67 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Mon, 15 Mar 2021 16:28:15 -0700 Subject: [PATCH] Azure: implement launch retries Change-Id: Ic1cefe47e141815c89313bd40842ba1c52180426 --- nodepool/driver/azurestate/adapter.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/nodepool/driver/azurestate/adapter.py b/nodepool/driver/azurestate/adapter.py index e0563eda1..05931b980 100644 --- a/nodepool/driver/azurestate/adapter.py +++ b/nodepool/driver/azurestate/adapter.py @@ -139,6 +139,7 @@ class AzureCreateStateMachine(statemachine.StateMachine): PIP_CREATING = 'creating pip' NIC_CREATING = 'creating nic' VM_CREATING = 'creating vm' + VM_RETRY = 'retrying vm creation' NIC_QUERY = 'querying nic' PIP_QUERY = 'querying pip' COMPLETE = 'complete' @@ -147,6 +148,7 @@ class AzureCreateStateMachine(statemachine.StateMachine): super().__init__() self.adapter = adapter self.retries = retries + self.attempts = 0 self.metadata = metadata self.tags = label.tags.copy() or {} self.tags.update(metadata) @@ -214,17 +216,24 @@ class AzureCreateStateMachine(statemachine.StateMachine): if self.state == self.VM_CREATING: self.vm = self.adapter._refresh(self.vm) - # if 404: - # increment retries - # state = self.NIC_CREATING - # if error: - # if retries too big: raise error - # delete vm if self.adapter._succeeded(self.vm): self.state = self.NIC_QUERY + elif self.adapter._failed(self.vm): + if self.attempts >= self.retries: + raise Exception("Too many retries") + self.attempts += 1 + self.vm = self.adapter._deleteVirtualMachine( + self.external_id) + self.state = self.VM_RETRY else: return + if self.state == self.VM_RETRY: + self.vm = self.adapter._refresh_delete(self.vm) + if self.vm is None: + self.state = self.NIC_CREATING + return + if self.state == self.NIC_QUERY: self.nic = self.adapter._refresh(self.nic, force=True) all_found = True @@ -348,6 +357,10 @@ class AzureAdapter(statemachine.Adapter): def _succeeded(obj): return obj['properties']['provisioningState'] == 'Succeeded' + @staticmethod + def _failed(obj): + return obj['properties']['provisioningState'] == 'Failed' + def _refresh(self, obj, force=False): if self._succeeded(obj) and not force: return obj