Improve AWS driver performance

The following adjustments improve performance at large scale: * Save the quota object earlier in the state machine. This is still using cached data, but it's not necessary to re-run this each time. * Flatten iterators returned from cached methods. Some of our methods intended to cache heavy list responses were in fact only caching the iterator, and re-iterating would end up re-running the request. This change does two things: it causes the iteration to happen within the rate limit calculator so we have a better idea of how long it actually took, and it causes the actual data to be put in the cache so that we don't re-run the request. * Don't create a second instance object after creating the instance. The create call returns an instance object in the form that we expect already. Avoid creating a second one which incurs another DescribeInstances call. Change-Id: I73bc099b450879917ab923fb7371f8006b113d68
2022-05-04 11:02:08 -07:00 · 2022-05-04 11:02:08 -07:00 · d2ac0de07b
parent 57fba40079
commit d2ac0de07b
2 changed files with 12 additions and 12 deletions
--- a/nodepool/driver/aws/adapter.py
+++ b/nodepool/driver/aws/adapter.py
@ -128,11 +128,11 @@ class AwsCreateStateMachine(statemachine.StateMachine):
            self.instance = self.adapter._createInstance(
                self.label, self.image_external_id,
                self.tags, self.hostname, self.log)
+            self.quota = self.adapter._getQuotaForInstanceType(
+                self.instance.instance_type)
            self.state = self.INSTANCE_CREATING

        if self.state == self.INSTANCE_CREATING:
-            self.quota = self.adapter._getQuotaForInstanceType(
-                self.instance.instance_type)
            self.instance = self.adapter._refresh(self.instance)

            if self.instance.state["Name"].lower() == "running":
@ -448,24 +448,24 @@ class AwsAdapter(statemachine.Adapter):
    def _listInstances(self):
        with self.non_mutating_rate_limiter(
                self.log.debug, "Listed instances"):
-            return self.ec2.instances.all()
+            return list(self.ec2.instances.all())

    @cachetools.func.ttl_cache(maxsize=1, ttl=10)
    def _listVolumes(self):
        with self.non_mutating_rate_limiter:
-            return self.ec2.volumes.all()
+            return list(self.ec2.volumes.all())

    @cachetools.func.ttl_cache(maxsize=1, ttl=10)
    def _listAmis(self):
        # Note: this is overridden in tests due to the filter
        with self.non_mutating_rate_limiter:
-            return self.ec2.images.filter(Owners=['self'])
+            return list(self.ec2.images.filter(Owners=['self']))

    @cachetools.func.ttl_cache(maxsize=1, ttl=10)
    def _listSnapshots(self):
        # Note: this is overridden in tests due to the filter
        with self.non_mutating_rate_limiter:
-            return self.ec2.snapshots.filter(OwnerIds=['self'])
+            return list(self.ec2.snapshots.filter(OwnerIds=['self']))

    @cachetools.func.ttl_cache(maxsize=1, ttl=10)
    def _listObjects(self):
@ -475,7 +475,7 @@ class AwsAdapter(statemachine.Adapter):

        bucket = self.s3.Bucket(bucket_name)
        with self.non_mutating_rate_limiter:
-            return bucket.objects.all()
+            return list(bucket.objects.all())

    def _getLatestImageIdByFilters(self, image_filters):
        # Normally we would decorate this method, but our cache key is
@ -487,9 +487,9 @@ class AwsAdapter(statemachine.Adapter):
            return val

        with self.non_mutating_rate_limiter:
-            res = self.ec2_client.describe_images(
+            res = list(self.ec2_client.describe_images(
                Filters=image_filters
-            ).get("Images")
+            ).get("Images"))

        images = sorted(
            res,
@ -598,7 +598,7 @@ class AwsAdapter(statemachine.Adapter):
            log.debug(f"Creating VM {hostname}")
            instances = self.ec2.create_instances(**args)
            log.debug(f"Created VM {hostname} as instance {instances[0].id}")
-            return self.ec2.Instance(instances[0].id)
+            return instances[0]

    def _deleteInstance(self, external_id, log=None):
        if log is None:
--- a/nodepool/tests/unit/fake_aws.py
+++ b/nodepool/tests/unit/fake_aws.py
@ -128,7 +128,7 @@ class FakeAws:
        raise NotImplementedError()

    def _listAmis(self):
-        return self.ec2.images.filter()
+        return list(self.ec2.images.filter())

    def _listSnapshots(self):
-        return self.ec2.snapshots.filter()
+        return list(self.ec2.snapshots.filter())