Merge "AWS: Use snapshot instead of image import"

2022-08-22 17:32:07 +00:00
parent d292376db2 89cda5a1ba
commit 632034700b
5 changed files with 252 additions and 136 deletions
--- a/doc/source/aws.rst
+++ b/doc/source/aws.rst
@@ -383,6 +383,19 @@ Selecting the ``aws`` driver adds the following options to the
           long-standing issue with ``ansible_shell_type`` in combination
           with ``become``.
      .. attr:: volume-type
         :type: str
         :default: gp2
         The root `EBS volume type`_ for the image.
      .. attr:: volume-size
         :type: int
         The size of the root EBS volume, in GiB, for the image.  If
         omitted, the volume size reported for the imported snapshot
         will be used.
   .. attr:: pools
      :type: list
--- a/nodepool/driver/aws/adapter.py
+++ b/nodepool/driver/aws/adapter.py
@@ -28,6 +28,7 @@ from nodepool.driver.utils import QuotaInformation, RateLimiter
 from nodepool.driver import statemachine
 import boto3
 import botocore.exceptions
 def tag_dict_to_list(tagdict):
@@ -277,33 +278,48 @@ class AwsAdapter(statemachine.Adapter):
        return AwsDeleteStateMachine(self, external_id, log)
    def listResources(self):
        self._tagAmis()
        self._tagSnapshots()
        self._tagAmis()
        for instance in self._listInstances():
-            if instance.state["Name"].lower() == "terminated":
+            try:
                if instance.state["Name"].lower() == "terminated":
                    continue
            except botocore.exceptions.ClientError:
                continue
            yield AwsResource(tag_list_to_dict(instance.tags),
                              'instance', instance.id)
        for volume in self._listVolumes():
-            if volume.state.lower() == "deleted":
+            try:
                if volume.state.lower() == "deleted":
                    continue
            except botocore.exceptions.ClientError:
                continue
            yield AwsResource(tag_list_to_dict(volume.tags),
                              'volume', volume.id)
        for ami in self._listAmis():
-            if ami.state.lower() == "deleted":
+            try:
                if ami.state.lower() == "deleted":
                    continue
            except (botocore.exceptions.ClientError, AttributeError):
                continue
            yield AwsResource(tag_list_to_dict(ami.tags),
                              'ami', ami.id)
        for snap in self._listSnapshots():
-            if snap.state.lower() == "deleted":
+            try:
                if snap.state.lower() == "deleted":
                    continue
            except botocore.exceptions.ClientError:
                continue
            yield AwsResource(tag_list_to_dict(snap.tags),
                              'snapshot', snap.id)
        if self.provider.object_storage:
            for obj in self._listObjects():
                with self.non_mutating_rate_limiter:
-                    tags = self.s3_client.get_object_tagging(
+                    try:
-                        Bucket=obj.bucket_name, Key=obj.key)
+                        tags = self.s3_client.get_object_tagging(
                            Bucket=obj.bucket_name, Key=obj.key)
                    except botocore.exceptions.ClientError:
                        continue
                yield AwsResource(tag_list_to_dict(tags['TagSet']),
                                  'object', obj.key)
@@ -368,36 +384,35 @@ class AwsAdapter(statemachine.Adapter):
                bucket.upload_fileobj(fobj, object_filename,
                                      ExtraArgs=extra_args)
-        # Import image as AMI
+        # Import snapshot
        self.log.debug(f"Importing {image_name}")
-        import_image_task = self._import_image(
+        with self.rate_limiter:
-            Architecture=provider_image.architecture,
+            import_snapshot_task = self._import_snapshot(
-            DiskContainers=[
+                DiskContainer={
                {
                    'Format': image_format,
                    'UserBucket': {
                        'S3Bucket': bucket_name,
                        'S3Key': object_filename,
-                    }
+                    },
                },
-            ],
+                TagSpecifications=[
-            TagSpecifications=[
+                    {
-                {
+                        'ResourceType': 'import-snapshot-task',
-                    'ResourceType': 'import-image-task',
+                        'Tags': tag_dict_to_list(metadata),
-                    'Tags': tag_dict_to_list(metadata),
+                    },
-                },
+                ]
-            ]
+            )
-        )
+        task_id = import_snapshot_task['ImportTaskId']
        task_id = import_image_task['ImportTaskId']
-        paginator = self._get_paginator('describe_import_image_tasks')
+        paginator = self._get_paginator('describe_import_snapshot_tasks')
        done = False
        while not done:
            time.sleep(self.IMAGE_UPLOAD_SLEEP)
            with self.non_mutating_rate_limiter:
                for page in paginator.paginate(ImportTaskIds=[task_id]):
-                    for task in page['ImportImageTasks']:
+                    for task in page['ImportSnapshotTasks']:
-                        if task['Status'].lower() in ('completed', 'deleted'):
+                        if task['SnapshotTaskDetail']['Status'].lower() in (
                                'completed', 'deleted'):
                            done = True
                            break
@@ -405,31 +420,54 @@ class AwsAdapter(statemachine.Adapter):
        with self.rate_limiter:
            self.s3.Object(bucket_name, object_filename).delete()
-        if task['Status'].lower() != 'completed':
+        if task['SnapshotTaskDetail']['Status'].lower() != 'completed':
            raise Exception(f"Error uploading image: {task}")
        # Tag the AMI
        try:
            with self.non_mutating_rate_limiter:
                ami = self.ec2.Image(task['ImageId'])
            with self.rate_limiter:
                ami.create_tags(Tags=task['Tags'])
        except Exception:
            self.log.exception("Error tagging AMI:")
        # Tag the snapshot
        try:
            with self.non_mutating_rate_limiter:
                snap = self.ec2.Snapshot(
-                    task['SnapshotDetails'][0]['SnapshotId'])
+                    task['SnapshotTaskDetail']['SnapshotId'])
            with self.rate_limiter:
                snap.create_tags(Tags=task['Tags'])
        except Exception:
            self.log.exception("Error tagging snapshot:")
-        self.log.debug(f"Upload of {image_name} complete as {task['ImageId']}")
+        volume_size = provider_image.volume_size or snap.volume_size
        # Register the snapshot as an AMI
        with self.rate_limiter:
            register_response = self.ec2_client.register_image(
                Architecture=provider_image.architecture,
                BlockDeviceMappings=[
                    {
                        'DeviceName': '/dev/sda1',
                        'Ebs': {
                            'DeleteOnTermination': True,
                            'SnapshotId': task[
                                'SnapshotTaskDetail']['SnapshotId'],
                            'VolumeSize': volume_size,
                            'VolumeType': provider_image.volume_type,
                        },
                    },
                ],
                RootDeviceName='/dev/sda1',
                VirtualizationType='hvm',
                Name=image_name,
            )
        # Tag the AMI
        try:
            with self.non_mutating_rate_limiter:
                ami = self.ec2.Image(register_response['ImageId'])
            with self.rate_limiter:
                ami.create_tags(Tags=task['Tags'])
        except Exception:
            self.log.exception("Error tagging AMI:")
        self.log.debug(f"Upload of {image_name} complete as "
                       f"{register_response['ImageId']}")
        # Last task returned from paginator above
-        return task['ImageId']
+        return register_response['ImageId']
    def deleteImage(self, external_id):
        snaps = set()
@@ -449,58 +487,110 @@ class AwsAdapter(statemachine.Adapter):
    def _tagAmis(self):
        # There is no way to tag imported AMIs, so this routine
        # "eventually" tags them.  We look for any AMIs without tags
-        # which correspond to import tasks, and we copy the tags from
+        # and we copy the tags from the associated snapshot import
-        # those import tasks to the AMI.
+        # task.
        to_examine = []
        for ami in self._listAmis():
-            if (ami.name.startswith('import-ami-') and
+            if ami.id in self.not_our_images:
-                not ami.tags and
+                continue
-                ami.id not in self.not_our_images):
+            try:
-                # This image was imported but has no tags, which means
+                if ami.tags:
-                # it's either not a nodepool image, or it's a new one
+                    continue
-                # which doesn't have tags yet.  Copy over any tags
+            except (botocore.exceptions.ClientError, AttributeError):
-                # from the import task; otherwise, mark it as an image
+                continue
-                # we can ignore in future runs.
+            # This has no tags, which means it's either not a nodepool
-                task = self._getImportImageTask(ami.name)
+            # image, or it's a new one which doesn't have tags yet.
-                tags = tag_list_to_dict(task.get('Tags'))
+            # Copy over any tags from the snapshot import task,
-                if (tags.get('nodepool_provider_name') == self.provider.name):
+            # otherwise, mark it as an image we can ignore in future
-                    # Copy over tags
+            # runs.
-                    self.log.debug(
+            if len(ami.block_device_mappings) < 1:
-                        f"Copying tags from import task {ami.name} to AMI")
+                self.not_our_images.add(ami.id)
-                    with self.rate_limiter:
+                continue
-                        ami.create_tags(Tags=task['Tags'])
+            bdm = ami.block_device_mappings[0]
-                else:
+            ebs = bdm.get('Ebs')
-                    self.not_our_images.add(ami.id)
+            if not ebs:
                self.not_our_images.add(ami.id)
                continue
            snapshot_id = ebs.get('SnapshotId')
            if not snapshot_id:
                self.not_our_images.add(ami.id)
                continue
            to_examine.append((ami, snapshot_id))
        if not to_examine:
            return
        # We have images to examine; get a list of import tasks so
        # we can copy the tags from the import task that resulted in
        # this image.
        task_map = {}
        for task in self._listImportSnapshotTasks():
            detail = task['SnapshotTaskDetail']
            task_snapshot_id = detail.get('SnapshotId')
            if not task_snapshot_id:
                continue
            task_map[task_snapshot_id] = task['Tags']
        for ami, snapshot_id in to_examine:
            tags = task_map.get(snapshot_id)
            if not tags:
                self.not_our_images.add(ami.id)
                continue
            metadata = tag_list_to_dict(tags)
            if (metadata.get('nodepool_provider_name') == self.provider.name):
                # Copy over tags
                self.log.debug(
                    f"Copying tags from import task to image {ami.id}")
                with self.rate_limiter:
                    ami.create_tags(Tags=tags)
            else:
                self.not_our_images.add(ami.id)
    def _tagSnapshots(self):
        # See comments for _tagAmis
        to_examine = []
        for snap in self._listSnapshots():
-            if ('import-ami-' in snap.description and
+            try:
-                not snap.tags and
+                if (snap.id not in self.not_our_snapshots and
-                snap.id not in self.not_our_snapshots):
+                    not snap.tags):
                    to_examine.append(snap)
            except botocore.exceptions.ClientError:
                # We may have cached a snapshot that doesn't exist
                continue
        if not to_examine:
            return
-                match = re.match(r'.*?(import-ami-\w*)', snap.description)
+        # We have snapshots to examine; get a list of import tasks so
-                if not match:
+        # we can copy the tags from the import task that resulted in
-                    self.not_our_snapshots.add(snap.id)
+        # this snapshot.
-                    continue
+        task_map = {}
-                task_id = match.group(1)
+        for task in self._listImportSnapshotTasks():
-                task = self._getImportImageTask(task_id)
+            detail = task['SnapshotTaskDetail']
-                tags = tag_list_to_dict(task.get('Tags'))
+            task_snapshot_id = detail.get('SnapshotId')
-                if (tags.get('nodepool_provider_name') == self.provider.name):
+            if not task_snapshot_id:
-                    # Copy over tags
+                continue
-                    self.log.debug(
+            task_map[task_snapshot_id] = task['Tags']
                        f"Copying tags from import task {task_id} to snapshot")
                    with self.rate_limiter:
                        snap.create_tags(Tags=task['Tags'])
                else:
                    self.not_our_snapshots.add(snap.id)
-    def _getImportImageTask(self, task_id):
+        for snap in to_examine:
-        paginator = self._get_paginator('describe_import_image_tasks')
+            tags = task_map.get(snap.id)
            if not tags:
                self.not_our_snapshots.add(snap.id)
                continue
            metadata = tag_list_to_dict(tags)
            if (metadata.get('nodepool_provider_name') == self.provider.name):
                # Copy over tags
                self.log.debug(
                    f"Copying tags from import task to snapshot {snap.id}")
                with self.rate_limiter:
                    snap.create_tags(Tags=tags)
            else:
                self.not_our_snapshots.add(snap.id)
    def _listImportSnapshotTasks(self):
        paginator = self._get_paginator('describe_import_snapshot_tasks')
        with self.non_mutating_rate_limiter:
-            for page in paginator.paginate(ImportTaskIds=[task_id]):
+            for page in paginator.paginate():
-                for task in page['ImportImageTasks']:
+                for task in page['ImportSnapshotTasks']:
-                    # Return the first and only task
+                    yield task
                    return task
    instance_key_re = re.compile(r'([a-z\-]+)\d.*')
@@ -809,8 +899,8 @@ class AwsAdapter(statemachine.Adapter):
    # These methods allow the tests to patch our use of boto to
    # compensate for missing methods in the boto mocks.
-    def _import_image(self, *args, **kw):
+    def _import_snapshot(self, *args, **kw):
-        return self.ec2_client.import_image(*args, **kw)
+        return self.ec2_client.import_snapshot(*args, **kw)
    def _get_paginator(self, *args, **kw):
        return self.ec2_client.get_paginator(*args, **kw)
--- a/nodepool/driver/aws/config.py
+++ b/nodepool/driver/aws/config.py
@@ -101,6 +101,8 @@ class AwsProviderDiskImage(ConfigValue):
            default_port_mapping.get(self.connection_type, 22))
        self.meta = {}
        self.architecture = image.get('architecture', 'x86_64')
        self.volume_size = image.get('volume-size', None)
        self.volume_type = image.get('volume-type', 'gp2')
    @property
    def external_name(self):
@@ -117,6 +119,8 @@ class AwsProviderDiskImage(ConfigValue):
            'connection-port': int,
            'python-path': str,
            'shell-type': str,
            'volume-size': int,
            'volume-type': str,
        }
--- a/nodepool/tests/unit/fake_aws.py
+++ b/nodepool/tests/unit/fake_aws.py
@@ -22,34 +22,28 @@ import boto3
 def make_stage_1(task_id, user_bucket, tags):
    return {
        'Architecture': 'x86_64',
-        'ImportTaskId': f'import-ami-{task_id}',
+        'ImportTaskId': f'import-snap-{task_id}',
        'Progress': '19',
-        'SnapshotDetails': [{'DiskImageSize': 355024384.0,
+        'SnapshotTaskDetail': {'DiskImageSize': 355024384.0,
-                             'Format': 'VMDK',
+                               'Format': 'VMDK',
-                             'Status': 'active',
+                               'Status': 'active',
-                             'UserBucket': user_bucket}],
+                               'UserBucket': user_bucket},
        'Status': 'active',
        'StatusMessage': 'converting',
        'Tags': tags,
    }
-def make_stage_2(task_id, image_id, snap_id, task):
+def make_stage_2(task_id, snap_id, task):
    # Make a unique snapshot id that's different than the task id.
    return {
-        'Architecture': 'x86_64',
+        'ImportTaskId': f'import-snap-{task_id}',
-        'BootMode': 'legacy_bios',
+        'SnapshotTaskDetail': {'DiskImageSize': 355024384.0,
-        'ImageId': image_id,
+                               'Format': 'VMDK',
-        'ImportTaskId': f'import-ami-{task_id}',
+                               'SnapshotId': snap_id,
-        'LicenseType': 'BYOL',
+                               'Status': 'completed',
-        'Platform': 'Linux',
+                               'UserBucket':
-        'SnapshotDetails': [{'DeviceName': '/dev/sda1',
+                               task['SnapshotTaskDetail']['UserBucket']},
                             'DiskImageSize': 355024384.0,
                             'Format': 'VMDK',
                             'SnapshotId': snap_id,
                             'Status': 'completed',
                             'UserBucket':
                             task['SnapshotDetails'][0]['UserBucket']}],
        'Status': 'completed',
        'Tags': task['Tags'],
    }
@@ -67,12 +61,12 @@ class ImportTaskPaginator:
            tasks = [t for t in tasks
                     if t['ImportTaskId'] in kw['ImportTaskIds']]
        # A page of tasks
-        ret = [{'ImportImageTasks': tasks}]
+        ret = [{'ImportSnapshotTasks': tasks}]
        # Move the task along
        for task in tasks:
            if task['Status'] != 'completed':
-                self.fake.finish_import_image(task)
+                self.fake.finish_import_snapshot(task)
        return ret
@@ -84,46 +78,39 @@ class FakeAws:
        self.ec2 = boto3.resource('ec2', region_name='us-west-2')
        self.ec2_client = boto3.client('ec2', region_name='us-west-2')
-    def import_image(self, *args, **kw):
+    def import_snapshot(self, *args, **kw):
        task_id = uuid.uuid4().hex
        task = make_stage_1(
            task_id,
-            kw['DiskContainers'][0]['UserBucket'],
+            kw['DiskContainer']['UserBucket'],
            kw['TagSpecifications'][0]['Tags'])
        self.tasks[task_id] = task
        return task
-    def finish_import_image(self, task):
+    def finish_import_snapshot(self, task):
        task_id = task['ImportTaskId'].split('-')[-1]
-        # Make an AMI to simulate the import finishing
+        # Make a Volume to simulate the import finishing
        reservation = self.ec2_client.run_instances(
            ImageId="ami-12c6146b", MinCount=1, MaxCount=1)
        instance = reservation["Instances"][0]
        instance_id = instance["InstanceId"]
        response = self.ec2_client.create_image(
            InstanceId=instance_id,
            Name=f'import-ami-{task_id}',
        )
        image_id = response["ImageId"]
        self.ec2_client.describe_images(ImageIds=[image_id])["Images"][0]
        volume = self.ec2_client.create_volume(
            Size=80,
            AvailabilityZone='us-west-2')
        snap_id = self.ec2_client.create_snapshot(
            VolumeId=volume['VolumeId'],
            Description=f'imported volume import-ami-{task_id}',
        )["SnapshotId"]
-        t2 = make_stage_2(task_id, image_id, snap_id, task)
+        t2 = make_stage_2(task_id, snap_id, task)
        self.tasks[task_id] = t2
-        return (image_id, snap_id)
+        return snap_id
    def change_snapshot_id(self, task, snapshot_id):
        # Given a task, update its snapshot id; the moto
        # register_image mock doesn't honor the snapshot_id we pass
        # in.
        task_id = task['ImportTaskId'].split('-')[-1]
        self.tasks[task_id]['SnapshotTaskDetail']['SnapshotId'] = snapshot_id
    def get_paginator(self, name):
-        if name == 'describe_import_image_tasks':
+        if name == 'describe_import_snapshot_tasks':
            return ImportTaskPaginator(self)
        raise NotImplementedError()
--- a/nodepool/tests/unit/test_driver_aws.py
+++ b/nodepool/tests/unit/test_driver_aws.py
@@ -103,6 +103,9 @@ class TestDriverAws(tests.DBTestCase):
            Description='Zuul Nodes')
        self.security_group_id = self.security_group['GroupId']
        self.patch(nodepool.driver.statemachine, 'nodescan', fake_nodescan)
        self.patch(AwsAdapter, '_import_snapshot',
                   self.fake_aws.import_snapshot)
        self.patch(AwsAdapter, '_get_paginator', self.fake_aws.get_paginator)
    def tearDown(self):
        self.mock_ec2.stop()
@@ -526,8 +529,6 @@ class TestDriverAws(tests.DBTestCase):
        self.assertTrue(response['EbsOptimized']['Value'])
    def test_aws_diskimage(self):
        self.patch(AwsAdapter, '_import_image', self.fake_aws.import_image)
        self.patch(AwsAdapter, '_get_paginator', self.fake_aws.get_paginator)
        configfile = self.setup_config('aws/diskimage.yaml')
        self.useBuilder(configfile)
@@ -558,8 +559,6 @@ class TestDriverAws(tests.DBTestCase):
                         {'key1': 'value1', 'key2': 'value2'})
    def test_aws_diskimage_removal(self):
        self.patch(AwsAdapter, '_import_image', self.fake_aws.import_image)
        self.patch(AwsAdapter, '_get_paginator', self.fake_aws.get_paginator)
        configfile = self.setup_config('aws/diskimage.yaml')
        self.useBuilder(configfile)
        self.waitForImage('ec2-us-west-2', 'fake-image')
@@ -601,19 +600,42 @@ class TestDriverAws(tests.DBTestCase):
        )
        instance_id = reservation['Instances'][0]['InstanceId']
-        task = self.fake_aws.import_image(
+        task = self.fake_aws.import_snapshot(
-            DiskContainers=[{
+            DiskContainer={
                'Format': 'ova',
                'UserBucket': {
                    'S3Bucket': 'nodepool',
                    'S3Key': 'testfile',
                }
-            }],
+            },
            TagSpecifications=[{
-                'ResourceType': 'import-image-task',
+                'ResourceType': 'import-snapshot-task',
                'Tags': image_tags,
            }])
-        image_id, snapshot_id = self.fake_aws.finish_import_image(task)
+        snapshot_id = self.fake_aws.finish_import_snapshot(task)
        register_response = self.ec2_client.register_image(
            Architecture='amd64',
            BlockDeviceMappings=[
                {
                    'DeviceName': '/dev/sda1',
                    'Ebs': {
                        'DeleteOnTermination': True,
                        'SnapshotId': snapshot_id,
                        'VolumeSize': 20,
                        'VolumeType': 'gp2',
                    },
                },
            ],
            RootDeviceName='/dev/sda1',
            VirtualizationType='hvm',
            Name='testimage',
        )
        image_id = register_response['ImageId']
        ami = self.ec2.Image(image_id)
        new_snapshot_id = ami.block_device_mappings[0]['Ebs']['SnapshotId']
        self.fake_aws.change_snapshot_id(task, new_snapshot_id)
        # Note that the resulting image and snapshot do not have tags
        # applied, so we test the automatic retagging methods in the
@@ -684,7 +706,7 @@ class TestDriverAws(tests.DBTestCase):
                break
        for _ in iterate_timeout(30, Exception, 'snapshot deletion'):
-            snap = self.ec2.Snapshot(snapshot_id)
+            snap = self.ec2.Snapshot(new_snapshot_id)
            try:
                if snap.state == 'deleted':
                    break