Merge "AWS: Use snapshot instead of image import"

This commit is contained in:
Zuul
2022-08-22 17:32:07 +00:00
committed by Gerrit Code Review
5 changed files with 252 additions and 136 deletions

View File

@@ -383,6 +383,19 @@ Selecting the ``aws`` driver adds the following options to the
long-standing issue with ``ansible_shell_type`` in combination long-standing issue with ``ansible_shell_type`` in combination
with ``become``. with ``become``.
.. attr:: volume-type
:type: str
:default: gp2
The root `EBS volume type`_ for the image.
.. attr:: volume-size
:type: int
The size of the root EBS volume, in GiB, for the image. If
omitted, the volume size reported for the imported snapshot
will be used.
.. attr:: pools .. attr:: pools
:type: list :type: list

View File

@@ -28,6 +28,7 @@ from nodepool.driver.utils import QuotaInformation, RateLimiter
from nodepool.driver import statemachine from nodepool.driver import statemachine
import boto3 import boto3
import botocore.exceptions
def tag_dict_to_list(tagdict): def tag_dict_to_list(tagdict):
@@ -277,33 +278,48 @@ class AwsAdapter(statemachine.Adapter):
return AwsDeleteStateMachine(self, external_id, log) return AwsDeleteStateMachine(self, external_id, log)
def listResources(self): def listResources(self):
self._tagAmis()
self._tagSnapshots() self._tagSnapshots()
self._tagAmis()
for instance in self._listInstances(): for instance in self._listInstances():
if instance.state["Name"].lower() == "terminated": try:
if instance.state["Name"].lower() == "terminated":
continue
except botocore.exceptions.ClientError:
continue continue
yield AwsResource(tag_list_to_dict(instance.tags), yield AwsResource(tag_list_to_dict(instance.tags),
'instance', instance.id) 'instance', instance.id)
for volume in self._listVolumes(): for volume in self._listVolumes():
if volume.state.lower() == "deleted": try:
if volume.state.lower() == "deleted":
continue
except botocore.exceptions.ClientError:
continue continue
yield AwsResource(tag_list_to_dict(volume.tags), yield AwsResource(tag_list_to_dict(volume.tags),
'volume', volume.id) 'volume', volume.id)
for ami in self._listAmis(): for ami in self._listAmis():
if ami.state.lower() == "deleted": try:
if ami.state.lower() == "deleted":
continue
except (botocore.exceptions.ClientError, AttributeError):
continue continue
yield AwsResource(tag_list_to_dict(ami.tags), yield AwsResource(tag_list_to_dict(ami.tags),
'ami', ami.id) 'ami', ami.id)
for snap in self._listSnapshots(): for snap in self._listSnapshots():
if snap.state.lower() == "deleted": try:
if snap.state.lower() == "deleted":
continue
except botocore.exceptions.ClientError:
continue continue
yield AwsResource(tag_list_to_dict(snap.tags), yield AwsResource(tag_list_to_dict(snap.tags),
'snapshot', snap.id) 'snapshot', snap.id)
if self.provider.object_storage: if self.provider.object_storage:
for obj in self._listObjects(): for obj in self._listObjects():
with self.non_mutating_rate_limiter: with self.non_mutating_rate_limiter:
tags = self.s3_client.get_object_tagging( try:
Bucket=obj.bucket_name, Key=obj.key) tags = self.s3_client.get_object_tagging(
Bucket=obj.bucket_name, Key=obj.key)
except botocore.exceptions.ClientError:
continue
yield AwsResource(tag_list_to_dict(tags['TagSet']), yield AwsResource(tag_list_to_dict(tags['TagSet']),
'object', obj.key) 'object', obj.key)
@@ -368,36 +384,35 @@ class AwsAdapter(statemachine.Adapter):
bucket.upload_fileobj(fobj, object_filename, bucket.upload_fileobj(fobj, object_filename,
ExtraArgs=extra_args) ExtraArgs=extra_args)
# Import image as AMI # Import snapshot
self.log.debug(f"Importing {image_name}") self.log.debug(f"Importing {image_name}")
import_image_task = self._import_image( with self.rate_limiter:
Architecture=provider_image.architecture, import_snapshot_task = self._import_snapshot(
DiskContainers=[ DiskContainer={
{
'Format': image_format, 'Format': image_format,
'UserBucket': { 'UserBucket': {
'S3Bucket': bucket_name, 'S3Bucket': bucket_name,
'S3Key': object_filename, 'S3Key': object_filename,
} },
}, },
], TagSpecifications=[
TagSpecifications=[ {
{ 'ResourceType': 'import-snapshot-task',
'ResourceType': 'import-image-task', 'Tags': tag_dict_to_list(metadata),
'Tags': tag_dict_to_list(metadata), },
}, ]
] )
) task_id = import_snapshot_task['ImportTaskId']
task_id = import_image_task['ImportTaskId']
paginator = self._get_paginator('describe_import_image_tasks') paginator = self._get_paginator('describe_import_snapshot_tasks')
done = False done = False
while not done: while not done:
time.sleep(self.IMAGE_UPLOAD_SLEEP) time.sleep(self.IMAGE_UPLOAD_SLEEP)
with self.non_mutating_rate_limiter: with self.non_mutating_rate_limiter:
for page in paginator.paginate(ImportTaskIds=[task_id]): for page in paginator.paginate(ImportTaskIds=[task_id]):
for task in page['ImportImageTasks']: for task in page['ImportSnapshotTasks']:
if task['Status'].lower() in ('completed', 'deleted'): if task['SnapshotTaskDetail']['Status'].lower() in (
'completed', 'deleted'):
done = True done = True
break break
@@ -405,31 +420,54 @@ class AwsAdapter(statemachine.Adapter):
with self.rate_limiter: with self.rate_limiter:
self.s3.Object(bucket_name, object_filename).delete() self.s3.Object(bucket_name, object_filename).delete()
if task['Status'].lower() != 'completed': if task['SnapshotTaskDetail']['Status'].lower() != 'completed':
raise Exception(f"Error uploading image: {task}") raise Exception(f"Error uploading image: {task}")
# Tag the AMI
try:
with self.non_mutating_rate_limiter:
ami = self.ec2.Image(task['ImageId'])
with self.rate_limiter:
ami.create_tags(Tags=task['Tags'])
except Exception:
self.log.exception("Error tagging AMI:")
# Tag the snapshot # Tag the snapshot
try: try:
with self.non_mutating_rate_limiter: with self.non_mutating_rate_limiter:
snap = self.ec2.Snapshot( snap = self.ec2.Snapshot(
task['SnapshotDetails'][0]['SnapshotId']) task['SnapshotTaskDetail']['SnapshotId'])
with self.rate_limiter: with self.rate_limiter:
snap.create_tags(Tags=task['Tags']) snap.create_tags(Tags=task['Tags'])
except Exception: except Exception:
self.log.exception("Error tagging snapshot:") self.log.exception("Error tagging snapshot:")
self.log.debug(f"Upload of {image_name} complete as {task['ImageId']}") volume_size = provider_image.volume_size or snap.volume_size
# Register the snapshot as an AMI
with self.rate_limiter:
register_response = self.ec2_client.register_image(
Architecture=provider_image.architecture,
BlockDeviceMappings=[
{
'DeviceName': '/dev/sda1',
'Ebs': {
'DeleteOnTermination': True,
'SnapshotId': task[
'SnapshotTaskDetail']['SnapshotId'],
'VolumeSize': volume_size,
'VolumeType': provider_image.volume_type,
},
},
],
RootDeviceName='/dev/sda1',
VirtualizationType='hvm',
Name=image_name,
)
# Tag the AMI
try:
with self.non_mutating_rate_limiter:
ami = self.ec2.Image(register_response['ImageId'])
with self.rate_limiter:
ami.create_tags(Tags=task['Tags'])
except Exception:
self.log.exception("Error tagging AMI:")
self.log.debug(f"Upload of {image_name} complete as "
f"{register_response['ImageId']}")
# Last task returned from paginator above # Last task returned from paginator above
return task['ImageId'] return register_response['ImageId']
def deleteImage(self, external_id): def deleteImage(self, external_id):
snaps = set() snaps = set()
@@ -449,58 +487,110 @@ class AwsAdapter(statemachine.Adapter):
def _tagAmis(self): def _tagAmis(self):
# There is no way to tag imported AMIs, so this routine # There is no way to tag imported AMIs, so this routine
# "eventually" tags them. We look for any AMIs without tags # "eventually" tags them. We look for any AMIs without tags
# which correspond to import tasks, and we copy the tags from # and we copy the tags from the associated snapshot import
# those import tasks to the AMI. # task.
to_examine = []
for ami in self._listAmis(): for ami in self._listAmis():
if (ami.name.startswith('import-ami-') and if ami.id in self.not_our_images:
not ami.tags and continue
ami.id not in self.not_our_images): try:
# This image was imported but has no tags, which means if ami.tags:
# it's either not a nodepool image, or it's a new one continue
# which doesn't have tags yet. Copy over any tags except (botocore.exceptions.ClientError, AttributeError):
# from the import task; otherwise, mark it as an image continue
# we can ignore in future runs. # This has no tags, which means it's either not a nodepool
task = self._getImportImageTask(ami.name) # image, or it's a new one which doesn't have tags yet.
tags = tag_list_to_dict(task.get('Tags')) # Copy over any tags from the snapshot import task,
if (tags.get('nodepool_provider_name') == self.provider.name): # otherwise, mark it as an image we can ignore in future
# Copy over tags # runs.
self.log.debug( if len(ami.block_device_mappings) < 1:
f"Copying tags from import task {ami.name} to AMI") self.not_our_images.add(ami.id)
with self.rate_limiter: continue
ami.create_tags(Tags=task['Tags']) bdm = ami.block_device_mappings[0]
else: ebs = bdm.get('Ebs')
self.not_our_images.add(ami.id) if not ebs:
self.not_our_images.add(ami.id)
continue
snapshot_id = ebs.get('SnapshotId')
if not snapshot_id:
self.not_our_images.add(ami.id)
continue
to_examine.append((ami, snapshot_id))
if not to_examine:
return
# We have images to examine; get a list of import tasks so
# we can copy the tags from the import task that resulted in
# this image.
task_map = {}
for task in self._listImportSnapshotTasks():
detail = task['SnapshotTaskDetail']
task_snapshot_id = detail.get('SnapshotId')
if not task_snapshot_id:
continue
task_map[task_snapshot_id] = task['Tags']
for ami, snapshot_id in to_examine:
tags = task_map.get(snapshot_id)
if not tags:
self.not_our_images.add(ami.id)
continue
metadata = tag_list_to_dict(tags)
if (metadata.get('nodepool_provider_name') == self.provider.name):
# Copy over tags
self.log.debug(
f"Copying tags from import task to image {ami.id}")
with self.rate_limiter:
ami.create_tags(Tags=tags)
else:
self.not_our_images.add(ami.id)
def _tagSnapshots(self): def _tagSnapshots(self):
# See comments for _tagAmis # See comments for _tagAmis
to_examine = []
for snap in self._listSnapshots(): for snap in self._listSnapshots():
if ('import-ami-' in snap.description and try:
not snap.tags and if (snap.id not in self.not_our_snapshots and
snap.id not in self.not_our_snapshots): not snap.tags):
to_examine.append(snap)
except botocore.exceptions.ClientError:
# We may have cached a snapshot that doesn't exist
continue
if not to_examine:
return
match = re.match(r'.*?(import-ami-\w*)', snap.description) # We have snapshots to examine; get a list of import tasks so
if not match: # we can copy the tags from the import task that resulted in
self.not_our_snapshots.add(snap.id) # this snapshot.
continue task_map = {}
task_id = match.group(1) for task in self._listImportSnapshotTasks():
task = self._getImportImageTask(task_id) detail = task['SnapshotTaskDetail']
tags = tag_list_to_dict(task.get('Tags')) task_snapshot_id = detail.get('SnapshotId')
if (tags.get('nodepool_provider_name') == self.provider.name): if not task_snapshot_id:
# Copy over tags continue
self.log.debug( task_map[task_snapshot_id] = task['Tags']
f"Copying tags from import task {task_id} to snapshot")
with self.rate_limiter:
snap.create_tags(Tags=task['Tags'])
else:
self.not_our_snapshots.add(snap.id)
def _getImportImageTask(self, task_id): for snap in to_examine:
paginator = self._get_paginator('describe_import_image_tasks') tags = task_map.get(snap.id)
if not tags:
self.not_our_snapshots.add(snap.id)
continue
metadata = tag_list_to_dict(tags)
if (metadata.get('nodepool_provider_name') == self.provider.name):
# Copy over tags
self.log.debug(
f"Copying tags from import task to snapshot {snap.id}")
with self.rate_limiter:
snap.create_tags(Tags=tags)
else:
self.not_our_snapshots.add(snap.id)
def _listImportSnapshotTasks(self):
paginator = self._get_paginator('describe_import_snapshot_tasks')
with self.non_mutating_rate_limiter: with self.non_mutating_rate_limiter:
for page in paginator.paginate(ImportTaskIds=[task_id]): for page in paginator.paginate():
for task in page['ImportImageTasks']: for task in page['ImportSnapshotTasks']:
# Return the first and only task yield task
return task
instance_key_re = re.compile(r'([a-z\-]+)\d.*') instance_key_re = re.compile(r'([a-z\-]+)\d.*')
@@ -809,8 +899,8 @@ class AwsAdapter(statemachine.Adapter):
# These methods allow the tests to patch our use of boto to # These methods allow the tests to patch our use of boto to
# compensate for missing methods in the boto mocks. # compensate for missing methods in the boto mocks.
def _import_image(self, *args, **kw): def _import_snapshot(self, *args, **kw):
return self.ec2_client.import_image(*args, **kw) return self.ec2_client.import_snapshot(*args, **kw)
def _get_paginator(self, *args, **kw): def _get_paginator(self, *args, **kw):
return self.ec2_client.get_paginator(*args, **kw) return self.ec2_client.get_paginator(*args, **kw)

View File

@@ -101,6 +101,8 @@ class AwsProviderDiskImage(ConfigValue):
default_port_mapping.get(self.connection_type, 22)) default_port_mapping.get(self.connection_type, 22))
self.meta = {} self.meta = {}
self.architecture = image.get('architecture', 'x86_64') self.architecture = image.get('architecture', 'x86_64')
self.volume_size = image.get('volume-size', None)
self.volume_type = image.get('volume-type', 'gp2')
@property @property
def external_name(self): def external_name(self):
@@ -117,6 +119,8 @@ class AwsProviderDiskImage(ConfigValue):
'connection-port': int, 'connection-port': int,
'python-path': str, 'python-path': str,
'shell-type': str, 'shell-type': str,
'volume-size': int,
'volume-type': str,
} }

View File

@@ -22,34 +22,28 @@ import boto3
def make_stage_1(task_id, user_bucket, tags): def make_stage_1(task_id, user_bucket, tags):
return { return {
'Architecture': 'x86_64', 'Architecture': 'x86_64',
'ImportTaskId': f'import-ami-{task_id}', 'ImportTaskId': f'import-snap-{task_id}',
'Progress': '19', 'Progress': '19',
'SnapshotDetails': [{'DiskImageSize': 355024384.0, 'SnapshotTaskDetail': {'DiskImageSize': 355024384.0,
'Format': 'VMDK', 'Format': 'VMDK',
'Status': 'active', 'Status': 'active',
'UserBucket': user_bucket}], 'UserBucket': user_bucket},
'Status': 'active', 'Status': 'active',
'StatusMessage': 'converting', 'StatusMessage': 'converting',
'Tags': tags, 'Tags': tags,
} }
def make_stage_2(task_id, image_id, snap_id, task): def make_stage_2(task_id, snap_id, task):
# Make a unique snapshot id that's different than the task id. # Make a unique snapshot id that's different than the task id.
return { return {
'Architecture': 'x86_64', 'ImportTaskId': f'import-snap-{task_id}',
'BootMode': 'legacy_bios', 'SnapshotTaskDetail': {'DiskImageSize': 355024384.0,
'ImageId': image_id, 'Format': 'VMDK',
'ImportTaskId': f'import-ami-{task_id}', 'SnapshotId': snap_id,
'LicenseType': 'BYOL', 'Status': 'completed',
'Platform': 'Linux', 'UserBucket':
'SnapshotDetails': [{'DeviceName': '/dev/sda1', task['SnapshotTaskDetail']['UserBucket']},
'DiskImageSize': 355024384.0,
'Format': 'VMDK',
'SnapshotId': snap_id,
'Status': 'completed',
'UserBucket':
task['SnapshotDetails'][0]['UserBucket']}],
'Status': 'completed', 'Status': 'completed',
'Tags': task['Tags'], 'Tags': task['Tags'],
} }
@@ -67,12 +61,12 @@ class ImportTaskPaginator:
tasks = [t for t in tasks tasks = [t for t in tasks
if t['ImportTaskId'] in kw['ImportTaskIds']] if t['ImportTaskId'] in kw['ImportTaskIds']]
# A page of tasks # A page of tasks
ret = [{'ImportImageTasks': tasks}] ret = [{'ImportSnapshotTasks': tasks}]
# Move the task along # Move the task along
for task in tasks: for task in tasks:
if task['Status'] != 'completed': if task['Status'] != 'completed':
self.fake.finish_import_image(task) self.fake.finish_import_snapshot(task)
return ret return ret
@@ -84,46 +78,39 @@ class FakeAws:
self.ec2 = boto3.resource('ec2', region_name='us-west-2') self.ec2 = boto3.resource('ec2', region_name='us-west-2')
self.ec2_client = boto3.client('ec2', region_name='us-west-2') self.ec2_client = boto3.client('ec2', region_name='us-west-2')
def import_image(self, *args, **kw): def import_snapshot(self, *args, **kw):
task_id = uuid.uuid4().hex task_id = uuid.uuid4().hex
task = make_stage_1( task = make_stage_1(
task_id, task_id,
kw['DiskContainers'][0]['UserBucket'], kw['DiskContainer']['UserBucket'],
kw['TagSpecifications'][0]['Tags']) kw['TagSpecifications'][0]['Tags'])
self.tasks[task_id] = task self.tasks[task_id] = task
return task return task
def finish_import_image(self, task): def finish_import_snapshot(self, task):
task_id = task['ImportTaskId'].split('-')[-1] task_id = task['ImportTaskId'].split('-')[-1]
# Make an AMI to simulate the import finishing # Make a Volume to simulate the import finishing
reservation = self.ec2_client.run_instances(
ImageId="ami-12c6146b", MinCount=1, MaxCount=1)
instance = reservation["Instances"][0]
instance_id = instance["InstanceId"]
response = self.ec2_client.create_image(
InstanceId=instance_id,
Name=f'import-ami-{task_id}',
)
image_id = response["ImageId"]
self.ec2_client.describe_images(ImageIds=[image_id])["Images"][0]
volume = self.ec2_client.create_volume( volume = self.ec2_client.create_volume(
Size=80, Size=80,
AvailabilityZone='us-west-2') AvailabilityZone='us-west-2')
snap_id = self.ec2_client.create_snapshot( snap_id = self.ec2_client.create_snapshot(
VolumeId=volume['VolumeId'], VolumeId=volume['VolumeId'],
Description=f'imported volume import-ami-{task_id}',
)["SnapshotId"] )["SnapshotId"]
t2 = make_stage_2(task_id, image_id, snap_id, task) t2 = make_stage_2(task_id, snap_id, task)
self.tasks[task_id] = t2 self.tasks[task_id] = t2
return (image_id, snap_id) return snap_id
def change_snapshot_id(self, task, snapshot_id):
# Given a task, update its snapshot id; the moto
# register_image mock doesn't honor the snapshot_id we pass
# in.
task_id = task['ImportTaskId'].split('-')[-1]
self.tasks[task_id]['SnapshotTaskDetail']['SnapshotId'] = snapshot_id
def get_paginator(self, name): def get_paginator(self, name):
if name == 'describe_import_image_tasks': if name == 'describe_import_snapshot_tasks':
return ImportTaskPaginator(self) return ImportTaskPaginator(self)
raise NotImplementedError() raise NotImplementedError()

View File

@@ -103,6 +103,9 @@ class TestDriverAws(tests.DBTestCase):
Description='Zuul Nodes') Description='Zuul Nodes')
self.security_group_id = self.security_group['GroupId'] self.security_group_id = self.security_group['GroupId']
self.patch(nodepool.driver.statemachine, 'nodescan', fake_nodescan) self.patch(nodepool.driver.statemachine, 'nodescan', fake_nodescan)
self.patch(AwsAdapter, '_import_snapshot',
self.fake_aws.import_snapshot)
self.patch(AwsAdapter, '_get_paginator', self.fake_aws.get_paginator)
def tearDown(self): def tearDown(self):
self.mock_ec2.stop() self.mock_ec2.stop()
@@ -526,8 +529,6 @@ class TestDriverAws(tests.DBTestCase):
self.assertTrue(response['EbsOptimized']['Value']) self.assertTrue(response['EbsOptimized']['Value'])
def test_aws_diskimage(self): def test_aws_diskimage(self):
self.patch(AwsAdapter, '_import_image', self.fake_aws.import_image)
self.patch(AwsAdapter, '_get_paginator', self.fake_aws.get_paginator)
configfile = self.setup_config('aws/diskimage.yaml') configfile = self.setup_config('aws/diskimage.yaml')
self.useBuilder(configfile) self.useBuilder(configfile)
@@ -558,8 +559,6 @@ class TestDriverAws(tests.DBTestCase):
{'key1': 'value1', 'key2': 'value2'}) {'key1': 'value1', 'key2': 'value2'})
def test_aws_diskimage_removal(self): def test_aws_diskimage_removal(self):
self.patch(AwsAdapter, '_import_image', self.fake_aws.import_image)
self.patch(AwsAdapter, '_get_paginator', self.fake_aws.get_paginator)
configfile = self.setup_config('aws/diskimage.yaml') configfile = self.setup_config('aws/diskimage.yaml')
self.useBuilder(configfile) self.useBuilder(configfile)
self.waitForImage('ec2-us-west-2', 'fake-image') self.waitForImage('ec2-us-west-2', 'fake-image')
@@ -601,19 +600,42 @@ class TestDriverAws(tests.DBTestCase):
) )
instance_id = reservation['Instances'][0]['InstanceId'] instance_id = reservation['Instances'][0]['InstanceId']
task = self.fake_aws.import_image( task = self.fake_aws.import_snapshot(
DiskContainers=[{ DiskContainer={
'Format': 'ova', 'Format': 'ova',
'UserBucket': { 'UserBucket': {
'S3Bucket': 'nodepool', 'S3Bucket': 'nodepool',
'S3Key': 'testfile', 'S3Key': 'testfile',
} }
}], },
TagSpecifications=[{ TagSpecifications=[{
'ResourceType': 'import-image-task', 'ResourceType': 'import-snapshot-task',
'Tags': image_tags, 'Tags': image_tags,
}]) }])
image_id, snapshot_id = self.fake_aws.finish_import_image(task) snapshot_id = self.fake_aws.finish_import_snapshot(task)
register_response = self.ec2_client.register_image(
Architecture='amd64',
BlockDeviceMappings=[
{
'DeviceName': '/dev/sda1',
'Ebs': {
'DeleteOnTermination': True,
'SnapshotId': snapshot_id,
'VolumeSize': 20,
'VolumeType': 'gp2',
},
},
],
RootDeviceName='/dev/sda1',
VirtualizationType='hvm',
Name='testimage',
)
image_id = register_response['ImageId']
ami = self.ec2.Image(image_id)
new_snapshot_id = ami.block_device_mappings[0]['Ebs']['SnapshotId']
self.fake_aws.change_snapshot_id(task, new_snapshot_id)
# Note that the resulting image and snapshot do not have tags # Note that the resulting image and snapshot do not have tags
# applied, so we test the automatic retagging methods in the # applied, so we test the automatic retagging methods in the
@@ -684,7 +706,7 @@ class TestDriverAws(tests.DBTestCase):
break break
for _ in iterate_timeout(30, Exception, 'snapshot deletion'): for _ in iterate_timeout(30, Exception, 'snapshot deletion'):
snap = self.ec2.Snapshot(snapshot_id) snap = self.ec2.Snapshot(new_snapshot_id)
try: try:
if snap.state == 'deleted': if snap.state == 'deleted':
break break