From 3422f6f3d651f59f54ca25340ec9956f69d7d1c8 Mon Sep 17 00:00:00 2001 From: Stephen Finucane Date: Fri, 26 Feb 2016 11:44:59 +0000 Subject: [PATCH] virt/hardware: Add diagnostic logs for scheduling There are many reasons why booting a pinned instance can fail. Expose some of these reasons to operators via the logs. Change-Id: Id99cca56df93573b0a0a192cbaf198d059c78e88 --- nova/virt/hardware.py | 62 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py index 376ce79067b3..94cf1aae28a8 100644 --- a/nova/virt/hardware.py +++ b/nova/virt/hardware.py @@ -766,6 +766,9 @@ def _pack_instance_onto_cores(available_siblings, fields.CPUThreadAllocationPolicy.ISOLATE): # make sure we have at least one fully free core if threads_per_core not in sibling_sets: + LOG.debug('Host does not have any fully free thread sibling sets.' + 'It is not possible to emulate a non-SMT behavior ' + 'for the isolate policy without this.') return pinning = _get_pinning(1, # we only want to "use" one thread per core @@ -817,24 +820,39 @@ def _numa_fit_instance_cell_with_pinning(host_cell, instance_cell): :returns: objects.InstanceNUMACell instance with pinning information, or None if instance cannot be pinned to the given host """ - if (host_cell.avail_cpus < len(instance_cell.cpuset) or - host_cell.avail_memory < instance_cell.memory): - # If we do not have enough CPUs available or not enough memory - # on the host cell, we quit early (no oversubscription). + if host_cell.avail_cpus < len(instance_cell.cpuset): + LOG.debug('Not enough available CPUs to schedule instance. ' + 'Oversubscription is not possible with pinned instances. ' + 'Required: %(required)s, actual: %(actual)s', + {'required': len(instance_cell.cpuset), + 'actual': host_cell.avail_cpus}) + return + + if host_cell.avail_memory < instance_cell.memory: + LOG.debug('Not enough available memory to schedule instance. ' + 'Oversubscription is not possible with pinned instances. ' + 'Required: %(required)s, actual: %(actual)s', + {'required': instance_cell.memory, + 'actual': host_cell.memory}) return if host_cell.siblings: # Try to pack the instance cell onto cores - return _pack_instance_onto_cores( + numa_cell = _pack_instance_onto_cores( host_cell.free_siblings, instance_cell, host_cell.id, max(map(len, host_cell.siblings))) else: # Straightforward to pin to available cpus when there is no # hyperthreading on the host free_cpus = [set([cpu]) for cpu in host_cell.free_cpus] - return _pack_instance_onto_cores( + numa_cell = _pack_instance_onto_cores( free_cpus, instance_cell, host_cell.id) + if not numa_cell: + LOG.debug('Failed to map instance cell CPUs to host cell CPUs') + + return numa_cell + def _numa_fit_instance_cell(host_cell, instance_cell, limit_cell=None): """Check if an instance cell can fit and set it's cell id @@ -851,9 +869,19 @@ def _numa_fit_instance_cell(host_cell, instance_cell, limit_cell=None): """ # NOTE (ndipanov): do not allow an instance to overcommit against # itself on any NUMA cell - if (instance_cell.memory > host_cell.memory or - len(instance_cell.cpuset) > len(host_cell.cpuset)): - return None + if instance_cell.memory > host_cell.memory: + LOG.debug('Not enough host cell memory to fit instance cell. ' + 'Required: %(required)d, actual: %(actual)d', + {'required': instance_cell.memory, + 'actual': host_cell.memory}) + return + + if len(instance_cell.cpuset) > len(host_cell.cpuset): + LOG.debug('Not enough host cell CPUs to fit instance cell. Required: ' + '%(required)d, actual: %(actual)d', + {'required': len(instance_cell.cpuset), + 'actual': len(host_cell.cpuset)}) + return if instance_cell.cpu_pinning_requested: new_instance_cell = _numa_fit_instance_cell_with_pinning( @@ -868,14 +896,26 @@ def _numa_fit_instance_cell(host_cell, instance_cell, limit_cell=None): cpu_usage = host_cell.cpu_usage + len(instance_cell.cpuset) cpu_limit = len(host_cell.cpuset) * limit_cell.cpu_allocation_ratio ram_limit = host_cell.memory * limit_cell.ram_allocation_ratio - if memory_usage > ram_limit or cpu_usage > cpu_limit: - return None + if memory_usage > ram_limit: + LOG.debug('Host cell has limitations on usable memory. There is ' + 'not enough free memory to schedule this instance. ' + 'Usage: %(usage)d, limit: %(limit)d', + {'usage': memory_usage, 'limit': ram_limit}) + return + if cpu_usage > cpu_limit: + LOG.debug('Host cell has limitations on usable CPUs. There are ' + 'not enough free CPUs to schedule this instance. ' + 'Usage: %(usage)d, limit: %(limit)d', + {'usage': memory_usage, 'limit': cpu_limit}) + return pagesize = None if instance_cell.pagesize: pagesize = _numa_cell_supports_pagesize_request( host_cell, instance_cell) if not pagesize: + LOG.debug('Host does not support requested memory pagesize. ' + 'Requested: %d kB', instance_cell.pagesize) return instance_cell.id = host_cell.id