From 3422f6f3d651f59f54ca25340ec9956f69d7d1c8 Mon Sep 17 00:00:00 2001
From: Stephen Finucane <stephen.finucane@intel.com>
Date: Fri, 26 Feb 2016 11:44:59 +0000
Subject: [PATCH] virt/hardware: Add diagnostic logs for scheduling

There are many reasons why booting a pinned instance can fail. Expose
some of these reasons to operators via the logs.

Change-Id: Id99cca56df93573b0a0a192cbaf198d059c78e88
---
 nova/virt/hardware.py | 62 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 51 insertions(+), 11 deletions(-)

diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py
index 376ce79067b3..94cf1aae28a8 100644
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@@ -766,6 +766,9 @@ def _pack_instance_onto_cores(available_siblings,
             fields.CPUThreadAllocationPolicy.ISOLATE):
         # make sure we have at least one fully free core
         if threads_per_core not in sibling_sets:
+            LOG.debug('Host does not have any fully free thread sibling sets.'
+                      'It is not possible to emulate a non-SMT behavior '
+                      'for the isolate policy without this.')
             return
 
         pinning = _get_pinning(1,  # we only want to "use" one thread per core
@@ -817,24 +820,39 @@ def _numa_fit_instance_cell_with_pinning(host_cell, instance_cell):
     :returns: objects.InstanceNUMACell instance with pinning information,
               or None if instance cannot be pinned to the given host
     """
-    if (host_cell.avail_cpus < len(instance_cell.cpuset) or
-        host_cell.avail_memory < instance_cell.memory):
-        # If we do not have enough CPUs available or not enough memory
-        # on the host cell, we quit early (no oversubscription).
+    if host_cell.avail_cpus < len(instance_cell.cpuset):
+        LOG.debug('Not enough available CPUs to schedule instance. '
+                  'Oversubscription is not possible with pinned instances. '
+                  'Required: %(required)s, actual: %(actual)s',
+                  {'required': len(instance_cell.cpuset),
+                   'actual': host_cell.avail_cpus})
+        return
+
+    if host_cell.avail_memory < instance_cell.memory:
+        LOG.debug('Not enough available memory to schedule instance. '
+                  'Oversubscription is not possible with pinned instances. '
+                  'Required: %(required)s, actual: %(actual)s',
+                  {'required': instance_cell.memory,
+                   'actual': host_cell.memory})
         return
 
     if host_cell.siblings:
         # Try to pack the instance cell onto cores
-        return _pack_instance_onto_cores(
+        numa_cell = _pack_instance_onto_cores(
             host_cell.free_siblings, instance_cell, host_cell.id,
             max(map(len, host_cell.siblings)))
     else:
         # Straightforward to pin to available cpus when there is no
         # hyperthreading on the host
         free_cpus = [set([cpu]) for cpu in host_cell.free_cpus]
-        return _pack_instance_onto_cores(
+        numa_cell = _pack_instance_onto_cores(
             free_cpus, instance_cell, host_cell.id)
 
+    if not numa_cell:
+        LOG.debug('Failed to map instance cell CPUs to host cell CPUs')
+
+    return numa_cell
+
 
 def _numa_fit_instance_cell(host_cell, instance_cell, limit_cell=None):
     """Check if an instance cell can fit and set it's cell id
@@ -851,9 +869,19 @@ def _numa_fit_instance_cell(host_cell, instance_cell, limit_cell=None):
     """
     # NOTE (ndipanov): do not allow an instance to overcommit against
     # itself on any NUMA cell
-    if (instance_cell.memory > host_cell.memory or
-            len(instance_cell.cpuset) > len(host_cell.cpuset)):
-        return None
+    if instance_cell.memory > host_cell.memory:
+        LOG.debug('Not enough host cell memory to fit instance cell. '
+                  'Required: %(required)d, actual: %(actual)d',
+                  {'required': instance_cell.memory,
+                   'actual': host_cell.memory})
+        return
+
+    if len(instance_cell.cpuset) > len(host_cell.cpuset):
+        LOG.debug('Not enough host cell CPUs to fit instance cell. Required: '
+                  '%(required)d, actual: %(actual)d',
+                  {'required': len(instance_cell.cpuset),
+                   'actual': len(host_cell.cpuset)})
+        return
 
     if instance_cell.cpu_pinning_requested:
         new_instance_cell = _numa_fit_instance_cell_with_pinning(
@@ -868,14 +896,26 @@ def _numa_fit_instance_cell(host_cell, instance_cell, limit_cell=None):
         cpu_usage = host_cell.cpu_usage + len(instance_cell.cpuset)
         cpu_limit = len(host_cell.cpuset) * limit_cell.cpu_allocation_ratio
         ram_limit = host_cell.memory * limit_cell.ram_allocation_ratio
-        if memory_usage > ram_limit or cpu_usage > cpu_limit:
-            return None
+        if memory_usage > ram_limit:
+            LOG.debug('Host cell has limitations on usable memory. There is '
+                      'not enough free memory to schedule this instance. '
+                      'Usage: %(usage)d, limit: %(limit)d',
+                      {'usage': memory_usage, 'limit': ram_limit})
+            return
+        if cpu_usage > cpu_limit:
+            LOG.debug('Host cell has limitations on usable CPUs. There are '
+                      'not enough free CPUs to schedule this instance. '
+                      'Usage: %(usage)d, limit: %(limit)d',
+                      {'usage': memory_usage, 'limit': cpu_limit})
+            return
 
     pagesize = None
     if instance_cell.pagesize:
         pagesize = _numa_cell_supports_pagesize_request(
             host_cell, instance_cell)
         if not pagesize:
+            LOG.debug('Host does not support requested memory pagesize. '
+                      'Requested: %d kB', instance_cell.pagesize)
             return
 
     instance_cell.id = host_cell.id