diff --git a/nova/scheduler/filters/numa_topology_filter.py b/nova/scheduler/filters/numa_topology_filter.py
index 680a97ad1845..2417f43927b0 100644
--- a/nova/scheduler/filters/numa_topology_filter.py
+++ b/nova/scheduler/filters/numa_topology_filter.py
@@ -13,6 +13,7 @@
 from oslo_log import log as logging
 
 from nova import objects
+from nova.objects import fields
 from nova.scheduler import filters
 from nova.virt import hardware
 
@@ -22,15 +23,57 @@ LOG = logging.getLogger(__name__)
 class NUMATopologyFilter(filters.BaseHostFilter):
     """Filter on requested NUMA topology."""
 
+    def _satisfies_cpu_policy(self, host_state, extra_specs, image_props):
+        """Check that the host_state provided satisfies any available
+        CPU policy requirements.
+        """
+        host_topology, _ = hardware.host_topology_and_format_from_host(
+            host_state)
+        # NOTE(stephenfin): There can be conflicts between the policy
+        # specified by the image and that specified by the instance, but this
+        # is not the place to resolve these. We do this during scheduling.
+        cpu_policy = [extra_specs.get('hw:cpu_policy'),
+                      image_props.get('hw_cpu_policy')]
+        cpu_thread_policy = [extra_specs.get('hw:cpu_thread_policy'),
+                             image_props.get('hw_cpu_thread_policy')]
+
+        if not host_topology:
+            return True
+
+        if fields.CPUAllocationPolicy.DEDICATED not in cpu_policy:
+            return True
+
+        if fields.CPUThreadAllocationPolicy.REQUIRE not in cpu_thread_policy:
+            return True
+
+        for cell in host_topology.cells:
+            # the presence of siblings indicates hyperthreading (HT)
+            if not cell.siblings:
+                LOG.debug("%(host_state)s fails CPU policy requirements. "
+                          "Host does not have hyperthreading or "
+                          "hyperthreading is disabled, but 'require' threads "
+                          "policy was requested.", {'host_state': host_state})
+                return False
+
+        return True
+
     def host_passes(self, host_state, spec_obj):
         ram_ratio = host_state.ram_allocation_ratio
         cpu_ratio = host_state.cpu_allocation_ratio
+        extra_specs = spec_obj.flavor.extra_specs
+        image_props = spec_obj.image.properties
         requested_topology = spec_obj.numa_topology
         host_topology, _fmt = hardware.host_topology_and_format_from_host(
                 host_state)
         pci_requests = spec_obj.pci_requests
+
         if pci_requests:
             pci_requests = pci_requests.requests
+
+        if not self._satisfies_cpu_policy(host_state, extra_specs,
+                                          image_props):
+            return False
+
         if requested_topology and host_topology:
             limits = objects.NUMATopologyLimits(
                 cpu_allocation_ratio=cpu_ratio,
diff --git a/nova/tests/unit/scheduler/fakes.py b/nova/tests/unit/scheduler/fakes.py
index c6991f8da7e8..646e6cdb9c24 100644
--- a/nova/tests/unit/scheduler/fakes.py
+++ b/nova/tests/unit/scheduler/fakes.py
@@ -32,6 +32,17 @@ NUMA_TOPOLOGY = objects.NUMATopology(
                                 cpu_usage=0, memory_usage=0, mempages=[],
                                siblings=[], pinned_cpus=set([]))])
 
+NUMA_TOPOLOGY_W_HT = objects.NUMATopology(cells=[
+    objects.NUMACell(
+        id=0, cpuset=set([1, 2, 5, 6]), memory=512,
+        cpu_usage=0, memory_usage=0, mempages=[],
+        siblings=[set([1, 5]), set([2, 6])], pinned_cpus=set([])),
+    objects.NUMACell(
+        id=1, cpuset=set([3, 4, 7, 8]), memory=512,
+        cpu_usage=0, memory_usage=0, mempages=[],
+        siblings=[set([3, 4]), set([7, 8])], pinned_cpus=set([]))
+])
+
 COMPUTE_NODES = [
         objects.ComputeNode(
             id=1, local_gb=1024, memory_mb=1024, vcpus=1,
diff --git a/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py b/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
index fc57ad2a9341..abcce20e3aa0 100644
--- a/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
+++ b/nova/tests/unit/scheduler/filters/test_numa_topology_filters.py
@@ -10,9 +10,11 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
+import itertools
 import uuid
 
 from nova import objects
+from nova.objects import fields
 from nova.scheduler.filters import numa_topology_filter
 from nova import test
 from nova.tests.unit.scheduler import fakes
@@ -24,14 +26,21 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
         super(TestNUMATopologyFilter, self).setUp()
         self.filt_cls = numa_topology_filter.NUMATopologyFilter()
 
+    def _get_spec_obj(self, numa_topology):
+        image_meta = objects.ImageMeta(properties=objects.ImageMetaProps())
+        spec_obj = objects.RequestSpec(numa_topology=numa_topology,
+                                       pci_requests=None,
+                                       instance_uuid=str(uuid.uuid4()),
+                                       flavor=objects.Flavor(extra_specs={}),
+                                       image=image_meta)
+        return spec_obj
+
     def test_numa_topology_filter_pass(self):
         instance_topology = objects.InstanceNUMATopology(
             cells=[objects.InstanceNUMACell(id=0, cpuset=set([1]), memory=512),
                    objects.InstanceNUMACell(id=1, cpuset=set([3]), memory=512)
                ])
-        spec_obj = objects.RequestSpec(numa_topology=instance_topology,
-                                       pci_requests=None,
-                                       instance_uuid=str(uuid.uuid4()))
+        spec_obj = self._get_spec_obj(numa_topology=instance_topology)
         host = fakes.FakeHostState('host1', 'node1',
                                    {'numa_topology': fakes.NUMA_TOPOLOGY,
                                     'pci_stats': None,
@@ -45,16 +54,12 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                    objects.InstanceNUMACell(id=1, cpuset=set([3]), memory=512)
                ])
 
-        spec_obj = objects.RequestSpec(numa_topology=instance_topology,
-                                       pci_requests=None,
-                                       instance_uuid=str(uuid.uuid4()))
+        spec_obj = self._get_spec_obj(numa_topology=instance_topology)
         host = fakes.FakeHostState('host1', 'node1', {'pci_stats': None})
         self.assertFalse(self.filt_cls.host_passes(host, spec_obj))
 
     def test_numa_topology_filter_numa_host_no_numa_instance_pass(self):
-        spec_obj = objects.RequestSpec(numa_topology=None,
-                                       pci_requests=None,
-                                       instance_uuid=str(uuid.uuid4()))
+        spec_obj = self._get_spec_obj(numa_topology=None)
         host = fakes.FakeHostState('host1', 'node1',
                                    {'numa_topology': fakes.NUMA_TOPOLOGY})
         self.assertTrue(self.filt_cls.host_passes(host, spec_obj))
@@ -65,9 +70,7 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                    objects.InstanceNUMACell(id=1, cpuset=set([2]), memory=512),
                    objects.InstanceNUMACell(id=2, cpuset=set([3]), memory=512)
                ])
-        spec_obj = objects.RequestSpec(numa_topology=instance_topology,
-                                       pci_requests=None,
-                                       instance_uuid=str(uuid.uuid4()))
+        spec_obj = self._get_spec_obj(numa_topology=instance_topology)
         host = fakes.FakeHostState('host1', 'node1',
                                    {'numa_topology': fakes.NUMA_TOPOLOGY,
                                     'pci_stats': None,
@@ -81,9 +84,7 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
                                             memory=1024),
                    objects.InstanceNUMACell(id=1, cpuset=set([3]), memory=512)
                ])
-        spec_obj = objects.RequestSpec(numa_topology=instance_topology,
-                                       pci_requests=None,
-                                       instance_uuid=str(uuid.uuid4()))
+        spec_obj = self._get_spec_obj(numa_topology=instance_topology)
         host = fakes.FakeHostState('host1', 'node1',
                                    {'numa_topology': fakes.NUMA_TOPOLOGY,
                                     'pci_stats': None,
@@ -96,9 +97,7 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
             cells=[objects.InstanceNUMACell(id=0, cpuset=set([1]), memory=512),
                    objects.InstanceNUMACell(id=1, cpuset=set([3, 4, 5]),
                                             memory=512)])
-        spec_obj = objects.RequestSpec(numa_topology=instance_topology,
-                                       pci_requests=None,
-                                       instance_uuid=str(uuid.uuid4()))
+        spec_obj = self._get_spec_obj(numa_topology=instance_topology)
         host = fakes.FakeHostState('host1', 'node1',
                                    {'numa_topology': fakes.NUMA_TOPOLOGY,
                                     'pci_stats': None,
@@ -111,9 +110,7 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
             cells=[objects.InstanceNUMACell(id=0, cpuset=set([1]), memory=512),
                    objects.InstanceNUMACell(id=1, cpuset=set([3]), memory=512)
                ])
-        spec_obj = objects.RequestSpec(numa_topology=instance_topology,
-                                       pci_requests=None,
-                                       instance_uuid=str(uuid.uuid4()))
+        spec_obj = self._get_spec_obj(numa_topology=instance_topology)
         host = fakes.FakeHostState('host1', 'node1',
                                    {'numa_topology': fakes.NUMA_TOPOLOGY,
                                     'pci_stats': None,
@@ -123,3 +120,76 @@ class TestNUMATopologyFilter(test.NoDBTestCase):
         limits = host.limits['numa_topology']
         self.assertEqual(limits.cpu_allocation_ratio, 21)
         self.assertEqual(limits.ram_allocation_ratio, 1.3)
+
+    def _do_test_numa_topology_filter_cpu_policy(
+            self, numa_topology, cpu_policy, cpu_thread_policy, passes):
+        instance_topology = objects.InstanceNUMATopology(
+            cells=[objects.InstanceNUMACell(id=0, cpuset=set([1]), memory=512),
+                   objects.InstanceNUMACell(id=1, cpuset=set([3]), memory=512)
+               ])
+        spec_obj = objects.RequestSpec(numa_topology=instance_topology,
+                                       pci_requests=None,
+                                       instance_uuid=str(uuid.uuid4()))
+
+        extra_specs = [
+            {},
+            {
+                'hw:cpu_policy': cpu_policy,
+                'hw:cpu_thread_policy': cpu_thread_policy,
+            }
+        ]
+        image_props = [
+            {},
+            {
+                'hw_cpu_policy': cpu_policy,
+                'hw_cpu_thread_policy': cpu_thread_policy,
+            }
+        ]
+        host = fakes.FakeHostState('host1', 'node1', {
+            'numa_topology': numa_topology,
+            'pci_stats': None,
+            'cpu_allocation_ratio': 1,
+            'ram_allocation_ratio': 1.5})
+        assertion = self.assertTrue if passes else self.assertFalse
+
+        # test combinations of image properties and extra specs
+        for specs, props in itertools.product(extra_specs, image_props):
+            # ...except for the one where no policy is specified
+            if specs == props == {}:
+                continue
+
+            fake_flavor = objects.Flavor(memory_mb=1024, extra_specs=specs)
+            fake_image_props = objects.ImageMetaProps(**props)
+            fake_image = objects.ImageMeta(properties=fake_image_props)
+
+            spec_obj.image = fake_image
+            spec_obj.flavor = fake_flavor
+
+            assertion(self.filt_cls.host_passes(host, spec_obj))
+
+    def test_numa_topology_filter_fail_cpu_thread_policy_require(self):
+        cpu_policy = fields.CPUAllocationPolicy.DEDICATED
+        cpu_thread_policy = fields.CPUThreadAllocationPolicy.REQUIRE
+        numa_topology = fakes.NUMA_TOPOLOGY
+
+        self._do_test_numa_topology_filter_cpu_policy(
+            numa_topology, cpu_policy, cpu_thread_policy, False)
+
+    def test_numa_topology_filter_pass_cpu_thread_policy_require(self):
+        cpu_policy = fields.CPUAllocationPolicy.DEDICATED
+        cpu_thread_policy = fields.CPUThreadAllocationPolicy.REQUIRE
+        numa_topology = fakes.NUMA_TOPOLOGY_W_HT
+
+        self._do_test_numa_topology_filter_cpu_policy(
+            numa_topology, cpu_policy, cpu_thread_policy, True)
+
+    def test_numa_topology_filter_pass_cpu_thread_policy_others(self):
+        cpu_policy = fields.CPUAllocationPolicy.DEDICATED
+        cpu_thread_policy = fields.CPUThreadAllocationPolicy.PREFER
+        numa_topology = fakes.NUMA_TOPOLOGY
+
+        for cpu_thread_policy in [
+                fields.CPUThreadAllocationPolicy.PREFER,
+                fields.CPUThreadAllocationPolicy.ISOLATE]:
+            self._do_test_numa_topology_filter_cpu_policy(
+                numa_topology, cpu_policy, cpu_thread_policy, True)
diff --git a/nova/tests/unit/virt/test_hardware.py b/nova/tests/unit/virt/test_hardware.py
index 6f4c140fcb13..69579ad21246 100644
--- a/nova/tests/unit/virt/test_hardware.py
+++ b/nova/tests/unit/virt/test_hardware.py
@@ -2103,6 +2103,55 @@ class CPUPinningCellTestCase(test.NoDBTestCase, _CPUPinningTestCaseBase):
         got_pinning = {x: x for x in range(0, 4)}
         self.assertEqual(got_pinning, inst_pin.cpu_pinning)
 
+    def test_get_pinning_require_policy_too_few_siblings(self):
+        host_pin = objects.NUMACell(
+                id=0,
+                cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                memory=4096, memory_usage=0,
+                pinned_cpus=set([0, 1, 2]),
+                siblings=[set([0, 4]), set([1, 5]), set([2, 6]), set([3, 7])],
+                mempages=[])
+        inst_pin = objects.InstanceNUMACell(
+                cpuset=set([0, 1, 2, 3]),
+                memory=2048,
+                cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                cpu_thread_policy=fields.CPUThreadAllocationPolicy.REQUIRE)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertIsNone(inst_pin)
+
+    def test_get_pinning_require_policy_fits(self):
+        host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3]),
+                                    memory=4096, memory_usage=0,
+                                    siblings=[set([0, 1]), set([2, 3])],
+                                    mempages=[], pinned_cpus=set([]))
+        inst_pin = objects.InstanceNUMACell(
+                cpuset=set([0, 1, 2, 3]),
+                memory=2048,
+                cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                cpu_thread_policy=fields.CPUThreadAllocationPolicy.REQUIRE)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=2)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
+    def test_get_pinning_require_policy_fits_w_usage(self):
+        host_pin = objects.NUMACell(
+                id=0,
+                cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
+                memory=4096, memory_usage=0,
+                pinned_cpus=set([0, 1]),
+                siblings=[set([0, 4]), set([1, 5]), set([2, 6]), set([3, 7])],
+                mempages=[])
+        inst_pin = objects.InstanceNUMACell(
+                cpuset=set([0, 1, 2, 3]),
+                memory=2048,
+                cpu_policy=fields.CPUAllocationPolicy.DEDICATED,
+                cpu_thread_policy=fields.CPUThreadAllocationPolicy.REQUIRE)
+        inst_pin = hw._numa_fit_instance_cell_with_pinning(host_pin, inst_pin)
+        self.assertInstanceCellPinned(inst_pin)
+        got_topo = objects.VirtCPUTopology(sockets=1, cores=2, threads=2)
+        self.assertEqualTopology(got_topo, inst_pin.cpu_topology)
+
     def test_get_pinning_host_siblings_instance_odd_fit(self):
         host_pin = objects.NUMACell(id=0, cpuset=set([0, 1, 2, 3, 4, 5, 6, 7]),
                                     memory=4096, memory_usage=0,
diff --git a/nova/virt/hardware.py b/nova/virt/hardware.py
index f60d58e7d7d8..c51aebca4ff1 100644
--- a/nova/virt/hardware.py
+++ b/nova/virt/hardware.py
@@ -670,31 +670,27 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
     topology, making sure that hyperthreads of the instance match up with
     those of the host when the pinning takes effect.
 
-    Currently the strategy for packing is to prefer siblings and try use
-    cores evenly, by using emptier cores first. This is achieved by the way we
-    order cores in the can_pack structure, and the order in which we iterate
+    Currently the strategy for packing is to prefer siblings and try use cores
+    evenly, by using emptier cores first. This is achieved by the way we order
+    cores in the sibling_sets structure, and the order in which we iterate
     through it.
 
-    The main packing loop that iterates over the can_pack dictionary will not
-    currently try to look for a fit that maximizes number of siblings, but will
-    simply rely on the iteration ordering and picking the first viable
+    The main packing loop that iterates over the sibling_sets dictionary will
+    not currently try to look for a fit that maximizes number of siblings, but
+    will simply rely on the iteration ordering and picking the first viable
     placement.
     """
 
-    # We build up a data structure 'can_pack' that answers the question:
-    # 'Given the number of threads I want to pack, give me a list of all
-    # the available sibling sets that can accommodate it'
-    can_pack = collections.defaultdict(list)
+    # We build up a data structure that answers the question: 'Given the
+    # number of threads I want to pack, give me a list of all the available
+    # sibling sets (or groups thereof) that can accommodate it'
+    sibling_sets = collections.defaultdict(list)
     for sib in available_siblings:
         for threads_no in range(1, len(sib) + 1):
-            can_pack[threads_no].append(sib)
+            sibling_sets[threads_no].append(sib)
 
-    def _can_pack_instance_cell(instance_cell, threads_per_core, cores_list):
-        """Determines if instance cell can fit an avail set of cores."""
-
-        if threads_per_core * len(cores_list) < len(instance_cell):
-            return False
-        return True
+    pinning = None
+    threads_no = 1
 
     def _orphans(instance_cell, threads_per_core):
         """Number of instance CPUs which will not fill up a host core.
@@ -702,7 +698,7 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
         Best explained by an example: consider set of free host cores as such:
             [(0, 1), (3, 5), (6, 7, 8)]
         This would be a case of 2 threads_per_core AKA an entry for 2 in the
-        can_pack structure.
+        sibling_sets structure.
 
         If we attempt to pack a 5 core instance on it - due to the fact that we
         iterate the list in order, we will end up with a single core of the
@@ -733,25 +729,59 @@ def _pack_instance_onto_cores(available_siblings, instance_cell, host_cell_id):
         return fractions.gcd(threads_per_core, _orphans(instance_cell,
                                                         threads_per_core))
 
-    # We iterate over the can_pack dict in descending order of cores that
-    # can be packed - an attempt to get even distribution over time
-    for cores_per_sib, sib_list in sorted(
-            (t for t in can_pack.items()), reverse=True):
-        if _can_pack_instance_cell(instance_cell,
-                                   cores_per_sib, sib_list):
-            sliced_sibs = map(lambda s: list(s)[:cores_per_sib], sib_list)
-            pinning = zip(sorted(instance_cell.cpuset),
-                          itertools.chain(*sliced_sibs))
+    if (instance_cell.cpu_thread_policy ==
+            fields.CPUThreadAllocationPolicy.REQUIRE):
+        LOG.debug("Requested 'require' thread policy for %d cores",
+                  len(instance_cell))
+    elif (instance_cell.cpu_thread_policy ==
+            fields.CPUThreadAllocationPolicy.PREFER):
+        LOG.debug("Request 'prefer' thread policy for %d cores",
+                  len(instance_cell))
+    elif (instance_cell.cpu_thread_policy ==
+            fields.CPUThreadAllocationPolicy.ISOLATE):
+        LOG.debug("Requested 'isolate' thread policy for %d cores",
+                  len(instance_cell))
+        raise NotImplementedError("The 'isolate' policy is not supported.")
+    else:
+        LOG.debug("User did not specify a thread policy. Using default "
+                  "for %d cores", len(instance_cell))
 
-            threads = _threads(instance_cell, cores_per_sib)
-            cores = len(instance_cell) / threads
-            topology = objects.VirtCPUTopology(sockets=1,
-                                               cores=cores,
-                                               threads=threads)
-            instance_cell.pin_vcpus(*pinning)
-            instance_cell.cpu_topology = topology
-            instance_cell.id = host_cell_id
-            return instance_cell
+    # NOTE(ndipanov): We iterate over the sibling sets in descending order
+    # of cores that can be packed. This is an attempt to evenly distribute
+    # instances among physical cores
+    for threads_no, sibling_set in sorted(
+            (t for t in sibling_sets.items()), reverse=True):
+        if threads_no * len(sibling_set) < len(instance_cell):
+            continue
+
+        usable_cores = map(lambda s: list(s)[:threads_no], sibling_set)
+
+        threads_no = _threads(instance_cell, threads_no)
+
+        # NOTE(sfinucan): The key difference between the require and
+        # prefer policies is that require will not settle for non-siblings
+        # if this is all that is available. Enforce this by ensuring we're
+        # using sibling sets that contain at least one sibling
+        if (instance_cell.cpu_thread_policy ==
+                fields.CPUThreadAllocationPolicy.REQUIRE):
+            if threads_no <= 1:
+                continue
+
+        pinning = zip(sorted(instance_cell.cpuset),
+                      itertools.chain(*usable_cores))
+
+        break
+
+    if not pinning:
+        return
+
+    topology = objects.VirtCPUTopology(sockets=1,
+                                       cores=len(pinning) / threads_no,
+                                       threads=threads_no)
+    instance_cell.pin_vcpus(*pinning)
+    instance_cell.cpu_topology = topology
+    instance_cell.id = host_cell_id
+    return instance_cell
 
 
 def _numa_fit_instance_cell_with_pinning(host_cell, instance_cell):
@@ -1063,7 +1093,6 @@ def _add_cpu_pinning_constraint(flavor, image_meta, numa_topology):
         for cell in numa_topology.cells:
             cell.cpu_policy = cpu_policy
             cell.cpu_thread_policy = cpu_thread_policy
-        return numa_topology
     else:
         single_cell = objects.InstanceNUMACell(
                 id=0,
@@ -1072,7 +1101,8 @@ def _add_cpu_pinning_constraint(flavor, image_meta, numa_topology):
                 cpu_policy=cpu_policy,
                 cpu_thread_policy=cpu_thread_policy)
         numa_topology = objects.InstanceNUMATopology(cells=[single_cell])
-        return numa_topology
+
+    return numa_topology
 
 
 # TODO(sahid): Move numa related to hardward/numa.py