diff --git a/.zuul.yaml b/.zuul.yaml
index 6de49f379809..b60ec352a44e 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml
@@ -138,6 +138,44 @@
               block_migrate_cinder_iscsi: true
     post-run: playbooks/nova-live-migration/post-run.yaml
 
+- job:
+    name: nova-graceful-shutdown
+    parent: devstack-multinode
+    description: |
+      Run Nova graceful shutdown tests.
+    run: playbooks/nova-graceful-shutdown/run.yaml
+    timeout: 10800
+    vars:
+      devstack_services:
+        neutron-trunk: true
+        openstack-cli-server: true
+        s-account: false
+        s-container: false
+        s-object: false
+        s-proxy: false
+        c-bak: false
+      tempest_test_regex: ''
+      devstack_localrc:
+        <<: *uec_image_vars
+        SERVICE_GRACEFUL_SHUTDOWN_TIMEOUT: 180
+        NOVA_ALLOW_MOVE_TO_SAME_HOST: false
+        LIVE_MIGRATION_AVAILABLE: true
+        USE_BLOCK_MIGRATION_FOR_LIVE_MIGRATION: true
+    group-vars:
+      subnode:
+        devstack_services:
+          openstack-cli-server: true
+          s-account: false
+          s-container: false
+          s-object: false
+          s-proxy: false
+          c-bak: false
+        devstack_localrc:
+          SERVICE_GRACEFUL_SHUTDOWN_TIMEOUT: 180
+          NOVA_ALLOW_MOVE_TO_SAME_HOST: false
+          LIVE_MIGRATION_AVAILABLE: true
+          USE_BLOCK_MIGRATION_FOR_LIVE_MIGRATION: true
+
 - job:
     name: nova-alt-configurations
     parent: tempest-multinode-full-py3
@@ -829,6 +867,9 @@
               - ^nova/network/.*$
               - nova/virt/libvirt/vif.py
         - nova-live-migration
+        # NOTE(gmaan): We will be running the graceful shutdown testing in
+        # check pipeline only and not required to test in gate as such.
+        - nova-graceful-shutdown
         - nova-live-migration-ceph
         - nova-lvm
         - nova-multi-cell
diff --git a/nova/compute/manager.py b/nova/compute/manager.py
index 867bc16ffb2c..4cbc6c69a500 100644
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@@ -642,7 +642,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
 class ComputeManager(manager.Manager):
     """Manages the running instances from creation to destruction."""
 
-    target = messaging.Target(version='6.4')
+    target = messaging.Target(version='6.5')
 
     def __init__(self, compute_driver=None, *args, **kwargs):
         """Load configuration options and connect to the hypervisor."""
diff --git a/nova/compute/rpcapi.py b/nova/compute/rpcapi.py
index 79cbc2ab6123..a97fdf58b891 100644
--- a/nova/compute/rpcapi.py
+++ b/nova/compute/rpcapi.py
@@ -406,6 +406,7 @@ class ComputeAPI(object):
         * 6.2 - Add target_state parameter to rebuild_instance()
         * 6.3 - Add delete_attachment parameter to remove_volume_connection
         * 6.4 - Add allow_share() and deny_share()
+        * 6.5 - Add 2nd RPC server with new topic 'compute-alt'
     '''
 
     VERSION_ALIASES = {
@@ -564,6 +565,33 @@ class ComputeAPI(object):
                               serializer=serializer,
                               call_monitor_timeout=cmt)
 
+    def prepare_for_alt_rpcserver(
+            self, client, server, version, **kwargs):
+        # NOTE(gmaan): By override the 'topic' in prepare() method, we make
+        # this rpc client to send the message to the different RPC server,
+        # which listen to RPC_TOPIC_ALT (the RPC server which is active during
+        # compute service graceful shutdown).
+        topic = RPC_TOPIC_ALT
+        msg = _("RPC: Sending the message to topic: %s") % topic
+
+        # NOTE(gmann): The old compute will not have the new 2nd RPC server
+        # so we need to handle it with RPC versioning. For the old compute,
+        # it will fallback to send the message to the original RPC server,
+        # which listen to RPC_TOPIC.
+        if not client.can_send_version('6.5'):
+            topic = RPC_TOPIC
+            msg = _("Fallback to send the message to original topic: %s as "
+                    "RPC version is too old.") % topic
+
+        LOG.debug(msg)
+
+        params = {
+            'server': server,
+            'version': version,
+            'topic': topic}
+        params.update(kwargs)
+        return client.prepare(**params)
+
     def add_fixed_ip_to_instance(self, ctxt, instance, network_id):
         version = self._ver(ctxt, '5.0')
         cctxt = self.router.client(ctxt).prepare(
@@ -604,6 +632,12 @@ class ComputeAPI(object):
             kwargs.pop('migration')
             kwargs.pop('limits')
             version = '5.0'
+        # NOTE(gmaan): Most of the live migration RPC methods use the
+        # 'compute-alt' topic, but this RPC method should use the 'compute'
+        # topic. If a shutdown is initiated on the destination compute, the
+        # RPC server for the 'compute' topic will be stopped. If a live
+        # migration request arrives after that, the destination compute node
+        # should not take it.
         cctxt = client.prepare(server=destination, version=version,
                                call_monitor_timeout=CONF.rpc_response_timeout,
                                timeout=CONF.long_rpc_timeout)
@@ -613,6 +647,10 @@ class ComputeAPI(object):
         version = self._ver(ctxt, '5.0')
         client = self.router.client(ctxt)
         source = _compute_host(None, instance)
+        # NOTE(gmaan): Like check_can_live_migrate_destination, this RPC
+        # method should use topic 'compute'. If a shutdown is initiated
+        # on the source compute and, after that, a live migration request
+        # arrives, the source compute should not take it.
         cctxt = client.prepare(server=source, version=version)
         return cctxt.call(ctxt, 'check_can_live_migrate_source',
                           instance=instance,
@@ -859,8 +897,14 @@ class ComputeAPI(object):
 
     def validate_console_port(self, ctxt, instance, port, console_type):
         version = self._ver(ctxt, '5.0')
-        cctxt = self.router.client(ctxt).prepare(
-                server=_compute_host(None, instance), version=version)
+        client = self.router.client(ctxt)
+        # NOTE(gmaan): Send this RPC request to 'compute-alt' topic. This is
+        # called when the console is already requested. If shutdown is
+        # requested after that, compute should finish the port validation
+        # so that users can get their requested console.
+        cctxt = self.prepare_for_alt_rpcserver(
+            client,
+            server=_compute_host(None, instance), version=version)
         return cctxt.call(ctxt, 'validate_console_port',
                           instance=instance, port=port,
                           console_type=console_type)
@@ -896,7 +940,13 @@ class ComputeAPI(object):
                        migration, migrate_data=None):
         version = self._ver(ctxt, '5.0')
         client = self.router.client(ctxt)
-        cctxt = client.prepare(server=host, version=version)
+        # NOTE(gmaan): Send this RPC request to 'compute-alt' topic. At this
+        # stage, both the source and destination compute have already confirmed
+        # that live migration can proceed. If the shutdown is initiated after
+        # that, the compute should finish the live migration using the
+        # 'compute-alt' RPC server.
+        cctxt = self.prepare_for_alt_rpcserver(
+            client, server=host, version=version)
         cctxt.cast(ctxt, 'live_migration', instance=instance,
                    dest=dest, block_migration=block_migration,
                    migrate_data=migrate_data, migration=migration)
@@ -925,7 +975,12 @@ class ComputeAPI(object):
     def post_live_migration_at_destination(self, ctxt, instance,
             block_migration, host):
         version = self._ver(ctxt, '5.0')
-        cctxt = self.router.client(ctxt).prepare(
+        client = self.router.client(ctxt)
+        # NOTE(gmaan): Send this RPC request to 'compute-alt' topic. If the
+        # shutdown is initiated during live migration, the compute should
+        # finish the live migration using the 'compute-alt' RPC server.
+        cctxt = self.prepare_for_alt_rpcserver(
+                client,
                 server=host, version=version,
                 call_monitor_timeout=CONF.rpc_response_timeout,
                 timeout=CONF.long_rpc_timeout)
@@ -943,9 +998,14 @@ class ComputeAPI(object):
             version = '5.0'
             # We just need to honor the argument in the v5.0 RPC API method
             msg_args['block_migration'] = None
-        cctxt = client.prepare(server=host, version=version,
-                               timeout=CONF.long_rpc_timeout,
-                               call_monitor_timeout=CONF.rpc_response_timeout)
+        # NOTE(gmaan): Send this RPC request to 'compute-alt' topic. If the
+        # shutdown is initiated during live migration, the compute should
+        # finish the live migration using the 'compute-alt' RPC server.
+        cctxt = self.prepare_for_alt_rpcserver(
+                client,
+                server=host, version=version,
+                timeout=CONF.long_rpc_timeout,
+                call_monitor_timeout=CONF.rpc_response_timeout)
         return cctxt.call(ctxt, 'pre_live_migration',
                           instance=instance,
                           disk=disk, migrate_data=migrate_data,
@@ -1153,8 +1213,12 @@ class ComputeAPI(object):
         if not client.can_send_version(version):
             kwargs.pop('delete_attachment')
             version = self._ver(ctxt, '5.0')
-
-        cctxt = client.prepare(server=host, version=version)
+        # NOTE(gmaan): This is called during live migration rollback. Send
+        # this RPC request to 'compute-alt' topic. If the shutdown is initiated
+        # during live migration rollback, the compute should finish the it
+        # using the 'compute-alt' RPC server.
+        cctxt = self.prepare_for_alt_rpcserver(
+                client, server=host, version=version)
         return cctxt.call(ctxt, 'remove_volume_connection', **kwargs)
 
     def rescue_instance(self, ctxt, instance, rescue_password,
@@ -1254,7 +1318,12 @@ class ComputeAPI(object):
                                                migrate_data):
         version = self._ver(ctxt, '5.0')
         client = self.router.client(ctxt)
-        cctxt = client.prepare(server=host, version=version)
+        # NOTE(gmaan): This is called during live migration rollback. Send
+        # this RPC request to 'compute-alt' topic. If the shutdown is initiated
+        # during live migration rollback, the compute should finish it using
+        # the 'compute-alt' RPC server.
+        cctxt = self.prepare_for_alt_rpcserver(
+                client, server=host, version=version)
         cctxt.cast(ctxt, 'rollback_live_migration_at_destination',
                    instance=instance, destroy_disks=destroy_disks,
                    migrate_data=migrate_data)
@@ -1278,7 +1347,12 @@ class ComputeAPI(object):
         """
         version = self._ver(ctxt, '5.3')
         client = self.router.client(ctxt)
-        cctxt = client.prepare(server=host, version=version)
+        # NOTE(gmaan): This is called during live migration rollback. Send
+        # this RPC request to 'compute-alt' topic. If the shutdown is initiated
+        # during live migration rollback, the compute should finish it using
+        # the 'compute-alt' RPC server.
+        cctxt = self.prepare_for_alt_rpcserver(
+                client, server=host, version=version)
         cctxt.call(ctxt, 'drop_move_claim_at_destination', instance=instance)
 
     def set_admin_password(self, ctxt, instance, new_pass):
@@ -1515,8 +1589,13 @@ class ComputeAPI(object):
     def external_instance_event(self, ctxt, instances, events, host=None):
         instance = instances[0]
         version = self._ver(ctxt, '5.0')
-        cctxt = self.router.client(ctxt).prepare(
-            server=_compute_host(host, instance),
+        client = self.router.client(ctxt)
+        # NOTE(gmaan): This is initiated by the external services (for
+        # example, neutron send event for network change) and let's not block
+        # them during shutdown. Make this RPC request to 'compute-alt' topic.
+        cctxt = self.prepare_for_alt_rpcserver(
+            client,
+            _compute_host(host, instance),
             version=version)
         cctxt.cast(ctxt, 'external_instance_event', instances=instances,
                    events=events)
diff --git a/nova/objects/service.py b/nova/objects/service.py
index ba1dcd30b027..9b59d3ee57e5 100644
--- a/nova/objects/service.py
+++ b/nova/objects/service.py
@@ -37,7 +37,7 @@ __all__ = [
 
 
 # NOTE(danms): This is the global service version counter
-SERVICE_VERSION = 70
+SERVICE_VERSION = 71
 
 
 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@@ -249,6 +249,9 @@ SERVICE_VERSION_HISTORY = (
     # Version 70: Compute RPC v6.4:
     # Compute manager supports USB controller model traits
     {'compute_rpc': '6.4'},
+    # Version 71: Compute RPC v6.5:
+    # Add 2nd RPC server for compute service
+    {'compute_rpc': '6.5'},
 )
 
 # This is the version after which we can rely on having a persistent
diff --git a/nova/test.py b/nova/test.py
index ddbbd33c735c..6976f49bb9a1 100644
--- a/nova/test.py
+++ b/nova/test.py
@@ -461,6 +461,12 @@ class TestCase(base.BaseTestCase):
         if host is not None:
             # Make sure that CONF.host is relevant to the right hostname
             self.useFixture(nova_fixtures.ConfPatcher(host=host))
+        # By default, service creates a RPC server for auto populated
+        # 'topic' from service binary name. For compute service, we need
+        # to create the 2nd RPC server which will be done by pass the
+        # 'topic_alt' explicitly.
+        if name == 'compute' and 'topic_alt' not in kwargs:
+            kwargs['topic_alt'] = compute_rpcapi.RPC_TOPIC_ALT
 
         if name == 'compute' and self.USES_DB:
             # NOTE(danms): We need to create the HostMapping first, because
diff --git a/nova/tests/unit/compute/test_rpcapi.py b/nova/tests/unit/compute/test_rpcapi.py
index f6d6be0fc07d..2502291457ec 100644
--- a/nova/tests/unit/compute/test_rpcapi.py
+++ b/nova/tests/unit/compute/test_rpcapi.py
@@ -131,10 +131,13 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
         prepare_extra_kwargs = {}
         cm_timeout = kwargs.pop('call_monitor_timeout', None)
         timeout = kwargs.pop('timeout', None)
+        topic_alt = kwargs.pop('topic_alt', None)
         if cm_timeout:
             prepare_extra_kwargs['call_monitor_timeout'] = cm_timeout
         if timeout:
             prepare_extra_kwargs['timeout'] = timeout
+        if topic_alt:
+            prepare_extra_kwargs['topic'] = topic_alt
 
         # NOTE(sbauza): If expected args are provided, we need to use them
         # for the expected kwargs and just add the needed _return_value that
@@ -368,7 +371,8 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
     def test_validate_console_port(self):
         self._test_compute_api('validate_console_port', 'call',
                 instance=self.fake_instance_obj, port="5900",
-                console_type="novnc", version='6.0')
+                console_type="novnc", version='6.0',
+                topic_alt=compute_rpcapi.RPC_TOPIC_ALT)
 
     def test_host_maintenance_mode(self):
         self._test_compute_api('host_maintenance_mode', 'call',
@@ -387,7 +391,8 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
                 instance=self.fake_instance_obj, dest='dest',
                 block_migration='blockity_block', host='tsoh',
                 migration='migration',
-                migrate_data={}, version='6.0')
+                migrate_data={}, version='6.0',
+                topic_alt=compute_rpcapi.RPC_TOPIC_ALT)
 
     def test_live_migration_force_complete(self):
         migration = migration_obj.Migration()
@@ -420,7 +425,8 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
         self._test_compute_api('post_live_migration_at_destination', 'call',
                 instance=self.fake_instance_obj,
                 block_migration='block_migration', host='host', version='6.0',
-                timeout=1234, call_monitor_timeout=60)
+                timeout=1234, call_monitor_timeout=60,
+                topic_alt=compute_rpcapi.RPC_TOPIC_ALT)
 
     def test_pause_instance(self):
         self._test_compute_api('pause_instance', 'cast',
@@ -448,7 +454,8 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
                 expected_args, instance=self.fake_instance_obj,
                 block_migration='block_migration', disk='disk', host='host',
                 migrate_data=None, version='6.0',
-                call_monitor_timeout=60, timeout=1234)
+                call_monitor_timeout=60, timeout=1234,
+                topic_alt=compute_rpcapi.RPC_TOPIC_ALT)
 
     def test_supports_numa_live_migration(self):
         mock_client = mock.MagicMock()
@@ -506,10 +513,19 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
             instance=self.fake_instance_obj, block_migration=False,
             disk_over_commit=False)
 
+    def test_rollback_live_migration_at_destination(self):
+        self._test_compute_api('rollback_live_migration_at_destination',
+                               'cast', instance=self.fake_instance_obj,
+                               host='host', destroy_disks=True,
+                               migrate_data=None, version='6.0',
+                               _return_value=None,
+                               topic_alt=compute_rpcapi.RPC_TOPIC_ALT)
+
     def test_drop_move_claim_at_destination(self):
         self._test_compute_api('drop_move_claim_at_destination', 'call',
                                instance=self.fake_instance_obj, host='host',
-                               version='6.0', _return_value=None)
+                               version='6.0', _return_value=None,
+                               topic_alt=compute_rpcapi.RPC_TOPIC_ALT)
 
     def test_prep_resize(self):
         self._test_compute_api('prep_resize', 'cast',
@@ -965,7 +981,8 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
     def test_remove_volume_connection(self):
         self._test_compute_api('remove_volume_connection', 'call',
                 instance=self.fake_instance_obj, volume_id='id', host='host',
-                delete_attachment=True, version='6.3')
+                delete_attachment=True, version='6.3',
+                topic_alt=compute_rpcapi.RPC_TOPIC_ALT)
 
     def test_rescue_instance(self):
         self._test_compute_api('rescue_instance', 'cast',
@@ -1218,7 +1235,8 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
         self._test_compute_api('external_instance_event', 'cast',
                                instances=[self.fake_instance_obj],
                                events=['event'],
-                               version='6.0')
+                               version='6.0',
+                               topic_alt=compute_rpcapi.RPC_TOPIC_ALT)
 
     def test_build_and_run_instance(self):
         # With rpcapi 5.11, when a list of accel_uuids is passed as a param,
@@ -1354,3 +1372,45 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
             instance=self.fake_instance_obj,
             share_mapping=self.get_fake_share_mapping(),
             version='6.4')
+
+    def test_prepare_for_alt_rpcserver_select_topic_alt(self):
+        rpcapi = compute_rpcapi.ComputeAPI()
+        mock_client = mock.MagicMock()
+        mock_client.can_send_version.return_value = True
+        rpcapi.prepare_for_alt_rpcserver(
+            mock_client, server='fake_host', version='6.5')
+        mock_client.can_send_version.assert_called_once_with('6.5')
+        mock_client.prepare.assert_called_once_with(
+            server='fake_host',
+            version='6.5',
+            topic=compute_rpcapi.RPC_TOPIC_ALT)
+
+    def test_prepare_for_alt_rpcserver_fallback_topic_for_old_compute(self):
+        rpcapi = compute_rpcapi.ComputeAPI()
+        mock_client = mock.MagicMock()
+        mock_client.can_send_version.return_value = False
+        rpcapi.prepare_for_alt_rpcserver(
+            mock_client, server='fake_host', version='6.0')
+        mock_client.can_send_version.assert_called_once_with('6.5')
+        mock_client.prepare.assert_called_once_with(
+            server='fake_host',
+            version='6.0',
+            topic=compute_rpcapi.RPC_TOPIC)
+
+    def test_prepare_for_alt_rpcserver_with_extra_kwargs(self):
+        rpcapi = compute_rpcapi.ComputeAPI()
+        mock_client = mock.MagicMock()
+        mock_client.can_send_version.return_value = True
+        rpcapi.prepare_for_alt_rpcserver(
+            mock_client,
+            server='fake_host',
+            version='6.5',
+            call_monitor_timeout=60,
+            timeout=120)
+        mock_client.can_send_version.assert_called_once_with('6.5')
+        mock_client.prepare.assert_called_once_with(
+            server='fake_host',
+            version='6.5',
+            topic=compute_rpcapi.RPC_TOPIC_ALT,
+            call_monitor_timeout=60,
+            timeout=120)
diff --git a/playbooks/nova-graceful-shutdown/run.yaml b/playbooks/nova-graceful-shutdown/run.yaml
new file mode 100644
index 000000000000..99130cb82b1f
--- /dev/null
+++ b/playbooks/nova-graceful-shutdown/run.yaml
@@ -0,0 +1,7 @@
+---
+- hosts: all
+  roles:
+    - orchestrate-devstack
+- hosts: controller
+  roles:
+    - run-graceful-shutdown-tests
diff --git a/releasenotes/notes/nova-services-graceful-shutdown-564a321e2769152d.yaml b/releasenotes/notes/nova-services-graceful-shutdown-564a321e2769152d.yaml
new file mode 100644
index 000000000000..744037fc381f
--- /dev/null
+++ b/releasenotes/notes/nova-services-graceful-shutdown-564a321e2769152d.yaml
@@ -0,0 +1,55 @@
+---
+features:
+  - |
+    Nova services now support graceful shutdown on ``SIGTERM``. When a service
+    receives ``SIGTERM``, it will stop accepting new RPC requests and wait for
+    in-progress tasks to reach a safe termination point.
+
+    The compute service creates a second RPC server on an ``compute-alt`` topic
+    which remains active during graceful shutdown, allowing compute service to
+    finish the in-progress tasks.
+
+    Currently below operations are using second RPC server:
+
+    * Live migration
+    * Server external Event
+    * Get Console output
+
+    Nova added two new configuration options which will control this behavior:
+
+    * ``[DEFAULT]/graceful_shutdown_timeout`` - The overall time the service
+      waits before forcefully exit. This is defaults to 180 seconds for each
+      Nova services.
+    * ``[DEFAULT]/manager_shutdown_timeout`` - The time the service manager
+      waits for in-progress tasks to complete during graceful shutdown. This
+      is defaults to 160 seconds for each service manager. This must be less
+      than ``graceful_shutdown_timeout``.
+
+    You can increase these timeouts based on the traffic and how long the
+    long-running (e.g. live migrations) tasks take in your deployment.
+
+    We plan to improve the graceful shutdown in future releases by task
+    tracking and transitioning resources to a recoverable state. Until then,
+    this feature is experimental.
+upgrade:
+  - |
+    The default value of ``[DEFAULT]/graceful_shutdown_timeout`` has been
+    changed from 60 to 180 seconds for all Nova services. This means that
+    when a Nova service receives ``SIGTERM``, it will now wait up to 180
+    seconds for a graceful shutdown before being forcefully terminated.
+    Operators using external system (e.g. k8s, systemd) to manage the
+    Nova serviecs should ensure that their service stop timeouts are set
+    to at least ``graceful_shutdown_timeout`` to avoid forcefully killing
+    service before Nova finish its graceful shutdown. For example, the
+    systemd ``TimeoutStopSec`` should be set to at least 180 seconds (or
+    greater) for Nova services.
+  - |
+    A new configuration option ``[DEFAULT]/manager_shutdown_timeout`` has been
+    added with a default value of 160 seconds. This controls how long the
+    service manager waits for in-progress tasks to finish during graceful
+    shutdown. Operators may want to tune this value based on how long their
+    typical long-running operations (e.g. live migrations) take to complete.
+  - |
+    The compute service now creates a second RPC server on the ``compute-alt``
+    topic. This means each compute worker will create an additional RabbitMQ
+    queue.
diff --git a/roles/run-graceful-shutdown-tests/README.rst b/roles/run-graceful-shutdown-tests/README.rst
new file mode 100644
index 000000000000..f15e39b0c224
--- /dev/null
+++ b/roles/run-graceful-shutdown-tests/README.rst
@@ -0,0 +1 @@
+Run Nova graceful shutdown tests and verify the operations.
diff --git a/roles/run-graceful-shutdown-tests/files/cleanup_test_servers.sh b/roles/run-graceful-shutdown-tests/files/cleanup_test_servers.sh
new file mode 100755
index 000000000000..35dd69940394
--- /dev/null
+++ b/roles/run-graceful-shutdown-tests/files/cleanup_test_servers.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+source /opt/stack/devstack/openrc admin
+set -x
+set -e
+
+confirm_resize() {
+    local server=$1
+
+    echo "Confirming resize on ${server}"
+    openstack server resize confirm "${server}"
+
+    count=0
+    while true; do
+        status=$(openstack server show "${server}" -f value -c status 2>/dev/null || echo "NOT_FOUND")
+        if [ "${status}" == "ACTIVE" ] || [ "${status}" == "ERROR" ]; then
+            break
+        fi
+        sleep 5
+        count=$((count+1))
+        if [ ${count} -eq 10 ]; then
+            echo "Timed out waiting for ${server} to be ACTIVE or Error after confirm resize"
+            break
+        fi
+    done
+}
+
+cleanup_server() {
+    local server=$1
+
+    status=$(openstack server show "${server}" -f value -c status 2>/dev/null || echo "NOT_FOUND")
+
+    if [ "${status}" == "VERIFY_RESIZE" ]; then
+        confirm_resize "${server}"
+    fi
+
+    status=$(openstack server show "${server}" -f value -c status 2>/dev/null || echo "NOT_FOUND")
+    if [ "${status}" == "ACTIVE" ] || [ "${status}" == "ERROR" ]; then
+        echo "Deleting ${server} (status: ${status})"
+        openstack server delete --wait "${server}"
+    else
+        echo "Skipping ${server} deletion (status: ${status})"
+    fi
+}
+
+for server in "$@"; do
+    cleanup_server "${server}"
+done
diff --git a/roles/run-graceful-shutdown-tests/files/start_and_verify_compute_service.sh b/roles/run-graceful-shutdown-tests/files/start_and_verify_compute_service.sh
new file mode 100755
index 000000000000..7bcc3f7e6d17
--- /dev/null
+++ b/roles/run-graceful-shutdown-tests/files/start_and_verify_compute_service.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+set -x
+set -e
+
+COMPUTE_HOST=$1
+EXPECTED_STATE=${2:-active}
+
+get_service_status() {
+  local host=$1
+  local status
+  status=$(ssh "${host}" systemctl is-active devstack@n-cpu || true)
+  echo "${status}"
+}
+
+wait_for_service_state() {
+  local host=$1
+  local expected=$2
+  local timeout=${3:-30}
+  local count=0
+  local status
+
+  status=$(get_service_status "${host}")
+  while [ "${status}" != "${expected}" ]; do
+    sleep 5
+    count=$((count+1))
+    if [ ${count} -eq ${timeout} ]; then
+      echo "Timed out waiting for compute service on ${host} to be ${expected} (current: ${status})"
+      exit 5
+    fi
+    status=$(get_service_status "${host}")
+  done
+  echo "Compute service on ${host} is ${expected}"
+}
+
+if [ "${EXPECTED_STATE}" == "active" ] && [ "$(get_service_status "${COMPUTE_HOST}")" != "active" ]; then
+    ssh "${COMPUTE_HOST}" sudo systemctl start devstack@n-cpu
+fi
+
+wait_for_service_state "${COMPUTE_HOST}" "${EXPECTED_STATE}"
diff --git a/roles/run-graceful-shutdown-tests/files/start_live_migration.sh b/roles/run-graceful-shutdown-tests/files/start_live_migration.sh
new file mode 100755
index 000000000000..5bdefa8954b4
--- /dev/null
+++ b/roles/run-graceful-shutdown-tests/files/start_live_migration.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+source /opt/stack/devstack/openrc admin
+set -x
+set -e
+
+timeout=196
+
+server_lm=$1
+
+image_id=$(openstack image list -f value -c ID | awk 'NR==1{print $1}')
+flavor_id=$(openstack flavor list -f value -c ID | awk 'NR==1{print $1}')
+network_id=$(openstack network list --no-share -f value -c ID | awk 'NR==1{print $1}')
+
+echo "Creating test server on subnode for graceful shutdown live migration test"
+openstack --os-compute-api-version 2.74 server create --image ${image_id} --flavor ${flavor_id} \
+--nic net-id=${network_id} --host ${SUBNODE_HOSTNAME} --wait ${server_lm}
+
+echo "Starting live migration of ${server_lm} to ${CONTROLLER_HOSTNAME}"
+openstack server migrate --live-migration \
+--host ${CONTROLLER_HOSTNAME} ${server_lm}
+
+# Wait for the migration to be in progress before returning so that the
+# SIGTERM can be sent while the migrations are in progress.
+count=0
+while true; do
+    migration_status=$(openstack server migration list ${server_lm} \
+        -f value -c Status 2>/dev/null | head -1)
+    server_status=$(openstack server show ${server_lm} \
+        -f value -c status 2>/dev/null)
+    task_state=$(openstack server show ${server_lm} \
+        -f value -c OS-EXT-STS:task_state 2>/dev/null)
+    if [ "${migration_status}" == "preparing" ] || \
+       [ "${migration_status}" == "running" ] || \
+       [ "${task_state}" == "migrating" ]; then
+        echo "Live migration is in progress (status: ${migration_status}, task_state: ${task_state})"
+        break
+    elif [ "${migration_status}" == "completed" ] || \
+         { [ "${server_status}" == "ACTIVE" ] && \
+           { [ "${task_state}" == "None" ] || [ -z "${task_state}" ]; }; }; then
+        echo "Live migration has already completed"
+        exit 2
+    fi
+
+    count=$((count+1))
+    if [ ${count} -eq ${timeout} ]; then
+        echo "Timed out waiting for migrations to start"
+        exit 2
+    fi
+done
diff --git a/roles/run-graceful-shutdown-tests/files/verify_live_migration.sh b/roles/run-graceful-shutdown-tests/files/verify_live_migration.sh
new file mode 100755
index 000000000000..77ac265f2bb2
--- /dev/null
+++ b/roles/run-graceful-shutdown-tests/files/verify_live_migration.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+source /opt/stack/devstack/openrc admin
+set -x
+set -e
+
+server=$1
+
+# Wait for the server to finish live migration and become ACTIVE with
+# no task_state, which indicates the migration has completed.
+timeout=360
+count=0
+migration_start=$(date +%s)
+while true; do
+    status=$(openstack server show ${server} -f value -c status)
+    task_state=$(openstack server show ${server} -f value -c OS-EXT-STS:task_state)
+
+    if [ "${status}" == "ACTIVE" ] && { [ "${task_state}" == "None" ] || [ -z "${task_state}" ]; }; then
+        migration_end=$(date +%s)
+        migration_duration=$((migration_end - migration_start))
+        echo "Migration is completed in ${migration_duration} seconds."
+        break
+    fi
+
+    if [ "${status}" == "ERROR" ]; then
+        echo "Server went to ERROR status during live migration"
+        exit 3
+    fi
+
+    sleep 5
+    count=$((count+1))
+    if [ ${count} -eq ${timeout} ]; then
+        echo "Timed out waiting for live migration to complete"
+        exit 5
+    fi
+done
+
+# Make sure the server moved to the controller.
+host=$(openstack server show ${server} -f value -c OS-EXT-SRV-ATTR:host)
+if [[ ${host} != ${CONTROLLER_HOSTNAME} ]]; then
+    echo "Unexpected host ${host} for server after live migration during graceful shutdown."
+    exit 4
+fi
+
+echo "Live migration during graceful shutdown completed successfully"
+echo "Server ${server} is ACTIVE on ${host}"
diff --git a/roles/run-graceful-shutdown-tests/tasks/main.yaml b/roles/run-graceful-shutdown-tests/tasks/main.yaml
new file mode 100644
index 000000000000..87b41cafd91a
--- /dev/null
+++ b/roles/run-graceful-shutdown-tests/tasks/main.yaml
@@ -0,0 +1,56 @@
+- name: Graceful shutdown source compute live migration
+  block:
+    - name: Start live migrations of test servers
+      become: true
+      become_user: stack
+      script: "start_live_migration.sh server-lm1"
+      environment:
+        SUBNODE_HOSTNAME: "{{ hostvars['compute1']['ansible_hostname'] }}"
+        CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
+      register: start_live_migrations_result
+      failed_when: start_live_migrations_result.rc not in [0, 2]
+
+    - name: Set fact if migrations completed or timed out before SIGTERM to source compute
+      set_fact:
+        live_migrations_completed_or_timeout: "{{ start_live_migrations_result.rc == 2 }}"
+
+    - name: Run graceful shutdown tests
+      when: not live_migrations_completed_or_timeout
+      block:
+        - name: Send SIGTERM to source compute to start the source compute graceful shutdown
+          delegate_to: compute1
+          become: true
+          shell: "kill -15 $(systemctl show devstack@n-cpu -p MainPID --value)"
+
+        - name: Verify live migration is completed during graceful shutdown
+          become: true
+          become_user: stack
+          script: "verify_live_migration.sh server-lm1"
+          environment:
+            CONTROLLER_HOSTNAME: "{{ hostvars['controller']['ansible_hostname'] }}"
+
+        # Sleep for 180 sec: default graceful_shutdown_timeout
+        - name: Sleep for 180 seconds to allow source compute graceful shutdown to complete
+          pause:
+            seconds: 180
+
+        - name: Verify compute service is stopped after graceful shutdown
+          become: true
+          become_user: stack
+          script: "start_and_verify_compute_service.sh {{ hostvars['compute1']['ansible_hostname'] }} inactive"
+
+    - name: Start and verify subnode compute service is running
+      become: true
+      become_user: stack
+      script: "start_and_verify_compute_service.sh {{ hostvars['compute1']['ansible_hostname'] }}"
+
+    - name: Cleanup test servers
+      become: true
+      become_user: stack
+      script: "cleanup_test_servers.sh server-lm1"
+      ignore_errors: true
+
+- name: Fail if any test is skipped
+  fail:
+    msg: "One or more test is skipped due to operation is either completed or timed out before SIGTERM signal."
+  when: live_migrations_completed_or_timeout