From 244fb3ced0b6f64aeb7bfa0b910f59a9480225f1 Mon Sep 17 00:00:00 2001
From: Tomi Juvonen <tomi.juvonen@nokia.com>
Date: Fri, 17 Apr 2020 12:31:15 +0300
Subject: [PATCH] Detailed session information and enhancements

- Add GET /v1/maintenance/{session_id}/detail
- Add 'maintenance.session' event. This can be used
  to track workflow. It gives you percent of hosts
  maintained.

Other enhancements:
- Add Sample VNFM for OpenStack: vnfm.py
  (Kubernetes renamed to vnfm_k8s.py)
- Add Sample VNF for OpenStack:
  maintenance_hot_tpl.yaml
- Update testing instructions (tools)
- Update documentation
- Add more tools for testing:
  - fenix_db_reset (flushed the database)
  - set_config.py (set the AODH / Ceilometer config)
- Add admin tool: infra_admin.py
  This tool can run maintenance workflow and
  track its progress
- Make sure everything is written in database.
  If Fenix is restarted, it initialise existing
  'ongoing' workflows from database. More functions
  to database API and utilization in example workflows.

story: 2004336
Task: #27922

Change-Id: I794b11a8684f5fc513cb8f5affcd370ec70f3dbc
Signed-off-by: Tomi Juvonen <tomi.juvonen@nokia.com>
---
 README.rst                                    |   1 +
 doc/source/api-ref/index.rst                  |   6 +-
 doc/source/api-ref/v1/index.rst               |  31 +-
 doc/source/api-ref/v1/maintenance.inc         |  60 +-
 doc/source/api-ref/v1/parameters.yaml         |  85 ++-
 doc/source/api-ref/v1/project.inc             |  18 +-
 .../maintenance-session-detail-get-200.json   | 212 +++++++
 ....json => maintenance-session-get-200.json} |   0
 ...json => maintenance-sessions-get-200.json} |   0
 ...project-maintenance-session-post-200.json} |   0
 doc/source/api-ref/v1/status.yaml             |  44 +-
 doc/source/user/notifications.rst             |  34 +-
 fenix/api/v1/controllers/__init__.py          |   6 +-
 fenix/api/v1/controllers/maintenance.py       |  13 +-
 fenix/api/v1/maintenance.py                   |   6 +-
 fenix/db/api.py                               |  52 ++
 .../versions/001_initial.py                   |   2 -
 fenix/db/sqlalchemy/api.py                    |  75 ++-
 fenix/db/sqlalchemy/models.py                 |   2 -
 .../db/sqlalchemy/test_sqlalchemy_api.py      |   4 +-
 fenix/tools/README.md                         | 180 +++++-
 fenix/tools/fenix_db_reset                    |   9 +
 fenix/tools/infra_admin.py                    | 320 ++++++++++
 fenix/tools/maintenance_hot_tpl.yaml          | 108 ++++
 fenix/tools/session.json                      |   6 +
 fenix/tools/set_config.py                     | 185 ++++++
 fenix/tools/vnfm.py                           | 570 ++++++++++--------
 fenix/tools/vnfm_k8s.py                       | 561 +++++++++++++++++
 fenix/utils/service.py                        |  52 +-
 fenix/workflow/actions/dummy.py               |   4 +
 fenix/workflow/workflow.py                    |  86 ++-
 fenix/workflow/workflows/default.py           |  41 +-
 fenix/workflow/workflows/k8s.py               |  29 +-
 fenix/workflow/workflows/vnf.py               |  58 +-
 34 files changed, 2481 insertions(+), 379 deletions(-)
 create mode 100644 doc/source/api-ref/v1/samples/maintenance-session-detail-get-200.json
 rename doc/source/api-ref/v1/samples/{get-maintenance-session-get-200.json => maintenance-session-get-200.json} (100%)
 rename doc/source/api-ref/v1/samples/{get-maintenance-sessions-get-200.json => maintenance-sessions-get-200.json} (100%)
 rename doc/source/api-ref/v1/samples/{get-project-maintenance-session-post-200.json => project-maintenance-session-post-200.json} (100%)
 create mode 100644 fenix/tools/fenix_db_reset
 create mode 100644 fenix/tools/infra_admin.py
 create mode 100644 fenix/tools/maintenance_hot_tpl.yaml
 create mode 100644 fenix/tools/session.json
 create mode 100644 fenix/tools/set_config.py
 create mode 100644 fenix/tools/vnfm_k8s.py

diff --git a/README.rst b/README.rst
index 6316bdc..6e42154 100644
--- a/README.rst
+++ b/README.rst
@@ -27,6 +27,7 @@ would also be telling about adding or removing a host.
 * Documentation: https://fenix.readthedocs.io/en/latest/index.html
 * Developer Documentation: https://wiki.openstack.org/wiki/Fenix
 * Source: https://opendev.org/x/fenix
+* Running sample workflows: https://opendev.org/x/fenix/src/branch/master/fenix/tools/README.md
 * Bug tracking and Blueprints: https://storyboard.openstack.org/#!/project/x/fenix
 * How to contribute: https://docs.openstack.org/infra/manual/developers.html
 * `Fenix Specifications <specifications/index.html>`_
diff --git a/doc/source/api-ref/index.rst b/doc/source/api-ref/index.rst
index 6af2316..26133ff 100644
--- a/doc/source/api-ref/index.rst
+++ b/doc/source/api-ref/index.rst
@@ -1,6 +1,6 @@
-####################
-Host Maintenance API
-####################
+###
+API
+###
 
 .. toctree::
    :maxdepth: 2
diff --git a/doc/source/api-ref/v1/index.rst b/doc/source/api-ref/v1/index.rst
index f502630..c206d71 100644
--- a/doc/source/api-ref/v1/index.rst
+++ b/doc/source/api-ref/v1/index.rst
@@ -1,28 +1,29 @@
 :tocdepth: 2
 
-#######################
-Host Maintenance API v1
-#######################
+######
+API v1
+######
 
 .. rest_expand_all::
 
-#####
-Admin
-#####
+#########
+Admin API
+#########
 
 These APIs are meant for infrastructure admin who is in charge of triggering
-the rolling maintenance and upgrade workflows.
+the rolling maintenance and upgrade workflow sessions.
 
 .. include:: maintenance.inc
 
-#######
-Project
-#######
+###########
+Project API
+###########
 
-These APIs are meant for projects having instances on top of the infrastructure
-under corresponding rolling maintenance or upgrade session. Usage of these APIs
-expects there is an application manager (VNFM) that can interact with Fenix
-workflow via these APIs. If this is not the case, workflow should have a default
-behavior for instances owned by projects, that are not interacting with Fenix.
+These APIs are meant for projects (tenant/VNF) having instances on top of the
+infrastructure under corresponding rolling maintenance or upgrade session.
+Usage of these APIs expects there is an application manager (VNFM) that can
+interact with Fenix workflow via these APIs. If this is not the case, workflow
+should have a default behavior for instances owned by projects, that are not
+interacting with Fenix.
 
 .. include:: project.inc
diff --git a/doc/source/api-ref/v1/maintenance.inc b/doc/source/api-ref/v1/maintenance.inc
index bd77612..43ec09d 100644
--- a/doc/source/api-ref/v1/maintenance.inc
+++ b/doc/source/api-ref/v1/maintenance.inc
@@ -1,13 +1,13 @@
 .. -*- rst -*-
 
-===========
-Maintenance
-===========
+==========================
+Admin workflow session API
+==========================
 
 Create maintenance session
 ==========================
 
-.. rest_method:: POST /v1/maintenance/
+.. rest_method:: POST /v1/maintenance
 
 Create a new maintenance session. You can specify a list of 'hosts' to be
 maintained or have an empty list to indicate those should be self-discovered.
@@ -49,7 +49,7 @@ Response codes
 Update maintenance session (planned future functionality)
 =========================================================
 
-.. rest_method:: PUT /v1/maintenance/{session_id}/
+.. rest_method:: PUT /v1/maintenance/{session_id}
 
 Update existing maintenance session. This can be used to continue a failed
 session after manually fixing what failed. Workflow should then run
@@ -79,7 +79,7 @@ Response codes
 Get maintenance sessions
 ========================
 
-.. rest_method:: GET /v1/maintenance/
+.. rest_method:: GET /v1/maintenance
 
 Get all ongoing maintenance sessions.
 
@@ -88,7 +88,7 @@ Response codes
 
 .. rest_status_code:: success status.yaml
 
-    - 200: get-maintenance-sessions-get
+    - 200: maintenance-sessions-get
 
 .. rest_status_code:: error status.yaml
 
@@ -98,7 +98,7 @@ Response codes
 Get maintenance session
 =======================
 
-.. rest_method:: GET /v1/maintenance/{session_id}/
+.. rest_method:: GET /v1/maintenance/{session_id}
 
 Get a maintenance session state.
 
@@ -114,7 +114,38 @@ Response codes
 
 .. rest_status_code:: success status.yaml
 
-    - 200: get-maintenance-session-get
+    - 200: maintenance-session-get
+
+.. rest_status_code:: error status.yaml
+
+    - 400
+    - 404
+    - 422
+    - 500
+
+Get maintenance session details
+===============================
+
+.. rest_method:: GET /v1/maintenance/{session_id}/detail
+
+Get a maintenance session details. This information can be usefull to see
+detailed status of a maintennace session or to troubleshoot a failed session.
+Usually session should fail on simple problem, that can be fast manually
+fixed. Then one can update maintenance session state to continue from 'prev_state'.
+
+Request
+-------
+
+.. rest_parameters:: parameters.yaml
+
+    - session_id: session_id
+
+Response codes
+--------------
+
+.. rest_status_code:: success status.yaml
+
+    - 200: maintenance-session-detail-get
 
 .. rest_status_code:: error status.yaml
 
@@ -126,7 +157,7 @@ Response codes
 Delete maintenance session
 ==========================
 
-.. rest_method:: DELETE /v1/maintenance/{session_id}/
+.. rest_method:: DELETE /v1/maintenance/{session_id}
 
 Delete a maintenance session. Usually called after the session is successfully
 finished.
@@ -141,12 +172,3 @@ finished.
     - 400
     - 422
     - 500
-
-Future
-======
-
-On top of some expected changes mentioned above, it will also be handy to get
-detailed information about the steps run already in the maintenance session.
-This will be helpful when need to figure out any correcting actions to
-successfully finish a failed session. For now admin can update failed session
-state to previous or his wanted state to try continue a failed session.
diff --git a/doc/source/api-ref/v1/parameters.yaml b/doc/source/api-ref/v1/parameters.yaml
index d29ed58..68d716f 100644
--- a/doc/source/api-ref/v1/parameters.yaml
+++ b/doc/source/api-ref/v1/parameters.yaml
@@ -36,7 +36,7 @@ uuid-path:
 #############################################################################
 action-metadata:
   description: |
-    Metadata; hints to plug-ins
+    Metadata; hints to plug-ins.
   in: body
   required: true
   type: dictionary
@@ -44,7 +44,17 @@ action-metadata:
 action-plugin-name:
   description: |
     plug-in name. Default workflow executes same type of plug-ins in an
-    alphabetical order
+    alphabetical order.
+  in: body
+  required: true
+  type: string
+
+action-plugin-state:
+  description: |
+    Action plug-in state. This is workflow and action plug-in specific
+    information to be passed from action plug-in to workflow. Helps
+    understanding how action plug-in was executed and to troubleshoot
+    accordingly.
   in: body
   required: true
   type: string
@@ -77,6 +87,20 @@ boolean:
   required: true
   type: boolean
 
+datetime-string:
+  description: |
+    Date and time string according to ISO 8601.
+  in: body
+  required: true
+  type: string
+
+details:
+  description: |
+    Workflow internal special usage detail. Example nova-compute service id.
+  in: body
+  required: true
+  type: string
+
 group-uuid:
   description: |
     Instance group uuid. Should match with OpenStack server group if one exists.
@@ -84,6 +108,21 @@ group-uuid:
   required: true
   type: string
 
+host-type:
+  description: |
+    Host type as it is wanted to be used in workflow implementation.
+    Example workflows uses values as compute and controller.
+  in: body
+  required: false
+  type: list of strings
+
+hostname:
+  description: |
+    Name of the host.
+  in: body
+  required: true
+  type: string
+
 hosts:
   description: |
     Hosts to be maintained. An empty list can indicate hosts are to be
@@ -102,7 +141,7 @@ instance-action:
 instance-actions:
   description: |
     instance ID : action string. This variable is not needed in reply to state
-    MAINTENANCE, SCALE_IN or MAINTENANCE_COMPLETE
+    MAINTENANCE, SCALE_IN or MAINTENANCE_COMPLETE.
   in: body
   required: true
   type: dictionary
@@ -128,6 +167,14 @@ instance-name:
   required: true
   type: string
 
+instance-state:
+  description: |
+    State of the instance as in underlying cloud. Can be different in
+    different clouds like OpenStack or Kubernetes.
+  in: body
+  required: true
+  type: string
+
 lead-time:
   description: |
     How long lead time VNF needs for 'migration_type' operation. VNF needs to
@@ -177,30 +224,50 @@ max-interruption-time:
 
 metadata:
   description: |
-    Metadata; like hints to projects
+    Hint to project/tenant/VNF to know what capability the infrastructure
+    is offering to instance when it moves to already maintained host in
+    'PLANNED_MAINTENANCE' state action. This may have impact on how
+    the instance is to be moved or if instance is to be upgraded and
+    VNF needs to re-instantiate it as its 'OWN_ACTION'. This could be the
+    case with new hardware or instance could be wanted to be upgraded
+    anyhow at the same time of the infrastructure maintenance.
   in: body
   required: true
   type: dictionary
 
 migration-type:
   description: |
-    LIVE_MIGRATION, MIGRATION or OWN_ACTION
+    'LIVE_MIGRATE', 'MIGRATE' or 'OWN_ACTION'
     Own action is create new and delete old instance.
     Note! VNF need to obey resource_mitigation with own action
     This affects to order of delete old and create new to not over
-    commit the resources. In Kubernetes also EVICTION supported. There admin
+    commit the resources. In Kubernetes also 'EVICTION' supported. There admin
     will delete instance and VNF automation like ReplicaSet will make a new
-    instance
+    instance.
   in: body
   required: true
   type: string
 
+percent_done:
+  description: |
+    How many percent of hosts are maintained.
+  in: body
+  required: true
+  type: dictionary
+
+plugin:
+  description: |
+    Action plugin name.
+  in: body
+  required: true
+  type: dictionary
+
 recovery-time:
   description: |
     VNF recovery time after operation to instance. Workflow needs to take
     into account recovery_time for previous instance moved and only then
     start moving next obyeing  max_impacted_members
-    Note! regardless anti_affinity group or not
+    Note! regardless anti_affinity group or not.
   in: body
   required: true
   type: integer
@@ -255,7 +322,7 @@ workflow-name:
 
 workflow-state:
   description: |
-    Maintenance workflow state.
+    Maintenance workflow state (States explained in the user guide)
   in: body
   required: true
   type: string
diff --git a/doc/source/api-ref/v1/project.inc b/doc/source/api-ref/v1/project.inc
index d6441fa..25716ec 100644
--- a/doc/source/api-ref/v1/project.inc
+++ b/doc/source/api-ref/v1/project.inc
@@ -1,8 +1,8 @@
 .. -*- rst -*-
 
-=======
-Project
-=======
+============================
+Project workflow session API
+============================
 
 These APIs are generic for any cloud as instance ID should be something that can
 be matched to virtual machines or containers regardless of the cloud underneath.
@@ -10,7 +10,7 @@ be matched to virtual machines or containers regardless of the cloud underneath.
 Get project maintenance session
 ===============================
 
-.. rest_method:: GET /v1/maintenance/{session_id}/{project_id}/
+.. rest_method:: GET /v1/maintenance/{session_id}/{project_id}
 
 Get project instances belonging to the current state of maintenance session.
 the Project-manager receives an AODH event alarm telling about different
@@ -31,7 +31,7 @@ Response codes
 
 .. rest_status_code:: success status.yaml
 
-    - 200: get-project-maintenance-session-post
+    - 200: project-maintenance-session-post
 
 .. rest_status_code:: error status.yaml
 
@@ -42,7 +42,7 @@ Response codes
 Input from project to maintenance session
 =========================================
 
-.. rest_method:: PUT /v1/maintenance/{session_id}/{project_id}/
+.. rest_method:: PUT /v1/maintenance/{session_id}/{project_id}
 
 Project having instances on top of the infrastructure handled by a maintenance
 session might need to make own action for its instances on top of a host going
@@ -78,9 +78,9 @@ Response codes
     - 422
     - 500
 
-============================
-Project with NFV constraints
-============================
+===========================
+Project NFV constraints API
+===========================
 
 These APIs are for VNFs, VNMF and EM that are made to support ETSI defined
 standard VIM interface for sophisticated interaction to optimize rolling
diff --git a/doc/source/api-ref/v1/samples/maintenance-session-detail-get-200.json b/doc/source/api-ref/v1/samples/maintenance-session-detail-get-200.json
new file mode 100644
index 0000000..ae51ebb
--- /dev/null
+++ b/doc/source/api-ref/v1/samples/maintenance-session-detail-get-200.json
@@ -0,0 +1,212 @@
+{
+   "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+   "instances": [
+      {
+         "instance_id": "da8f96ae-a1fe-4e6b-a852-6951d513a440",
+         "action_done": false,
+         "host": "overcloud-novacompute-2",
+         "created_at": "2020-04-15T11:43:09.000000",
+         "project_state": "INSTANCE_ACTION_DONE",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "instance_name": "demo_nonha_app_2",
+         "state": "active",
+         "details": null,
+         "action": null,
+         "project_id": "444b05e6f4764189944f00a7288cd281",
+         "id": "73190018-eab0-4074-bed0-4b0c274a1c8b"
+      },
+      {
+         "instance_id": "22d869d7-2a67-4d70-bb3c-dcc14a014d78",
+         "action_done": false,
+         "host": "overcloud-novacompute-4",
+         "created_at": "2020-04-15T11:43:09.000000",
+         "project_state": "ACK_PLANNED_MAINTENANCE",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "instance_name": "demo_nonha_app_3",
+         "state": "active",
+         "details": null,
+         "action": "MIGRATE",
+         "project_id": "444b05e6f4764189944f00a7288cd281",
+         "id": "c0930990-65ac-4bca-88cb-7cb0e7d5c420"
+      },
+      {
+         "instance_id": "89467f5c-d5f8-461f-8b5c-236ce54138be",
+         "action_done": false,
+         "host": "overcloud-novacompute-2",
+         "created_at": "2020-04-15T11:43:09.000000",
+         "project_state": "INSTANCE_ACTION_DONE",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "instance_name": "demo_nonha_app_1",
+         "state": "active",
+         "details": null,
+         "action": null,
+         "project_id": "444b05e6f4764189944f00a7288cd281",
+         "id": "c6eba3ae-cb9e-4a1f-af10-13c66f61e4d9"
+      },
+      {
+         "instance_id": "5243f1a4-9f7b-4c91-abd5-533933bb9c90",
+         "action_done": false,
+         "host": "overcloud-novacompute-3",
+         "created_at": "2020-04-15T11:43:09.000000",
+         "project_state": "INSTANCE_ACTION_DONE",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "instance_name": "demo_ha_app_0",
+         "state": "active",
+         "details": "floating_ip",
+         "action": null,
+         "project_id": "444b05e6f4764189944f00a7288cd281",
+         "id": "d67176ff-e2e4-45e3-9a52-c069a3a66c5e"
+      },
+      {
+         "instance_id": "4e2e24d7-0e5d-4a92-8edc-e343b33b9f10",
+         "action_done": false,
+         "host": "overcloud-novacompute-3",
+         "created_at": "2020-04-15T11:43:09.000000",
+         "project_state": "INSTANCE_ACTION_DONE",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "instance_name": "demo_nonha_app_0",
+         "state": "active",
+         "details": null,
+         "action": null,
+         "project_id": "444b05e6f4764189944f00a7288cd281",
+         "id": "f2f7fd7f-8900-4b24-91dc-098f797790e1"
+      },
+      {
+         "instance_id": "92aa44f9-7ce4-4ba4-a29c-e03096ad1047",
+         "action_done": false,
+         "host": "overcloud-novacompute-4",
+         "created_at": "2020-04-15T11:43:09.000000",
+         "project_state": "ACK_PLANNED_MAINTENANCE",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "instance_name": "demo_ha_app_1",
+         "state": "active",
+         "details": null,
+         "action": "MIGRATE",
+         "project_id": "444b05e6f4764189944f00a7288cd281",
+         "id": "f35c9ba5-e5f7-4843-bae5-7df9bac2a33c"
+      },
+      {
+         "instance_id": "afa2cf43-6a1f-4508-ba59-12b773f8b926",
+         "action_done": false,
+         "host": "overcloud-novacompute-0",
+         "created_at": "2020-04-15T11:43:09.000000",
+         "project_state": "ACK_PLANNED_MAINTENANCE",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "instance_name": "demo_nonha_app_4",
+         "state": "active",
+         "details": null,
+         "action": "MIGRATE",
+         "project_id": "444b05e6f4764189944f00a7288cd281",
+         "id": "fea38e9b-3d7c-4358-ba2e-06e9c340342d"
+      }
+   ],
+   "state": "PLANNED_MAINTENANCE",
+   "session": {
+      "workflow": "vnf",
+      "created_at": "2020-04-15T11:43:09.000000",
+      "updated_at": "2020-04-15T11:44:04.000000",
+      "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+      "maintenance_at": "2020-04-15T11:43:28.000000",
+      "state": "PLANNED_MAINTENANCE",
+      "prev_state": "START_MAINTENANCE",
+      "meta": "{'openstack': 'upgrade'}"
+   },
+   "hosts": [
+      {
+         "created_at": "2020-04-15T11:43:09.000000",
+         "hostname": "overcloud-novacompute-3",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "disabled": false,
+         "maintained": true,
+         "details": "3de22382-5500-4d13-b9a2-470cc21002ee",
+         "type": "compute",
+         "id": "426ea4b9-4438-44ee-9849-1b3ffcc42ad6",
+      },
+      {
+         "created_at": "2020-04-15T11:43:09.000000",
+         "hostname": "overcloud-novacompute-2",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "disabled": false,
+         "maintained": true,
+         "details": "91457572-dabf-4aff-aab9-e12a5c6656cd",
+         "type": "compute",
+         "id": "74f0f6d1-520a-4e5b-b69c-c3265d874b14",
+      },
+      {
+         "created_at": "2020-04-15T11:43:09.000000",
+         "hostname": "overcloud-novacompute-5",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "disabled": false,
+         "maintained": true,
+         "details": "87921762-0c70-4d3e-873a-240cb2e5c0bf",
+         "type": "compute",
+         "id": "8d0f764e-11e8-4b96-8f6a-9c8fc0eebca2",
+      },
+      {
+         "created_at": "2020-04-15T11:43:09.000000",
+         "hostname": "overcloud-novacompute-1",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "disabled": false,
+         "maintained": true,
+         "details": "52c7270a-cfc2-41dd-a574-f4c4c54aa78d",
+         "type": "compute",
+         "id": "be7fd08c-0c5f-4bf4-a95b-bc3b3c01d918",
+      },
+      {
+         "created_at": "2020-04-15T11:43:09.000000",
+         "hostname": "overcloud-novacompute-0",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "disabled": true,
+         "maintained": false,
+         "details": "ea68bd0d-a5b6-4f06-9bff-c6eb0b248530",
+         "type": "compute",
+         "id": "ce46f423-e485-4494-8bb7-e1a2b038bb8e",
+      },
+      {
+         "created_at": "2020-04-15T11:43:09.000000",
+         "hostname": "overcloud-novacompute-4",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "disabled": true,
+         "maintained": false,
+         "details": "d5271d60-db14-4011-9497-b1529486f62b",
+         "type": "compute",
+         "id": "efdf668c-b1cc-4539-bdb6-aea9afbcc897",
+      },
+      {
+         "created_at": "2020-04-15T11:43:09.000000",
+         "hostname": "overcloud-controller-0",
+         "updated_at": null,
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "disabled": false,
+         "maintained": true,
+         "details": "9a68c85e-42f7-4e40-b64a-2e7a9e2ccd03",
+         "type": "controller",
+         "id": "f4631941-8a51-44ee-b814-11a898729f3c",
+      }
+   ],
+   "percent_done": 71,
+   "action_plugin_instances": [
+      {
+         "created_at": "2020-04-15 11:12:16",
+         "updated_at": null,
+         "id": "4e864972-b692-487b-9204-b4d6470db266",
+         "session_id": "47479bca-7f0e-11ea-99c9-2c600c9893ee",
+         "hostname": "overcloud-novacompute-4",
+         "plugin": "dummy",
+         "state": null
+      }
+   ]
+}
diff --git a/doc/source/api-ref/v1/samples/get-maintenance-session-get-200.json b/doc/source/api-ref/v1/samples/maintenance-session-get-200.json
similarity index 100%
rename from doc/source/api-ref/v1/samples/get-maintenance-session-get-200.json
rename to doc/source/api-ref/v1/samples/maintenance-session-get-200.json
diff --git a/doc/source/api-ref/v1/samples/get-maintenance-sessions-get-200.json b/doc/source/api-ref/v1/samples/maintenance-sessions-get-200.json
similarity index 100%
rename from doc/source/api-ref/v1/samples/get-maintenance-sessions-get-200.json
rename to doc/source/api-ref/v1/samples/maintenance-sessions-get-200.json
diff --git a/doc/source/api-ref/v1/samples/get-project-maintenance-session-post-200.json b/doc/source/api-ref/v1/samples/project-maintenance-session-post-200.json
similarity index 100%
rename from doc/source/api-ref/v1/samples/get-project-maintenance-session-post-200.json
rename to doc/source/api-ref/v1/samples/project-maintenance-session-post-200.json
diff --git a/doc/source/api-ref/v1/status.yaml b/doc/source/api-ref/v1/status.yaml
index 5354736..f6dc2f2 100644
--- a/doc/source/api-ref/v1/status.yaml
+++ b/doc/source/api-ref/v1/status.yaml
@@ -19,28 +19,60 @@
     .. literalinclude:: samples/maintenance-session-put-200.json
       :language: javascript
 
-  get-maintenance-sessions-get: |
+  maintenance-sessions-get: |
     .. rest_parameters:: parameters.yaml
 
       - session_id: uuid-list
 
-    .. literalinclude:: samples/get-maintenance-sessions-get-200.json
+    .. literalinclude:: samples/maintenance-sessions-get-200.json
       :language: javascript
 
-  get-maintenance-session-get: |
+  maintenance-session-get: |
     .. rest_parameters:: parameters.yaml
 
       - state: workflow-state
 
-    .. literalinclude:: samples/get-maintenance-session-get-200.json
+    .. literalinclude:: samples/maintenance-session-get-200.json
       :language: javascript
 
-  get-project-maintenance-session-post: |
+  maintenance-session-detail-get: |
+    .. rest_parameters:: parameters.yaml
+
+      - action: migration-type
+      - action_done: boolean
+      - created_at: datetime-string
+      - details: details
+      - disabled: boolean
+      - host: hostname
+      - hostname: hostname
+      - id: uuid
+      - instance_id: uuid
+      - instance_name: instance-name
+      - maintained: boolean
+      - maintenance_at: datetime-string
+      - meta: metadata
+      - percent_done: percent_done
+      - plugin: plugin
+      - prev_state: workflow-state
+      - project_id: uuid
+      - project_state: workflow-state-reply
+      - session_id: uuid
+      - state(action_plugin_instances): action-plugin-state
+      - state(instances): instance-state
+      - state: workflow-state
+      - type: host-type
+      - updated_at: datetime-string
+      - workflow: workflow-name
+
+    .. literalinclude:: samples/maintenance-session-detail-get-200.json
+      :language: javascript
+
+  project-maintenance-session-post: |
     .. rest_parameters:: parameters.yaml
 
       - instance_ids: instance-ids
 
-    .. literalinclude:: samples/get-project-maintenance-session-post-200.json
+    .. literalinclude:: samples/project-maintenance-session-post-200.json
       :language: javascript
 
 201:
diff --git a/doc/source/user/notifications.rst b/doc/source/user/notifications.rst
index ea2bcc9..60bfd00 100644
--- a/doc/source/user/notifications.rst
+++ b/doc/source/user/notifications.rst
@@ -77,12 +77,38 @@ Example:
 Event type 'maintenance.session'
 --------------------------------
 
---Not yet implemented--
-
 This event type is meant for infrastructure admin to know the changes in the
-ongoing maintenance workflow session. When implemented, there will not be a need
-for polling the state through an API.
+ongoing maintenance workflow session. This can be used instead of polling API.
+Via API you will get more detailed information if you need to troubleshoot.
 
+payload
+~~~~~~~~
+                       
++--------------+--------+------------------------------------------------------------------------------+
+| Name         | Type   | Description                                                                  |
++==============+========+==============================================================================+
+| service      | string | Origin service name: Fenix                                                   |
++--------------+--------+------------------------------------------------------------------------------+
+| state        | string | Maintenance workflow state (States explained in the user guide)              |
++--------------+--------+------------------------------------------------------------------------------+
+| session_id   | string | UUID of the related maintenance session                                      |
++--------------+--------+------------------------------------------------------------------------------+
+| percent_done | string | How many percent of hosts are maintained                                     |
++--------------+--------+------------------------------------------------------------------------------+
+| project_id   | string | workflow admin project ID                                                    |
++--------------+--------+------------------------------------------------------------------------------+
+
+Example:
+
+.. code-block:: json
+
+    {
+        "service": "fenix",
+        "state": "IN_MAINTENANCE",
+        "session_id": "76e55df8-1c51-11e8-9928-0242ac110002",
+        "percent_done": 34,
+        "project_id": "ead0dbcaf3564cbbb04842e3e54960e3"
+    }
 
 Project
 =======
diff --git a/fenix/api/v1/controllers/__init__.py b/fenix/api/v1/controllers/__init__.py
index 0c24c86..4062a68 100644
--- a/fenix/api/v1/controllers/__init__.py
+++ b/fenix/api/v1/controllers/__init__.py
@@ -66,7 +66,11 @@ class V1Controller(rest.RestController):
                 else:
                     args[0] = 'http404-nonexistingcontroller'
             elif depth == 3 and route == "maintenance":
-                args[0] = "project"
+                last = self._routes.get(args[2], args[2])
+                if last == "detail":
+                    args[0] = "session"
+                else:
+                    args[0] = "project"
             elif depth == 4 and route == "maintenance":
                 args[0] = "project_instance"
             else:
diff --git a/fenix/api/v1/controllers/maintenance.py b/fenix/api/v1/controllers/maintenance.py
index 19dc0dc..53cf609 100644
--- a/fenix/api/v1/controllers/maintenance.py
+++ b/fenix/api/v1/controllers/maintenance.py
@@ -160,9 +160,10 @@ class SessionController(BaseController):
         self.engine_rpcapi = maintenance.EngineRPCAPI()
 
     # GET /v1/maintenance/<session_id>
+    # GET /v1/maintenance/<session_id>/detail
     @policy.authorize('maintenance:session', 'get')
     @expose(content_type='application/json')
-    def get(self, session_id):
+    def get(self, session_id, detail=None):
         try:
             jsonschema.validate(session_id, schema.uid)
         except jsonschema.exceptions.ValidationError as e:
@@ -173,7 +174,15 @@ class SessionController(BaseController):
             LOG.error("Unexpected data")
             abort(400)
         try:
-            session = self.engine_rpcapi.admin_get_session(session_id)
+            if detail:
+                if detail != "detail":
+                    description = "Invalid path %s" % detail
+                    LOG.error(description)
+                    abort(400, six.text_type(description))
+                session = (
+                    self.engine_rpcapi.admin_get_session_detail(session_id))
+            else:
+                session = self.engine_rpcapi.admin_get_session(session_id)
         except RemoteError as e:
             self.handle_remote_error(e)
         if session is None:
diff --git a/fenix/api/v1/maintenance.py b/fenix/api/v1/maintenance.py
index 956de21..f4b4c38 100644
--- a/fenix/api/v1/maintenance.py
+++ b/fenix/api/v1/maintenance.py
@@ -37,9 +37,13 @@ class EngineRPCAPI(service.RPCClient):
         return self.call('admin_create_session', data=data)
 
     def admin_get_session(self, session_id):
-        """Get maintenance workflow session details"""
+        """Get maintenance workflow session state"""
         return self.call('admin_get_session', session_id=session_id)
 
+    def admin_get_session_detail(self, session_id):
+        """Get maintenance workflow session details"""
+        return self.call('admin_get_session_detail', session_id=session_id)
+
     def admin_delete_session(self, session_id):
         """Delete maintenance workflow session thread"""
         return self.call('admin_delete_session', session_id=session_id)
diff --git a/fenix/db/api.py b/fenix/db/api.py
index 79250b0..68278ea 100644
--- a/fenix/db/api.py
+++ b/fenix/db/api.py
@@ -115,11 +115,23 @@ def create_session(values):
     return IMPL.create_session(values)
 
 
+def update_session(values):
+    return IMPL.update_session(values)
+
+
 def remove_session(session_id):
     """Remove a session from the tables."""
     return IMPL.remove_session(session_id)
 
 
+def get_session(session_id):
+    return IMPL.maintenance_session_get(session_id)
+
+
+def get_sessions():
+    return IMPL.maintenance_session_get_all()
+
+
 def create_action_plugin(values):
     """Create a action from the values."""
     return IMPL.create_action_plugin(values)
@@ -129,10 +141,22 @@ def create_action_plugins(session_id, action_dict_list):
     return IMPL.create_action_plugins(action_dict_list)
 
 
+def get_action_plugins(session_id):
+    return IMPL.action_plugins_get_all(session_id)
+
+
 def create_action_plugin_instance(values):
     return IMPL.create_action_plugin_instance(values)
 
 
+def get_action_plugin_instances(session_id):
+    return IMPL.action_plugin_instances_get_all(session_id)
+
+
+def update_action_plugin_instance(values):
+    return IMPL.update_action_plugin_instance(values)
+
+
 def remove_action_plugin_instance(ap_instance):
     return IMPL.remove_action_plugin_instance(ap_instance)
 
@@ -141,11 +165,19 @@ def create_downloads(download_dict_list):
     return IMPL.create_downloads(download_dict_list)
 
 
+def get_downloads(session_id):
+    return IMPL.download_get_all(session_id)
+
+
 def create_host(values):
     """Create a host from the values."""
     return IMPL.create_host(values)
 
 
+def update_host(values):
+    return IMPL.update_host(values)
+
+
 def create_hosts(session_id, hostnames):
     hosts = []
     for hostname in hostnames:
@@ -174,6 +206,10 @@ def create_hosts_by_details(session_id, hosts_dict_list):
     return IMPL.create_hosts(hosts)
 
 
+def get_hosts(session_id):
+    return IMPL.hosts_get(session_id)
+
+
 def create_projects(session_id, project_ids):
     projects = []
     for project_id in project_ids:
@@ -185,6 +221,18 @@ def create_projects(session_id, project_ids):
     return IMPL.create_projects(projects)
 
 
+def update_project(values):
+    return IMPL.update_project(values)
+
+
+def get_projects(session_id):
+    return IMPL.projects_get(session_id)
+
+
+def update_instance(values):
+    return IMPL.update_instance(values)
+
+
 def create_instance(values):
     """Create a instance from the values."""
     return IMPL.create_instance(values)
@@ -199,6 +247,10 @@ def remove_instance(session_id, instance_id):
     return IMPL.remove_instance(session_id, instance_id)
 
 
+def get_instances(session_id):
+    return IMPL.instances_get(session_id)
+
+
 def update_project_instance(values):
     return IMPL.update_project_instance(values)
 
diff --git a/fenix/db/migration/alembic_migrations/versions/001_initial.py b/fenix/db/migration/alembic_migrations/versions/001_initial.py
index abd87d2..006ef53 100644
--- a/fenix/db/migration/alembic_migrations/versions/001_initial.py
+++ b/fenix/db/migration/alembic_migrations/versions/001_initial.py
@@ -58,8 +58,6 @@ def upgrade():
         sa.Column('maintained', sa.Boolean, default=False),
         sa.Column('disabled', sa.Boolean, default=False),
         sa.Column('details', sa.String(length=255), nullable=True),
-        sa.Column('plugin', sa.String(length=255), nullable=True),
-        sa.Column('plugin_state', sa.String(length=32), nullable=True),
         sa.UniqueConstraint('session_id', 'hostname', name='_session_host_uc'),
         sa.PrimaryKeyConstraint('id'))
 
diff --git a/fenix/db/sqlalchemy/api.py b/fenix/db/sqlalchemy/api.py
index 62f9f9a..b18c7fa 100644
--- a/fenix/db/sqlalchemy/api.py
+++ b/fenix/db/sqlalchemy/api.py
@@ -135,6 +135,15 @@ def maintenance_session_get(session_id):
     return _maintenance_session_get(get_session(), session_id)
 
 
+def _maintenance_session_get_all(session):
+    query = model_query(models.MaintenanceSession, session)
+    return query
+
+
+def maintenance_session_get_all():
+    return _maintenance_session_get_all(get_session())
+
+
 def create_session(values):
     values = values.copy()
     msession = models.MaintenanceSession()
@@ -152,6 +161,18 @@ def create_session(values):
     return maintenance_session_get(msession.session_id)
 
 
+def update_session(values):
+    session = get_session()
+    session_id = values.session_id
+    with session.begin():
+        msession = _maintenance_session_get(session,
+                                            session_id)
+        msession.update(values)
+        msession.save(session=session)
+
+    return maintenance_session_get(session_id)
+
+
 def remove_session(session_id):
     session = get_session()
     with session.begin():
@@ -276,6 +297,22 @@ def action_plugin_instances_get_all(session_id):
     return _action_plugin_instances_get_all(get_session(), session_id)
 
 
+def update_action_plugin_instance(values):
+    session = get_session()
+    session_id = values.session_id
+    plugin = values.plugin
+    hostname = values.hostname
+    with session.begin():
+        ap_instance = _action_plugin_instance_get(session,
+                                                  session_id,
+                                                  plugin,
+                                                  hostname)
+        ap_instance.update(values)
+        ap_instance.save(session=session)
+
+    return action_plugin_instance_get(session_id, plugin, hostname)
+
+
 def create_action_plugin_instance(values):
     values = values.copy()
     ap_instance = models.MaintenanceActionPluginInstance()
@@ -402,6 +439,18 @@ def create_host(values):
     return host_get(mhost.session_id, mhost.hostname)
 
 
+def update_host(values):
+    session = get_session()
+    session_id = values.session_id
+    hostname = values.hostname
+    with session.begin():
+        mhost = _host_get(session, session_id, hostname)
+        mhost.update(values)
+        mhost.save(session=session)
+
+    return host_get(session_id, hostname)
+
+
 def create_hosts(values_list):
     for values in values_list:
         vals = values.copy()
@@ -468,6 +517,18 @@ def create_project(values):
     return project_get(mproject.session_id, mproject.project_id)
 
 
+def update_project(values):
+    session = get_session()
+    session_id = values.session_id
+    project_id = values.project_id
+    with session.begin():
+        mproject = _project_get(session, session_id, project_id)
+        mproject.update(values)
+        mproject.save(session=session)
+
+    return project_get(session_id, project_id)
+
+
 def create_projects(values_list):
     for values in values_list:
         vals = values.copy()
@@ -476,7 +537,7 @@ def create_projects(values_list):
             mproject = models.MaintenanceProject()
             mproject.update(vals)
             if _project_get(session, mproject.session_id,
-                             mproject.project_id):
+                            mproject.project_id):
                 selected = ['project_id']
                 raise db_exc.FenixDBDuplicateEntry(
                           model=mproject.__class__.__name__,
@@ -512,6 +573,18 @@ def instances_get(session_id):
     return _instances_get(get_session(), session_id)
 
 
+def update_instance(values):
+    session = get_session()
+    session_id = values.session_id
+    instance_id = values.instance_id
+    with session.begin():
+        minstance = _instance_get(session, session_id, instance_id)
+        minstance.update(values)
+        minstance.save(session=session)
+
+    return instance_get(session_id, instance_id)
+
+
 def create_instance(values):
     values = values.copy()
     minstance = models.MaintenanceInstance()
diff --git a/fenix/db/sqlalchemy/models.py b/fenix/db/sqlalchemy/models.py
index 87b17ef..8f98068 100644
--- a/fenix/db/sqlalchemy/models.py
+++ b/fenix/db/sqlalchemy/models.py
@@ -99,8 +99,6 @@ class MaintenanceHost(mb.FenixBase):
     maintained = sa.Column(sa.Boolean, default=False)
     disabled = sa.Column(sa.Boolean, default=False)
     details = sa.Column(sa.String(length=255), nullable=True)
-    plugin = sa.Column(sa.String(length=255), nullable=True)
-    plugin_state = sa.Column(sa.String(length=32), nullable=True)
 
     def to_dict(self):
         return super(MaintenanceHost, self).to_dict()
diff --git a/fenix/tests/db/sqlalchemy/test_sqlalchemy_api.py b/fenix/tests/db/sqlalchemy/test_sqlalchemy_api.py
index 9429661..0722082 100644
--- a/fenix/tests/db/sqlalchemy/test_sqlalchemy_api.py
+++ b/fenix/tests/db/sqlalchemy/test_sqlalchemy_api.py
@@ -117,9 +117,7 @@ def _get_fake_host_values(uuid=_get_fake_uuid(),
              'type': 'compute',
              'maintained': False,
              'disabled': False,
-             'details': None,
-             'plugin': None,
-             'plugin_state': None}
+             'details': None}
     return hdict
 
 
diff --git a/fenix/tools/README.md b/fenix/tools/README.md
index 65ba20b..5256e41 100644
--- a/fenix/tools/README.md
+++ b/fenix/tools/README.md
@@ -10,7 +10,18 @@ Files:
 
 - 'demo-ha.yaml': demo-ha ReplicaSet to make 2 anti-affinity PODS.
 - 'demo-nonha.yaml': demo-nonha ReplicaSet to make n nonha PODS.
-- 'vnfm.py': VNFM to test k8s.py workflow.
+- 'vnfm_k8s.py': VNFM to test k8s.py (Kubernetes example) workflow.
+- 'vnfm.py': VNFM to test nfv.py (OpenStack example) workflow.
+- 'infra_admin.py': Tool to act as infrastructure admin. Tool catch also
+  the 'maintenance.session' and 'maintenance.host' events to keep track
+  where the maintenance is going. You will see when certain host is maintained
+  and how many percent of hosts are maintained.
+- 'session.json': Example to define maintenance session parameters as JSON
+  file to be given as input to 'infra_admin.py'. Example if for nfv.py workflow.
+  This could be used for any advanced workflow testing giving software downloads
+  and real action plugins.
+- 'set_config.py': You can use this to set Fenix AODH/Ceilometer configuration.
+- 'fenix_db_reset': Flush the Fenix database.
 
 ## Kubernetes workflow (k8s.py)
 
@@ -92,7 +103,7 @@ kluster. Under here is what you can run in different terminals. Terminals
 should be running in master node. Here is short description:
 
 - Term1: Used for logging Fenix
-- Term2: Infrastructure admin commands
+- Term2: Infrastructure admin
 - Term3: VNFM logging for testing and setting up the VNF
 
 #### Term1: Fenix-engine logging
@@ -114,6 +125,8 @@ Debugging and other configuration changes to '.conf' files under '/etc/fenix'
 
 #### Term2: Infrastructure admin window
 
+##### Admin commands as command line and curl
+
 Use DevStack admin as user. Set your variables needed accordingly
 
 ```sh
@@ -148,12 +161,42 @@ If maintenance run till the end with 'MAINTENANCE_DONE', you are ready to run it
 again if you wish. 'MAINTENANCE_FAILED' or in case of exceptions, you should
 recover system before trying to test again. This is covered in Term3 below.
 
-#### Term3: VNFM (fenix/tools/vnfm.py)
+##### Admin commands using admin tool
 
-Use DevStack admin as user.
+Go to Fenix tools directory
 
 ```sh
-. ~/devstack/operc admin admin
+cd /opt/stack/fenix/fenix/tools
+```
+Call admin tool and it will run the maintenance workflow. Admin tool defaults
+to 'OpenStack' and 'nfv' workflow, so you can override those by exporting
+environmental variables
+
+```sh
+. ~/devstack/openrc admin admin
+export WORKFLOW=k8s
+export CLOUD_TYPE=k8s
+python infra_admin.py
+```
+
+If you want to choose freely parameters for maintenance workflow session,
+you can give session.json file as input. With this option infra_admin.py
+will only override the 'maintenance_at' to be 20seconds in future when
+Fenix is called.
+
+```sh
+python infra_admin.py --file session.json
+```
+
+Maintenance will start by pressing enter, just follow instructions on the
+console.
+
+#### Term3: VNFM (fenix/tools/vnfm_k8s.py)
+
+Use DevStack as demo user for testing demo application
+
+```sh
+. ~/devstack/operc demo demo
 ```
 
 Go to Fenix Kubernetes tool directory for testing
@@ -181,7 +224,7 @@ is 32 cpus, so value is "15" in both yaml files. Replicas can be changed in
 demo-nonha.yaml. Minimum 2 (if minimum of 3 worker nodes) to maximum
 '(amount_of_worker_nodes-1)*2'. Greater amount means more scaling needed and
 longer maintenance window as less parallel actions possible. Surely constraints
-in vnfm.py also can be changed for different behavior.
+in vnfm_k8s.py also can be changed for different behavior.
 
 You can delete pods used like this
 
@@ -192,11 +235,11 @@ kubectl delete replicaset.apps demo-ha demo-nonha --namespace=demo
 Start Kubernetes VNFM that we need for testing
 
 ```sh
-python vnfm.py
+python vnfm_k8s.py
 ```
 
 Now you can start maintenance session in Term2. When workflow failed or
-completed; you first kill vnfm.py with "ctrl+c" and delete maintenance session
+completed; you first kill vnfm_k8s.py with "ctrl+c" and delete maintenance session
 in Term2.
 
 If workflow failed something might need to be manually fixed. Here you
@@ -221,7 +264,8 @@ kubectl delete replicaset.apps demo-ha demo-nonha --namespace=demo;sleep 15;kube
 
 ## OpenStack workflows (default.py and nvf.py)
 
-OpenStack workflows can be tested by using OPNFV Doctor project for testing.
+OpenStack workflows can be tested by using OPNFV Doctor project for testing
+or to use Fenix own tools.
 Workflows:
 
 - default.py is the first example workflow with VNFM interaction.
@@ -290,7 +334,7 @@ cpu_allocation_ratio = 1.0
 allow_resize_to_same_host = False
 ```
 
-### Workflow default.py
+### Workflow default.py testing with Doctor
 
 On controller node clone Doctor to be able to test. Doctor currently requires
 Python 3.6:
@@ -331,13 +375,13 @@ sudo systemctl restart devstack@fenix*
 
 You can also make changed to Doctor before running Doctor test
 
-### Workflow vnf.py
+### Workflow vnf.py testing with Doctor
 
 This workflow differs from above as it expects ETSI FEAT03 constraints.
 In Doctor testing it means we also need to use different application manager (VNFM)
 
 Where default.py worklow used the sample.py application manager vnf.py
-workflow uses vnfm.py workflow (doctor/doctor_tests/app_manager/vnfm.py)
+workflow uses vnfm_k8s.py workflow (doctor/doctor_tests/app_manager/vnfm_k8s.py)
 
 Only change to testing is that you should export variable to use different
 application manager.
@@ -354,3 +398,115 @@ export APP_MANAGER_TYPE=sample
 ```
 Doctor modifies the message where it calls maintenance accordingly to use
 either 'default' or 'nfv' as workflow in Fenix side
+
+### Workflow vnf.py testing with Fenix
+
+Where Doctor is made to automate everything as a test case, Fenix provides
+different tools for admin and VNFM:
+
+- 'vnfm.py': VNFM to test nfv.py.
+- 'infra_admin.py': Tool to act as infrastructure admin.
+
+Use 3 terminal windows (Term1, Term2 and Term3) to test Fenix with Kubernetes
+kluster. Under here is what you can run in different terminals. Terminals
+should be running in master node. Here is short description:
+
+- Term1: Used for logging Fenix
+- Term2: Infrastructure admin
+- Term3: VNFM logging for testing and setting up the VNF
+
+#### Term1: Fenix-engine logging
+
+If any changes to Fenix make them under '/opt/stack/fenix'; restart Fenix and
+see logs
+
+```sh
+sudo systemctl restart devstack@fenix*;sudo journalctl -f --unit devstack@fenix-engine
+```
+
+API logs can also be seen
+
+```sh
+sudo journalctl -f --unit devstack@fenix-api
+```
+
+Debugging and other configuration changes to '.conf' files under '/etc/fenix'
+
+#### Term2: Infrastructure admin window
+
+Go to Fenix tools directory for testing
+
+```sh
+cd /opt/stack/fenix/fenix/tools
+```
+
+Make flavor for testing that takes the half of the amount of VCPUs on single
+compute node (here we have 48 VCPUs on each compute) This is required by
+the current example 'vnfm.py' and the vnf 'maintenance_hot_tpl.yaml' that
+is used in testing. 'vnf.py' workflow is not bind to these in any way, but
+can be used with different VNFs and VNFM.
+
+```sh
+openstack flavor create --ram 512 --vcpus 24 --disk 1 --public demo_maint_flavor
+```
+
+Call admin tool and it will run the nvf.py workflow.
+
+```sh
+. ~/devstack/openrc admin admin
+python infra_admin.py
+```
+
+If you want to choose freely parameters for maintenance workflow session,
+you can give 'session.json' file as input. With this option 'infra_admin.py'
+will only override the 'maintenance_at' to be 20 seconds in future when
+Fenix is called.
+
+```sh
+python infra_admin.py --file session.json
+```
+
+Maintenance will start by pressing enter, just follow instructions on the
+console.
+
+In case you failed to remove maintenance workflow session, you can do it
+manually as instructed above in 'Admin commands as command line and curl'.
+
+#### Term3: VNFM (fenix/tools/vnfm.py)
+
+Use DevStack as demo user for testing demo application
+
+```sh
+. ~/devstack/openrc demo demo
+```
+
+Go to Fenix tools directory for testing
+
+```sh
+cd /opt/stack/fenix/fenix/tools
+```
+
+Start VNFM that we need for testing
+
+```sh
+python vnfm.py
+```
+
+Now you can start maintenance session in Term2. When workflow failed or
+completed; you first kill vnfm.py with "ctrl+c" and then delete maintenance
+session in Term2.
+
+If workflow failed something might need to be manually fixed.
+Here you can remove the heat stack if vnfm.py  failed to sdo that:
+
+```sh
+openstack stack delete -y --wait demo_stack
+```
+
+It may also be that workflow failed somewhere in the middle and some
+'nova-compute' are disabled. You can enable those. Here you can see the
+states:
+
+```sh
+openstack compute service list
+```
diff --git a/fenix/tools/fenix_db_reset b/fenix/tools/fenix_db_reset
new file mode 100644
index 0000000..06d2182
--- /dev/null
+++ b/fenix/tools/fenix_db_reset
@@ -0,0 +1,9 @@
+MYSQLPW=admin
+# Fenix DB
+[ `mysql -uroot -p$MYSQLPW -e "SELECT host, user FROM mysql.user;" | grep fenix | wc -l` -eq 0 ] && {
+    mysql -uroot -p$MYSQLPW -hlocalhost -e "CREATE USER 'fenix'@'localhost' IDENTIFIED BY 'fenix';"
+    mysql -uroot -p$MYSQLPW -hlocalhost -e "GRANT ALL PRIVILEGES ON fenix.* TO 'fenix'@'' identified by 'fenix';FLUSH PRIVILEGES;"
+}
+mysql -ufenix -pfenix -hlocalhost -e "DROP DATABASE IF EXISTS fenix;"
+mysql -ufenix -pfenix -hlocalhost -e "CREATE DATABASE fenix CHARACTER SET utf8;"
+
diff --git a/fenix/tools/infra_admin.py b/fenix/tools/infra_admin.py
new file mode 100644
index 0000000..517ce01
--- /dev/null
+++ b/fenix/tools/infra_admin.py
@@ -0,0 +1,320 @@
+# Copyright (c) 2020 Nokia Corporation.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import aodhclient.client as aodhclient
+import argparse
+import datetime
+from flask import Flask
+from flask import request
+import json
+from keystoneauth1 import loading
+from keystoneclient import client as ks_client
+import logging as lging
+import os
+from oslo_config import cfg
+from oslo_log import log as logging
+import requests
+import sys
+from threading import Thread
+import time
+import yaml
+
+try:
+    import fenix.utils.identity_auth as identity_auth
+except ValueError:
+    sys.path.append('../utils')
+    import identity_auth
+
+try:
+    input = raw_input
+except NameError:
+    pass
+
+LOG = logging.getLogger(__name__)
+streamlog = lging.StreamHandler(sys.stdout)
+formatter = lging.Formatter("%(asctime)s: %(message)s")
+streamlog.setFormatter(formatter)
+LOG.logger.addHandler(streamlog)
+LOG.logger.setLevel(logging.INFO)
+
+
+def get_identity_auth(conf, project=None, username=None, password=None):
+    loader = loading.get_plugin_loader('password')
+    return loader.load_from_options(
+        auth_url=conf.service_user.os_auth_url,
+        username=(username or conf.service_user.os_username),
+        password=(password or conf.service_user.os_password),
+        user_domain_name=conf.service_user.os_user_domain_name,
+        project_name=(project or conf.service_user.os_project_name),
+        tenant_name=(project or conf.service_user.os_project_name),
+        project_domain_name=conf.service_user.os_project_domain_name)
+
+
+class InfraAdmin(object):
+
+    def __init__(self, conf, log):
+        self.conf = conf
+        self.log = log
+        self.app = None
+
+    def start(self):
+        self.log.info('InfraAdmin start...')
+        self.app = InfraAdminManager(self.conf, self.log)
+        self.app.start()
+
+    def stop(self):
+        self.log.info('InfraAdmin stop...')
+        if not self.app:
+            return
+        headers = {
+            'Content-Type': 'application/json',
+            'Accept': 'application/json',
+        }
+        url = 'http://%s:%d/shutdown'\
+              % (self.conf.host,
+                 self.conf.port)
+        requests.post(url, data='', headers=headers)
+
+
+class InfraAdminManager(Thread):
+
+    def __init__(self, conf, log, project='service'):
+        Thread.__init__(self)
+        self.conf = conf
+        self.log = log
+        self.project = project
+        # Now we are as admin:admin:admin by default. This means we listen
+        # notifications/events as admin
+        # This means Fenix service user needs to be admin:admin:admin
+        # self.auth = identity_auth.get_identity_auth(conf,
+        # project=self.project)
+        self.auth = get_identity_auth(conf,
+                                      project='service',
+                                      username='fenix',
+                                      password='admin')
+        self.session = identity_auth.get_session(auth=self.auth)
+        self.keystone = ks_client.Client(version='v3', session=self.session)
+        self.aodh = aodhclient.Client(2, self.session)
+        self.headers = {
+            'Content-Type': 'application/json',
+            'Accept': 'application/json'}
+        self.project_id = self.keystone.projects.list(name=self.project)[0].id
+        self.headers['X-Auth-Token'] = self.session.get_token()
+        self.create_alarm()
+        services = self.keystone.services.list()
+        for service in services:
+            if service.type == 'maintenance':
+                LOG.info('maintenance service: %s:%s type %s'
+                         % (service.name, service.id, service.type))
+                maint_id = service.id
+        self.endpoint = [ep.url for ep in self.keystone.endpoints.list()
+                         if ep.service_id == maint_id and
+                         ep.interface == 'public'][0]
+        self.log.info('maintenance endpoint: %s' % self.endpoint)
+
+        if self.conf.workflow_file:
+            with open(self.conf.workflow_file) as json_file:
+                self.session_request = yaml.safe_load(json_file)
+        else:
+            if self.conf.cloud_type == 'openstack':
+                metadata = {'openstack': 'upgrade'}
+            elif self.conf.cloud_type in ['k8s', 'kubernetes']:
+                metadata = {'kubernetes': 'upgrade'}
+            else:
+                metadata = {}
+            self.session_request = {'state': 'MAINTENANCE',
+                                    'workflow': self.conf.workflow,
+                                    'metadata': metadata,
+                                    'actions': [
+                                        {"plugin": "dummy",
+                                         "type": "host",
+                                         "metadata": {"foo": "bar"}}]}
+
+        self.start_maintenance()
+
+    def create_alarm(self):
+        alarms = {alarm['name']: alarm for alarm in self.aodh.alarm.list()}
+        alarm_name = "%s_MAINTENANCE_SESSION" % self.project
+        if alarm_name not in alarms:
+            alarm_request = dict(
+                name=alarm_name,
+                description=alarm_name,
+                enabled=True,
+                alarm_actions=[u'http://%s:%d/maintenance_session'
+                               % (self.conf.host,
+                                  self.conf.port)],
+                repeat_actions=True,
+                severity=u'moderate',
+                type=u'event',
+                event_rule=dict(event_type=u'maintenance.session'))
+            self.aodh.alarm.create(alarm_request)
+        alarm_name = "%s_MAINTENANCE_HOST" % self.project
+        if alarm_name not in alarms:
+            alarm_request = dict(
+                name=alarm_name,
+                description=alarm_name,
+                enabled=True,
+                alarm_actions=[u'http://%s:%d/maintenance_host'
+                               % (self.conf.host,
+                                  self.conf.port)],
+                repeat_actions=True,
+                severity=u'moderate',
+                type=u'event',
+                event_rule=dict(event_type=u'maintenance.host'))
+            self.aodh.alarm.create(alarm_request)
+
+    def start_maintenance(self):
+        self.log.info('Waiting AODH to initialize...')
+        time.sleep(5)
+        input('--Press ENTER to start maintenance session--')
+
+        maintenance_at = (datetime.datetime.utcnow() +
+                          datetime.timedelta(seconds=20)
+                          ).strftime('%Y-%m-%d %H:%M:%S')
+
+        self.session_request['maintenance_at'] = maintenance_at
+
+        self.headers['X-Auth-Token'] = self.session.get_token()
+        url = self.endpoint + "/maintenance"
+        self.log.info('Start maintenance session: %s\n%s\n%s' %
+                      (url, self.headers, self.session_request))
+        ret = requests.post(url, data=json.dumps(self.session_request),
+                            headers=self.headers)
+        session_id = ret.json()['session_id']
+        self.log.info('--== Maintenance session %s instantiated ==--'
+                      % session_id)
+
+    def _alarm_data_decoder(self, data):
+        if "[" in data or "{" in data:
+            # string to list or dict removing unicode
+            data = yaml.load(data.replace("u'", "'"))
+        return data
+
+    def _alarm_traits_decoder(self, data):
+        return ({str(t[0]): self._alarm_data_decoder(str(t[2]))
+                for t in data['reason_data']['event']['traits']})
+
+    def run(self):
+        app = Flask('InfraAdmin')
+
+        @app.route('/maintenance_host', methods=['POST'])
+        def maintenance_host():
+            data = json.loads(request.data.decode('utf8'))
+            try:
+                payload = self._alarm_traits_decoder(data)
+            except Exception:
+                payload = ({t[0]: t[2] for t in
+                           data['reason_data']['event']['traits']})
+                self.log.error('cannot parse alarm data: %s' % payload)
+                raise Exception('VNFM cannot parse alarm.'
+                                'Possibly trait data over 256 char')
+
+            state = payload['state']
+            host = payload['host']
+            session_id = payload['session_id']
+            self.log.info("%s: Host: %s %s" % (session_id, host, state))
+            return 'OK'
+
+        @app.route('/maintenance_session', methods=['POST'])
+        def maintenance_session():
+            data = json.loads(request.data.decode('utf8'))
+            try:
+                payload = self._alarm_traits_decoder(data)
+            except Exception:
+                payload = ({t[0]: t[2] for t in
+                           data['reason_data']['event']['traits']})
+                self.log.error('cannot parse alarm data: %s' % payload)
+                raise Exception('VNFM cannot parse alarm.'
+                                'Possibly trait data over 256 char')
+            state = payload['state']
+            percent_done = payload['percent_done']
+            session_id = payload['session_id']
+            self.log.info("%s: %s%% done in state %s" % (session_id,
+                                                         percent_done,
+                                                         state))
+            if state in ['MAINTENANCE_FAILED', 'MAINTENANCE_DONE']:
+                self.headers['X-Auth-Token'] = self.session.get_token()
+                input('--Press any key to remove %s session--' %
+                          session_id)
+                self.log.info('Remove maintenance session %s....' % session_id)
+
+                url = ('%s/maintenance/%s' % (self.endpoint, session_id))
+                self.headers['X-Auth-Token'] = self.session.get_token()
+
+                ret = requests.delete(url, data=None, headers=self.headers)
+                LOG.info('Press CTRL + C to quit')
+                if ret.status_code != 200:
+                    raise Exception(ret.text)
+
+            return 'OK'
+
+        @app.route('/shutdown', methods=['POST'])
+        def shutdown():
+            self.log.info('shutdown InfraAdmin server at %s' % time.time())
+            func = request.environ.get('werkzeug.server.shutdown')
+            if func is None:
+                raise RuntimeError('Not running with the Werkzeug Server')
+            func()
+            return 'InfraAdmin shutting down...'
+
+        app.run(host=self.conf.host, port=self.conf.port)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Workflow Admin tool')
+
+    parser.add_argument('--file', type=str, default=None,
+                        help='Workflow sesssion creation arguments file')
+
+    parser.add_argument('--host', type=str, default=None,
+                        help='the ip of InfraAdmin')
+
+    parser.add_argument('--port', type=int, default=None,
+                        help='the port of InfraAdmin')
+
+    args = parser.parse_args()
+
+    opts = [
+        cfg.StrOpt('host',
+                   default=(args.host or '127.0.0.1'),
+                   help='the ip of InfraAdmin',
+                   required=True),
+        cfg.IntOpt('port',
+                   default=(args.port or '12349'),
+                   help='the port of InfraAdmin',
+                   required=True),
+        cfg.StrOpt('workflow',
+                   default=os.environ.get('WORKFLOW', 'vnf'),
+                   help='Workflow to be used',
+                   required=True),
+        cfg.StrOpt('cloud_type',
+                   default=os.environ.get('CLOUD_TYPE', 'openstack'),
+                   help='Cloud type for metadata',
+                   required=True),
+        cfg.StrOpt('workflow_file',
+                   default=(args.file or None),
+                   help='Workflow session creation arguments file',
+                   required=True)]
+
+    CONF = cfg.CONF
+    CONF.register_opts(opts)
+    CONF.register_opts(identity_auth.os_opts, group='service_user')
+
+    app = InfraAdmin(CONF, LOG)
+    app.start()
+    try:
+        LOG.info('Press CTRL + C to quit')
+        while True:
+            time.sleep(2)
+    except KeyboardInterrupt:
+        app.stop()
diff --git a/fenix/tools/maintenance_hot_tpl.yaml b/fenix/tools/maintenance_hot_tpl.yaml
new file mode 100644
index 0000000..211b4c7
--- /dev/null
+++ b/fenix/tools/maintenance_hot_tpl.yaml
@@ -0,0 +1,108 @@
+---
+heat_template_version: 2017-02-24
+description: Demo VNF test case
+
+parameters:
+  ext_net:
+    type: string
+    default: public
+#  flavor_vcpus:
+#    type: number
+#    default: 24
+  maint_image:
+    type: string
+    default: cirros-0.4.0-x86_64-disk
+  ha_intances:
+    type: number
+    default: 2
+  nonha_intances:
+    type: number
+    default: 10
+  app_manager_alarm_url:
+    type: string
+    default: http://0.0.0.0:12348/maintenance
+
+
+resources:
+  int_net:
+    type: OS::Neutron::Net
+
+  int_subnet:
+    type: OS::Neutron::Subnet
+    properties:
+      network_id: {get_resource: int_net}
+      cidr: "9.9.9.0/24"
+      dns_nameservers: ["8.8.8.8"]
+      ip_version: 4
+
+  int_router:
+    type: OS::Neutron::Router
+    properties:
+      external_gateway_info: {network: {get_param: ext_net}}
+
+  int_interface:
+    type: OS::Neutron::RouterInterface
+    properties:
+      router_id: {get_resource: int_router}
+      subnet: {get_resource: int_subnet}
+
+#  maint_instance_flavor:
+#    type: OS::Nova::Flavor
+#    properties:
+#      name: demo_maint_flavor
+#      ram: 512
+#      vcpus: {get_param: flavor_vcpus}
+#      disk: 1
+
+  ha_app_svrgrp:
+    type: OS::Nova::ServerGroup
+    properties:
+      name: demo_ha_app_group
+      policies: ['anti-affinity']
+
+  floating_ip:
+    type: OS::Nova::FloatingIP
+    properties:
+      pool: {get_param: ext_net}
+
+  multi_ha_instances:
+    type: OS::Heat::ResourceGroup
+    properties:
+      count: {get_param: ha_intances}
+      resource_def:
+        type: OS::Nova::Server
+        properties:
+          name: demo_ha_app_%index%
+          flavor: demo_maint_flavor
+          image: {get_param: maint_image}
+          networks:
+            - network: {get_resource: int_net}
+          scheduler_hints:
+            group: {get_resource: ha_app_svrgrp}
+
+  multi_nonha_instances:
+    type: OS::Heat::ResourceGroup
+    properties:
+      count: {get_param: nonha_intances}
+      resource_def:
+        type: OS::Nova::Server
+        properties:
+          name: demo_nonha_app_%index%
+          flavor: demo_maint_flavor
+          image: {get_param: maint_image}
+          networks:
+            - network: {get_resource: int_net}
+
+  association:
+    type: OS::Nova::FloatingIPAssociation
+    properties:
+      floating_ip: {get_resource: floating_ip}
+      server_id: {get_attr: [multi_ha_instances, resource.0]}
+
+  app_manager_alarm:
+    type: OS::Aodh::EventAlarm
+    properties:
+      alarm_actions:
+        - {get_param: app_manager_alarm_url}
+      event_type: "maintenance.scheduled"
+      repeat_actions: true
diff --git a/fenix/tools/session.json b/fenix/tools/session.json
new file mode 100644
index 0000000..e525e34
--- /dev/null
+++ b/fenix/tools/session.json
@@ -0,0 +1,6 @@
+{
+  "state": "MAINTENANCE",
+  "metadata": {"openstack": "upgrade"},
+  "actions": [{"metadata": {"os": "upgrade"}, "type": "host", "plugin": "dummy"}],
+  "workflow": "vnf"
+}
diff --git a/fenix/tools/set_config.py b/fenix/tools/set_config.py
new file mode 100644
index 0000000..4b96c35
--- /dev/null
+++ b/fenix/tools/set_config.py
@@ -0,0 +1,185 @@
+# Copyright (c) 2020 ZTE and others.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import os
+import shutil
+import yaml
+
+
+cbase = "/var/lib/config-data/puppet-generated/ceilometer"
+if not os.path.isdir(cbase):
+    cbase = ""
+
+
+def set_notifier_topic():
+    ep_file = cbase + '/etc/ceilometer/event_pipeline.yaml'
+    ep_file_bak = cbase + '/etc/ceilometer/event_pipeline.yaml.bak'
+    event_notifier_topic = 'notifier://?topic=alarm.all'
+    config_modified = False
+
+    if not os.path.isfile(ep_file):
+        raise Exception("File doesn't exist: %s." % ep_file)
+
+    with open(ep_file, 'r') as file:
+        config = yaml.safe_load(file)
+
+    sinks = config['sinks']
+    for sink in sinks:
+        if sink['name'] == 'event_sink':
+            publishers = sink['publishers']
+            if event_notifier_topic not in publishers:
+                print('Add event notifier in ceilometer')
+                publishers.append(event_notifier_topic)
+                config_modified = True
+            else:
+                print('NOTE: event notifier is configured'
+                      'in ceilometer as we needed')
+
+    if config_modified:
+        shutil.copyfile(ep_file, ep_file_bak)
+        with open(ep_file, 'w+') as file:
+            file.write(yaml.safe_dump(config))
+
+
+def set_event_definitions():
+    ed_file = cbase + '/etc/ceilometer/event_definitions.yaml'
+    ed_file_bak = cbase + '/etc/ceilometer/event_definitions.bak'
+    orig_ed_file_exist = True
+    modify_config = False
+
+    if not os.path.isfile(ed_file):
+        # Deployment did not modify file, so it did not exist
+        src_file = '/etc/ceilometer/event_definitions.yaml'
+        if not os.path.isfile(src_file):
+            config = []
+            orig_ed_file_exist = False
+        else:
+            shutil.copyfile('/etc/ceilometer/event_definitions.yaml', ed_file)
+    if orig_ed_file_exist:
+        with open(ed_file, 'r') as file:
+            config = yaml.safe_load(file)
+
+    et_list = [et['event_type'] for et in config]
+
+    if 'compute.instance.update' in et_list:
+        print('NOTE: compute.instance.update allready configured')
+    else:
+        print('NOTE: add compute.instance.update to event_definitions.yaml')
+        modify_config = True
+        instance_update = {
+            'event_type': 'compute.instance.update',
+            'traits': {
+                'deleted_at': {'fields': 'payload.deleted_at',
+                               'type': 'datetime'},
+                'disk_gb': {'fields': 'payload.disk_gb',
+                            'type': 'int'},
+                'display_name': {'fields': 'payload.display_name'},
+                'ephemeral_gb': {'fields': 'payload.ephemeral_gb',
+                                 'type': 'int'},
+                'host': {'fields': 'publisher_id.`split(., 1, 1)`'},
+                'instance_id': {'fields': 'payload.instance_id'},
+                'instance_type': {'fields': 'payload.instance_type'},
+                'instance_type_id': {'fields': 'payload.instance_type_id',
+                                     'type': 'int'},
+                'launched_at': {'fields': 'payload.launched_at',
+                                'type': 'datetime'},
+                'memory_mb': {'fields': 'payload.memory_mb',
+                              'type': 'int'},
+                'old_state': {'fields': 'payload.old_state'},
+                'os_architecture': {
+                    'fields':
+                    "payload.image_meta.'org.openstack__1__architecture'"},
+                'os_distro': {
+                    'fields':
+                    "payload.image_meta.'org.openstack__1__os_distro'"},
+                'os_version': {
+                    'fields':
+                    "payload.image_meta.'org.openstack__1__os_version'"},
+                'resource_id': {'fields': 'payload.instance_id'},
+                'root_gb': {'fields': 'payload.root_gb',
+                            'type': 'int'},
+                'service': {'fields': 'publisher_id.`split(., 0, -1)`'},
+                'state': {'fields': 'payload.state'},
+                'tenant_id': {'fields': 'payload.tenant_id'},
+                'user_id': {'fields': 'payload.user_id'},
+                'vcpus': {'fields': 'payload.vcpus', 'type': 'int'}
+                }
+            }
+        config.append(instance_update)
+
+    if 'maintenance.scheduled' in et_list:
+        print('NOTE: maintenance.scheduled allready configured')
+    else:
+        print('NOTE: add maintenance.scheduled to event_definitions.yaml')
+        modify_config = True
+        mscheduled = {
+            'event_type': 'maintenance.scheduled',
+            'traits': {
+                'allowed_actions': {'fields': 'payload.allowed_actions'},
+                'instance_ids': {'fields': 'payload.instance_ids'},
+                'reply_url': {'fields': 'payload.reply_url'},
+                'actions_at': {'fields': 'payload.actions_at',
+                               'type': 'datetime'},
+                'reply_at': {'fields': 'payload.reply_at', 'type': 'datetime'},
+                'state': {'fields': 'payload.state'},
+                'session_id': {'fields': 'payload.session_id'},
+                'project_id': {'fields': 'payload.project_id'},
+                'metadata': {'fields': 'payload.metadata'}
+                }
+            }
+        config.append(mscheduled)
+
+    if 'maintenance.host' in et_list:
+        print('NOTE: maintenance.host allready configured')
+    else:
+        print('NOTE: add maintenance.host to event_definitions.yaml')
+        modify_config = True
+        mhost = {
+            'event_type': 'maintenance.host',
+            'traits': {
+                'host': {'fields': 'payload.host'},
+                'project_id': {'fields': 'payload.project_id'},
+                'state': {'fields': 'payload.state'},
+                'session_id': {'fields': 'payload.session_id'}
+                }
+            }
+        config.append(mhost)
+
+    if 'maintenance.session' in et_list:
+        print('NOTE: maintenance.session allready configured')
+    else:
+        print('NOTE: add maintenance.session to event_definitions.yaml')
+        modify_config = True
+        mhost = {
+            'event_type': 'maintenance.session',
+            'traits': {
+                'percent_done': {'fields': 'payload.percent_done'},
+                'project_id': {'fields': 'payload.project_id'},
+                'state': {'fields': 'payload.state'},
+                'session_id': {'fields': 'payload.session_id'}
+                }
+            }
+        config.append(mhost)
+
+    if modify_config:
+        if orig_ed_file_exist:
+            shutil.copyfile(ed_file, ed_file_bak)
+        else:
+            with open(ed_file_bak, 'w+') as file:
+                file.close()
+        with open(ed_file, 'w+') as file:
+            file.write(yaml.safe_dump(config))
+
+set_notifier_topic()
+set_event_definitions()
diff --git a/fenix/tools/vnfm.py b/fenix/tools/vnfm.py
index e5573f8..7ad0d82 100644
--- a/fenix/tools/vnfm.py
+++ b/fenix/tools/vnfm.py
@@ -12,21 +12,25 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
-import aodhclient.client as aodhclient
-import datetime
 from flask import Flask
 from flask import request
+import heatclient.client as heatclient
+from heatclient.common.template_utils import get_template_contents
+from heatclient import exc as heat_excecption
 import json
+from keystoneauth1 import loading
 from keystoneclient import client as ks_client
-from kubernetes import client
-from kubernetes import config
 import logging as lging
+from neutronclient.v2_0 import client as neutronclient
+import novaclient.client as novaclient
+import os
 from oslo_config import cfg
 from oslo_log import log as logging
 import requests
 import sys
 from threading import Thread
 import time
+import uuid
 import yaml
 
 try:
@@ -56,6 +60,120 @@ CONF.register_opts(opts)
 CONF.register_opts(identity_auth.os_opts, group='service_user')
 
 
+class Stack(object):
+
+    def __init__(self, conf, log, project='demo'):
+        self.conf = conf
+        self.log = log
+        self.project = project
+        self.auth = identity_auth.get_identity_auth(conf, project=self.project)
+        self.session = identity_auth.get_session(self.auth)
+        self.heat = heatclient.Client(version='1', session=self.session)
+        self.stack_name = None
+        self.stack_id = None
+        self.template = None
+        self.parameters = {}
+        self.files = {}
+
+    # standard yaml.load will not work for hot tpl becasue of date format in
+    # heat_template_version is not string
+    def get_hot_tpl(self, template_file):
+        if not os.path.isfile(template_file):
+            raise Exception('File(%s) does not exist' % template_file)
+        return get_template_contents(template_file=template_file)
+
+    def _wait_stack_action_complete(self, action):
+        action_in_progress = '%s_IN_PROGRESS' % action
+        action_complete = '%s_COMPLETE' % action
+        action_failed = '%s_FAILED' % action
+
+        status = action_in_progress
+        stack_retries = 160
+        while status == action_in_progress and stack_retries > 0:
+            time.sleep(2)
+            try:
+                stack = self.heat.stacks.get(self.stack_name)
+            except heat_excecption.HTTPNotFound:
+                if action == 'DELETE':
+                    # Might happen you never get status as stack deleted
+                    status = action_complete
+                    break
+                else:
+                    raise Exception('unable to get stack')
+            status = stack.stack_status
+            stack_retries = stack_retries - 1
+        if stack_retries == 0 and status != action_complete:
+            raise Exception("stack %s not completed within 5min, status:"
+                            " %s" % (action, status))
+        elif status == action_complete:
+            self.log.info('stack %s %s' % (self.stack_name, status))
+        elif status == action_failed:
+            raise Exception("stack %s failed" % action)
+        else:
+            self.log.error('stack %s %s' % (self.stack_name, status))
+            raise Exception("stack %s unknown result" % action)
+
+    def wait_stack_delete(self):
+        self._wait_stack_action_complete('DELETE')
+
+    def wait_stack_create(self):
+        self._wait_stack_action_complete('CREATE')
+
+    def wait_stack_update(self):
+        self._wait_stack_action_complete('UPDATE')
+
+    def create(self, stack_name, template, parameters={}, files={}):
+        self.stack_name = stack_name
+        self.template = template
+        self.parameters = parameters
+        self.files = files
+        stack = self.heat.stacks.create(stack_name=self.stack_name,
+                                        files=files,
+                                        template=template,
+                                        parameters=parameters)
+        self.stack_id = stack['stack']['id']
+        try:
+            self.wait_stack_create()
+        except Exception:
+            # It might not always work at first
+            self.log.info('retry creating maintenance stack.......')
+            self.delete()
+            time.sleep(5)
+            stack = self.heat.stacks.create(stack_name=self.stack_name,
+                                            files=files,
+                                            template=template,
+                                            parameters=parameters)
+            self.stack_id = stack['stack']['id']
+            self.wait_stack_create()
+
+    def update(self, stack_name, stack_id, template, parameters={}, files={}):
+        self.heat.stacks.update(stack_name=stack_name,
+                                stack_id=stack_id,
+                                files=files,
+                                template=template,
+                                parameters=parameters)
+        self.wait_stack_update()
+
+    def delete(self):
+        if self.stack_id is not None:
+            self.heat.stacks.delete(self.stack_name)
+            self.wait_stack_delete()
+        else:
+            self.log.info('no stack to delete')
+
+
+def get_identity_auth(conf, project=None, username=None, password=None):
+    loader = loading.get_plugin_loader('password')
+    return loader.load_from_options(
+        auth_url=conf.service_user.os_auth_url,
+        username=(username or conf.service_user.os_username),
+        password=(password or conf.service_user.os_password),
+        user_domain_name=conf.service_user.os_user_domain_name,
+        project_name=(project or conf.service_user.os_project_name),
+        tenant_name=(project or conf.service_user.os_project_name),
+        project_domain_name=conf.service_user.os_project_domain_name)
+
+
 class VNFM(object):
 
     def __init__(self, conf, log):
@@ -64,16 +182,18 @@ class VNFM(object):
         self.app = None
 
     def start(self):
-        LOG.info('VNFM start......')
+        self.log.info('VNFM start...')
         self.app = VNFManager(self.conf, self.log)
         self.app.start()
 
     def stop(self):
-        LOG.info('VNFM stop......')
+        self.log.info('VNFM stop...')
         if not self.app:
             return
-        self.app.headers['X-Auth-Token'] = self.app.session.get_token()
+        self.log.info('delete VNF constraints...')
         self.app.delete_constraints()
+        self.log.info('VNF delete start...')
+        self.app.stack.delete()
         headers = {
             'Content-Type': 'application/json',
             'Accept': 'application/json',
@@ -86,29 +206,38 @@ class VNFM(object):
 
 class VNFManager(Thread):
 
-    def __init__(self, conf, log):
+    def __init__(self, conf, log, project='demo'):
         Thread.__init__(self)
         self.conf = conf
-        self.log = log
         self.port = self.conf.port
+        self.log = log
         self.intance_ids = None
-        # VNFM is started with OS_* exported as admin user
-        # We need that to query Fenix endpoint url
-        # Still we work with our tenant/poroject/vnf as demo
-        self.project = "demo"
-        LOG.info('VNFM project: %s' % self.project)
+        self.project = project
         self.auth = identity_auth.get_identity_auth(conf, project=self.project)
         self.session = identity_auth.get_session(auth=self.auth)
-        self.ks = ks_client.Client(version='v3', session=self.session)
-        self.aodh = aodhclient.Client(2, self.session)
-        # Subscribe to mainenance event alarm from Fenix via AODH
-        self.create_alarm()
-        config.load_kube_config()
-        self.kaapi = client.AppsV1Api()
-        self.kapi = client.CoreV1Api()
+        self.keystone = ks_client.Client(version='v3', session=self.session)
+        auth = get_identity_auth(conf,
+                                 project='service',
+                                 username='fenix',
+                                 password='admin')
+        session = identity_auth.get_session(auth=auth)
+        keystone = ks_client.Client(version='v3', session=session)
+        self.nova = novaclient.Client(version='2.34', session=self.session)
+        self.neutron = neutronclient.Client(session=self.session)
         self.headers = {
             'Content-Type': 'application/json',
             'Accept': 'application/json'}
+        self.project_id = self.session.get_project_id()
+        self.stack = Stack(self.conf, self.log, self.project)
+        files, template = self.stack.get_hot_tpl('maintenance_hot_tpl.yaml')
+        ext_net = self.get_external_network()
+        parameters = {'ext_net': ext_net}
+        self.log.info('creating VNF...')
+        self.log.info('parameters: %s' % parameters)
+        self.stack.create('%s_stack' % self.project,
+                          template,
+                          parameters=parameters,
+                          files=files)
         self.headers['X-Auth-Token'] = self.session.get_token()
         self.orig_number_of_instances = self.number_of_instances()
         # List of instances
@@ -118,66 +247,58 @@ class VNFManager(Thread):
         self.instance_constraints = None
         # Update existing instances to instance lists
         self.update_instances()
-        # How many instances needs to exists (with current VNF load)
-        # max_impacted_members need to be updated accordingly
-        # if number of instances is scaled. example for demo-ha:
-        # max_impacted_members = len(self.ha_instances) - ha_group_limit
-        self.ha_group_limit = 2
-        self.nonha_group_limit = 2
+        nonha_instances = len(self.nonha_instances)
+        if nonha_instances < 7:
+            self.scale = 2
+        else:
+            self.scale = int((nonha_instances) / 2)
+        self.log.info('Init nonha_instances: %s scale: %s: max_impacted %s' %
+                      (nonha_instances, self.scale, nonha_instances - 1))
         # Different instance groups constraints dict
         self.ha_group = None
         self.nonha_group = None
-        # VNF project_id (VNF ID)
-        self.project_id = None
-        # HA instance_id that is active has active label
+        self.nonha_group_id = str(uuid.uuid4())
+        self.ha_group_id = [sg.id for sg in self.nova.server_groups.list()
+                            if sg.name == "%s_ha_app_group" % self.project][0]
+        # Floating IP used in HA instance
+        self.floating_ip = None
+        # HA instance_id that is active / has floating IP
         self.active_instance_id = self.active_instance_id()
 
-        services = self.ks.services.list()
+        services = keystone.services.list()
         for service in services:
             if service.type == 'maintenance':
-                LOG.info('maintenance service: %s:%s type %s'
-                         % (service.name, service.id, service.type))
+                self.log.info('maintenance service: %s:%s type %s'
+                              % (service.name, service.id, service.type))
                 maint_id = service.id
-        self.maint_endpoint = [ep.url for ep in self.ks.endpoints.list()
+        self.maint_endpoint = [ep.url for ep in keystone.endpoints.list()
                                if ep.service_id == maint_id and
                                ep.interface == 'public'][0]
-        LOG.info('maintenance endpoint: %s' % self.maint_endpoint)
+        self.log.info('maintenance endpoint: %s' % self.maint_endpoint)
         self.update_constraints_lock = False
         self.update_constraints()
-        # Instances waiting action to be done
-        self.pending_actions = {}
 
-    def create_alarm(self):
-        alarms = {alarm['name']: alarm for alarm in self.aodh.alarm.list()}
-        alarm_name = "%s_MAINTENANCE_ALARM" % self.project
-        if alarm_name in alarms:
-            return
-        alarm_request = dict(
-            name=alarm_name,
-            description=alarm_name,
-            enabled=True,
-            alarm_actions=[u'http://%s:%d/maintenance'
-                           % (self.conf.ip,
-                              self.conf.port)],
-            repeat_actions=True,
-            severity=u'moderate',
-            type=u'event',
-            event_rule=dict(event_type=u'maintenance.scheduled'))
-        self.aodh.alarm.create(alarm_request)
+    def get_external_network(self):
+        ext_net = None
+        networks = self.neutron.list_networks()['networks']
+        for network in networks:
+            if network['router:external']:
+                ext_net = network['name']
+                break
+        if ext_net is None:
+            raise Exception("external network not defined")
+        return ext_net
 
     def delete_remote_instance_constraints(self, instance_id):
         url = "%s/instance/%s" % (self.maint_endpoint, instance_id)
-        LOG.info('DELETE: %s' % url)
+        self.log.info('DELETE: %s' % url)
         ret = requests.delete(url, data=None, headers=self.headers)
         if ret.status_code != 200 and ret.status_code != 204:
-            if ret.status_code == 404:
-                LOG.info('Already deleted: %s' % instance_id)
-            else:
-                raise Exception(ret.text)
+            raise Exception(ret.text)
 
     def update_remote_instance_constraints(self, instance):
         url = "%s/instance/%s" % (self.maint_endpoint, instance["instance_id"])
-        LOG.info('PUT: %s' % url)
+        self.log.info('PUT: %s' % url)
         ret = requests.put(url, data=json.dumps(instance),
                            headers=self.headers)
         if ret.status_code != 200 and ret.status_code != 204:
@@ -186,7 +307,7 @@ class VNFManager(Thread):
     def delete_remote_group_constraints(self, instance_group):
         url = "%s/instance_group/%s" % (self.maint_endpoint,
                                         instance_group["group_id"])
-        LOG.info('DELETE: %s' % url)
+        self.log.info('DELETE: %s' % url)
         ret = requests.delete(url, data=None, headers=self.headers)
         if ret.status_code != 200 and ret.status_code != 204:
             raise Exception(ret.text)
@@ -194,13 +315,14 @@ class VNFManager(Thread):
     def update_remote_group_constraints(self, instance_group):
         url = "%s/instance_group/%s" % (self.maint_endpoint,
                                         instance_group["group_id"])
-        LOG.info('PUT: %s' % url)
+        self.log.info('PUT: %s' % url)
         ret = requests.put(url, data=json.dumps(instance_group),
                            headers=self.headers)
         if ret.status_code != 200 and ret.status_code != 204:
             raise Exception(ret.text)
 
     def delete_constraints(self):
+        self.headers['X-Auth-Token'] = self.session.get_token()
         for instance_id in self.instance_constraints:
             self.delete_remote_instance_constraints(instance_id)
         self.delete_remote_group_constraints(self.nonha_group)
@@ -208,73 +330,82 @@ class VNFManager(Thread):
 
     def update_constraints(self):
         while self.update_constraints_lock:
-            LOG.info('Waiting update_constraints_lock...')
+            self.log.info('Waiting update_constraints_lock...')
             time.sleep(1)
         self.update_constraints_lock = True
-        LOG.info('Update constraints')
-        if self.project_id is None:
-            self.project_id = self.ks.projects.list(name=self.project)[0].id
-        # Pods groupped by ReplicaSet, so we use that id
-        rs = {r.metadata.name: r.metadata.uid for r in
-              self.kaapi.list_namespaced_replica_set('demo').items}
+        self.log.info('Update constraints')
+
+        # Nova does not support groupping instances that do not belong to
+        # anti-affinity server_groups. Anyhow all instances need groupping
         max_impacted_members = len(self.nonha_instances) - 1
         nonha_group = {
-            "group_id": rs['demo-nonha'],
+            "group_id": self.nonha_group_id,
             "project_id": self.project_id,
-            "group_name": "demo-nonha",
+            "group_name": "%s_nonha_app_group" % self.project,
             "anti_affinity_group": False,
             "max_instances_per_host": 0,
             "max_impacted_members": max_impacted_members,
-            "recovery_time": 10,
+            "recovery_time": 2,
             "resource_mitigation": True}
-        LOG.info('create demo-nonha constraints: %s'
-                 % nonha_group)
+        self.log.info('create %s_nonha_app_group constraints: %s'
+                      % (self.project, nonha_group))
+
         ha_group = {
-            "group_id": rs['demo-ha'],
+            "group_id": self.ha_group_id,
             "project_id": self.project_id,
-            "group_name": "demo-ha",
+            "group_name": "%s_ha_app_group" % self.project,
             "anti_affinity_group": True,
             "max_instances_per_host": 1,
             "max_impacted_members": 1,
-            "recovery_time": 10,
+            "recovery_time": 4,
             "resource_mitigation": True}
-        LOG.info('create demo-ha constraints: %s'
-                 % ha_group)
+        self.log.info('create %s_ha_app_group constraints: %s'
+                      % (self.project, ha_group))
+        if not self.ha_group or self.ha_group != ha_group:
+            LOG.info('ha instance group need update')
+            self.update_remote_group_constraints(ha_group)
+            self.ha_group = ha_group.copy()
+        if not self.nonha_group or self.nonha_group != nonha_group:
+            LOG.info('nonha instance group need update')
+            self.update_remote_group_constraints(nonha_group)
+            self.nonha_group = nonha_group.copy()
 
         instance_constraints = {}
         for ha_instance in self.ha_instances:
             instance = {
-                "instance_id": ha_instance.metadata.uid,
+                "instance_id": ha_instance.id,
                 "project_id": self.project_id,
                 "group_id": ha_group["group_id"],
-                "instance_name": ha_instance.metadata.name,
+                "instance_name": ha_instance.name,
                 "max_interruption_time": 120,
-                "migration_type": "EVICTION",
+                "migration_type": "MIGRATE",
                 "resource_mitigation": True,
                 "lead_time": 40}
-            LOG.info('create ha instance constraints: %s' % instance)
-            instance_constraints[ha_instance.metadata.uid] = instance
+            self.log.info('create ha instance constraints: %s'
+                          % instance)
+            instance_constraints[ha_instance.id] = instance
         for nonha_instance in self.nonha_instances:
             instance = {
-                "instance_id": nonha_instance.metadata.uid,
+                "instance_id": nonha_instance.id,
                 "project_id": self.project_id,
                 "group_id": nonha_group["group_id"],
-                "instance_name": nonha_instance.metadata.name,
+                "instance_name": nonha_instance.name,
                 "max_interruption_time": 120,
-                "migration_type": "EVICTION",
+                "migration_type": "MIGRATE",
                 "resource_mitigation": True,
                 "lead_time": 40}
-            LOG.info('create nonha instance constraints: %s' % instance)
-            instance_constraints[nonha_instance.metadata.uid] = instance
+            self.log.info('create nonha instance constraints: %s'
+                          % instance)
+            instance_constraints[nonha_instance.id] = instance
         if not self.instance_constraints:
             # Initial instance constraints
-            LOG.info('create initial instances constraints...')
+            self.log.info('create initial instances constraints...')
             for instance in [instance_constraints[i] for i
                              in instance_constraints]:
                 self.update_remote_instance_constraints(instance)
             self.instance_constraints = instance_constraints.copy()
         else:
-            LOG.info('check instances constraints changes...')
+            self.log.info('check instances constraints changes...')
             added = [i for i in instance_constraints.keys()
                      if i not in self.instance_constraints]
             deleted = [i for i in self.instance_constraints.keys()
@@ -291,64 +422,55 @@ class VNFManager(Thread):
             if updated or deleted:
                 # Some instance constraints have changed
                 self.instance_constraints = instance_constraints.copy()
-        if not self.ha_group or self.ha_group != ha_group:
-            LOG.info('ha instance group need update')
-            self.update_remote_group_constraints(ha_group)
-            self.ha_group = ha_group.copy()
-        if not self.nonha_group or self.nonha_group != nonha_group:
-            LOG.info('nonha instance group need update')
-            self.update_remote_group_constraints(nonha_group)
-            self.nonha_group = nonha_group.copy()
         self.update_constraints_lock = False
 
     def active_instance_id(self):
-        # We digtate the active in the beginning
-        instance = self.ha_instances[0]
-        LOG.info('Initially Active instance: %s %s' %
-                 (instance.metadata.name, instance.metadata.uid))
-        name = instance.metadata.name
-        namespace = instance.metadata.namespace
-        body = {"metadata": {"labels": {"active": "True"}}}
-        self.kapi.patch_namespaced_pod(name, namespace, body)
-        self.active_instance_id = instance.metadata.uid
-
-    def switch_over_ha_instance(self, instance_id):
-        if instance_id == self.active_instance_id:
-            # Need to switchover as instance_id will be affected and is active
+        # Need rertry as it takes time after heat template done before
+        # Floating IP in place
+        retry = 5
+        while retry > 0:
             for instance in self.ha_instances:
-                if instance_id == instance.metadata.uid:
-                    LOG.info('Active to Standby: %s %s' %
-                             (instance.metadata.name, instance.metadata.uid))
-                    name = instance.metadata.name
-                    namespace = instance.metadata.namespace
-                    body = client.UNKNOWN_BASE_TYPE()
-                    body.metadata.labels = {"ative": None}
-                    self.kapi.patch_namespaced_pod(name, namespace, body)
-                else:
-                    LOG.info('Standby to Active: %s %s' %
-                             (instance.metadata.name, instance.metadata.uid))
-                    name = instance.metadata.name
-                    namespace = instance.metadata.namespace
-                    body = client.UNKNOWN_BASE_TYPE()
-                    body.metadata.labels = {"ative": "True"}
-                    self.kapi.patch_namespaced_pod(name, namespace, body)
-                    self.active_instance_id = instance.metadata.uid
+                network_interfaces = next(iter(instance.addresses.values()))
+                for network_interface in network_interfaces:
+                    _type = network_interface.get('OS-EXT-IPS:type')
+                    if _type == "floating":
+                        if not self.floating_ip:
+                            self.floating_ip = network_interface.get('addr')
+                        self.log.debug('active_instance: %s %s' %
+                                       (instance.name, instance.id))
+                        return instance.id
+            time.sleep(2)
             self.update_instances()
+            retry -= 1
+        raise Exception("No active instance found")
+
+    def switch_over_ha_instance(self):
+        for instance in self.ha_instances:
+            if instance.id != self.active_instance_id:
+                self.log.info('Switch over to: %s %s' % (instance.name,
+                                                         instance.id))
+                # Deprecated, need to use neutron instead
+                # instance.add_floating_ip(self.floating_ip)
+                port = self.neutron.list_ports(device_id=instance.id)['ports'][0]['id']  # noqa
+                floating_id = self.neutron.list_floatingips(floating_ip_address=self.floating_ip)['floatingips'][0]['id']  # noqa
+                self.neutron.update_floatingip(floating_id, {'floatingip': {'port_id': port}})  # noqa
+                # Have to update ha_instances as floating_ip changed
+                self.update_instances()
+                self.active_instance_id = instance.id
+                break
 
     def get_instance_ids(self):
-        instances = self.kapi.list_pod_for_all_namespaces().items
-        return [i.metadata.uid for i in instances
-                if i.metadata.name.startswith("demo-")
-                and i.metadata.namespace == "demo"]
+        ret = list()
+        for instance in self.nova.servers.list(detailed=False):
+            ret.append(instance.id)
+        return ret
 
     def update_instances(self):
-        instances = self.kapi.list_pod_for_all_namespaces().items
+        instances = self.nova.servers.list(detailed=True)
         self.ha_instances = [i for i in instances
-                             if i.metadata.name.startswith("demo-ha")
-                             and i.metadata.namespace == "demo"]
+                             if "%s_ha_app_" % self.project in i.name]
         self.nonha_instances = [i for i in instances
-                                if i.metadata.name.startswith("demo-nonha")
-                                and i.metadata.namespace == "demo"]
+                                if "%s_nonha_app_" % self.project in i.name]
 
     def _alarm_data_decoder(self, data):
         if "[" in data or "{" in data:
@@ -364,77 +486,38 @@ class VNFManager(Thread):
         ret = requests.get(url, data=None, headers=self.headers)
         if ret.status_code != 200:
             raise Exception(ret.text)
-        LOG.info('get_instance_ids %s' % ret.json())
+        self.log.info('get_instance_ids %s' % ret.json())
         return ret.json()['instance_ids']
 
-    def scale_instances(self, scale_instances):
+    def scale_instances(self, number_of_instances):
+        # number_of_instances_before = self.number_of_instances()
         number_of_instances_before = len(self.nonha_instances)
-        replicas = number_of_instances_before + scale_instances
+        parameters = self.stack.parameters
+        parameters['nonha_intances'] = (number_of_instances_before +
+                                        number_of_instances)
+        self.stack.update(self.stack.stack_name,
+                          self.stack.stack_id,
+                          self.stack.template,
+                          parameters=parameters,
+                          files=self.stack.files)
 
-        # We only scale nonha apps
-        namespace = "demo"
-        name = "demo-nonha"
-        body = {'spec': {"replicas": replicas}}
-        self.kaapi.patch_namespaced_replica_set_scale(name, namespace, body)
-        time.sleep(3)
-
-        # Let's check if scale has taken effect
+        # number_of_instances_after = self.number_of_instances()
         self.update_instances()
+        self.update_constraints()
         number_of_instances_after = len(self.nonha_instances)
-        check = 20
-        while number_of_instances_after == number_of_instances_before:
-            if check == 0:
-                LOG.error('scale_instances with: %d failed, still %d instances'
-                          % (scale_instances, number_of_instances_after))
-                raise Exception('scale_instances failed')
-            check -= 1
-            time.sleep(1)
-            self.update_instances()
-            number_of_instances_after = len(self.nonha_instances)
+        if (number_of_instances_before + number_of_instances !=
+           number_of_instances_after):
+            self.log.error('scale_instances with: %d from: %d ends up to: %d'
+                           % (number_of_instances, number_of_instances_before,
+                              number_of_instances_after))
+            raise Exception('scale_instances failed')
 
-        LOG.info('scaled instances from %d to %d' %
-                 (number_of_instances_before, number_of_instances_after))
+        self.log.info('scaled nonha_intances from %d to %d' %
+                      (number_of_instances_before,
+                       number_of_instances_after))
 
     def number_of_instances(self):
-        instances = self.kapi.list_pod_for_all_namespaces().items
-        return len([i for i in instances
-                    if i.metadata.name.startswith("demo-")])
-
-    def instance_action(self, instance_id, allowed_actions):
-        # We should keep instance constraint in our internal structur
-        # and match instance_id specific allowed action. Now we assume EVICTION
-        if 'EVICTION' not in allowed_actions:
-            LOG.error('Action for %s not foudn from %s' %
-                      (instance_id, allowed_actions))
-            return None
-        return 'EVICTION'
-
-    def instance_action_started(self, instance_id, action):
-        time_now = datetime.datetime.utcnow()
-        max_interruption_time = (
-            self.instance_constraints[instance_id]['max_interruption_time'])
-        self.pending_actions[instance_id] = {
-            'started': time_now,
-            'max_interruption_time': max_interruption_time,
-            'action': action}
-
-    def was_instance_action_in_time(self, instance_id):
-        time_now = datetime.datetime.utcnow()
-        started = self.pending_actions[instance_id]['started']
-        limit = self.pending_actions[instance_id]['max_interruption_time']
-        action = self.pending_actions[instance_id]['action']
-        td = time_now - started
-        if td.total_seconds() > limit:
-            LOG.error('%s %s took too long: %ds' %
-                      (instance_id, action, td.total_seconds()))
-            LOG.error('%s max_interruption_time %ds might be too short' %
-                      (instance_id, limit))
-            raise Exception('%s %s took too long: %ds' %
-                            (instance_id, action, td.total_seconds()))
-        else:
-            LOG.info('%s %s with recovery time took %ds' %
-                     (instance_id, action, td.total_seconds()))
-        del self.pending_actions[instance_id]
+        return len(self.nova.servers.list(detailed=False))
 
     def run(self):
         app = Flask('VNFM')
@@ -447,85 +530,86 @@ class VNFManager(Thread):
             except Exception:
                 payload = ({t[0]: t[2] for t in
                            data['reason_data']['event']['traits']})
-                LOG.error('cannot parse alarm data: %s' % payload)
+                self.log.error('cannot parse alarm data: %s' % payload)
                 raise Exception('VNFM cannot parse alarm.'
                                 'Possibly trait data over 256 char')
 
-            LOG.info('VNFM received data = %s' % payload)
+            self.log.info('VNFM received data = %s' % payload)
 
             state = payload['state']
             reply_state = None
             reply = dict()
 
-            LOG.info('VNFM state: %s' % state)
+            self.log.info('VNFM state: %s' % state)
 
             if state == 'MAINTENANCE':
-                self.headers['X-Auth-Token'] = self.session.get_token()
                 instance_ids = (self.get_session_instance_ids(
                                 payload['instance_ids'],
                                 payload['session_id']))
-                reply['instance_ids'] = instance_ids
-                reply_state = 'ACK_MAINTENANCE'
+                my_instance_ids = self.get_instance_ids()
+                invalid_instances = (
+                    [instance_id for instance_id in instance_ids
+                     if instance_id not in my_instance_ids])
+                if invalid_instances:
+                    self.log.error('Invalid instances: %s' % invalid_instances)
+                    reply_state = 'NACK_MAINTENANCE'
+                else:
+                    reply_state = 'ACK_MAINTENANCE'
 
             elif state == 'SCALE_IN':
-                # scale down only nonha instances
-                nonha_instances = len(self.nonha_instances)
-                scale_in = nonha_instances / 2
-                self.scale_instances(-scale_in)
-                self.update_constraints()
-                reply['instance_ids'] = self.get_instance_ids()
+                # scale down "self.scale" instances that is VCPUS equaling
+                # at least a single compute node
+                self.scale_instances(-self.scale)
                 reply_state = 'ACK_SCALE_IN'
 
             elif state == 'MAINTENANCE_COMPLETE':
                 # possibly need to upscale
-                number_of_instances = self.number_of_instances()
-                if self.orig_number_of_instances > number_of_instances:
-                    scale_instances = (self.orig_number_of_instances -
-                                       number_of_instances)
-                    self.scale_instances(scale_instances)
-                    self.update_constraints()
+                self.scale_instances(self.scale)
                 reply_state = 'ACK_MAINTENANCE_COMPLETE'
 
-            elif (state == 'PREPARE_MAINTENANCE'
-                  or state == 'PLANNED_MAINTENANCE'):
-                instance_id = payload['instance_ids'][0]
-                instance_action = (self.instance_action(instance_id,
-                                   payload['allowed_actions']))
-                if not instance_action:
-                    raise Exception('Allowed_actions not supported for %s' %
-                                    instance_id)
+            elif state == 'PREPARE_MAINTENANCE':
+                # TBD from contraints
+                if "MIGRATE" not in payload['allowed_actions']:
+                    raise Exception('MIGRATE not supported')
+                instance_ids = payload['instance_ids'][0]
+                self.log.info('VNFM got instance: %s' % instance_ids)
+                if instance_ids == self.active_instance_id:
+                    self.switch_over_ha_instance()
+                # optional also in contraints
+                reply['instance_action'] = "MIGRATE"
+                reply_state = 'ACK_PREPARE_MAINTENANCE'
 
-                LOG.info('VNFM got instance: %s' % instance_id)
-                self.switch_over_ha_instance(instance_id)
-
-                reply['instance_action'] = instance_action
-                reply_state = 'ACK_%s' % state
-                self.instance_action_started(instance_id, instance_action)
+            elif state == 'PLANNED_MAINTENANCE':
+                # TBD from contraints
+                if "MIGRATE" not in payload['allowed_actions']:
+                    raise Exception('MIGRATE not supported')
+                instance_ids = payload['instance_ids'][0]
+                self.log.info('VNFM got instance: %s' % instance_ids)
+                if instance_ids == self.active_instance_id:
+                    self.switch_over_ha_instance()
+                # optional also in contraints
+                reply['instance_action'] = "MIGRATE"
+                reply_state = 'ACK_PLANNED_MAINTENANCE'
 
             elif state == 'INSTANCE_ACTION_DONE':
-                # TBD was action done in max_interruption_time (live migration)
-                # NOTE, in EVICTION instance_id reported that was in evicted
-                # node. New instance_id might be different
-                LOG.info('%s' % payload['instance_ids'])
-                self.was_instance_action_in_time(payload['instance_ids'][0])
-                self.update_instances()
-                self.update_constraints()
+                # TBD was action done in allowed window
+                self.log.info('%s' % payload['instance_ids'])
             else:
                 raise Exception('VNFM received event with'
                                 ' unknown state %s' % state)
 
             if reply_state:
-                reply['session_id'] = payload['session_id']
+                self.headers['X-Auth-Token'] = self.session.get_token()
                 reply['state'] = reply_state
                 url = payload['reply_url']
-                LOG.info('VNFM reply: %s' % reply)
+                self.log.info('VNFM reply: %s' % reply)
                 requests.put(url, data=json.dumps(reply), headers=self.headers)
 
             return 'OK'
 
         @app.route('/shutdown', methods=['POST'])
         def shutdown():
-            LOG.info('shutdown VNFM server at %s' % time.time())
+            self.log.info('shutdown VNFM server at %s' % time.time())
             func = request.environ.get('werkzeug.server.shutdown')
             if func is None:
                 raise RuntimeError('Not running with the Werkzeug Server')
@@ -543,3 +627,5 @@ if __name__ == '__main__':
             time.sleep(2)
     except KeyboardInterrupt:
         app_manager.stop()
+    except Exception:
+        app_manager.app.stack.delete()
diff --git a/fenix/tools/vnfm_k8s.py b/fenix/tools/vnfm_k8s.py
new file mode 100644
index 0000000..572394b
--- /dev/null
+++ b/fenix/tools/vnfm_k8s.py
@@ -0,0 +1,561 @@
+# Copyright (c) 2020 Nokia Corporation.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import aodhclient.client as aodhclient
+import datetime
+from flask import Flask
+from flask import request
+import json
+from keystoneauth1 import loading
+from keystoneclient import client as ks_client
+from kubernetes import client
+from kubernetes import config
+import logging as lging
+from oslo_config import cfg
+from oslo_log import log as logging
+import requests
+import sys
+from threading import Thread
+import time
+import yaml
+
+try:
+    import fenix.utils.identity_auth as identity_auth
+except ValueError:
+    sys.path.append('../utils')
+    import identity_auth
+
+LOG = logging.getLogger(__name__)
+streamlog = lging.StreamHandler(sys.stdout)
+LOG.logger.addHandler(streamlog)
+LOG.logger.setLevel(logging.INFO)
+
+opts = [
+    cfg.StrOpt('ip',
+               default='127.0.0.1',
+               help='the ip of VNFM',
+               required=True),
+    cfg.IntOpt('port',
+               default='12348',
+               help='the port of VNFM',
+               required=True),
+]
+
+CONF = cfg.CONF
+CONF.register_opts(opts)
+CONF.register_opts(identity_auth.os_opts, group='service_user')
+
+
+def get_identity_auth(conf, project=None, username=None, password=None):
+    loader = loading.get_plugin_loader('password')
+    return loader.load_from_options(
+        auth_url=conf.service_user.os_auth_url,
+        username=(username or conf.service_user.os_username),
+        password=(password or conf.service_user.os_password),
+        user_domain_name=conf.service_user.os_user_domain_name,
+        project_name=(project or conf.service_user.os_project_name),
+        tenant_name=(project or conf.service_user.os_project_name),
+        project_domain_name=conf.service_user.os_project_domain_name)
+
+
+class VNFM(object):
+
+    def __init__(self, conf, log):
+        self.conf = conf
+        self.log = log
+        self.app = None
+
+    def start(self):
+        LOG.info('VNFM start......')
+        self.app = VNFManager(self.conf, self.log)
+        self.app.start()
+
+    def stop(self):
+        LOG.info('VNFM stop......')
+        if not self.app:
+            return
+        self.app.headers['X-Auth-Token'] = self.app.session.get_token()
+        self.app.delete_constraints()
+        headers = {
+            'Content-Type': 'application/json',
+            'Accept': 'application/json',
+        }
+        url = 'http://%s:%d/shutdown'\
+              % (self.conf.ip,
+                 self.conf.port)
+        requests.post(url, data='', headers=headers)
+
+
+class VNFManager(Thread):
+
+    def __init__(self, conf, log):
+        Thread.__init__(self)
+        self.conf = conf
+        self.log = log
+        self.port = self.conf.port
+        self.intance_ids = None
+        # VNFM is started with OS_* exported as admin user
+        # We need that to query Fenix endpoint url
+        # Still we work with our tenant/poroject/vnf as demo
+        self.project = "demo"
+        LOG.info('VNFM project: %s' % self.project)
+        self.auth = identity_auth.get_identity_auth(conf, project=self.project)
+        self.session = identity_auth.get_session(auth=self.auth)
+        self.ks = ks_client.Client(version='v3', session=self.session)
+        self.aodh = aodhclient.Client(2, self.session)
+        # Subscribe to mainenance event alarm from Fenix via AODH
+        self.create_alarm()
+        config.load_kube_config()
+        self.kaapi = client.AppsV1Api()
+        self.kapi = client.CoreV1Api()
+        self.headers = {
+            'Content-Type': 'application/json',
+            'Accept': 'application/json'}
+        self.headers['X-Auth-Token'] = self.session.get_token()
+        self.orig_number_of_instances = self.number_of_instances()
+        # List of instances
+        self.ha_instances = []
+        self.nonha_instances = []
+        # Different instance_id specific constraints {instanse_id: {},...}
+        self.instance_constraints = None
+        # Update existing instances to instance lists
+        self.update_instances()
+        # How many instances needs to exists (with current VNF load)
+        # max_impacted_members need to be updated accordingly
+        # if number of instances is scaled. example for demo-ha:
+        # max_impacted_members = len(self.ha_instances) - ha_group_limit
+        self.ha_group_limit = 2
+        self.nonha_group_limit = 2
+        # Different instance groups constraints dict
+        self.ha_group = None
+        self.nonha_group = None
+        auth = get_identity_auth(conf,
+                                 project='service',
+                                 username='fenix',
+                                 password='admin')
+        session = identity_auth.get_session(auth=auth)
+        keystone = ks_client.Client(version='v3', session=session)
+        # VNF project_id (VNF ID)
+        self.project_id = self.session.get_project_id()
+        # HA instance_id that is active has active label
+        self.active_instance_id = self.active_instance_id()
+        services = keystone.services.list()
+        for service in services:
+            if service.type == 'maintenance':
+                LOG.info('maintenance service: %s:%s type %s'
+                         % (service.name, service.id, service.type))
+                maint_id = service.id
+        self.maint_endpoint = [ep.url for ep in keystone.endpoints.list()
+                               if ep.service_id == maint_id and
+                               ep.interface == 'public'][0]
+        LOG.info('maintenance endpoint: %s' % self.maint_endpoint)
+        self.update_constraints_lock = False
+        self.update_constraints()
+        # Instances waiting action to be done
+        self.pending_actions = {}
+
+    def create_alarm(self):
+        alarms = {alarm['name']: alarm for alarm in self.aodh.alarm.list()}
+        alarm_name = "%s_MAINTENANCE_ALARM" % self.project
+        if alarm_name in alarms:
+            return
+        alarm_request = dict(
+            name=alarm_name,
+            description=alarm_name,
+            enabled=True,
+            alarm_actions=[u'http://%s:%d/maintenance'
+                           % (self.conf.ip,
+                              self.conf.port)],
+            repeat_actions=True,
+            severity=u'moderate',
+            type=u'event',
+            event_rule=dict(event_type=u'maintenance.scheduled'))
+        self.aodh.alarm.create(alarm_request)
+
+    def delete_remote_instance_constraints(self, instance_id):
+        url = "%s/instance/%s" % (self.maint_endpoint, instance_id)
+        LOG.info('DELETE: %s' % url)
+        ret = requests.delete(url, data=None, headers=self.headers)
+        if ret.status_code != 200 and ret.status_code != 204:
+            if ret.status_code == 404:
+                LOG.info('Already deleted: %s' % instance_id)
+            else:
+                raise Exception(ret.text)
+
+    def update_remote_instance_constraints(self, instance):
+        url = "%s/instance/%s" % (self.maint_endpoint, instance["instance_id"])
+        LOG.info('PUT: %s' % url)
+        ret = requests.put(url, data=json.dumps(instance),
+                           headers=self.headers)
+        if ret.status_code != 200 and ret.status_code != 204:
+            raise Exception(ret.text)
+
+    def delete_remote_group_constraints(self, instance_group):
+        url = "%s/instance_group/%s" % (self.maint_endpoint,
+                                        instance_group["group_id"])
+        LOG.info('DELETE: %s' % url)
+        ret = requests.delete(url, data=None, headers=self.headers)
+        if ret.status_code != 200 and ret.status_code != 204:
+            raise Exception(ret.text)
+
+    def update_remote_group_constraints(self, instance_group):
+        url = "%s/instance_group/%s" % (self.maint_endpoint,
+                                        instance_group["group_id"])
+        LOG.info('PUT: %s' % url)
+        ret = requests.put(url, data=json.dumps(instance_group),
+                           headers=self.headers)
+        if ret.status_code != 200 and ret.status_code != 204:
+            raise Exception(ret.text)
+
+    def delete_constraints(self):
+        for instance_id in self.instance_constraints:
+            self.delete_remote_instance_constraints(instance_id)
+        self.delete_remote_group_constraints(self.nonha_group)
+        self.delete_remote_group_constraints(self.ha_group)
+
+    def update_constraints(self):
+        while self.update_constraints_lock:
+            LOG.info('Waiting update_constraints_lock...')
+            time.sleep(1)
+        self.update_constraints_lock = True
+        LOG.info('Update constraints')
+        # Pods groupped by ReplicaSet, so we use that id
+        rs = {r.metadata.name: r.metadata.uid for r in
+              self.kaapi.list_namespaced_replica_set('demo').items}
+        max_impacted_members = len(self.nonha_instances) - 1
+        nonha_group = {
+            "group_id": rs['demo-nonha'],
+            "project_id": self.project_id,
+            "group_name": "demo-nonha",
+            "anti_affinity_group": False,
+            "max_instances_per_host": 0,
+            "max_impacted_members": max_impacted_members,
+            "recovery_time": 10,
+            "resource_mitigation": True}
+        LOG.info('create demo-nonha constraints: %s'
+                 % nonha_group)
+        ha_group = {
+            "group_id": rs['demo-ha'],
+            "project_id": self.project_id,
+            "group_name": "demo-ha",
+            "anti_affinity_group": True,
+            "max_instances_per_host": 1,
+            "max_impacted_members": 1,
+            "recovery_time": 10,
+            "resource_mitigation": True}
+        LOG.info('create demo-ha constraints: %s'
+                 % ha_group)
+        if not self.ha_group or self.ha_group != ha_group:
+            LOG.info('ha instance group need update')
+            self.update_remote_group_constraints(ha_group)
+            self.ha_group = ha_group.copy()
+        if not self.nonha_group or self.nonha_group != nonha_group:
+            LOG.info('nonha instance group need update')
+            self.update_remote_group_constraints(nonha_group)
+            self.nonha_group = nonha_group.copy()
+
+        instance_constraints = {}
+        for ha_instance in self.ha_instances:
+            instance = {
+                "instance_id": ha_instance.metadata.uid,
+                "project_id": self.project_id,
+                "group_id": ha_group["group_id"],
+                "instance_name": ha_instance.metadata.name,
+                "max_interruption_time": 120,
+                "migration_type": "EVICTION",
+                "resource_mitigation": True,
+                "lead_time": 40}
+            LOG.info('create ha instance constraints: %s' % instance)
+            instance_constraints[ha_instance.metadata.uid] = instance
+        for nonha_instance in self.nonha_instances:
+            instance = {
+                "instance_id": nonha_instance.metadata.uid,
+                "project_id": self.project_id,
+                "group_id": nonha_group["group_id"],
+                "instance_name": nonha_instance.metadata.name,
+                "max_interruption_time": 120,
+                "migration_type": "EVICTION",
+                "resource_mitigation": True,
+                "lead_time": 40}
+            LOG.info('create nonha instance constraints: %s' % instance)
+            instance_constraints[nonha_instance.metadata.uid] = instance
+        if not self.instance_constraints:
+            # Initial instance constraints
+            LOG.info('create initial instances constraints...')
+            for instance in [instance_constraints[i] for i
+                             in instance_constraints]:
+                self.update_remote_instance_constraints(instance)
+            self.instance_constraints = instance_constraints.copy()
+        else:
+            LOG.info('check instances constraints changes...')
+            added = [i for i in instance_constraints.keys()
+                     if i not in self.instance_constraints]
+            deleted = [i for i in self.instance_constraints.keys()
+                       if i not in instance_constraints]
+            modified = [i for i in instance_constraints.keys()
+                        if (i not in added and i not in deleted and
+                            instance_constraints[i] !=
+                            self.instance_constraints[i])]
+            for instance_id in deleted:
+                self.delete_remote_instance_constraints(instance_id)
+            updated = added + modified
+            for instance in [instance_constraints[i] for i in updated]:
+                self.update_remote_instance_constraints(instance)
+            if updated or deleted:
+                # Some instance constraints have changed
+                self.instance_constraints = instance_constraints.copy()
+        self.update_constraints_lock = False
+
+    def active_instance_id(self):
+        # We digtate the active in the beginning
+        instance = self.ha_instances[0]
+        LOG.info('Initially Active instance: %s %s' %
+                 (instance.metadata.name, instance.metadata.uid))
+        name = instance.metadata.name
+        namespace = instance.metadata.namespace
+        body = {"metadata": {"labels": {"active": "True"}}}
+        self.kapi.patch_namespaced_pod(name, namespace, body)
+        self.active_instance_id = instance.metadata.uid
+
+    def switch_over_ha_instance(self, instance_id):
+        if instance_id == self.active_instance_id:
+            # Need to switchover as instance_id will be affected and is active
+            for instance in self.ha_instances:
+                if instance_id == instance.metadata.uid:
+                    LOG.info('Active to Standby: %s %s' %
+                             (instance.metadata.name, instance.metadata.uid))
+                    name = instance.metadata.name
+                    namespace = instance.metadata.namespace
+                    body = client.UNKNOWN_BASE_TYPE()
+                    body.metadata.labels = {"ative": None}
+                    self.kapi.patch_namespaced_pod(name, namespace, body)
+                else:
+                    LOG.info('Standby to Active: %s %s' %
+                             (instance.metadata.name, instance.metadata.uid))
+                    name = instance.metadata.name
+                    namespace = instance.metadata.namespace
+                    body = client.UNKNOWN_BASE_TYPE()
+                    body.metadata.labels = {"ative": "True"}
+                    self.kapi.patch_namespaced_pod(name, namespace, body)
+                    self.active_instance_id = instance.metadata.uid
+            self.update_instances()
+
+    def get_instance_ids(self):
+        instances = self.kapi.list_pod_for_all_namespaces().items
+        return [i.metadata.uid for i in instances
+                if i.metadata.name.startswith("demo-") and
+                i.metadata.namespace == "demo"]
+
+    def update_instances(self):
+        instances = self.kapi.list_pod_for_all_namespaces().items
+        self.ha_instances = [i for i in instances
+                             if i.metadata.name.startswith("demo-ha") and
+                             i.metadata.namespace == "demo"]
+        self.nonha_instances = [i for i in instances
+                                if i.metadata.name.startswith("demo-nonha") and
+                                i.metadata.namespace == "demo"]
+
+    def _alarm_data_decoder(self, data):
+        if "[" in data or "{" in data:
+            # string to list or dict removing unicode
+            data = yaml.load(data.replace("u'", "'"))
+        return data
+
+    def _alarm_traits_decoder(self, data):
+        return ({str(t[0]): self._alarm_data_decoder(str(t[2]))
+                for t in data['reason_data']['event']['traits']})
+
+    def get_session_instance_ids(self, url, session_id):
+        ret = requests.get(url, data=None, headers=self.headers)
+        if ret.status_code != 200:
+            raise Exception(ret.text)
+        LOG.info('get_instance_ids %s' % ret.json())
+        return ret.json()['instance_ids']
+
+    def scale_instances(self, scale_instances):
+        number_of_instances_before = len(self.nonha_instances)
+        replicas = number_of_instances_before + scale_instances
+
+        # We only scale nonha apps
+        namespace = "demo"
+        name = "demo-nonha"
+        body = {'spec': {"replicas": replicas}}
+        self.kaapi.patch_namespaced_replica_set_scale(name, namespace, body)
+        time.sleep(3)
+
+        # Let's check if scale has taken effect
+        self.update_instances()
+        number_of_instances_after = len(self.nonha_instances)
+        check = 20
+        while number_of_instances_after == number_of_instances_before:
+            if check == 0:
+                LOG.error('scale_instances with: %d failed, still %d instances'
+                          % (scale_instances, number_of_instances_after))
+                raise Exception('scale_instances failed')
+            check -= 1
+            time.sleep(1)
+            self.update_instances()
+            number_of_instances_after = len(self.nonha_instances)
+
+        LOG.info('scaled instances from %d to %d' %
+                 (number_of_instances_before, number_of_instances_after))
+
+    def number_of_instances(self):
+        instances = self.kapi.list_pod_for_all_namespaces().items
+        return len([i for i in instances
+                    if i.metadata.name.startswith("demo-")])
+
+    def instance_action(self, instance_id, allowed_actions):
+        # We should keep instance constraint in our internal structur
+        # and match instance_id specific allowed action. Now we assume EVICTION
+        if 'EVICTION' not in allowed_actions:
+            LOG.error('Action for %s not foudn from %s' %
+                      (instance_id, allowed_actions))
+            return None
+        return 'EVICTION'
+
+    def instance_action_started(self, instance_id, action):
+        time_now = datetime.datetime.utcnow()
+        max_interruption_time = (
+            self.instance_constraints[instance_id]['max_interruption_time'])
+        self.pending_actions[instance_id] = {
+            'started': time_now,
+            'max_interruption_time': max_interruption_time,
+            'action': action}
+
+    def was_instance_action_in_time(self, instance_id):
+        time_now = datetime.datetime.utcnow()
+        started = self.pending_actions[instance_id]['started']
+        limit = self.pending_actions[instance_id]['max_interruption_time']
+        action = self.pending_actions[instance_id]['action']
+        td = time_now - started
+        if td.total_seconds() > limit:
+            LOG.error('%s %s took too long: %ds' %
+                      (instance_id, action, td.total_seconds()))
+            LOG.error('%s max_interruption_time %ds might be too short' %
+                      (instance_id, limit))
+            raise Exception('%s %s took too long: %ds' %
+                            (instance_id, action, td.total_seconds()))
+        else:
+            LOG.info('%s %s with recovery time took %ds' %
+                     (instance_id, action, td.total_seconds()))
+        del self.pending_actions[instance_id]
+
+    def run(self):
+        app = Flask('VNFM')
+
+        @app.route('/maintenance', methods=['POST'])
+        def maintenance_alarm():
+            data = json.loads(request.data.decode('utf8'))
+            try:
+                payload = self._alarm_traits_decoder(data)
+            except Exception:
+                payload = ({t[0]: t[2] for t in
+                           data['reason_data']['event']['traits']})
+                LOG.error('cannot parse alarm data: %s' % payload)
+                raise Exception('VNFM cannot parse alarm.'
+                                'Possibly trait data over 256 char')
+
+            LOG.info('VNFM received data = %s' % payload)
+
+            state = payload['state']
+            reply_state = None
+            reply = dict()
+
+            LOG.info('VNFM state: %s' % state)
+
+            if state == 'MAINTENANCE':
+                self.headers['X-Auth-Token'] = self.session.get_token()
+                instance_ids = (self.get_session_instance_ids(
+                                payload['instance_ids'],
+                                payload['session_id']))
+                reply['instance_ids'] = instance_ids
+                reply_state = 'ACK_MAINTENANCE'
+
+            elif state == 'SCALE_IN':
+                # scale down only nonha instances
+                nonha_instances = len(self.nonha_instances)
+                scale_in = nonha_instances / 2
+                self.scale_instances(-scale_in)
+                self.update_constraints()
+                reply['instance_ids'] = self.get_instance_ids()
+                reply_state = 'ACK_SCALE_IN'
+
+            elif state == 'MAINTENANCE_COMPLETE':
+                # possibly need to upscale
+                number_of_instances = self.number_of_instances()
+                if self.orig_number_of_instances > number_of_instances:
+                    scale_instances = (self.orig_number_of_instances -
+                                       number_of_instances)
+                    self.scale_instances(scale_instances)
+                    self.update_constraints()
+                reply_state = 'ACK_MAINTENANCE_COMPLETE'
+
+            elif (state == 'PREPARE_MAINTENANCE' or
+                  state == 'PLANNED_MAINTENANCE'):
+                instance_id = payload['instance_ids'][0]
+                instance_action = (self.instance_action(instance_id,
+                                   payload['allowed_actions']))
+                if not instance_action:
+                    raise Exception('Allowed_actions not supported for %s' %
+                                    instance_id)
+
+                LOG.info('VNFM got instance: %s' % instance_id)
+                self.switch_over_ha_instance(instance_id)
+
+                reply['instance_action'] = instance_action
+                reply_state = 'ACK_%s' % state
+                self.instance_action_started(instance_id, instance_action)
+
+            elif state == 'INSTANCE_ACTION_DONE':
+                # TBD was action done in max_interruption_time (live migration)
+                # NOTE, in EVICTION instance_id reported that was in evicted
+                # node. New instance_id might be different
+                LOG.info('%s' % payload['instance_ids'])
+                self.was_instance_action_in_time(payload['instance_ids'][0])
+                self.update_instances()
+                self.update_constraints()
+            else:
+                raise Exception('VNFM received event with'
+                                ' unknown state %s' % state)
+
+            if reply_state:
+                reply['session_id'] = payload['session_id']
+                reply['state'] = reply_state
+                url = payload['reply_url']
+                LOG.info('VNFM reply: %s' % reply)
+                requests.put(url, data=json.dumps(reply), headers=self.headers)
+
+            return 'OK'
+
+        @app.route('/shutdown', methods=['POST'])
+        def shutdown():
+            LOG.info('shutdown VNFM server at %s' % time.time())
+            func = request.environ.get('werkzeug.server.shutdown')
+            if func is None:
+                raise RuntimeError('Not running with the Werkzeug Server')
+            func()
+            return 'VNFM shutting down...'
+
+        app.run(host="0.0.0.0", port=self.port)
+
+if __name__ == '__main__':
+    app_manager = VNFM(CONF, LOG)
+    app_manager.start()
+    try:
+        LOG.info('Press CTRL + C to quit')
+        while True:
+            time.sleep(2)
+    except KeyboardInterrupt:
+        app_manager.stop()
diff --git a/fenix/utils/service.py b/fenix/utils/service.py
index b1ad738..3311980 100644
--- a/fenix/utils/service.py
+++ b/fenix/utils/service.py
@@ -94,7 +94,36 @@ class RPCClient(object):
 class EngineEndpoint(object):
 
     def __init__(self):
+        sessions = db_api.get_sessions()
         self.workflow_sessions = {}
+        if sessions:
+            LOG.info("Initialize workflows from DB")
+            for session in sessions:
+                session_id = session.session_id
+                LOG.info("Session %s from DB" % session.session_id)
+                workflow = "fenix.workflow.workflows.%s" % session.workflow
+                LOG.info("Workflow plugin module: %s" % workflow)
+                try:
+                    wf_plugin = getattr(import_module(workflow), 'Workflow')
+                    self.workflow_sessions[session_id] = wf_plugin(CONF,
+                                                                   session_id,
+                                                                   None)
+                except ImportError:
+                    session_dir = "%s/%s" % (CONF.local_cache_dir, session_id)
+                    download_plugin_dir = session_dir + "/workflow/"
+                    download_plugin_file = "%s/%s.py" % (download_plugin_dir,
+                                                         session.workflow)
+                    if os.path.isfile(download_plugin_file):
+                        self.workflow_sessions[session_id] = (
+                            source_loader_workflow_instance(
+                                workflow,
+                                download_plugin_file,
+                                CONF,
+                                session_id,
+                                None))
+                    else:
+                        raise Exception('%s: could not find workflow plugin %s'
+                                        % (session_id, session.workflow))
 
     def _validate_session(self, session_id):
         if session_id not in self.workflow_sessions.keys():
@@ -144,7 +173,7 @@ class EngineEndpoint(object):
                                                     data))
             else:
                 raise Exception('%s: could not find workflow plugin %s' %
-                                (self.session_id, data["workflow"]))
+                                (session_id, data["workflow"]))
 
         self.workflow_sessions[session_id].start()
         return {"session_id": session_id}
@@ -154,8 +183,23 @@ class EngineEndpoint(object):
         if not self._validate_session(session_id):
             return None
         LOG.info("EngineEndpoint: admin_get_session")
-        return ({"session_id": session_id, "state":
-                self.workflow_sessions[session_id].session.state})
+        return {"session_id": session_id, "state":
+                self.workflow_sessions[session_id].session.state}
+
+    def admin_get_session_detail(self, ctx, session_id):
+        """Get maintenance workflow session details"""
+        if not self._validate_session(session_id):
+            return None
+        LOG.info("EngineEndpoint: admin_get_session_detail")
+        sess = self.workflow_sessions[session_id]
+        return {"session_id": session_id,
+                "state": sess.session.state,
+                "percent_done": sess.session_report["last_percent"],
+                "session": sess.session,
+                "hosts": sess.hosts,
+                "instances": sess.instances,
+                "action_plugin_instances": db_api.get_action_plugin_instances(
+                    session_id)}
 
     def admin_delete_session(self, ctx, session_id):
         """Delete maintenance workflow session thread"""
@@ -198,6 +242,7 @@ class EngineEndpoint(object):
         session_obj = self.workflow_sessions[session_id]
         project = session_obj.project(project_id)
         project.state = data["state"]
+        db_api.update_project(project)
         if "instance_actions" in data:
             session_obj.proj_instance_actions[project_id] = (
                 data["instance_actions"].copy())
@@ -212,6 +257,7 @@ class EngineEndpoint(object):
         instance.project_state = data["state"]
         if "instance_action" in data:
             instance.action = data["instance_action"]
+        db_api.update_instance(instance)
         return data
 
     def get_instance(self, ctx, instance_id):
diff --git a/fenix/workflow/actions/dummy.py b/fenix/workflow/actions/dummy.py
index 1a34038..068e55d 100644
--- a/fenix/workflow/actions/dummy.py
+++ b/fenix/workflow/actions/dummy.py
@@ -12,8 +12,10 @@
 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 #    License for the specific language governing permissions and limitations
 #    under the License.
+from fenix.db import api as db_api
 from oslo_log import log as logging
 import subprocess
+import time
 
 LOG = logging.getLogger(__name__)
 
@@ -32,10 +34,12 @@ class ActionPlugin(object):
             output = subprocess.check_output("echo Dummy running in %s" %
                                              self.hostname,
                                              shell=True)
+            time.sleep(1)
             self.ap_dbi.state = "DONE"
         except subprocess.CalledProcessError:
             self.ap_dbi.state = "FAILED"
         finally:
+            db_api.update_action_plugin_instance(self.ap_dbi)
             LOG.debug("%s: OUTPUT: %s" % (self.wf.session_id, output))
             LOG.info("%s: Dummy action plugin state: %s" % (self.wf.session_id,
                                                             self.ap_dbi.state))
diff --git a/fenix/workflow/workflow.py b/fenix/workflow/workflow.py
index 7e48263..10d3603 100644
--- a/fenix/workflow/workflow.py
+++ b/fenix/workflow/workflow.py
@@ -34,31 +34,55 @@ LOG = logging.getLogger(__name__)
 
 class BaseWorkflow(Thread):
 
-    def __init__(self, conf, session_id, data):
+    def __init__(self, conf, session_id, data=None):
+        # if data not set, we initialize from DB
         Thread.__init__(self)
         self.conf = conf
         self.session_id = session_id
         self.stopped = False
         self.thg = threadgroup.ThreadGroup()
         self.timer = {}
-        self.session = self._init_session(data)
+
+        if data:
+            self.session = self._init_session(data)
+        else:
+            self.session = db_api.get_session(session_id)
+            LOG.info('%s session from DB: %s' % (self.session_id,
+                                                 self.session.state))
+
         self.hosts = []
-        if "hosts" in data and data['hosts']:
+        if not data:
+            self.hosts = db_api.get_hosts(session_id)
+        elif "hosts" in data and data['hosts']:
             # Hosts given as input, not to be discovered in workflow
             self.hosts = self.init_hosts(self.convert(data['hosts']))
         else:
             LOG.info('%s: No hosts as input' % self.session_id)
-        if "actions" in data:
+
+        if not data:
+            self.actions = db_api.get_action_plugins(session_id)
+        elif "actions" in data:
             self.actions = self._init_action_plugins(data["actions"])
         else:
             self.actions = []
-        if "download" in data:
+
+        if not data:
+            self.downloads = db_api.get_downloads(session_id)
+        elif "download" in data:
             self.downloads = self._init_downloads(data["download"])
         else:
             self.downloads = []
 
-        self.projects = []
-        self.instances = []
+        if not data:
+            self.projects = db_api.get_projects(session_id)
+        else:
+            self.projects = []
+
+        if not data:
+            self.instances = db_api.get_instances(session_id)
+        else:
+            self.instances = []
+
         self.proj_instance_actions = {}
 
         self.states_methods = {'MAINTENANCE': 'maintenance',
@@ -72,6 +96,7 @@ class BaseWorkflow(Thread):
         self.url = "http://%s:%s" % (conf.host, conf.port)
         self.auth = get_identity_auth(conf)
         self.auth_session = get_session(auth=self.auth)
+        self.project_id = self.auth_session.get_project_id()
         self.aodh = aodhclient.Client('2', self.auth_session)
         transport = messaging.get_transport(self.conf)
         self.notif_proj = messaging.Notifier(transport,
@@ -84,6 +109,13 @@ class BaseWorkflow(Thread):
                                               driver='messaging',
                                               topics=['notifications'])
         self.notif_admin = self.notif_admin.prepare(publisher_id='fenix')
+        self.notif_sess = messaging.Notifier(transport,
+                                             'maintenance.session',
+                                             driver='messaging',
+                                             topics=['notifications'])
+        self.notif_sess = self.notif_sess.prepare(publisher_id='fenix')
+
+        self.session_report = {'last_percent': 0, 'last_state': None}
 
     def init_hosts(self, hostnames):
         LOG.info('%s:  init_hosts: %s' % (self.session_id, hostnames))
@@ -174,6 +206,12 @@ class BaseWorkflow(Thread):
         return [host.hostname for host in self.hosts if host.maintained and
                 host.type == host_type]
 
+    def get_maintained_percent(self):
+        maintained_hosts = float(len([host for host in self.hosts
+                                     if host.maintained]))
+        all_hosts = float(len(self.hosts))
+        return int(maintained_hosts / all_hosts * 100)
+
     def get_disabled_hosts(self):
         return [host for host in self.hosts if host.disabled]
 
@@ -195,6 +233,7 @@ class BaseWorkflow(Thread):
         if host_obj:
             if len(host_obj) == 1:
                 host_obj[0].maintained = True
+                db_api.update_host(host_obj[0])
             else:
                 raise Exception('host_maintained: %s has duplicate entries' %
                                 hostname)
@@ -230,8 +269,10 @@ class BaseWorkflow(Thread):
     def set_projets_state(self, state):
         for project in self.projects:
             project.state = state
+            db_api.update_project(project)
         for instance in self.instances:
             instance.project_state = None
+            db_api.update_instance(instance)
 
     def project_has_state_instances(self, project_id):
         instances = ([instance.instance_id for instance in self.instances if
@@ -254,11 +295,13 @@ class BaseWorkflow(Thread):
                     instance.project_state = state
                 else:
                     instance.project_state = None
+                db_api.update_instance(instance)
             if state_instances:
                 some_project_has_instances = True
                 project.state = state
             else:
                 project.state = None
+            db_api.update_project(project)
         if not some_project_has_instances:
             LOG.error('%s: No project has instances on hosts %s' %
                       (self.session_id, hosts))
@@ -410,6 +453,10 @@ class BaseWorkflow(Thread):
         # TBD we could notify admin for workflow state change
         self.session.prev_state = self.session.state
         self.session.state = state
+        self.session = db_api.update_session(self.session)
+        self._session_notify(state,
+                             self.get_maintained_percent(),
+                             self.session_id)
         if state in ["MAINTENANCE_DONE", "MAINTENANCE_FAILED"]:
             try:
                 statefunc = (getattr(self,
@@ -481,14 +528,35 @@ class BaseWorkflow(Thread):
         self.notif_proj.info({'some': 'context'}, 'maintenance.scheduled',
                              payload)
 
-    def _admin_notify(self, project, host, state, session_id):
-        payload = dict(project_id=project, host=host, state=state,
+    def _admin_notify(self, host, state, session_id):
+        payload = dict(project_id=self.project_id, host=host, state=state,
                        session_id=session_id)
 
         LOG.info('Sending "maintenance.host": %s' % payload)
 
         self.notif_admin.info({'some': 'context'}, 'maintenance.host', payload)
 
+    def _session_notify(self, state, percent_done, session_id):
+        # There is race in threads to send this message
+        # Maintenance can be further away with other thread
+        if self.session_report['last_percent'] > percent_done:
+            percent_done = self.session_report['last_percent']
+            if self.session_report['last_state'] == state:
+                return
+        else:
+            self.session_report['last_percent'] = percent_done
+        self.session_report['last_state'] = state
+        payload = dict(project_id=self.project_id,
+                       state=state,
+                       percent_done=percent_done,
+                       session_id=session_id)
+
+        LOG.info('Sending "maintenance.session": %s' % payload)
+
+        self.notif_sess.info({'some': 'context'},
+                             'maintenance.session',
+                             payload)
+
     def projects_answer(self, state, projects):
         state_ack = 'ACK_%s' % state
         state_nack = 'NACK_%s' % state
diff --git a/fenix/workflow/workflows/default.py b/fenix/workflow/workflows/default.py
index 3e1da9e..3691b45 100644
--- a/fenix/workflow/workflows/default.py
+++ b/fenix/workflow/workflows/default.py
@@ -140,6 +140,7 @@ class Workflow(BaseWorkflow):
                 host.type = 'controller'
                 continue
             host.type = 'other'
+            db_api.update_host(host)
 
     def disable_host_nova_compute(self, hostname):
         LOG.info('%s: disable nova-compute on host %s' % (self.session_id,
@@ -153,6 +154,7 @@ class Workflow(BaseWorkflow):
             self.nova.services.disable_log_reason(hostname, "nova-compute",
                                                   "maintenance")
         host.disabled = True
+        db_api.update_host(host)
 
     def enable_host_nova_compute(self, hostname):
         LOG.info('%s: enable nova-compute on host %s' % (self.session_id,
@@ -165,6 +167,7 @@ class Workflow(BaseWorkflow):
                       (self.session_id, hostname))
             self.nova.services.enable(hostname, "nova-compute")
         host.disabled = False
+        db_api.update_host(host)
 
     def get_compute_hosts(self):
         return [host.hostname for host in self.hosts
@@ -408,8 +411,8 @@ class Workflow(BaseWorkflow):
 
     def get_free_vcpus_by_host(self, host, hvisors):
         hvisor = ([h for h in hvisors if
-                  h.__getattr__('hypervisor_hostname').split(".", 1)[0]
-                  == host][0])
+                  h.__getattr__(
+                      'hypervisor_hostname').split(".", 1)[0] == host][0])
         vcpus = hvisor.__getattr__('vcpus')
         vcpus_used = hvisor.__getattr__('vcpus_used')
         return vcpus - vcpus_used
@@ -547,6 +550,7 @@ class Workflow(BaseWorkflow):
         reply_at = None
         state = "INSTANCE_ACTION_DONE"
         instance.project_state = state
+        db_api.update_instance(instance)
         metadata = "{}"
         self._project_notify(project, instance_ids, allowed_actions,
                              actions_at, reply_at, state, metadata)
@@ -561,6 +565,7 @@ class Workflow(BaseWorkflow):
                                    project, instance.instance_id))
                 LOG.info('Action %s instance %s ' % (instance.action,
                                                      instance.instance_id))
+                db_api.update_instance(instance)
                 if instance.action == 'MIGRATE':
                     if not self.migrate_server(instance):
                         return False
@@ -576,6 +581,12 @@ class Workflow(BaseWorkflow):
                                     '%s not supported' %
                                     (self.session_id, instance.instance_id,
                                      instance.action))
+                server = self.nova.servers.get(instance.instance_id)
+                instance.host = (
+                    str(server.__dict__.get('OS-EXT-SRV-ATTR:host')))
+                instance.state = server.__dict__.get('OS-EXT-STS:vm_state')
+                instance.action = None
+                db_api.update_instance(instance)
         return self._wait_host_empty(host)
 
     def _wait_host_empty(self, host):
@@ -625,6 +636,7 @@ class Workflow(BaseWorkflow):
                 if instance.state == 'error':
                     LOG.error('instance %s live migration failed'
                               % server_id)
+                    db_api.update_instance(instance)
                     return False
                 elif orig_vm_state != instance.state:
                     LOG.info('instance %s state changed: %s' % (server_id,
@@ -632,6 +644,7 @@ class Workflow(BaseWorkflow):
                 elif host != orig_host:
                     LOG.info('instance %s live migrated to host %s' %
                              (server_id, host))
+                    db_api.update_instance(instance)
                     return True
                 migration = (
                     self.nova.migrations.list(instance_uuid=server_id)[0])
@@ -664,6 +677,7 @@ class Workflow(BaseWorkflow):
         except Exception as e:
             LOG.error('server %s live migration failed, Exception=%s' %
                       (server_id, e))
+        db_api.update_instance(instance)
         return False
 
     def migrate_server(self, instance):
@@ -693,6 +707,7 @@ class Workflow(BaseWorkflow):
                         LOG.info('instance %s migration resized to host %s' %
                                  (server_id, host))
                         instance.host = host
+                        db_api.update_instance(instance)
                         return True
                     if last_vm_state != instance.state:
                         LOG.info('instance %s state changed: %s' % (server_id,
@@ -701,6 +716,7 @@ class Workflow(BaseWorkflow):
                         LOG.error('instance %s migration failed, state: %s'
                                   % (server_id, instance.state))
                         instance.host = host
+                        db_api.update_instance(instance)
                         return False
                     time.sleep(5)
                     retries = retries - 1
@@ -712,6 +728,7 @@ class Workflow(BaseWorkflow):
                 if retry_migrate == 0:
                     LOG.error('server %s migrate failed after retries' %
                               server_id)
+                    db_api.update_instance(instance)
                     return False
                 # Might take time for scheduler to sync inconsistent instance
                 # list for host
@@ -723,11 +740,13 @@ class Workflow(BaseWorkflow):
             except Exception as e:
                 LOG.error('server %s migration failed, Exception=%s' %
                           (server_id, e))
+                db_api.update_instance(instance)
                 return False
             finally:
                 retry_migrate = retry_migrate - 1
         LOG.error('instance %s migration timeout, state: %s' %
                   (server_id, instance.state))
+        db_api.update_instance(instance)
         return False
 
     def maintenance_by_plugin_type(self, hostname, plugin_type):
@@ -889,13 +908,11 @@ class Workflow(BaseWorkflow):
                 self.disable_host_nova_compute(compute)
             for host in self.get_controller_hosts():
                 LOG.info('IN_MAINTENANCE controller %s' % host)
-                self._admin_notify(self.conf.service_user.os_project_name,
-                                   host,
+                self._admin_notify(host,
                                    'IN_MAINTENANCE',
                                    self.session_id)
                 self.host_maintenance(host)
-                self._admin_notify(self.conf.service_user.os_project_name,
-                                   host,
+                self._admin_notify(host,
                                    'MAINTENANCE_COMPLETE',
                                    self.session_id)
                 LOG.info('MAINTENANCE_COMPLETE controller %s' % host)
@@ -908,13 +925,11 @@ class Workflow(BaseWorkflow):
                 self._wait_host_empty(host)
 
                 LOG.info('IN_MAINTENANCE compute %s' % host)
-                self._admin_notify(self.conf.service_user.os_project_name,
-                                   host,
+                self._admin_notify(host,
                                    'IN_MAINTENANCE',
                                    self.session_id)
                 self.host_maintenance(host)
-                self._admin_notify(self.conf.service_user.os_project_name,
-                                   host,
+                self._admin_notify(host,
                                    'MAINTENANCE_COMPLETE',
                                    self.session_id)
 
@@ -929,13 +944,11 @@ class Workflow(BaseWorkflow):
                 self._wait_host_empty(host)
 
                 LOG.info('IN_MAINTENANCE host %s' % host)
-                self._admin_notify(self.conf.service_user.os_project_name,
-                                   host,
+                self._admin_notify(host,
                                    'IN_MAINTENANCE',
                                    self.session_id)
                 self.host_maintenance(host)
-                self._admin_notify(self.conf.service_user.os_project_name,
-                                   host,
+                self._admin_notify(host,
                                    'MAINTENANCE_COMPLETE',
                                    self.session_id)
 
diff --git a/fenix/workflow/workflows/k8s.py b/fenix/workflow/workflows/k8s.py
index 50ad1ac..e04b836 100644
--- a/fenix/workflow/workflows/k8s.py
+++ b/fenix/workflow/workflows/k8s.py
@@ -63,11 +63,12 @@ class Workflow(BaseWorkflow):
         LOG.info("%s: initialized with Kubernetes: %s" %
                  (self.session_id,
                   v_api.get_code_with_http_info()[0].git_version))
-
-        self.hosts = self._init_hosts_by_services()
-
-        LOG.info('%s: Execute pre action plugins' % (self.session_id))
-        self.maintenance_by_plugin_type("localhost", "pre")
+        if not data:
+            self.hosts = db_api.get_hosts(session_id)
+        else:
+            self.hosts = self._init_hosts_by_services()
+            LOG.info('%s: Execute pre action plugins' % (self.session_id))
+            self.maintenance_by_plugin_type("localhost", "pre")
         self.group_impacted_members = {}
 
     def _init_hosts_by_services(self):
@@ -106,6 +107,7 @@ class Workflow(BaseWorkflow):
         body = {"apiVersion": "v1", "spec": {"unschedulable": True}}
         self.kapi.patch_node(node_name, body)
         host.disabled = True
+        db_api.update_host(host)
 
     def uncordon(self, node_name):
         LOG.info("%s: uncordon %s" % (self.session_id, node_name))
@@ -113,6 +115,7 @@ class Workflow(BaseWorkflow):
         body = {"apiVersion": "v1", "spec": {"unschedulable": None}}
         self.kapi.patch_node(node_name, body)
         host.disabled = False
+        db_api.update_host(host)
 
     def _pod_by_id(self, pod_id):
         return [p for p in self.kapi.list_pod_for_all_namespaces().items
@@ -667,6 +670,7 @@ class Workflow(BaseWorkflow):
         actions_at = reply_time_str(wait_time)
         reply_at = actions_at
         instance.project_state = state
+        db_api.update_instance(instance)
         metadata = self.session.meta
         retry = 2
         replied = False
@@ -737,6 +741,7 @@ class Workflow(BaseWorkflow):
         reply_at = None
         state = "INSTANCE_ACTION_DONE"
         instance.project_state = state
+        db_api.update_instance(instance)
         metadata = "{}"
         self._project_notify(project, instance_ids, allowed_actions,
                              actions_at, reply_at, state, metadata)
@@ -814,22 +819,24 @@ class Workflow(BaseWorkflow):
         if host.type == "compute":
             self._wait_host_empty(hostname)
         LOG.info('IN_MAINTENANCE %s' % hostname)
-        self._admin_notify(self.conf.service_user.os_project_name,
-                           hostname,
+        self._admin_notify(hostname,
                            'IN_MAINTENANCE',
                            self.session_id)
         for plugin_type in ["host", host.type]:
             LOG.info('%s: Execute %s action plugins' % (self.session_id,
                                                         plugin_type))
             self.maintenance_by_plugin_type(hostname, plugin_type)
-        self._admin_notify(self.conf.service_user.os_project_name,
-                           hostname,
+        self._admin_notify(hostname,
                            'MAINTENANCE_COMPLETE',
                            self.session_id)
         if host.type == "compute":
             self.uncordon(hostname)
         LOG.info('MAINTENANCE_COMPLETE %s' % hostname)
         host.maintained = True
+        db_api.update_host(host)
+        self._session_notify(self.session.state,
+                             self.get_maintained_percent(),
+                             self.session_id)
 
     def maintenance(self):
         LOG.info("%s: maintenance called" % self.session_id)
@@ -919,6 +926,10 @@ class Workflow(BaseWorkflow):
             return
         for host_name in self.get_compute_hosts():
             self.cordon(host_name)
+        for host in self.get_controller_hosts():
+            # TBD one might need to change this. Now all controllers
+            # maintenance serialized
+            self.host_maintenance(host)
         thrs = []
         for host_name in empty_hosts:
             # LOG.info("%s: Maintaining %s" % (self.session_id, host_name))
diff --git a/fenix/workflow/workflows/vnf.py b/fenix/workflow/workflows/vnf.py
index 93500dc..4097658 100644
--- a/fenix/workflow/workflows/vnf.py
+++ b/fenix/workflow/workflows/vnf.py
@@ -66,15 +66,20 @@ class Workflow(BaseWorkflow):
                 nova_version = max_nova_server_ver
             self.nova = novaclient.Client(nova_version,
                                           session=self.auth_session)
-        if not self.hosts:
+
+        if not data:
+            self.hosts = db_api.get_hosts(session_id)
+        elif not self.hosts:
             self.hosts = self._init_hosts_by_services()
         else:
             self._init_update_hosts()
         LOG.info("%s: initialized. Nova version %f" % (self.session_id,
                                                        nova_version))
 
-        LOG.info('%s: Execute pre action plugins' % (self.session_id))
-        self.maintenance_by_plugin_type("localhost", "pre")
+        if data:
+            # We expect this is done if initialized from DB
+            LOG.info('%s: Execute pre action plugins' % (self.session_id))
+            self.maintenance_by_plugin_type("localhost", "pre")
         # How many members of each instance group are currently affected
         self.group_impacted_members = {}
 
@@ -144,6 +149,7 @@ class Workflow(BaseWorkflow):
                 host.type = 'controller'
                 continue
             host.type = 'other'
+            db_api.update_host(host)
 
     def disable_host_nova_compute(self, hostname):
         LOG.info('%s: disable nova-compute on host %s' % (self.session_id,
@@ -157,6 +163,7 @@ class Workflow(BaseWorkflow):
             self.nova.services.disable_log_reason(hostname, "nova-compute",
                                                   "maintenance")
         host.disabled = True
+        db_api.update_host(host)
 
     def enable_host_nova_compute(self, hostname):
         LOG.info('%s: enable nova-compute on host %s' % (self.session_id,
@@ -169,6 +176,7 @@ class Workflow(BaseWorkflow):
                       (self.session_id, hostname))
             self.nova.services.enable(hostname, "nova-compute")
         host.disabled = False
+        db_api.update_host(host)
 
     def get_instance_details(self, instance):
         network_interfaces = next(iter(instance.addresses.values()))
@@ -413,17 +421,17 @@ class Workflow(BaseWorkflow):
             prev_hostname = hostname
         if free_vcpus >= vcpus:
             # TBD vcpu capacity might be too scattered so moving instances from
-            # one host to other host still might not succeed. At least with
+            # one host to another host still might not succeed. At least with
             # NUMA and CPU pinning, one should calculate and ask specific
-            # instances
+            # instances to be moved so can get empty host obeying pinning.
             return False
         else:
             return True
 
     def get_vcpus_by_host(self, host, hvisors):
         hvisor = ([h for h in hvisors if
-                  h.__getattr__('hypervisor_hostname').split(".", 1)[0]
-                  == host][0])
+                  h.__getattr__(
+                      'hypervisor_hostname').split(".", 1)[0] == host][0])
         vcpus = hvisor.__getattr__('vcpus')
         vcpus_used = hvisor.__getattr__('vcpus_used')
         return vcpus, vcpus_used
@@ -535,6 +543,7 @@ class Workflow(BaseWorkflow):
         actions_at = reply_time_str(wait_time)
         reply_at = actions_at
         instance.project_state = state
+        db_api.update_instance(instance)
         metadata = self.session.meta
         retry = 2
         replied = False
@@ -605,6 +614,7 @@ class Workflow(BaseWorkflow):
         reply_at = None
         state = "INSTANCE_ACTION_DONE"
         instance.project_state = state
+        db_api.update_instance(instance)
         metadata = "{}"
         self._project_notify(project, instance_ids, allowed_actions,
                              actions_at, reply_at, state, metadata)
@@ -697,6 +707,11 @@ class Workflow(BaseWorkflow):
                   % (instance.instance_id,
                      self.group_impacted_members[group_id],
                      max_parallel))
+        server = self.nova.servers.get(instance.instance_id)
+        instance.host = str(server.__dict__.get('OS-EXT-SRV-ATTR:host'))
+        instance.state = server.__dict__.get('OS-EXT-STS:vm_state')
+        instance.action = None
+        db_api.update_instance(instance)
 
     @run_async
     def actions_to_have_empty_host(self, host, state, target_host=None):
@@ -759,6 +774,7 @@ class Workflow(BaseWorkflow):
                 if instance.state == 'error':
                     LOG.error('instance %s live migration failed'
                               % server_id)
+                    db_api.update_instance(instance)
                     return False
                 elif orig_vm_state != instance.state:
                     LOG.info('instance %s state changed: %s' % (server_id,
@@ -766,6 +782,7 @@ class Workflow(BaseWorkflow):
                 elif host != orig_host:
                     LOG.info('instance %s live migrated to host %s' %
                              (server_id, host))
+                    db_api.update_instance(instance)
                     return True
                 migration = (
                     self.nova.migrations.list(instance_uuid=server_id)[0])
@@ -775,6 +792,7 @@ class Workflow(BaseWorkflow):
                                   '%d retries' %
                                   (server_id,
                                    self.conf.live_migration_retries))
+                        db_api.update_instance(instance)
                         return False
                     # When live migrate fails it can fail fast after calling
                     # To have Nova time to be ready for next live migration
@@ -793,17 +811,20 @@ class Workflow(BaseWorkflow):
                 waited = waited + 1
                 last_migration_status = migration.status
                 last_vm_status = vm_status
+            db_api.update_instance(instance)
             LOG.error('instance %s live migration did not finish in %ss, '
                       'state: %s' % (server_id, waited, instance.state))
         except Exception as e:
             LOG.error('server %s live migration failed, Exception=%s' %
                       (server_id, e))
+            db_api.update_instance(instance)
         return False
 
     def migrate_server(self, instance, target_host=None):
         server_id = instance.instance_id
         server = self.nova.servers.get(server_id)
-        instance.state = server.__dict__.get('OS-EXT-STS:vm_state')
+        orig_state = server.__dict__.get('OS-EXT-STS:vm_state')
+        instance.state = orig_state
         orig_host = str(server.__dict__.get('OS-EXT-SRV-ATTR:host'))
         LOG.info('migrate_server %s state %s host %s to %s' %
                  (server_id, instance.state, orig_host, target_host))
@@ -823,7 +844,12 @@ class Workflow(BaseWorkflow):
                         server.confirm_resize()
                         LOG.info('instance %s migration resized to host %s' %
                                  (server_id, host))
-                        instance.host = host
+                        server = self.nova.servers.get(server_id)
+                        instance.host = (
+                            str(server.__dict__.get('OS-EXT-SRV-ATTR:host')))
+                        instance.state = (
+                            server.__dict__.get('OS-EXT-STS:vm_state'))
+                        db_api.update_instance(instance)
                         return True
                     if last_vm_state != instance.state:
                         LOG.info('instance %s state changed: %s' % (server_id,
@@ -832,6 +858,7 @@ class Workflow(BaseWorkflow):
                         LOG.error('instance %s migration failed, state: %s'
                                   % (server_id, instance.state))
                         instance.host = host
+                        db_api.update_instance(instance)
                         return False
                     time.sleep(5)
                     retries = retries - 1
@@ -843,6 +870,7 @@ class Workflow(BaseWorkflow):
                 if retry_migrate == 0:
                     LOG.error('server %s migrate failed after retries' %
                               server_id)
+                    db_api.update_instance(instance)
                     return False
                 # Might take time for scheduler to sync inconsistent instance
                 # list for host.
@@ -855,11 +883,13 @@ class Workflow(BaseWorkflow):
             except Exception as e:
                 LOG.error('server %s migration failed, Exception=%s' %
                           (server_id, e))
+                db_api.update_instance(instance)
                 return False
             finally:
                 retry_migrate = retry_migrate - 1
         LOG.error('instance %s migration timeout, state: %s' %
                   (server_id, instance.state))
+        db_api.update_instance(instance)
         return False
 
     def maintenance_by_plugin_type(self, hostname, plugin_type):
@@ -922,22 +952,24 @@ class Workflow(BaseWorkflow):
         if host.type == "compute":
             self._wait_host_empty(hostname)
         LOG.info('IN_MAINTENANCE %s' % hostname)
-        self._admin_notify(self.conf.service_user.os_project_name,
-                           hostname,
+        self._admin_notify(hostname,
                            'IN_MAINTENANCE',
                            self.session_id)
         for plugin_type in ["host", host.type]:
             LOG.info('%s: Execute %s action plugins' % (self.session_id,
                                                         plugin_type))
             self.maintenance_by_plugin_type(hostname, plugin_type)
-        self._admin_notify(self.conf.service_user.os_project_name,
-                           hostname,
+        self._admin_notify(hostname,
                            'MAINTENANCE_COMPLETE',
                            self.session_id)
         if host.type == "compute":
             self.enable_host_nova_compute(hostname)
         LOG.info('MAINTENANCE_COMPLETE %s' % hostname)
         host.maintained = True
+        db_api.update_host(host)
+        self._session_notify(self.session.state,
+                             self.get_maintained_percent(),
+                             self.session_id)
 
     def maintenance(self):
         LOG.info("%s: maintenance called" % self.session_id)