PyMI subscribes to event providers, setting a callback that will be invoked when an event arrives. When called, the event watcher will block until an event is retrieved. A threading.Event is used internally by PyMI for the event waiting workflow. The issue is that the threading module will be monkey patched by eventlet. The event callback is performed from a different thread, so attempting to set the event for which the event watcher waits will some times raise the "error: cannot switch to a different thread" greenlet exception, thus breaking the event listener. This issue can be fixed at the os-win level, without PyMI having to know about eventlet monkey patching. We can make sure that PyMI uses the unpatched threading module, also ensuring that the blocking calls are run in a different thread, so that we don't block the calling thread. eventlet.tpool can be used to easily achieve this. Closes-Bug: #1568824 Change-Id: I9be6c1a1b144bdf565f25517c52439a5fb10cb51
234 lines
8.1 KiB
Python
234 lines
8.1 KiB
Python
# Copyright 2016 Cloudbase Solutions Srl
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""
|
|
Utility class for VM related operations on Hyper-V Clusters.
|
|
"""
|
|
|
|
import re
|
|
import sys
|
|
|
|
if sys.platform == 'win32':
|
|
import wmi
|
|
|
|
from eventlet import tpool
|
|
from oslo_log import log as logging
|
|
|
|
from os_win._i18n import _, _LE
|
|
from os_win import exceptions
|
|
from os_win.utils import baseutils
|
|
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class ClusterUtils(baseutils.BaseUtils):
|
|
|
|
_MSCLUSTER_NODE = 'MSCluster_Node'
|
|
_MSCLUSTER_RES = 'MSCluster_Resource'
|
|
|
|
_VM_BASE_NAME = 'Virtual Machine %s'
|
|
_VM_TYPE = 'Virtual Machine'
|
|
_VM_GROUP_TYPE = 111
|
|
|
|
_MS_CLUSTER_NAMESPACE = '//%s/root/MSCluster'
|
|
|
|
_LIVE_MIGRATION_TYPE = 4
|
|
_IGNORE_LOCKED = 1
|
|
_DESTROY_GROUP = 1
|
|
|
|
_FAILBACK_TRUE = 1
|
|
_FAILBACK_WINDOW_MIN = 0
|
|
_FAILBACK_WINDOW_MAX = 23
|
|
|
|
_WMI_EVENT_TIMEOUT_MS = 100
|
|
_WMI_EVENT_CHECK_INTERVAL = 2
|
|
|
|
def __init__(self, host='.'):
|
|
self._instance_name_regex = re.compile('Virtual Machine (.*)')
|
|
|
|
if sys.platform == 'win32':
|
|
self._init_hyperv_conn(host)
|
|
self._watcher = self._get_failover_watcher()
|
|
|
|
def _init_hyperv_conn(self, host):
|
|
try:
|
|
self._conn_cluster = self._get_wmi_conn(
|
|
self._MS_CLUSTER_NAMESPACE % host)
|
|
self._cluster = self._conn_cluster.MSCluster_Cluster()[0]
|
|
|
|
# extract this node name from cluster's path
|
|
path = self._cluster.path_()
|
|
self._this_node = re.search(r'\\\\(.*)\\root', path,
|
|
re.IGNORECASE).group(1)
|
|
except AttributeError:
|
|
raise exceptions.HyperVClusterException(
|
|
_("Could not initialize cluster wmi connection."))
|
|
|
|
def _get_failover_watcher(self):
|
|
raw_query = (
|
|
"SELECT * FROM __InstanceModificationEvent "
|
|
"WITHIN %(wmi_check_interv)s WHERE TargetInstance ISA "
|
|
"'%(cluster_res)s' AND "
|
|
"TargetInstance.Type='%(cluster_res_type)s' AND "
|
|
"TargetInstance.OwnerNode != PreviousInstance.OwnerNode" %
|
|
{'wmi_check_interv': self._WMI_EVENT_CHECK_INTERVAL,
|
|
'cluster_res': self._MSCLUSTER_RES,
|
|
'cluster_res_type': self._VM_TYPE})
|
|
return self._conn_cluster.watch_for(raw_wql=raw_query)
|
|
|
|
def check_cluster_state(self):
|
|
if len(self._get_cluster_nodes()) < 1:
|
|
raise exceptions.HyperVClusterException(
|
|
_("Not enough cluster nodes."))
|
|
|
|
def get_node_name(self):
|
|
return self._this_node
|
|
|
|
def _get_cluster_nodes(self):
|
|
cluster_assoc = self._conn_cluster.MSCluster_ClusterToNode(
|
|
Antecedent=self._cluster.path_())
|
|
return [x.Dependent for x in cluster_assoc]
|
|
|
|
def _get_vm_groups(self):
|
|
assocs = self._conn_cluster.MSCluster_ClusterToResourceGroup(
|
|
GroupComponent=self._cluster.path_())
|
|
resources = [a.PartComponent for a in assocs]
|
|
return (r for r in resources if
|
|
hasattr(r, 'GroupType') and
|
|
r.GroupType == self._VM_GROUP_TYPE)
|
|
|
|
def _lookup_vm_group_check(self, vm_name):
|
|
vm = self._lookup_vm_group(vm_name)
|
|
if not vm:
|
|
raise exceptions.HyperVVMNotFoundException(vm_name=vm_name)
|
|
return vm
|
|
|
|
def _lookup_vm_group(self, vm_name):
|
|
return self._lookup_res(self._conn_cluster.MSCluster_ResourceGroup,
|
|
vm_name)
|
|
|
|
def _lookup_vm_check(self, vm_name):
|
|
vm = self._lookup_vm(vm_name)
|
|
if not vm:
|
|
raise exceptions.HyperVVMNotFoundException(vm_name=vm_name)
|
|
return vm
|
|
|
|
def _lookup_vm(self, vm_name):
|
|
vm_name = self._VM_BASE_NAME % vm_name
|
|
return self._lookup_res(self._conn_cluster.MSCluster_Resource, vm_name)
|
|
|
|
def _lookup_res(self, resource_source, res_name):
|
|
res = resource_source(Name=res_name)
|
|
n = len(res)
|
|
if n == 0:
|
|
return None
|
|
elif n > 1:
|
|
raise exceptions.HyperVClusterException(
|
|
_('Duplicate resource name %s found.') % res_name)
|
|
else:
|
|
return res[0]
|
|
|
|
def get_cluster_node_names(self):
|
|
nodes = self._get_cluster_nodes()
|
|
return [n.Name for n in nodes]
|
|
|
|
def get_vm_host(self, vm_name):
|
|
return self._lookup_vm_group_check(vm_name).OwnerNode
|
|
|
|
def list_instances(self):
|
|
return [r.Name for r in self._get_vm_groups()]
|
|
|
|
def list_instance_uuids(self):
|
|
return [r.Id for r in self._get_vm_groups()]
|
|
|
|
def add_vm_to_cluster(self, vm_name):
|
|
LOG.debug("Add vm to cluster called for vm %s" % vm_name)
|
|
self._cluster.AddVirtualMachine(vm_name)
|
|
|
|
vm_group = self._lookup_vm_group_check(vm_name)
|
|
vm_group.PersistentState = True
|
|
vm_group.AutoFailbackType = self._FAILBACK_TRUE
|
|
# set the earliest and latest time that the group can be moved
|
|
# back to its preferred node. The unit is in hours.
|
|
vm_group.FailbackWindowStart = self._FAILBACK_WINDOW_MIN
|
|
vm_group.FailbackWindowEnd = self._FAILBACK_WINDOW_MAX
|
|
vm_group.put()
|
|
|
|
def bring_online(self, vm_name):
|
|
vm = self._lookup_vm_check(vm_name)
|
|
vm.BringOnline()
|
|
|
|
def take_offline(self, vm_name):
|
|
vm = self._lookup_vm_check(vm_name)
|
|
vm.TakeOffline()
|
|
|
|
def delete(self, vm_name):
|
|
vm = self._lookup_vm_group_check(vm_name)
|
|
vm.DestroyGroup(self._DESTROY_GROUP)
|
|
|
|
def vm_exists(self, vm_name):
|
|
return self._lookup_vm(vm_name) is not None
|
|
|
|
def live_migrate_vm(self, vm_name, new_host):
|
|
self._migrate_vm(vm_name, new_host, self._LIVE_MIGRATION_TYPE)
|
|
|
|
def _migrate_vm(self, vm_name, new_host, migration_type):
|
|
vm_group = self._lookup_vm_group_check(vm_name)
|
|
try:
|
|
vm_group.MoveToNewNodeParams(self._IGNORE_LOCKED, new_host,
|
|
[migration_type])
|
|
except Exception as e:
|
|
LOG.error(_LE('Exception during cluster live migration of '
|
|
'%(vm_name)s to %(host)s: %(exception)s'),
|
|
{'vm_name': vm_name,
|
|
'host': new_host,
|
|
'exception': e})
|
|
|
|
def monitor_vm_failover(self, callback):
|
|
"""Creates a monitor to check for new WMI MSCluster_Resource
|
|
events.
|
|
|
|
This method will poll the last _WMI_EVENT_CHECK_INTERVAL + 1
|
|
seconds for new events and listens for _WMI_EVENT_TIMEOUT_MS
|
|
miliseconds, since listening is a thread blocking action.
|
|
|
|
Any event object caught will then be processed.
|
|
"""
|
|
vm_name = None
|
|
new_host = None
|
|
try:
|
|
# wait for new event for _WMI_EVENT_TIMEOUT_MS miliseconds.
|
|
wmi_object = tpool.execute(self._watcher,
|
|
self._WMI_EVENT_TIMEOUT_MS)
|
|
old_host = wmi_object.previous.OwnerNode
|
|
new_host = wmi_object.OwnerNode
|
|
# wmi_object.Name field is of the form:
|
|
# 'Virtual Machine nova-instance-template'
|
|
# wmi_object.Name filed is a key and as such is not affected
|
|
# by locale, so it will always be 'Virtual Machine'
|
|
match = self._instance_name_regex.search(wmi_object.Name)
|
|
if match:
|
|
vm_name = match.group(1)
|
|
|
|
if vm_name:
|
|
try:
|
|
callback(vm_name, old_host, new_host)
|
|
except Exception:
|
|
LOG.exception(
|
|
_LE("Exception during failover callback."))
|
|
except wmi.x_wmi_timed_out:
|
|
pass
|