Merge "Implement Pci Interrupt Affinity Agent"
This commit is contained in:
commit
9f12913b75
|
@ -0,0 +1,7 @@
|
|||
Metadata-Version: 1.2
|
||||
Name: PCIInterruptAffinityAgent
|
||||
Version: 1.0
|
||||
Summary: PCI Interrupt Affinity Agent Package
|
||||
Author: StarlingX
|
||||
License: Apache-2.0
|
||||
Platform: UNKNOWN
|
|
@ -0,0 +1,3 @@
|
|||
SRC_DIR="pci_irq_affinity"
|
||||
COPY_LIST_TO_TAR="files/*"
|
||||
TIS_PATCH_VER=1
|
|
@ -0,0 +1,70 @@
|
|||
Summary: StarlingX PCI Interrupt Affinity Agent Package
|
||||
Name: pci-irq-affinity-agent
|
||||
Version: 1.0
|
||||
Release: %{tis_patch_ver}%{?_tis_dist}
|
||||
License: Apache-2.0
|
||||
Group: base
|
||||
Packager: StarlingX
|
||||
URL: unknown
|
||||
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
|
||||
Requires: python-novaclient
|
||||
BuildRequires: python-setuptools
|
||||
BuildRequires: systemd-devel
|
||||
|
||||
%description
|
||||
StarlingX PCI Interrupt Affinity Agent Package
|
||||
|
||||
%define local_etc_initd /etc/init.d/
|
||||
%define local_etc_pmond /etc/pmon.d/
|
||||
%define pythonroot /usr/lib64/python2.7/site-packages
|
||||
%define debug_package %{nil}
|
||||
|
||||
%prep
|
||||
%setup
|
||||
|
||||
# Remove bundled egg-info
|
||||
rm -rf *.egg-info
|
||||
|
||||
%build
|
||||
%{__python} setup.py build
|
||||
|
||||
%install
|
||||
%{__python} setup.py install --root=%{buildroot} \
|
||||
--install-lib=%{pythonroot} \
|
||||
--prefix=/usr \
|
||||
--install-data=/usr/share \
|
||||
--single-version-externally-managed
|
||||
|
||||
%{__install} -d -m 755 %{buildroot}%{local_etc_initd}
|
||||
%{__install} -p -D -m 755 pci-irq-affinity-agent %{buildroot}%{local_etc_initd}/pci-irq-affinity-agent
|
||||
|
||||
%{__install} -d -m 755 %{buildroot}%{local_etc_pmond}
|
||||
%{__install} -p -D -m 644 pci-irq-affinity-agent.conf %{buildroot}%{local_etc_pmond}/pci-irq-affinity-agent.conf
|
||||
%{__install} -p -D -m 644 pci-irq-affinity-agent.service %{buildroot}%{_unitdir}/pci-irq-affinity-agent.service
|
||||
|
||||
%{__install} -d %{buildroot}%{_bindir}
|
||||
%{__install} -p -D -m 755 nova-sriov %{buildroot}%{_bindir}/nova-sriov
|
||||
|
||||
%{__install} -d %{buildroot}%{_sysconfdir}/pci_irq_affinity
|
||||
%{__install} -p -D -m 600 config.ini %{buildroot}%{_sysconfdir}/pci_irq_affinity/config.ini
|
||||
|
||||
%post
|
||||
/usr/bin/systemctl enable pci-irq-affinity-agent.service >/dev/null 2>&1
|
||||
|
||||
%clean
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
|
||||
%files
|
||||
%defattr(-,root,root,-)
|
||||
%doc LICENSE
|
||||
%{local_etc_initd}/pci-irq-affinity-agent
|
||||
%{local_etc_pmond}/pci-irq-affinity-agent.conf
|
||||
%{_unitdir}/pci-irq-affinity-agent.service
|
||||
%{pythonroot}/pci_irq_affinity/*
|
||||
%{pythonroot}/pci_irq_affinity_agent-%{version}*.egg-info
|
||||
|
||||
%{_bindir}/pci-irq-affinity-agent
|
||||
%{_bindir}/nova-sriov
|
||||
%config(noreplace) %{_sysconfdir}/pci_irq_affinity/config.ini
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,22 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
[openstack]
|
||||
openstack_enabled=False
|
||||
username=admin
|
||||
tenant=admin
|
||||
authorization_protocol=http
|
||||
authorization_ip=192.168.204.2
|
||||
authorization_port=5000
|
||||
user_domain_name=Default
|
||||
project_domain_name=Default
|
||||
keyring_service=CGCS
|
||||
|
||||
[amqp]
|
||||
host=192.168.204.2
|
||||
port=5672
|
||||
user_id=guest
|
||||
password=guest
|
||||
virt_host=/
|
|
@ -0,0 +1,117 @@
|
|||
#! /usr/bin/python
|
||||
|
||||
#
|
||||
# Copyright (c) 2015 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import fnmatch
|
||||
|
||||
|
||||
def usage():
|
||||
argv0 = os.path.basename(sys.argv[0])
|
||||
print """
|
||||
Usage:
|
||||
------
|
||||
%(argv0)s pci_pt_whitelist pci_sriov_whitelist
|
||||
|
||||
Where pci_pt_whitelist is a list of passthrough devices and the
|
||||
pci_sriov_whitelist is a list of SR-IOV interfaces. The format of the lists
|
||||
are as follows:
|
||||
|
||||
pci_pt_whitelist:
|
||||
[{"address": "0000:09:00.0"}, ..]
|
||||
|
||||
pci_sriov_whitelist:
|
||||
[{"sriov_numvfs": 16, "physical_network": "group0-nic0",
|
||||
"address": "0000:02:00.0"}, ..]
|
||||
|
||||
""" % locals() # replace items from local variables
|
||||
|
||||
|
||||
def get_vf_whitelist(sriov_if):
|
||||
'''For the given PF PCI address and provider network, generate the list of VF
|
||||
PCI addresses to create a VF based whitelist'''
|
||||
|
||||
pf_addr = sriov_if.get('address')
|
||||
dirpcidev = '/sys/bus/pci/devices/' + pf_addr
|
||||
|
||||
# Attempt to configure the requested number of VFs if the device supports
|
||||
# setting the number of VFs via sysfs
|
||||
# Need to write 0 to sriov_numvfs before writing a new value.
|
||||
numvfs = sriov_if.get('sriov_numvfs')
|
||||
if numvfs is not None:
|
||||
numvfs_path = os.path.join(dirpcidev, 'sriov_numvfs')
|
||||
if os.path.isfile(numvfs_path):
|
||||
with open(numvfs_path, 'w') as f:
|
||||
f.write('0')
|
||||
f.flush()
|
||||
f.write(str(numvfs))
|
||||
|
||||
virtfn_links = len(fnmatch.filter(os.listdir(dirpcidev), 'virtfn*'))
|
||||
|
||||
# Some devices (for e.g. Coleto Creek) don't support configuration of the
|
||||
# number of VFs. Use all the VFs present in this case.
|
||||
if numvfs is not None:
|
||||
if virtfn_links != numvfs:
|
||||
print 'Configured number of VFs is different than the present ones', \
|
||||
'(if:%s conf:%d present:%d)' % (pf_addr, numvfs, virtfn_links)
|
||||
exit(1)
|
||||
else:
|
||||
numvfs = virtfn_links
|
||||
|
||||
pci_sriov_vf_whitelist = []
|
||||
i = 0
|
||||
while i < int(numvfs):
|
||||
lvf = dirpcidev + '/virtfn' + str(i)
|
||||
try:
|
||||
vf_addr = os.path.basename(os.readlink(lvf))
|
||||
except:
|
||||
print("virtfn link %s non-existent (numvfs=%s)" % (lvf, numvfs))
|
||||
sys.exit(1)
|
||||
|
||||
device = {'address': vf_addr}
|
||||
|
||||
# Some devices (for e.g. Coleto Creek) are not associated with a
|
||||
# physical network.
|
||||
providernets = sriov_if.get('physical_network')
|
||||
if providernets:
|
||||
device.update({'physical_network': providernets})
|
||||
|
||||
pci_sriov_vf_whitelist.append(device)
|
||||
i += 1
|
||||
|
||||
return pci_sriov_vf_whitelist
|
||||
|
||||
|
||||
def main():
|
||||
''' The goal of this script is to properly discover SR-IOV VF PCI addresses
|
||||
for interfaces that were configured for SR-IOV. It is used by the
|
||||
nova-compute puppet manifest and is run at manifest application time. This
|
||||
script should be run after the VF driver is loaded and the VF PCI addresses
|
||||
are visible in the system.'''
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
usage()
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
pci_pt_whitelist = json.loads(sys.argv[1])
|
||||
pci_sriov_whitelist = json.loads(sys.argv[2])
|
||||
except:
|
||||
usage()
|
||||
exit(1)
|
||||
|
||||
for sriov_if in pci_sriov_whitelist:
|
||||
pci_sriov_vf_whitelist = get_vf_whitelist(sriov_if)
|
||||
pci_pt_whitelist.extend(pci_sriov_vf_whitelist)
|
||||
|
||||
return pci_pt_whitelist
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print json.dumps(main())
|
|
@ -0,0 +1,120 @@
|
|||
#! /bin/sh
|
||||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
#
|
||||
# chkconfig: 2345 75 25
|
||||
#
|
||||
### BEGIN INIT INFO
|
||||
# Provides: pci-irq-affinity-agent
|
||||
### END INIT INFO
|
||||
|
||||
source /etc/init.d/functions
|
||||
|
||||
PLATFORM_CONF="/etc/platform/platform.conf"
|
||||
NODETYPE=""
|
||||
DAEMON_NAME="pci-irq-affinity-agent"
|
||||
AFFINITYAGENT="/usr/bin/${DAEMON_NAME}"
|
||||
|
||||
daemon_pidfile="/var/run/${DAEMON_NAME}.pid"
|
||||
|
||||
if [ -f ${PLATFORM_CONF} ] ; then
|
||||
source ${PLATFORM_CONF}
|
||||
NODETYPE=${nodetype}
|
||||
else
|
||||
logger "$0: ${PLATFORM_CONF} is missing"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "${AFFINITYAGENT}" ] ; then
|
||||
logger "$0: ${AFFINITYAGENT} is missing"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
RETVAL=0
|
||||
|
||||
PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
|
||||
export PATH
|
||||
|
||||
case "$1" in
|
||||
start)
|
||||
# Check for installation failure
|
||||
if [ -f /etc/platform/installation_failed ] ; then
|
||||
logger "$0: /etc/platform/installation_failed flag is set. Aborting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ${NODETYPE} = "worker" ] ; then
|
||||
echo -n "Setting up config for pci-irq-affinity-agent: "
|
||||
|
||||
if [ -e ${daemon_pidfile} ] ; then
|
||||
echo "Killing existing process before starting new"
|
||||
pid=`cat ${daemon_pidfile}`
|
||||
kill -TERM $pid
|
||||
rm -f ${daemon_pidfile}
|
||||
fi
|
||||
|
||||
echo -n "Starting pci-irq-affinity-agent: "
|
||||
/bin/sh -c "${AFFINITYAGENT}"' >> /dev/null 2>&1 & echo $!' > ${daemon_pidfile}
|
||||
RETVAL=$?
|
||||
if [ $RETVAL -eq 0 ] ; then
|
||||
echo "OK"
|
||||
touch /var/lock/subsys/${DAEMON_NAME}
|
||||
else
|
||||
echo "FAIL"
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
|
||||
stop)
|
||||
if [ ${NODETYPE} = "worker" ] ; then
|
||||
echo -n "Stopping pci-irq-affinity-agent: "
|
||||
|
||||
if [ -e ${daemon_pidfile} ] ; then
|
||||
pid=`cat ${daemon_pidfile}`
|
||||
kill -TERM $pid
|
||||
rm -f ${daemon_pidfile}
|
||||
rm -f /var/lock/subsys/${DAEMON_NAME}
|
||||
echo "OK"
|
||||
else
|
||||
echo "FAIL"
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
|
||||
restart)
|
||||
$0 stop
|
||||
sleep 1
|
||||
$0 start
|
||||
;;
|
||||
|
||||
status)
|
||||
if [ -e ${daemon_pidfile} ] ; then
|
||||
pid=`cat ${daemon_pidfile}`
|
||||
ps -p $pid | grep -v "PID TTY" >> /dev/null 2>&1
|
||||
if [ $? -eq 0 ] ; then
|
||||
echo "pci-irq-affinity-agent is running"
|
||||
RETVAL=0
|
||||
else
|
||||
echo "pci-irq-affinity-agent is not running"
|
||||
RETVAL=1
|
||||
fi
|
||||
else
|
||||
echo "pci-irq-affinity-agent is not running ; no pidfile"
|
||||
RETVAL=1
|
||||
fi
|
||||
;;
|
||||
|
||||
condrestart)
|
||||
[ -f /var/lock/subsys/$DAEMON_NAME ] && $0 restart
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "usage: $0 { start | stop | status | restart | condrestart | status }"
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $RETVAL
|
|
@ -0,0 +1,9 @@
|
|||
[process]
|
||||
process = pci-irq-affinity-agent
|
||||
pidfile = /var/run/pci-irq-affinity-agent.pid
|
||||
script = /etc/init.d/pci-irq-affinity-agent
|
||||
style = lsb ; ocf or lsb
|
||||
severity = major ; minor, major, critical
|
||||
restarts = 3 ; restarts before error assertion
|
||||
interval = 5 ; number of seconds to wait between restarts
|
||||
debounce = 20 ; number of seconds to wait before degrade clear
|
|
@ -0,0 +1,14 @@
|
|||
[Unit]
|
||||
Description=StarlingX PCI Interrupt Affinity Agent
|
||||
After=sysinv-agent.service
|
||||
Before=pmon.service
|
||||
|
||||
[Service]
|
||||
Type=forking
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/etc/init.d/pci-irq-affinity-agent start
|
||||
ExecStop=/etc/init.d/pci-irq-affinity-agent stop
|
||||
PIDFile=/var/run/pci-irq-affinity-agent.pid
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
|
@ -0,0 +1,92 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# All Rights Reserved.
|
||||
#
|
||||
|
||||
""" Define pci_irq_affinity_provider class"""
|
||||
|
||||
import utils as pci_utils
|
||||
from driver import AffinePciIrqDriver
|
||||
from nova_provider import novaClient
|
||||
from log import LOG
|
||||
|
||||
|
||||
class pci_irq_affinity_provider:
|
||||
def __init__(self):
|
||||
self.affinePciIrqDriver = AffinePciIrqDriver()
|
||||
self.inst_dict = {}
|
||||
|
||||
def reset_irq_affinity(self, uuid, irqs=None, msi_irqs=None):
|
||||
"""Reset irq affinity for instance
|
||||
|
||||
The instance has already been deleted or
|
||||
related PCI not used by it anymore.
|
||||
"""
|
||||
if irqs or msi_irqs:
|
||||
# reset irq affinity for specified irqs
|
||||
_irqs = irqs
|
||||
_msi_irqs = msi_irqs
|
||||
|
||||
elif uuid in self.inst_dict:
|
||||
# reset all irq affinity for deleted instance
|
||||
_irqs = self.inst_dict[uuid][0]
|
||||
_msi_irqs = self.inst_dict[uuid][1]
|
||||
else:
|
||||
LOG.debug("No pci affinity need to be reset for instance=%s!" % uuid)
|
||||
return
|
||||
|
||||
try:
|
||||
with open('/proc/irq/default_smp_affinity') as f:
|
||||
cpulist = f.readline().strip()
|
||||
LOG.debug("default smp affinity bitmap:%s" % cpulist)
|
||||
|
||||
for x in [_irqs, _msi_irqs]:
|
||||
if len(x) > 0:
|
||||
pci_utils.set_irq_affinity(True, x, cpulist)
|
||||
|
||||
except Exception as e:
|
||||
LOG.error("Failed to reset smp affinity! error=%s" % e)
|
||||
|
||||
LOG.info("Reset smp affinity done for instance=%s!" % uuid)
|
||||
|
||||
def instance_irq_pcpulist_update(self, uuid, irqs, msi_irqs, cpulist):
|
||||
if uuid in self.inst_dict:
|
||||
_prev = self.inst_dict[uuid]
|
||||
# get irqs that not appear anymore.
|
||||
_irqs = _prev[0].difference(irqs)
|
||||
_msi_irqs = _prev[1].difference(msi_irqs)
|
||||
|
||||
# reset pci affinity for those pcis not used by intance anymore
|
||||
if (len(_irqs) + len(_msi_irqs)) > 0:
|
||||
self.reset_irq_affinity(uuid, _irqs, _msi_irqs)
|
||||
|
||||
self.inst_dict[uuid] = [irqs, msi_irqs, cpulist]
|
||||
LOG.debug(self.inst_dict)
|
||||
|
||||
def affine_pci_dev_instance(self, instance, wait_for_irqs=True):
|
||||
if instance is not None:
|
||||
if instance.get_cpu_policy() == 'dedicated' and instance.get_pci_devices():
|
||||
LOG.debug("Instance=%s use dedicated cpu policy!!!" % instance.uuid)
|
||||
irqs, msi_irqs, cpulist = \
|
||||
self.affinePciIrqDriver.affine_pci_dev_irqs(instance, wait_for_irqs)
|
||||
# record instance on which pci affinity has been applied
|
||||
self.instance_irq_pcpulist_update(instance.uuid, irqs, msi_irqs, cpulist)
|
||||
return
|
||||
|
||||
def audit_pci_irq_affinity(self):
|
||||
# audit instance PCI devices periodically
|
||||
filters = {'vm_state': 'active',
|
||||
'task_state': None,
|
||||
'deleted': False}
|
||||
instances = novaClient.get_instances(filters)
|
||||
for inst in instances:
|
||||
self.affine_pci_dev_instance(inst, wait_for_irqs=False)
|
||||
|
||||
|
||||
pciIrqAffinity = pci_irq_affinity_provider()
|
|
@ -0,0 +1,206 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# All Rights Reserved.
|
||||
#
|
||||
|
||||
""" Pci interrupt affinity agent daemon entry"""
|
||||
|
||||
import six
|
||||
import json
|
||||
import sys
|
||||
import signal
|
||||
import re
|
||||
import eventlet
|
||||
import threading
|
||||
import time
|
||||
|
||||
from oslo_service import periodic_task
|
||||
from oslo_service import service
|
||||
import oslo_messaging
|
||||
|
||||
from config import CONF
|
||||
from config import sysconfig
|
||||
from nova_provider import novaClient
|
||||
from affinity import pciIrqAffinity
|
||||
from log import LOG
|
||||
|
||||
stay_on = True
|
||||
|
||||
|
||||
class EventType:
|
||||
CREATE = 'compute.instance.create.end'
|
||||
DELETE = 'compute.instance.delete.end'
|
||||
RESIZE = 'compute.instance.resize.confirm.end'
|
||||
|
||||
|
||||
def process_signal_handler(signum, frame):
|
||||
"""Process Signal Handler"""
|
||||
global stay_on
|
||||
|
||||
if signum in [signal.SIGTERM, signal.SIGINT, signal.SIGTSTP]:
|
||||
stay_on = False
|
||||
else:
|
||||
LOG.info("Ignoring signal" % signum)
|
||||
|
||||
|
||||
def get_inst(instance_uuid, callback):
|
||||
# get instance info from nova
|
||||
inst = novaClient.get_instance(instance_uuid)
|
||||
if inst is not None:
|
||||
LOG.debug("inst:%s" % inst)
|
||||
callback(inst)
|
||||
|
||||
|
||||
def query_instance_callback(inst):
|
||||
LOG.debug("query inst:%s" % inst)
|
||||
pciIrqAffinity.affine_pci_dev_instance(inst)
|
||||
|
||||
|
||||
@periodic_task.periodic_task(spacing=CONF.pci_affine_interval)
|
||||
def audit_affinity(self, context):
|
||||
pciIrqAffinity.audit_pci_irq_affinity()
|
||||
|
||||
|
||||
def audit_work(srv, callback):
|
||||
srv.tg.add_dynamic_timer(callback, None, None, None)
|
||||
srv.tg.wait()
|
||||
|
||||
|
||||
def audits_initialize():
|
||||
"""Init periodic audit task for pci interrupt affinity check"""
|
||||
srv = service.Service()
|
||||
periodicTasks = periodic_task.PeriodicTasks(CONF)
|
||||
periodicTasks.add_periodic_task(audit_affinity)
|
||||
thread = threading.Thread(target=audit_work, args=(srv, periodicTasks.run_periodic_tasks))
|
||||
thread.start()
|
||||
return srv
|
||||
|
||||
|
||||
class InstCreateNotificationEp(object):
|
||||
filter_rule = oslo_messaging.NotificationFilter(
|
||||
event_type=EventType.CREATE)
|
||||
|
||||
def info(self, ctxt, publisher_id, event_type, payload, metadata):
|
||||
uuid = payload.get('instance_id', None)
|
||||
self.instance_create_handler(uuid)
|
||||
|
||||
def instance_create_handler(self, instance_uuid):
|
||||
if instance_uuid is not None:
|
||||
LOG.info("instance_created: uuid=%s." % instance_uuid)
|
||||
eventlet.spawn(get_inst, instance_uuid, query_instance_callback).wait()
|
||||
|
||||
|
||||
class InstResizeNotificationEp(object):
|
||||
filter_rule = oslo_messaging.NotificationFilter(
|
||||
event_type=EventType.RESIZE)
|
||||
|
||||
def info(self, ctxt, publisher_id, event_type, payload, metadata):
|
||||
uuid = payload.get('instance_id', None)
|
||||
self.instance_resize_handler(uuid)
|
||||
|
||||
def instance_resize_handler(self, instance_uuid):
|
||||
if instance_uuid is not None:
|
||||
LOG.info("instance_resized: uuid=%s." % instance_uuid)
|
||||
eventlet.spawn(get_inst, instance_uuid, query_instance_callback).wait()
|
||||
|
||||
|
||||
class InstDelNotificationEp(object):
|
||||
filter_rule = oslo_messaging.NotificationFilter(
|
||||
event_type=EventType.DELETE)
|
||||
|
||||
def info(self, ctxt, publisher_id, event_type, payload, metadata):
|
||||
uuid = payload.get('instance_id', None)
|
||||
self.instance_delete_handler(uuid)
|
||||
|
||||
def instance_delete_handler(self, instance_uuid):
|
||||
if instance_uuid is not None:
|
||||
LOG.info("instance_deleted: uuid=%s." % instance_uuid)
|
||||
pciIrqAffinity.reset_irq_affinity(instance_uuid)
|
||||
|
||||
|
||||
def get_rabbit_config():
|
||||
"""Get rabbit config info from specific system config file."""
|
||||
|
||||
rabbit_cfg = {}
|
||||
rabbit_session = 'amqp'
|
||||
options = ['host', 'port', 'user_id', 'password',
|
||||
'virt_host']
|
||||
try:
|
||||
for option in options:
|
||||
rabbit_cfg[option] = sysconfig.get(rabbit_session, option)
|
||||
|
||||
except Exception as e:
|
||||
LOG.error("Could not read all required rabbitmq configuration! Err=%s" % e)
|
||||
rabbit_cfg = {}
|
||||
|
||||
return rabbit_cfg
|
||||
|
||||
|
||||
def rpc_work(srv):
|
||||
srv.start()
|
||||
srv.wait()
|
||||
|
||||
|
||||
def start_rabbitmq_client():
|
||||
"""Start Rabbitmq client to listen instance notifications from Nova"""
|
||||
cfg = get_rabbit_config()
|
||||
rabbit_url = "rabbit://%s:%s@%s:%s/%s" % (cfg['user_id'], cfg['password'],
|
||||
cfg['host'], cfg['port'], cfg['virt_host'])
|
||||
LOG.info(rabbit_url)
|
||||
|
||||
target = oslo_messaging.Target(exchange="nova", topic="notifications", server="info",
|
||||
version="2.1", fanout=True)
|
||||
transport = oslo_messaging.get_notification_transport(CONF, url=rabbit_url)
|
||||
endpoints = [InstCreateNotificationEp(),
|
||||
InstResizeNotificationEp(),
|
||||
InstDelNotificationEp()]
|
||||
|
||||
server = oslo_messaging.get_notification_listener(transport, [target],
|
||||
endpoints, "threading")
|
||||
thread = threading.Thread(target=rpc_work, args=(server,))
|
||||
thread.start()
|
||||
LOG.info("Rabbitmq Client Started!")
|
||||
|
||||
return server
|
||||
|
||||
|
||||
def process_main():
|
||||
"""Entry function for PCI Interrupt Affinity Agent"""
|
||||
|
||||
LOG.info("Enter PCIInterruptAffinity Agent")
|
||||
|
||||
try:
|
||||
signal.signal(signal.SIGTSTP, process_signal_handler)
|
||||
openstack_enabled = sysconfig.get('openstack', 'openstack_enabled')
|
||||
if openstack_enabled == 'true':
|
||||
novaClient.open_libvirt_connect()
|
||||
audit_srv = audits_initialize()
|
||||
rabbit_client = start_rabbitmq_client()
|
||||
|
||||
while stay_on:
|
||||
time.sleep(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
LOG.info("keyboard Interrupt received.")
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
LOG.info("%s" % e)
|
||||
sys.exit(200)
|
||||
|
||||
finally:
|
||||
LOG.error("proces_main finalized!!!")
|
||||
if openstack_enabled == 'true':
|
||||
novaClient.close_libvirt_connect()
|
||||
audit_srv.tg.stop()
|
||||
rabbit_client.stop()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
process_main()
|
|
@ -0,0 +1,45 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# All Rights Reserved.
|
||||
#
|
||||
|
||||
""" Define configuration info for pci-irq-affinity-agent"""
|
||||
|
||||
from six.moves import configparser
|
||||
from oslo_config import cfg
|
||||
|
||||
pci_irq_affinity_opts = [
|
||||
cfg.IntOpt('pci_affine_interval',
|
||||
default=60,
|
||||
help='Number of seconds between pci affinity updates'),
|
||||
cfg.IntOpt('msi_irq_timeout',
|
||||
default=45,
|
||||
help='Number of seconds to wait for msi irq configuration'),
|
||||
cfg.IntOpt('msi_irq_since',
|
||||
default=6,
|
||||
help='Number of seconds to wait for msi irqs to stabilize.'),
|
||||
cfg.IntOpt('msi_irq_check_interval',
|
||||
default=2,
|
||||
help='Check interval in seconds for msi irqs to stabilize.'),
|
||||
cfg.StrOpt('config_file',
|
||||
default='/etc/pci_irq_affinity/config.ini',
|
||||
help='Get config info from specific config file.'),
|
||||
]
|
||||
|
||||
CONF = cfg.CONF
|
||||
|
||||
|
||||
def register_opts(conf):
|
||||
conf.register_opts(pci_irq_affinity_opts)
|
||||
|
||||
|
||||
register_opts(CONF)
|
||||
|
||||
sysconfig = configparser.ConfigParser()
|
||||
sysconfig.read(CONF.config_file)
|
|
@ -0,0 +1,141 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# All Rights Reserved.
|
||||
#
|
||||
|
||||
""" Define AffinePciIrqDriver class"""
|
||||
|
||||
from oslo_service import loopingcall
|
||||
from oslo_concurrency import lockutils
|
||||
import utils as pci_utils
|
||||
import instance
|
||||
from config import CONF
|
||||
from log import LOG
|
||||
from nova_provider import novaClient
|
||||
|
||||
synchronized = lockutils.synchronized_with_prefix('pci_irq_affinity-')
|
||||
|
||||
|
||||
class AffinePciIrqDriver:
|
||||
|
||||
def __init__(self):
|
||||
self._msi_irq_count = {}
|
||||
self._msi_irq_since = {}
|
||||
self._msi_irq_elapsed = {}
|
||||
|
||||
def affine_pci_dev_irqs(self, inst, wait_for_irqs=True):
|
||||
"""Affine PCI device irqs to VM's pcpus."""
|
||||
|
||||
def _wait_for_msi_irqs(self, inst):
|
||||
"""Check if each pci device has the expected number of msi irqs."""
|
||||
_prev = self._msi_irq_count.copy()
|
||||
addrs = set()
|
||||
|
||||
for pci_dev in inst.pci_devices:
|
||||
addr = pci_dev.address
|
||||
addrs.update([addr])
|
||||
try:
|
||||
irqs, msi_irqs = pci_utils.get_irqs_by_pci_address(addr)
|
||||
except Exception as e:
|
||||
msi_irqs = set()
|
||||
LOG.error('_wait_for_msi_irqs: pci_addr=%(A)s, error=%(E)s' %
|
||||
{'A': addr, 'E': e})
|
||||
self._msi_irq_count[addr] = len(msi_irqs)
|
||||
self._msi_irq_elapsed[addr] += \
|
||||
CONF.msi_irq_check_interval
|
||||
if _prev[addr] == self._msi_irq_count[addr]:
|
||||
self._msi_irq_since[addr] += \
|
||||
CONF.msi_irq_check_interval
|
||||
else:
|
||||
self._msi_irq_since[addr] = 0
|
||||
|
||||
# Done when msi irq counts have not changed for some time
|
||||
if all((self._msi_irq_count[k] > 0) and
|
||||
(self._msi_irq_since[k] >= CONF.msi_irq_since)
|
||||
for k in addrs):
|
||||
raise loopingcall.LoopingCallDone()
|
||||
|
||||
# Abort due to timeout
|
||||
if all(self._msi_irq_elapsed[k] >= CONF.msi_irq_timeout
|
||||
for k in addrs):
|
||||
msg = ("reached %(timeout)s seconds timeout, waiting for "
|
||||
"msi irqs of pci_addrs: %(addrs)s") % {
|
||||
'timeout': CONF.msi_irq_timeout,
|
||||
'addrs': list(addrs)}
|
||||
LOG.warning(msg)
|
||||
raise loopingcall.LoopingCallDone()
|
||||
|
||||
# Determine how many msi irqs we expect to be configured.
|
||||
if len(inst.get_pci_devices()) == 0:
|
||||
return
|
||||
|
||||
# Initialize msi irq tracking.
|
||||
for pci_dev in inst.pci_devices:
|
||||
if wait_for_irqs or (pci_dev.address not in self._msi_irq_count):
|
||||
self._msi_irq_count[pci_dev.address] = 0
|
||||
self._msi_irq_since[pci_dev.address] = 0
|
||||
self._msi_irq_elapsed[pci_dev.address] = 0
|
||||
|
||||
# Wait for msi irqs to be configured.
|
||||
if wait_for_irqs:
|
||||
timer = loopingcall.FixedIntervalLoopingCall(
|
||||
_wait_for_msi_irqs, self, inst)
|
||||
timer.start(interval=CONF.msi_irq_check_interval).wait()
|
||||
|
||||
@synchronized(inst.uuid)
|
||||
def do_affine_pci_dev_instance(refresh_need):
|
||||
"""Set pci device irq affinity for this instance."""
|
||||
|
||||
_irqs = set()
|
||||
_msi_irqs = set()
|
||||
# refresh instance info.
|
||||
if refresh_need:
|
||||
_inst = novaClient.get_instance(inst.uuid)
|
||||
if _inst is None:
|
||||
return
|
||||
|
||||
numa_topology = _inst.get_numa_topology()
|
||||
extra_spec = _inst.get_extra_spec()
|
||||
for pci_dev in _inst.pci_devices:
|
||||
try:
|
||||
irqs, msi_irqs, pci_numa_node, pci_cpulist = \
|
||||
pci_utils.set_irqs_affinity_by_pci_address(
|
||||
pci_dev.address, extra_spec, numa_topology)
|
||||
except Exception as e:
|
||||
irqs = set()
|
||||
msi_irqs = set()
|
||||
pci_numa_node = None
|
||||
pci_cpulist = ''
|
||||
LOG.error("Could not affine irqs for pci_addr:%(A)s, "
|
||||
"error: %(E)s" % {"A": pci_dev.address, "E": e})
|
||||
|
||||
# Log irqs affined when there is a change in the counts.
|
||||
msi_irq_count = len(msi_irqs)
|
||||
if ((msi_irq_count != self._msi_irq_count[pci_dev.address]) or
|
||||
wait_for_irqs):
|
||||
self._msi_irq_count[pci_dev.address] = msi_irq_count
|
||||
LOG.info(("Instance=%(U)s: IRQs affined for pci_addr=%(A)s, "
|
||||
"dev_id=%(D)s, dev_type=%(T)s, "
|
||||
"vendor_id=%(V)s, product_id=%(P)s, "
|
||||
"irqs=%(I)s, msi_irqs=%(M)s, "
|
||||
"numa_node=%(N)s, cpulist=%(C)s")
|
||||
% {'U': inst.uuid,
|
||||
'A': pci_dev.address,
|
||||
'D': pci_dev.dev_id,
|
||||
'T': pci_dev.dev_type,
|
||||
'V': pci_dev.vendor_id,
|
||||
'P': pci_dev.product_id,
|
||||
'I': ', '.join(map(str, irqs)),
|
||||
'M': ', '.join(map(str, msi_irqs)),
|
||||
'N': pci_numa_node, 'C': pci_cpulist})
|
||||
_irqs.update(irqs)
|
||||
_msi_irqs.update(msi_irqs)
|
||||
return (_irqs, _msi_irqs, pci_cpulist)
|
||||
return do_affine_pci_dev_instance(wait_for_irqs)
|
||||
|
|
@ -0,0 +1,265 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# All Rights Reserved.
|
||||
#
|
||||
|
||||
""" Encapsulate libvirt related interfaces"""
|
||||
|
||||
import libvirt
|
||||
import os
|
||||
import sys
|
||||
import signal
|
||||
from xml.dom import minidom
|
||||
from xml.etree import ElementTree
|
||||
from log import LOG
|
||||
|
||||
debug = 0
|
||||
# libvirt timeout parameters
|
||||
LIBVIRT_TIMEOUT_SEC = 5.0
|
||||
total_cpus = 0
|
||||
|
||||
|
||||
def range_to_list(csv_range=None):
|
||||
"""Convert a string of comma separate ranges into an expanded list of integers.
|
||||
|
||||
E.g., '1-3,8-9,15' is converted to [1,2,3,8,9,15]
|
||||
"""
|
||||
if not csv_range:
|
||||
return []
|
||||
xranges = [(lambda L: range(L[0], L[-1] + 1))(map(int, r.split('-')))
|
||||
for r in csv_range.split(',')]
|
||||
return [y for x in xranges for y in x]
|
||||
|
||||
|
||||
def _translate_virDomainState(state):
|
||||
"""Return human readable virtual domain state string."""
|
||||
states = {}
|
||||
states[0] = 'NOSTATE'
|
||||
states[1] = 'Running'
|
||||
states[2] = 'Blocked'
|
||||
states[3] = 'Paused'
|
||||
states[4] = 'Shutdown'
|
||||
states[5] = 'Shutoff'
|
||||
states[6] = 'Crashed'
|
||||
states[7] = 'pmSuspended'
|
||||
states[8] = 'Last'
|
||||
return states[state]
|
||||
|
||||
|
||||
def _mask_to_cpulist(mask=0):
|
||||
"""Create cpulist from mask, list in socket-core-thread enumerated order.
|
||||
|
||||
:param extended: extended info
|
||||
:param mask: cpuset mask
|
||||
:returns cpulist: list of cpus in socket-core-thread enumerated order
|
||||
"""
|
||||
cpulist = []
|
||||
if mask is None or mask <= 0:
|
||||
return cpulist
|
||||
|
||||
# Assume max number of cpus for now...
|
||||
max_cpus = 1024
|
||||
for cpu in range(max_cpus):
|
||||
if ((1 << cpu) & mask):
|
||||
cpulist.append(cpu)
|
||||
return cpulist
|
||||
|
||||
|
||||
class suppress_stdout_stderr(object):
|
||||
"""A context manager for doing a "deep suppression" of stdout and stderr in Python
|
||||
|
||||
i.e. will suppress all print, even if the print originates in a compiled C/Fortran
|
||||
sub-function.
|
||||
This will not suppress raised exceptions, since exceptions are printed
|
||||
to stderr just before a script exits, and after the context manager has
|
||||
exited (at least, I think that is why it lets exceptions through).
|
||||
"""
|
||||
def __init__(self):
|
||||
# Open a pair of null files
|
||||
self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)]
|
||||
# Save the actual stdout (1) and stderr (2) file descriptors.
|
||||
self.save_fds = (os.dup(1), os.dup(2))
|
||||
|
||||
def __enter__(self):
|
||||
# Assign the null pointers to stdout and stderr.
|
||||
os.dup2(self.null_fds[0], 1)
|
||||
os.dup2(self.null_fds[1], 2)
|
||||
|
||||
def __exit__(self, *_):
|
||||
# Re-assign the real stdout/stderr back to (1) and (2)
|
||||
os.dup2(self.save_fds[0], 1)
|
||||
os.dup2(self.save_fds[1], 2)
|
||||
# Close the null files
|
||||
os.close(self.null_fds[0])
|
||||
os.close(self.null_fds[1])
|
||||
|
||||
|
||||
class TimeoutError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutError('timeout')
|
||||
|
||||
|
||||
def connect_to_libvirt():
|
||||
"""Connect to local libvirt."""
|
||||
duri = "qemu:///system"
|
||||
try:
|
||||
signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.setitimer(signal.ITIMER_REAL, LIBVIRT_TIMEOUT_SEC)
|
||||
with suppress_stdout_stderr():
|
||||
conn = libvirt.openReadOnly(duri)
|
||||
signal.alarm(0)
|
||||
except TimeoutError:
|
||||
conn = None
|
||||
raise
|
||||
except Exception as e:
|
||||
conn = None
|
||||
raise
|
||||
finally:
|
||||
signal.alarm(0)
|
||||
return conn
|
||||
|
||||
|
||||
def get_host_cpu_topology():
|
||||
"""Enumerate logical cpu topology using socket_id, core_id, thread_id.
|
||||
|
||||
This generates the following dictionary:
|
||||
topology[socket_id][core_id][thread_id] = cpu_id
|
||||
"""
|
||||
global total_cpus
|
||||
|
||||
# Connect to local libvirt hypervisor
|
||||
conn = connect_to_libvirt()
|
||||
# Get host capabilities
|
||||
caps_str = conn.getCapabilities()
|
||||
doc = ElementTree.fromstring(caps_str)
|
||||
caps = minidom.parseString(caps_str)
|
||||
caps_host = caps.getElementsByTagName('host')[0]
|
||||
caps_cells = caps_host.getElementsByTagName('cells')[0]
|
||||
total_cpus = caps_cells.getElementsByTagName('cpu').length
|
||||
|
||||
Thread_cnt = {}
|
||||
topology = {}
|
||||
cells = doc.findall('./host/topology/cells/cell')
|
||||
for cell in cells:
|
||||
for cpu in cell.findall('./cpus/cpu'):
|
||||
# obtain core_id, cpu_id, and socket_id; ignore 'siblings' since
|
||||
# that can be inferred by enumeration of thread_id.
|
||||
core_id = int(cpu.get('core_id'))
|
||||
cpu_id = int(cpu.get('id'))
|
||||
socket_id = int(cpu.get('socket_id'))
|
||||
|
||||
# thread_id's are enumerated assuming cpu_id is already sorted
|
||||
if socket_id not in Thread_cnt:
|
||||
Thread_cnt[socket_id] = {}
|
||||
if core_id not in Thread_cnt[socket_id]:
|
||||
Thread_cnt[socket_id][core_id] = 0
|
||||
else:
|
||||
Thread_cnt[socket_id][core_id] += 1
|
||||
thread_id = Thread_cnt[socket_id][core_id]
|
||||
|
||||
# save topology[socket_id][core_id][thread_id]
|
||||
if socket_id not in topology:
|
||||
topology[socket_id] = {}
|
||||
if core_id not in topology[socket_id]:
|
||||
topology[socket_id][core_id] = {}
|
||||
topology[socket_id][core_id][thread_id] = cpu_id
|
||||
conn.close()
|
||||
return topology
|
||||
|
||||
|
||||
def get_guest_domain_info(dom):
|
||||
"""Obtain cpulist of pcpus in the order of vcpus.
|
||||
|
||||
This applies to either pinned or floating vcpus, Note that the cpuinfo
|
||||
pcpu value can be stale if we scale down cpus since it reports cpu-last-run.
|
||||
For this reason use cpumap = d_vcpus[1][vcpu], instead of cpuinfo
|
||||
(i.e., vcpu, state, cpuTime, pcpu = d_vcpus[0][vcpu]).
|
||||
"""
|
||||
uuid = dom.UUIDString()
|
||||
d_state, d_maxMem_KiB, d_memory_KiB, \
|
||||
d_nrVirtCpu, d_cpuTime = dom.info()
|
||||
try:
|
||||
with suppress_stdout_stderr():
|
||||
d_vcpus = dom.vcpus()
|
||||
except Exception as e:
|
||||
d_vcpus = tuple([d_nrVirtCpu * [],
|
||||
d_nrVirtCpu * [tuple(total_cpus * [False])]])
|
||||
|
||||
cpulist_p = []
|
||||
cpulist_d = {}
|
||||
cpuset_total = 0
|
||||
up_total = 0
|
||||
for vcpu in range(d_nrVirtCpu):
|
||||
cpuset_b = d_vcpus[1][vcpu]
|
||||
cpuset = 0
|
||||
for cpu, up in enumerate(cpuset_b):
|
||||
if up:
|
||||
cpulist_d[vcpu] = cpu
|
||||
aff = 1 << cpu
|
||||
cpuset |= aff
|
||||
up_total += 1
|
||||
cpuset_total |= cpuset
|
||||
cpulist_f = _mask_to_cpulist(mask=cpuset_total)
|
||||
for key in sorted(cpulist_d.keys()):
|
||||
cpulist_p.append(cpulist_d[key])
|
||||
|
||||
# Determine if floating or pinned, display appropriate cpulist
|
||||
if up_total > d_nrVirtCpu:
|
||||
d_cpulist = cpulist_f
|
||||
cpu_pinned = False
|
||||
else:
|
||||
d_cpulist = cpulist_p
|
||||
cpu_pinned = True
|
||||
|
||||
# Determine list of numa nodes (the hard way)
|
||||
dom_xml = ElementTree.fromstring(dom.XMLDesc(0))
|
||||
nodeset = set([])
|
||||
for elem in dom_xml.findall('./numatune/memnode'):
|
||||
nodes = range_to_list(elem.get('nodeset'))
|
||||
nodeset.update(nodes)
|
||||
d_nodelist = list(sorted(nodeset))
|
||||
|
||||
# Get pci info.
|
||||
pci_addrs = set()
|
||||
for interface in dom_xml.findall('./devices/interface'):
|
||||
if interface.find('driver').get('name').startswith('vfio'):
|
||||
addr_tag = interface.find('source/address')
|
||||
if addr_tag.get('type') == 'pci':
|
||||
pci_addr = "%04x:%02x:%02x.%01x" % (
|
||||
addr_tag.get('domain'),
|
||||
addr_tag.get('bus'),
|
||||
addr_tag.get('slot'),
|
||||
addr_tag.get('function'))
|
||||
pci_addrs.update([pci_addr])
|
||||
|
||||
# Update dictionary with per-domain information
|
||||
domain = {
|
||||
'uuid': uuid,
|
||||
'state': _translate_virDomainState(d_state),
|
||||
'IsCpuPinned': cpu_pinned,
|
||||
'nr_vcpus': d_nrVirtCpu,
|
||||
'nodelist': d_nodelist,
|
||||
'cpulist': d_cpulist,
|
||||
'cpu_pinning': cpulist_d,
|
||||
'pci_addrs': pci_addrs
|
||||
}
|
||||
return domain
|
||||
|
||||
|
||||
def get_guest_domain_by_uuid(conn, uuid):
|
||||
try:
|
||||
dom = conn.lookupByUUIDString(uuid)
|
||||
except Exception as e:
|
||||
LOG.warning("Failed to get domain for uuid=%s! error=%s" % (uuid, e))
|
||||
return None
|
||||
domain = get_guest_domain_info(dom)
|
||||
return domain
|
|
@ -0,0 +1,82 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# All Rights Reserved.
|
||||
#
|
||||
|
||||
""" Define instance related class"""
|
||||
|
||||
from log import LOG
|
||||
|
||||
|
||||
class numa_cell:
|
||||
def __init__(self, id, cpuset, cpu_pinning):
|
||||
self.id = id
|
||||
self.cpuset = cpuset
|
||||
self.cpu_pinning = cpu_pinning
|
||||
|
||||
|
||||
class numa_topology:
|
||||
def __init__(self, uuid, cells):
|
||||
self.instance_uuid = uuid
|
||||
self.cells = cells
|
||||
|
||||
def vcpu_to_pcpu(self, vcpu):
|
||||
for cell in self.cells:
|
||||
if vcpu in cell.cpu_pinning.keys():
|
||||
return cell, cell.cpu_pinning[vcpu]
|
||||
raise KeyError('Unable to find pCPU for vCPU %d' % vcpu)
|
||||
|
||||
|
||||
class pci_device:
|
||||
def __init__(self, addr):
|
||||
self.address = addr
|
||||
self.dev_id = ""
|
||||
self.dev_type = ""
|
||||
self.vendor_id = ""
|
||||
self.product_id = ""
|
||||
|
||||
|
||||
class instance:
|
||||
def __init__(self, uuid, name, extra_spec):
|
||||
self.uuid = uuid
|
||||
self.name = name
|
||||
self.extra_spec = extra_spec
|
||||
self.pci_devices = set()
|
||||
self.numa_topology = None
|
||||
self.cpu_policy = 'shared'
|
||||
|
||||
def update(self, domain):
|
||||
cells = set()
|
||||
for node_id in domain['nodelist']:
|
||||
cell = numa_cell(node_id, range(domain['nr_vcpus']), domain['cpu_pinning'])
|
||||
LOG.debug("cell_id=%s, vcpuset=%s, cpu_pinning=%s"
|
||||
% (node_id, range(domain['nr_vcpus']), domain['cpu_pinning']))
|
||||
cells.update([cell])
|
||||
|
||||
self.numa_topology = numa_topology(self.uuid, cells)
|
||||
if domain['IsCpuPinned']:
|
||||
self.cpu_policy = 'dedicated'
|
||||
else:
|
||||
self.cpu_policy = 'shared'
|
||||
|
||||
for pci_addr in domain['pci_addrs']:
|
||||
pci_dev = pci_device(pci_addr)
|
||||
self.pci_devices.update([pci_dev])
|
||||
|
||||
def get_cpu_policy(self):
|
||||
return self.cpu_policy
|
||||
|
||||
def get_numa_topology(self):
|
||||
return self.numa_topology
|
||||
|
||||
def get_extra_spec(self):
|
||||
return self.extra_spec
|
||||
|
||||
def get_pci_devices(self):
|
||||
return self.pci_devices
|
|
@ -0,0 +1,28 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# All Rights Reserved.
|
||||
#
|
||||
|
||||
""" Define Logger class for this agent"""
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
|
||||
_syslog_facility = 'local1'
|
||||
|
||||
|
||||
LOG = logging.getLogger("pci-interrupt-affinity")
|
||||
formatter = logging.Formatter("%(asctime)s %(threadName)s[%(process)d] "
|
||||
"%(name)s.%(pathname)s.%(lineno)d - %(levelname)s "
|
||||
"%(message)s")
|
||||
handler = logging.handlers.SysLogHandler(address='/dev/log',
|
||||
facility=_syslog_facility)
|
||||
handler.setFormatter(formatter)
|
||||
LOG.addHandler(handler)
|
||||
LOG.setLevel(logging.INFO)
|
|
@ -0,0 +1,139 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# All Rights Reserved.
|
||||
#
|
||||
|
||||
""" Define NovaProvider class
|
||||
This class wraps novaclient access interface and expose get_instance() and
|
||||
get_instances() to other agent classes.
|
||||
"""
|
||||
|
||||
import keyring
|
||||
from novaclient import client
|
||||
from keystoneauth1 import loading
|
||||
from keystoneauth1 import session
|
||||
import socket
|
||||
from log import LOG
|
||||
from config import CONF
|
||||
from config import sysconfig
|
||||
import instance
|
||||
import guest
|
||||
|
||||
|
||||
class NovaProvider:
|
||||
|
||||
def __init__(self):
|
||||
self._creds = self._get_keystone_creds()
|
||||
self._auth = self._get_auth(self._creds)
|
||||
self._hostname = self.get_hostname()
|
||||
self._conn = None
|
||||
|
||||
def get_hostname(self):
|
||||
return socket.gethostname()
|
||||
|
||||
def _get_keystone_creds(self):
|
||||
creds = {}
|
||||
openstackSession = 'openstack'
|
||||
options = ['username', 'user_domain_name', 'project_name',
|
||||
'project_domain_name', 'keyring_service', 'auth_url']
|
||||
|
||||
try:
|
||||
for option in options:
|
||||
creds[option] = sysconfig.get(openstackSession, option)
|
||||
|
||||
creds['password'] = keyring.get_password(creds.pop('keyring_service'),
|
||||
creds['username'])
|
||||
|
||||
except Exception as e:
|
||||
LOG.error("Could not get keystone creds configuration! Err=%s" % e)
|
||||
creds = None
|
||||
|
||||
return creds
|
||||
|
||||
def _get_auth(self, creds):
|
||||
|
||||
if creds is not None:
|
||||
loader = loading.get_plugin_loader('password')
|
||||
auth = loader.load_from_options(**creds)
|
||||
return auth
|
||||
return None
|
||||
|
||||
def get_nova(self):
|
||||
try:
|
||||
sess = session.Session(auth=self._auth)
|
||||
nova = client.Client('2.1', session=sess)
|
||||
return nova
|
||||
except Exception as e:
|
||||
LOG.warning("Failed to connect to nova!")
|
||||
raise Exception("could not connect nova!")
|
||||
|
||||
def open_libvirt_connect(self):
|
||||
self._conn = guest.connect_to_libvirt()
|
||||
guest.get_host_cpu_topology()
|
||||
|
||||
def close_libvirt_connect(self):
|
||||
self._conn.close()
|
||||
|
||||
def get_instance(self, uuid):
|
||||
try:
|
||||
nova = self.get_nova()
|
||||
server = nova.servers.get(uuid)
|
||||
flavor_info = nova.flavors.get(server.flavor["id"])
|
||||
hostname = server.__dict__['OS-EXT-SRV-ATTR:host']
|
||||
except Exception as e:
|
||||
LOG.warning("Could not get instance=%s from Nova! error=%s" % (uuid, e))
|
||||
return None
|
||||
|
||||
LOG.debug('GET VM:%s in node:%s' % (server.name, hostname))
|
||||
|
||||
if hostname == self._hostname:
|
||||
inst = instance.instance(uuid, server.name, flavor_info.get_keys())
|
||||
# get numa topology and pci info from libvirt
|
||||
try:
|
||||
domain = guest.get_guest_domain_by_uuid(self._conn, uuid)
|
||||
if domain:
|
||||
inst.update(domain)
|
||||
except Exception as e:
|
||||
LOG.warning("Failed to access libvirt! error=%s" % e)
|
||||
return inst
|
||||
else:
|
||||
LOG.debug('The VM is not in current host!')
|
||||
return None
|
||||
|
||||
def get_instances(self, filters):
|
||||
instances = set()
|
||||
try:
|
||||
nova = self.get_nova()
|
||||
filters['host'] = self._hostname
|
||||
servers = nova.servers.list(detailed=True, search_opts=filters)
|
||||
flavors = nova.flavors.list()
|
||||
|
||||
for server in servers:
|
||||
for flavor in flavors:
|
||||
if flavor.id == server.flavor["id"]:
|
||||
extra_spec = flavor.get_keys()
|
||||
if 'hw:cpu_policy' in extra_spec \
|
||||
and extra_spec['hw:cpu_policy'] == 'dedicated':
|
||||
inst = instance.instance(server.id, server.name, extra_spec)
|
||||
instances.update([inst])
|
||||
# get numa topology and pci info from libvirt
|
||||
if len(instances) > 0:
|
||||
for inst in instances:
|
||||
domain = guest.get_guest_domain_by_uuid(self._conn, inst.uuid)
|
||||
inst.update(domain)
|
||||
except Exception as e:
|
||||
LOG.warning("Failed to get instances info! error=%s" % e)
|
||||
|
||||
return instances
|
||||
|
||||
|
||||
if sysconfig.get('openstack', 'openstack_enabled') == 'true':
|
||||
novaClient = NovaProvider()
|
||||
else:
|
||||
novaClient = None
|
|
@ -0,0 +1,291 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# All Rights Reserved.
|
||||
#
|
||||
|
||||
""" Define utility functions for this agent"""
|
||||
|
||||
import os
|
||||
import errno
|
||||
from itertools import groupby
|
||||
|
||||
from log import LOG
|
||||
import instance
|
||||
|
||||
|
||||
def list_to_range(input_list=None):
|
||||
"""Convert a list into a string of comma separate ranges.
|
||||
|
||||
E.g., [1,2,3,8,9,15] is converted to '1-3,8-9,15'
|
||||
"""
|
||||
if input_list is None:
|
||||
return ''
|
||||
if len(input_list) < 3:
|
||||
return ','.join(str(x) for x in input_list)
|
||||
else:
|
||||
G = (list(x) for _, x in groupby(enumerate(input_list),
|
||||
lambda i, x: i - x))
|
||||
return ','.join(
|
||||
'-'.join(map(str, (g[0][1], g[-1][1])[:len(g)])) for g in G)
|
||||
|
||||
|
||||
def parse_cpu_spec(spec):
|
||||
"""Parse a CPU set specification.
|
||||
|
||||
Each element in the list is either a single CPU number, a range of
|
||||
CPU numbers, or a caret followed by a CPU number to be excluded
|
||||
from a previous range.
|
||||
|
||||
:param spec: cpu set string eg "1-4,^3,6"
|
||||
|
||||
:returns: a set of CPU indexes
|
||||
"""
|
||||
cpuset_ids = set()
|
||||
cpuset_reject_ids = set()
|
||||
for rule in spec.split(','):
|
||||
rule = rule.strip()
|
||||
# Handle multi ','
|
||||
if len(rule) < 1:
|
||||
continue
|
||||
# Note the count limit in the .split() call
|
||||
range_parts = rule.split('-', 1)
|
||||
if len(range_parts) > 1:
|
||||
reject = False
|
||||
if range_parts[0] and range_parts[0][0] == '^':
|
||||
reject = True
|
||||
range_parts[0] = str(range_parts[0][1:])
|
||||
|
||||
# So, this was a range; start by converting the parts to ints
|
||||
try:
|
||||
start, end = [int(p.strip()) for p in range_parts]
|
||||
except ValueError:
|
||||
raise Exception("Invalid range expression %r" % rule)
|
||||
# Make sure it's a valid range
|
||||
if start > end:
|
||||
raise Exception("Invalid range expression %r" % rule)
|
||||
# Add available CPU ids to set
|
||||
if not reject:
|
||||
cpuset_ids |= set(range(start, end + 1))
|
||||
else:
|
||||
cpuset_reject_ids |= set(range(start, end + 1))
|
||||
elif rule[0] == '^':
|
||||
# Not a range, the rule is an exclusion rule; convert to int
|
||||
try:
|
||||
cpuset_reject_ids.add(int(rule[1:].strip()))
|
||||
except ValueError:
|
||||
raise Exception("Invalid exclusion expression %r" % rule)
|
||||
else:
|
||||
# OK, a single CPU to include; convert to int
|
||||
try:
|
||||
cpuset_ids.add(int(rule))
|
||||
except ValueError:
|
||||
raise Exception("Invalid inclusion expression %r" % rule)
|
||||
|
||||
# Use sets to handle the exclusion rules for us
|
||||
cpuset_ids -= cpuset_reject_ids
|
||||
|
||||
return cpuset_ids
|
||||
|
||||
|
||||
def _get_pci_irq_affinity_mask(extra_spec):
|
||||
"""Parse pci irq affinity mask based on flavor extra-spec.
|
||||
|
||||
Returns set of vcpu ids with corresponding pci irq affinity mask.
|
||||
"""
|
||||
|
||||
if 'hw:pci_irq_affinity_mask' in extra_spec:
|
||||
pci_irq_affinity_mask = extra_spec['hw:pci_irq_affinity_mask']
|
||||
LOG.info("pci_irq_affinity_mask: %s" % pci_irq_affinity_mask)
|
||||
else:
|
||||
LOG.info('Not set pci_irq_affinity_mask!')
|
||||
return None
|
||||
|
||||
cpuset_ids = parse_cpu_spec(pci_irq_affinity_mask)
|
||||
if not cpuset_ids:
|
||||
raise Exception("No CPUs available after parsing %r" % pci_irq_affinity_mask)
|
||||
return cpuset_ids
|
||||
|
||||
|
||||
def get_irqs_by_pci_address(pci_addr):
|
||||
"""Get list of PCI IRQs based on a VF's pci address
|
||||
|
||||
Raises PciDeviceNotFoundById in case the pci device is not found,
|
||||
or when there is an underlying problem getting associated irqs.
|
||||
:param pci_addr: PCI address
|
||||
:return: irqs, msi_irqs
|
||||
"""
|
||||
irqs = set()
|
||||
msi_irqs = set()
|
||||
|
||||
dev_path = "/sys/bus/pci/devices/%s" % (pci_addr)
|
||||
if not os.path.isdir(dev_path):
|
||||
raise Exception("PciDeviceNotFoundById id = %r" % pci_addr)
|
||||
|
||||
_irqs = set()
|
||||
irq_path = "%s/irq" % (dev_path)
|
||||
try:
|
||||
with open(irq_path) as f:
|
||||
_irqs.update([int(x) for x in f.readline().split() if int(x) > 0])
|
||||
except Exception as e:
|
||||
LOG.error('get_irqs_by_pci_address: '
|
||||
'pci_addr=%(A)s: irq_path=%(P)s; error=%(E)s',
|
||||
{'A': pci_addr, 'P': irq_path, 'E': e})
|
||||
raise Exception("PciDeviceNotFoundById id = %r" % pci_addr)
|
||||
|
||||
_msi_irqs = set()
|
||||
msi_path = "%s/msi_irqs" % (dev_path)
|
||||
try:
|
||||
_msi_irqs.update([int(x) for x in os.listdir(msi_path) if int(x) > 0])
|
||||
except OSError as e:
|
||||
# msi_path disappears during configuration; do not treat
|
||||
# non-existance as fatal
|
||||
if e.errno == errno.ENOENT:
|
||||
return (irqs, msi_irqs)
|
||||
else:
|
||||
LOG.error('get_irqs_by_pci_address: '
|
||||
'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s',
|
||||
{'A': pci_addr, 'P': msi_path, 'E': e})
|
||||
raise Exception("PciDeviceNotFoundById id = %r" % pci_addr)
|
||||
except Exception as e:
|
||||
LOG.error('get_irqs_by_pci_address: '
|
||||
'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s',
|
||||
{'A': pci_addr, 'P': msi_path, 'E': e})
|
||||
raise Exception("PciDeviceNotFoundById id = %r" % pci_addr)
|
||||
|
||||
# Return only configured irqs, ignore any that are missing.
|
||||
for irq in _irqs:
|
||||
irq_path = "/proc/irq/%s" % (irq)
|
||||
if os.path.isdir(irq_path):
|
||||
irqs.update([irq])
|
||||
for irq in _msi_irqs:
|
||||
irq_path = "/proc/irq/%s" % (irq)
|
||||
if os.path.isdir(irq_path):
|
||||
msi_irqs.update([irq])
|
||||
return (irqs, msi_irqs)
|
||||
|
||||
|
||||
def get_pci_irqs_pinned_cpuset(extra_spec=None, numa_topology=None,
|
||||
pci_numa_node=None):
|
||||
"""Get pinned cpuset where pci irq are affined.
|
||||
|
||||
:param extra_spec: extra_spec
|
||||
:param pci_numa_node: numa node of a specific PCI device
|
||||
:param numa_topology: instance numa topology
|
||||
:return: cpuset, cpulist
|
||||
"""
|
||||
cpuset = set()
|
||||
cpulist = ''
|
||||
|
||||
LOG.debug("extra_spec:%s, topo:%s, numa_node:%s" % (extra_spec, numa_topology, pci_numa_node))
|
||||
if numa_topology is None or pci_numa_node is None or pci_numa_node < 0:
|
||||
return (cpuset, cpulist)
|
||||
|
||||
# Determine full affinity cpuset, but restrict to pci's numa node
|
||||
for cell in numa_topology.cells:
|
||||
if cell.id == pci_numa_node and cell.cpu_pinning is not None:
|
||||
cpuset.update(set(cell.cpu_pinning.values()))
|
||||
LOG.info("pinning pcpu list:%s" % cpuset)
|
||||
|
||||
# Use extra-spec hw:pci_irq_affinity_mask only when the instance is pinned.
|
||||
if cpuset:
|
||||
pci_cpuset = _get_pci_irq_affinity_mask(extra_spec)
|
||||
if pci_cpuset:
|
||||
cpuset = set()
|
||||
for cell in numa_topology.cells:
|
||||
if cell.cpu_pinning is not None:
|
||||
for vcpu in cell.cpuset:
|
||||
if vcpu in pci_cpuset:
|
||||
vcpu_cell, pcpu = numa_topology.vcpu_to_pcpu(vcpu)
|
||||
cpuset.update(set([pcpu]))
|
||||
|
||||
cpulist = list_to_range(input_list=list(cpuset))
|
||||
return (cpuset, cpulist)
|
||||
|
||||
|
||||
def set_irq_affinity(set_bitmap, irqs, cpulist):
|
||||
"""Set irq affinity to the specified cpulist for list of irqs.
|
||||
|
||||
:param set_bitmap: True: set bitmap file, False: set list file
|
||||
:param irqs: irq list
|
||||
:param cpulist: cpu list
|
||||
"""
|
||||
_irqs = set()
|
||||
|
||||
if set_bitmap:
|
||||
filename = 'smp_affinity'
|
||||
else:
|
||||
filename = 'smp_affinity_list'
|
||||
|
||||
for irq in irqs:
|
||||
irq_aff_path = "/proc/irq/%s/%s" % (irq, filename)
|
||||
try:
|
||||
with open(irq_aff_path, 'w') as f:
|
||||
f.write(cpulist)
|
||||
_irqs.update([irq])
|
||||
except Exception as e:
|
||||
LOG.warning("Failed to write pci affine file:%(F)s, irq:%(I)s, "
|
||||
"error=%(E)s"
|
||||
% {"F": filename, "I": irq, "E": e})
|
||||
return _irqs
|
||||
|
||||
|
||||
def set_irqs_affinity_by_pci_address(pci_addr, extra_spec=None,
|
||||
numa_topology=None):
|
||||
"""Set cpu affinity for list of PCI IRQs with a VF's pci address,
|
||||
|
||||
Restrict cpuset to the numa node of the PCI.
|
||||
Return list
|
||||
Raises PciDeviceNotFoundById in case the pci device is not found,
|
||||
or when there is an underlying problem getting associated irqs.
|
||||
:param pci_addr: PCI address
|
||||
:param extra_spec: extra_spec
|
||||
:param numa_topology: instance numa topology
|
||||
:return: irqs, msi_irqs, numa_node, cpulist
|
||||
"""
|
||||
irqs = set()
|
||||
msi_irqs = set()
|
||||
numa_node = None
|
||||
cpulist = ''
|
||||
|
||||
if numa_topology is None:
|
||||
return (irqs, msi_irqs, numa_node, cpulist)
|
||||
|
||||
# Get the irqs associated with pci addr
|
||||
_irqs, _msi_irqs = get_irqs_by_pci_address(pci_addr)
|
||||
LOG.debug("pci: %s, irqs: %s, msi_irqs: %s" % (pci_addr, _irqs, _msi_irqs))
|
||||
|
||||
# Obtain physical numa_node for this pci addr
|
||||
numa_path = "/sys/bus/pci/devices/%s/numa_node" % (pci_addr)
|
||||
try:
|
||||
with open(numa_path) as f:
|
||||
numa_node = [int(x) for x in f.readline().split()][0]
|
||||
except Exception as e:
|
||||
LOG.error('set_irqs_affinity_by_pci_address: '
|
||||
'pci_addr=%(A)s: numa_path=%(P)s; error=%(E)s',
|
||||
{'A': pci_addr, 'P': numa_path, 'E': e})
|
||||
raise Exception("PciDeviceNotFoundById id = %r" % pci_addr)
|
||||
# Skip irq configuration if there is no associated numa node
|
||||
if numa_node is None or numa_node < 0:
|
||||
return (irqs, msi_irqs, numa_node, cpulist)
|
||||
|
||||
# Determine the pinned cpuset where irqs are to be affined
|
||||
cpuset, cpulist = get_pci_irqs_pinned_cpuset(extra_spec,
|
||||
numa_topology,
|
||||
numa_node)
|
||||
|
||||
LOG.debug("cpuset where irqs are to be affined:%s or %s" % (cpuset, cpulist))
|
||||
|
||||
# Skip irq configuration if there are no pinned cpus
|
||||
if not cpuset:
|
||||
return (irqs, msi_irqs, numa_node, cpulist)
|
||||
|
||||
# Set IRQ affinity, but do not treat errors as fatal.
|
||||
irqs = set_irq_affinity(False, _irqs, cpulist)
|
||||
msi_irqs = set_irq_affinity(False, _msi_irqs, cpulist)
|
||||
return (irqs, msi_irqs, numa_node, cpulist)
|
|
@ -0,0 +1,35 @@
|
|||
#
|
||||
# Copyright (c) 2019 StarlingX.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# flake8: noqa
|
||||
#
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='pci-irq-affinity-agent',
|
||||
description='PCI Interrupt Affinity Agent',
|
||||
version='1.0.0',
|
||||
classifiers=[
|
||||
'Environment :: OpenStack',
|
||||
'Intended Audience :: Information Technology',
|
||||
'Intended Audience :: System Administrators',
|
||||
'License :: OSI Approved :: Apache Software License',
|
||||
'Operating System :: POSIX :: Linux',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 2',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 2.6',
|
||||
],
|
||||
license='Apache-2.0',
|
||||
platforms=['any'],
|
||||
provides='pci_irq_affinity_agent',
|
||||
packages=find_packages(),
|
||||
include_package_data=False,
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'pci-irq-affinity-agent = pci_irq_affinity.agent:process_main',
|
||||
],
|
||||
}
|
||||
)
|
Loading…
Reference in New Issue