Merge "Storage Automation with Pbench-fio"

This commit is contained in:
Zuul 2023-09-29 06:31:14 +00:00 committed by Gerrit Code Review
commit b454d258ed
10 changed files with 650 additions and 0 deletions

View File

@ -72,6 +72,7 @@ rally:
- dynamic-workloads: rally/rally-plugins/dynamic-workloads
- reports: rally/rally-plugins/reports
- manila: rally/rally-plugins/manila
- storage: rally/rally-plugins/pbench-fio
shaker:
server: 1.1.1.1
port: 5555
@ -875,3 +876,36 @@ workloads:
provider_net_id:
ext_net_id:
file: rally/rally-plugins/manila/create-share-boot-vm-mount-share-write-delete-vm-share.yml
- name: storage
enabled: false
type: rally
rally_deployment: overcloud
concurrency:
- 1
times: 1
scenarios:
- name: pbench-fio
enabled: true
image_name: rhel-9
flavor_name: m1.small
num_vms_per_compute: 1
user: cloud-user
pbench_key_url: http://example.com/some/path/to/agent/{{ pbench_configuration_environment }}/ssh
pbench_config_url: http://example.com/some/path/to/agent/{{ pbench_configuration_environment }}/config
public_net_name: public
pbench_repo_name: pbench-0.72
#do not append '/' at the end for dir_path
pbench_repo_dir_path: /home/stack/pbench_repos
volume_size: 10
job_input: rwrwr
#job parameters
runtime: 3000
io_depth: 16
start_delay: 10
block_size: 4,8 #comma separated list
workload_size: 1G
num_jobs: 1
sample: 1
ansible_forks: 5
file: rally/rally-plugins/pbench-fio/pbench-fio.yml

View File

@ -235,6 +235,9 @@ class Rally(base.WorkloadBase):
workload["name"], scenario_name
)
with open("../rally_result_dir_path", "w") as file:
file.write(result_dir)
self.logger.debug("Created result directory: {}".format(result_dir))
self.workload_logger(self.__class__.__name__)

View File

@ -0,0 +1,14 @@
Block Storage Performance Testing with Pbench FIO
===================================================
Overview
--------
This workload is designed to perform storage-based tests to analyze the read and write throughput achieved by OpenStack VMs backed by Ceph. Cinder provides volumes as additional mounts to the guest machines, and the scenario uses Pbench FIO to perform these tests.
Pbench FIO is utilized to simulate block-based I/O to drives, whether virtual or physical. These drives are presented to the virtual machines using Cinder and Nova.
Customizing FIO Parameters
--------------------------
To customize FIO parameters and better understand their usage, please refer to the [Pbench FIO documentation](https://github.com/distributed-system-analysis/pbench/blob/main/agent/bench-scripts/pbench-fio.md). This documentation provides detailed information on how to configure and pass FIO parameters for your specific testing requirements

View File

@ -0,0 +1,88 @@
---
- hosts: localhost
gather_facts: false
tasks:
- name: copy necessary files, repos and update dns server
shell: |
sudo cp ~/pbench_repos/* /etc/yum.repos.d
sudo cp ~/resolv.conf /etc/resolv.conf
sudo cp ~/client_file /root/client_file
- hosts: servers
remote_user: cloud-user
become: true
vars:
ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
tasks:
- name: create directory
file:
path: "{{ item }}"
state: directory
mode: '1777'
with_items:
- /var/lib/pbench-agent
- /var/lib/pbench-agent/tmp
- name: copy dns server
copy:
src: /etc/resolv.conf
dest: /etc/resolv.conf
mode: 0644
- name: Add drop-cache file
copy:
dest: /root/drop-cache.sh
content: |
sync
echo 3 > /proc/sys/vm/drop_caches
mode: 777
- name: copy read.job
copy:
src: /home/cloud-user/read.job
dest: /root/read.job
- name: copy write.job
copy:
src: /home/cloud-user/write.job
dest: /root/write.job
- name: copy necessary repos
copy:
src: /etc/yum.repos.d/
dest: /etc/yum.repos.d/
mode: 0644
- name: copy .ssh dir to root
copy:
src: /home/cloud-user/.ssh/
dest: /root/.ssh/
mode: 0600
- name: install ansible-core and pbench.agent
shell: |
yum install ansible-core -y
LANG=C.UTF-8 ansible-galaxy collection install pbench.agent
- name: install fio, perf, firewalld and sysstat
yum:
name: '{{ item }}'
state: latest
with_items:
- fio
- sysstat
- perf
- firewalld
- name: start firewalld
systemd:
name: firewalld
state: started
- name: mount cinder volume on vms
shell: |
mkfs.xfs -f /dev/vdb
mkdir /mnt/fio
mount /dev/vdb /mnt/fio
echo "/dev/vdb /mnt/fio xfs rw,seclabel,relatime,attr2,inode64,logbufs=8,logbsize=32k,noquota 0 0" >> /etc/fstab
mount -a

View File

@ -0,0 +1,24 @@
---
- name: Install pbench-agent
hosts: servers
remote_user: cloud-user
become: yes
module_defaults:
ansible.builtin.get_url:
validate_certs: false
# The default value ('production') can be overridden by cenv, a host-specific
# inventory variable.
vars:
pbench_configuration_environment: "{{ cenv | default('production') }}"
roles:
- pbench_repo_install
- pbench_agent_install
- pbench_agent_config
tasks:
- name: register pbench across servers
shell: |
source /etc/profile.d/pbench-agent.sh
pbench-register-tool-set

View File

@ -0,0 +1,23 @@
---
- name: Open ports for redis and tool data sink
hosts: servers
remote_user: cloud-user
become: true
roles:
- pbench_firewall_open_ports
tasks:
- name: enable port 8765
firewalld:
port: 8765/tcp
permanent: true
state: enabled
- name: restart firewalld and redis
systemd:
name: "{{ item }}"
state: restarted
loop:
- firewalld
- redis

View File

@ -0,0 +1,62 @@
{% set nova_api_version = nova_api_version or 2.74 %}
{% set sla_max_avg_duration = sla_max_avg_duration or 60 %}
{% set sla_max_failure = sla_max_failure or 0 %}
{% set sla_max_seconds = sla_max_seconds or 60 %}
---
BrowbeatPlugin.pbench_fio:
-
args:
flavor:
name: "{{ flavor_name }}"
image:
name: "{{ image_name }}"
num_vms_per_compute: {{ num_vms_per_compute }}
public_net_name: "{{ public_net_name }}"
user: "{{ user }}"
pbench_key_url: "{{ pbench_key_url }}"
pbench_config_url: "{{ pbench_config_url }}"
pbench_repo_dir_path: "{{ pbench_repo_dir_path }}"
pbench_repo_name: "{{ pbench_repo_name }}"
volume_size: {{ volume_size }}
job_input: "{{ job_input }}"
block_size: "{{ block_size }}"
io_depth: {{ io_depth }}
start_delay: {{ start_delay }}
runtime: {{ runtime }}
workload_size: "{{ workload_size }}"
num_jobs: {{ num_jobs }}
sample: {{ sample }}
ansible_forks: {{ ansible_forks }}
runner:
concurrency: {{concurrency}}
times: {{times}}
type: "constant"
context:
users:
tenants: 1
users_per_tenant: 1
api_versions:
nova:
version: {{ nova_api_version }}
quotas:
neutron:
network: -1
port: -1
router: -1
subnet: -1
floatingip: -1
security_group: -1
security_group_rule: -1
trunk: -1
nova:
instances: -1
cores: -1
ram: -1
cinder:
gigabytes: -1
volumes: -1
sla:
max_avg_duration: {{sla_max_avg_duration}}
max_seconds_per_iteration: {{sla_max_seconds}}
failure_rate:
max: {{sla_max_failure}}

View File

@ -0,0 +1,362 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from rally import exceptions
from rally.common import logging
from rally.task import atomic
from rally_openstack.common import consts
from rally_openstack.scenarios.cinder import utils as cinder_utils
from rally_openstack.task.scenarios.neutron import utils as neutron_utils
from rally_openstack.task.scenarios.vm import utils as vm_utils
from rally.task import scenario
from rally.task import types
from rally.task import validation
from rally.utils import sshutils
from jinja2 import Environment
from jinja2 import FileSystemLoader
import os
LOG = logging.getLogger(__name__)
@types.convert(image={"type": "glance_image"}, flavor={"type": "nova_flavor"})
@validation.add("image_valid_on_flavor", flavor_param="flavor", image_param="image")
@validation.add(
"required_services", services=[consts.Service.NEUTRON,
consts.Service.NOVA]
)
@validation.add("required_platform", platform="openstack", users=True)
@scenario.configure(
context={
"cleanup@openstack": ["neutron", "nova", "cinder"],
"keypair@openstack": {},
"allow_ssh@openstack": None,
},
name="BrowbeatPlugin.pbench_fio",
platform="openstack"
)
class PbenchFio(vm_utils.VMScenario, neutron_utils.NeutronScenario,
cinder_utils.CinderBasic):
def run(self, image, flavor, num_vms_per_compute, public_net_name, user,
pbench_key_url, pbench_config_url, pbench_repo_name, pbench_repo_dir_path,
volume_size, job_input, block_size, io_depth, start_delay, runtime,
workload_size, num_jobs, sample, ansible_forks, **kwargs):
# create log and result directories
with open('../rally_result_dir_path') as f:
rally_result_dir_path = f.readline()
pbench_result_dir = rally_result_dir_path + "/pbench/results"
pbench_log_dir = rally_result_dir_path + "/pbench/logs"
os.makedirs(pbench_result_dir)
os.makedirs(pbench_log_dir)
# create n/w, use it for guests nd jumphost
network = self._create_network({})
subnet = self._create_subnet(network, {})
# attach external network to the subnet
router = self._create_router({}, external_gw=public_net_name)
self._add_interface_router(subnet['subnet'], router['router'])
# create sg
sg = self.create_custom_security_group()
# build jumphost
LOG.info("Creating Jump Host...")
jumphost, jumphost_ip, jump_ssh = self.build_jumphost(image, flavor, network,
public_net_name, user,
sg, volume_size)
LOG.info("Jump Host has been successfully provisioned.")
# create client/guest vms with cinder volume attached
LOG.info("Building Guest VMs...")
servers, server_ips = self.create_guests(image, flavor, network, num_vms_per_compute,
sg, volume_size)
LOG.info("Guest VMs have been successfully created.")
# prepare and copy job files to jumphost
env = Environment(loader=FileSystemLoader(os.getcwd()))
template_path = './rally/rally-plugins/pbench-fio/templates/read.job.j2'
template = env.get_template(template_path)
output_file_path = '../read.job'
self.render_template(env, template, runtime, io_depth, workload_size,
num_jobs, start_delay, output_file_path)
template_path = './rally/rally-plugins/pbench-fio/templates/write.job.j2'
template = env.get_template(template_path)
output_file_path = '../write.job'
self.render_template(env, template, runtime, io_depth, workload_size,
num_jobs, start_delay, output_file_path)
self.copy_over_ssh("../read.job", "~/read.job", jump_ssh)
self.copy_over_ssh("../write.job", "~/write.job", jump_ssh)
# prepare and copy client file
servers_str = "\n".join([str(i) for i in server_ips])
client_file_str = f"{servers_str}"
client_file_str = client_file_str + "\n"
with open("../client_file", 'w') as file:
file.write(client_file_str)
self.copy_over_ssh("../client_file", "~/client_file", jump_ssh)
# copy pbench repos to jumphost
self.exec_command_over_ssh("mkdir ~/pbench_repos", jump_ssh)
repo_names = os.listdir(pbench_repo_dir_path)
for repo_name in repo_names:
local_path = pbench_repo_dir_path + "/" + repo_name
remote_path = "~/pbench_repos/" + repo_name
self.copy_over_ssh(local_path, remote_path, jump_ssh)
# prepare and copy necessary files to jumphost
server_ips.append(list(jumphost.addresses.values())[0][0]['addr'])
self.prepare_inventory(server_ips, pbench_key_url, pbench_config_url, pbench_repo_name)
self.copy_over_ssh("/etc/resolv.conf", "~/resolv.conf", jump_ssh)
self.copy_over_ssh("../pbench_inventory.inv", "~/pbench_inventory.inv", jump_ssh)
local_path = "./rally/rally-plugins/pbench-fio/ansible/bootstrap.yaml"
self.copy_over_ssh(local_path, "~/bootstrap.yaml", jump_ssh)
local_path = ("./rally/rally-plugins/pbench-fio/ansible/"
"pbench_agent_install.yaml")
self.copy_over_ssh(local_path, "~/pbench_agent_install.yaml", jump_ssh)
local_path = ("./rally/rally-plugins/pbench-fio/"
"ansible/pbench_agent_tool_meister_firewall.yml")
remote_path = "~/pbench_agent_tool_meister_firewall.yml"
self.copy_over_ssh(local_path, remote_path, jump_ssh)
# install pbench
LOG.info("Installing Pbench...")
exit_code = self.install_pbench(jump_ssh, ansible_forks)
if exit_code != 0:
self.copy_pbench_logs(jumphost_ip, user, pbench_log_dir)
raise exceptions.RallyException("Pbench installation failed. "
"Check logs for more details.")
LOG.info("Pbench installation has been successful on both jumphost and guests.")
# run jobs
LOG.info("Starting FIO jobs...")
jump_ssh_root = sshutils.SSH("root", jumphost_ip, port=22,
pkey=self.context["user"]["keypair"]["private"])
exit_code = self.handle_jobs(jump_ssh_root, job_input, block_size, sample)
if exit_code != 0:
raise exceptions.RallyException("Fio jobs failed. Check logs for more details.")
LOG.info("FIO jobs has been successfully executed. "
"Find results at {}".format(pbench_result_dir))
# copy logs and results
self.copy_pbench_results(jumphost_ip, pbench_result_dir)
self.copy_pbench_logs(jumphost_ip, user, pbench_log_dir)
def copy_pbench_results(self, jumphost_ip, pbench_result_dir):
cmd = f"scp -r -i ../pbench_fio_jumphost_pkey root@{jumphost_ip}:" \
f"/var/lib/pbench-agent/* {pbench_result_dir}/"
os.system(cmd)
def copy_pbench_logs(self, jumphost_ip, user, pbench_log_dir):
cmd = f"scp -i ../pbench_fio_jumphost_pkey {user}@{jumphost_ip}:" \
f"~/*.log {pbench_log_dir}/"
os.system(cmd)
def render_template(self, env, template, runtime, io_depth, workload_size,
num_jobs, start_delay, output_file_path):
rendered_template = template.render(
runtime=runtime,
io_depth=io_depth,
workload_size=workload_size,
num_jobs=num_jobs,
start_delay=start_delay
)
with open(output_file_path, 'w') as file:
file.write(rendered_template)
def copy_over_ssh(self, local_path, remote_path, jump_ssh):
command = {
"local_path": local_path,
"remote_path": remote_path
}
self._run_command_over_ssh(jump_ssh, command)
def exec_command_over_ssh(self, script_inline, jump_ssh):
command = {
"script_inline": script_inline,
"interpreter": "/bin/sh"
}
exit_code, _, _ = self._run_command_over_ssh(jump_ssh, command)
return exit_code
@atomic.action_timer("pbench_fio.install_pbench")
def install_pbench(self, jump_ssh, ansible_forks):
cmd_str = ("sudo cp ~/pbench_repos/* /etc/yum.repos.d && "
"sudo cp ~/resolv.conf /etc/resolv.conf && "
"export LANG=C.UTF-8 && "
"sudo yum install ansible-core -y &> /dev/null && "
"ansible-galaxy collection install pbench.agent &> /dev/null && "
"ansible-galaxy collection install ansible.posix &> /dev/null")
self.exec_command_over_ssh(cmd_str, jump_ssh)
cmd_str = ("export LANG=C.UTF-8 && "
"ansible-playbook -i ~/pbench_inventory.inv -vv -f {} bootstrap.yaml "
"&> ~/bootstrap.log".format(ansible_forks))
exit_code = self.exec_command_over_ssh(cmd_str, jump_ssh)
if exit_code != 0:
return exit_code
cmd_str = ("export LANG=C.UTF-8 && "
"export ANSIBLE_ROLES_PATH=$HOME/.ansible/collections/ansible_collections/"
"pbench/agent/roles:$ANSIBLE_ROLES_PATH && "
"ansible-playbook -i ~/pbench_inventory.inv -vv -f {} "
"~/pbench_agent_install.yaml &> "
"~/pbench_agent_install.log".format(ansible_forks))
exit_code = self.exec_command_over_ssh(cmd_str, jump_ssh)
if exit_code != 0:
return exit_code
cmd_str = ("export LANG=C.UTF-8 && "
"export ANSIBLE_ROLES_PATH=$HOME/.ansible/collections/ansible_collections/"
"pbench/agent/roles:$ANSIBLE_ROLES_PATH && "
"ansible-playbook -i ~/pbench_inventory.inv -vv -f {} "
"~/pbench_agent_tool_meister_firewall.yml &> "
"~/pbench_agent_tool_meister_firewall.log".format(ansible_forks))
exit_code = self.exec_command_over_ssh(cmd_str, jump_ssh)
return exit_code
def handle_jobs(self, jump_ssh_root, job_input, block_size, sample):
job_input = job_input.lower()
if len(job_input) == 0:
raise exceptions.RallyException("Job input required")
if job_input[0] == 'r':
exit_code = self.write(jump_ssh_root, block_size, sample)
if exit_code != 0:
return exit_code
for job in job_input:
if job == 'r':
exit_code = self.read(jump_ssh_root, block_size, sample)
else:
exit_code = self.write(jump_ssh_root, block_size, sample)
if exit_code != 0:
return exit_code
return 0
@atomic.action_timer("pbench_fio.write_job")
def write(self, jump_ssh_root, block_size, sample):
cmd_str = f"export LANG=C.UTF-8 && " \
f"source /etc/profile.d/pbench-agent.sh && " \
f"pbench-fio -t write -b {block_size} --client-file /root/client_file " \
f"--pre-iteration-script=/root/drop-cache.sh --job-file=/root/write.job " \
f"--sample={sample}"
return self.exec_command_over_ssh(cmd_str, jump_ssh_root)
@atomic.action_timer("pbench_fio.read_job")
def read(self, jump_ssh_root, block_size, sample):
cmd_str = f"export LANG=C.UTF-8 && " \
f"source /etc/profile.d/pbench-agent.sh && " \
f"pbench-fio -t read -b {block_size} --client-file /root/client_file " \
f"--pre-iteration-script=/root/drop-cache.sh --job-file=/root/read.job " \
f"--sample={sample}"
return self.exec_command_over_ssh(cmd_str, jump_ssh_root)
def build_jumphost(self, image, flavor, tenant_network, public_net_name,
user, sg, volume_size):
kwargs = {}
kwargs["nics"] = [{"net-id": tenant_network["network"]["id"]}]
kwargs["security_groups"] = [sg["security_group"]["name"]]
# build jumphost and attach floating ip(preparing it for ssh access)
jumphost, jumphost_ip = self._boot_server_with_fip(
image, flavor, use_floating_ip=True,
floating_network=public_net_name,
key_name=self.context["user"]["keypair"]["name"],
**kwargs)
self._wait_for_ping(jumphost_ip["ip"])
pkey = self.context["user"]["keypair"]["private"]
with open("../pbench_fio_jumphost_pkey", 'w') as file:
file.write(pkey)
os.chmod('../pbench_fio_jumphost_pkey', 0o600)
# Open SSH connection
jump_ssh = sshutils.SSH(user, jumphost_ip["ip"], port=22, pkey=pkey)
# Check for connectivity and copy pkey
self._wait_for_ssh(jump_ssh)
jump_ssh.run("cat > ~/.ssh/id_rsa", stdin=pkey)
jump_ssh.execute("chmod 0600 ~/.ssh/id_rsa")
# attach volume
volume = self.cinder.create_volume(volume_size)
self._attach_volume(jumphost, volume)
return jumphost, jumphost_ip["ip"], jump_ssh
def create_guests(self, image, flavor, network, num_vms_per_compute, sg, volume_size):
hypervisors = self._list_hypervisors()
num_computes = len(hypervisors)
server_ips = []
servers = []
kwargs = {}
kwargs["nics"] = [{"net-id": network["network"]["id"]}]
kwargs["security_groups"] = [sg["security_group"]["name"]]
kwargs["key_name"] = self.context["user"]["keypair"]["name"]
for i in range(num_computes):
availability_zone = f"nova:{hypervisors[i].hypervisor_hostname}"
kwargs["availability_zone"] = availability_zone
servers_per_compute = self._boot_servers(image, flavor, 1,
instances_amount=num_vms_per_compute, **kwargs)
servers.extend(servers_per_compute)
for server in servers:
server_ips.append(list(server.addresses.values())[0][0]['addr'])
# attach volume
volume = self.cinder.create_volume(volume_size)
self._attach_volume(server, volume)
return servers, server_ips
def create_custom_security_group(self):
security_group = self._create_security_group()
msg = "security_group isn't created"
self.assertTrue(security_group, err_msg=msg)
# icmp
security_group_rule_args = {}
security_group_rule_args["protocol"] = "icmp"
security_group_rule_args["remote_ip_prefix"] = "0.0.0.0/0"
security_group_rule = self._create_security_group_rule(
security_group["security_group"]["id"],
**security_group_rule_args)
msg = "security_group_rule isn't created"
self.assertTrue(security_group_rule, err_msg=msg)
# tcp
for port in [22, 6379, 17001, 8080, 8765]:
security_group_rule_args["protocol"] = "tcp"
security_group_rule_args["port_range_min"] = port
security_group_rule_args["port_range_max"] = port
security_group_rule = self._create_security_group_rule(
security_group["security_group"]["id"],
**security_group_rule_args)
msg = "security_group_rule isn't created"
self.assertTrue(security_group_rule, err_msg=msg)
return security_group
def prepare_inventory(self, server_ips, pbench_key_url, pbench_config_url, pbench_repo_name):
servers = "\n".join([str(i) for i in server_ips])
inventory_str = f"[servers]\n{servers}\n\n" \
f"[servers:vars]\npbench_key_url = {pbench_key_url}\n" \
f"pbench_config_url = {pbench_config_url}\n" \
f"pbench_repo_name = {pbench_repo_name}"
with open("../pbench_inventory.inv", 'w') as file:
file.write(inventory_str)

View File

@ -0,0 +1,19 @@
[global]
bs = $@
runtime = {{ runtime }}
ioengine = libaio
iodepth = {{ io_depth }}
direct = 1
startdelay= {{ start_delay }}
clocksource = clock_gettime
directory = /mnt/fio/
write_bw_log = fio
write_iops_log = fio
write_lat_log = fio
log_avg_msec = 10000
write_hist_log = fio
[rhcs-read-test]
rw = $@
size = {{ workload_size }}
numjobs = {{ num_jobs }}

View File

@ -0,0 +1,21 @@
[global]
bs = $@
runtime = {{ runtime }}
ioengine = libaio
iodepth = {{ io_depth }}
direct = 1
startdelay= {{ start_delay }}
clocksource = clock_gettime
directory = /mnt/fio/
write_bw_log = fio
write_iops_log = fio
write_lat_log = fio
log_avg_msec = 10000
write_hist_log = fio
create_on_open = 1
create_serialize = 0
[rhcs-write-test]
rw = $@
size = {{ workload_size }}
numjobs = {{ num_jobs }}