diff --git a/nova_powervm/tests/virt/powervm/test_host.py b/nova_powervm/tests/virt/powervm/test_host.py index 5f2fa6aa..71269917 100644 --- a/nova_powervm/tests/virt/powervm/test_host.py +++ b/nova_powervm/tests/virt/powervm/test_host.py @@ -80,34 +80,36 @@ class TestHostCPUStats(test.TestCase): return lpar return None + @mock.patch('nova_powervm.virt.powervm.host.HostCPUStats.' + '_get_fw_cycles_delta') @mock.patch('nova_powervm.virt.powervm.host.HostCPUStats._get_cpu_freq') @mock.patch('nova_powervm.virt.powervm.host.HostCPUStats.' - '_get_total_cycles') + '_get_total_cycles_delta') @mock.patch('nova_powervm.virt.powervm.host.HostCPUStats.' - '_gather_user_cycles') + '_gather_user_cycles_delta') @mock.patch('pypowervm.tasks.monitor.util.MetricCache._refresh_if_needed') @mock.patch('pypowervm.tasks.monitor.util.ensure_ltm_monitors') - def test_update_internal_metric(self, mock_ensure_ltm, mock_refresh, - mock_user_cycles, mock_total_cycles, - mock_cpu_freq): + def test_update_internal_metric( + self, mock_ensure_ltm, mock_refresh, mock_user_cycles, + mock_total_cycles, mock_cpu_freq, mock_fw_cycles): + host_stats = pvm_host.HostCPUStats(self.adpt, 'host_uuid') mock_cpu_freq.return_value = 4116 # Make sure None is returned if there is no data. host_stats.cur_phyp = None host_stats._update_internal_metric() - self.assertIsNone(host_stats.cur_data) + expect = {'iowait': 0, 'idle': 0, 'kernel': 0, 'user': 0, + 'frequency': 0} + self.assertEqual(expect, host_stats.tot_data) # Create mock phyp objects to test with mock_phyp = mock.MagicMock() - mock_phyp.sample.system_firmware.utilized_proc_cycles = 58599310268 - mock_prev_phyp = mock.MagicMock( - sample=mock.MagicMock( - system_firmware=mock.MagicMock( - utilized_proc_cycles=58599310268))) + mock_fw_cycles.return_value = 58599310268 + mock_prev_phyp = mock.MagicMock() # Mock methods not currently under test - mock_user_cycles.return_value = 0 + mock_user_cycles.return_value = 50 mock_total_cycles.return_value = 1.6125945178663e+16 # Make the 'prev' the current...for the first pass @@ -117,10 +119,10 @@ class TestHostCPUStats(test.TestCase): # Validate the dictionary... No user cycles because all of the # previous data is empty. - expect = {'iowait': 0, 'idle': 1.6125886579352732e+16, - 'kernel': 58599310268, 'user': 0, + expect = {'iowait': 0, 'idle': 1.6125886579352682e+16, + 'kernel': 58599310268, 'user': 50, 'frequency': 4116} - self.assertEqual(expect, host_stats.cur_data) + self.assertEqual(expect, host_stats.tot_data) # Mock methods not currently under test mock_user_cycles.return_value = 30010090000 @@ -129,29 +131,34 @@ class TestHostCPUStats(test.TestCase): # Now 'increment' it with a new current/previous host_stats.cur_phyp = mock_phyp host_stats.prev_phyp = mock_prev_phyp + mock_user_cycles.return_value = 100000 host_stats._update_internal_metric() - # Validate this dictionary. Note that the values are still higher - # overall, even though we add the 'deltas' from each VM. - expect = {'iowait': 0, 'idle': 1.6125856569262732e+16, - 'kernel': 58599310268, 'user': 30010090000, + # Validate this dictionary. Note that these values are 'higher' + # because this is a running total. + new_kern = 58599310268 * 2 + expect = {'iowait': 0, 'idle': 3.2251773158605416e+16, + 'kernel': new_kern, 'user': 100050, 'frequency': 4116} - self.assertEqual(expect, host_stats.cur_data) + self.assertEqual(expect, host_stats.tot_data) @mock.patch('nova_powervm.virt.powervm.host.HostCPUStats.' - '_get_total_cycles') + '_get_fw_cycles_delta') @mock.patch('nova_powervm.virt.powervm.host.HostCPUStats.' - '_gather_user_cycles') + '_get_total_cycles_delta') + @mock.patch('nova_powervm.virt.powervm.host.HostCPUStats.' + '_gather_user_cycles_delta') @mock.patch('nova_powervm.virt.powervm.host.HostCPUStats._get_cpu_freq') @mock.patch('pypowervm.tasks.monitor.util.MetricCache._refresh_if_needed') @mock.patch('pypowervm.tasks.monitor.util.ensure_ltm_monitors') def test_update_internal_metric_bad_total( self, mock_ensure_ltm, mock_refresh, mock_cpu_freq, - mock_user_cycles, mock_tot_cycles): + mock_user_cycles, mock_tot_cycles, mock_fw_cycles): """Validates that if the total cycles are off, we handle.""" host_stats = pvm_host.HostCPUStats(self.adpt, 'host_uuid') mock_cpu_freq.return_value = 4116 mock_user_cycles.return_value = 30010090000 + mock_fw_cycles.return_value = 58599310268 # Mock the total cycles to some really low number. mock_tot_cycles.return_value = 5 @@ -170,7 +177,7 @@ class TestHostCPUStats(test.TestCase): # negative number. expect = {'iowait': 0, 'idle': 0, 'kernel': 58599310268, 'user': 30010090000, 'frequency': 4116} - self.assertEqual(expect, host_stats.cur_data) + self.assertEqual(expect, host_stats.tot_data) @mock.patch('subprocess.check_output') @mock.patch('pypowervm.tasks.monitor.util.MetricCache._refresh_if_needed') @@ -185,8 +192,8 @@ class TestHostCPUStats(test.TestCase): '_delta_proc_cycles') @mock.patch('pypowervm.tasks.monitor.util.MetricCache._refresh_if_needed') @mock.patch('pypowervm.tasks.monitor.util.ensure_ltm_monitors') - def test_gather_user_cycles(self, mock_ensure_ltm, mock_refresh, - mock_cycles): + def test_gather_user_cycles_delta(self, mock_ensure_ltm, mock_refresh, + mock_cycles): # Crete objects to test with host_stats = pvm_host.HostCPUStats(self.adpt, 'host_uuid') mock_phyp = mock.MagicMock() @@ -198,21 +205,15 @@ class TestHostCPUStats(test.TestCase): # Test that we can run with previous samples and then without. host_stats.cur_phyp = mock_phyp host_stats.prev_phyp = mock_prev_phyp - resp = host_stats._gather_user_cycles() + resp = host_stats._gather_user_cycles_delta() self.assertEqual(30010090000, resp) - # Last, test to make sure the previous data is used. - host_stats.prev_data = {'user': 1000000} - resp = host_stats._gather_user_cycles() - self.assertEqual(30011090000, resp) - # Now test if there is no previous sample. Since there are no previous - # samples, it will be 0 (except it WILL default up to the previous - # min cycles, which we just set to 1000000). + # samples, it will be 0. host_stats.prev_phyp = None mock_cycles.return_value = 0 - resp = host_stats._gather_user_cycles() - self.assertEqual(1000000, resp) + resp = host_stats._gather_user_cycles_delta() + self.assertEqual(0, resp) @mock.patch('pypowervm.tasks.monitor.util.MetricCache._refresh_if_needed') @mock.patch('pypowervm.tasks.monitor.util.ensure_ltm_monitors') @@ -226,7 +227,7 @@ class TestHostCPUStats(test.TestCase): # is deleted and a new one takes its place (LPAR ID 6) delta = host_stats._delta_proc_cycles(mock_phyp.sample.lpars, mock_prev_phyp.sample.lpars) - self.assertEqual(10010090000, delta) + self.assertEqual(10010000000, delta) # Now test as if there is no previous data. This results in 0 as they # could have all been LPMs with months of cycles (rather than 30 @@ -235,6 +236,16 @@ class TestHostCPUStats(test.TestCase): self.assertEqual(0, delta2) self.assertNotEqual(delta2, delta) + # Test that if previous sample had 0 values, the sample is not + # considered for evaluation, and resultant delta cycles is 0. + prev_lpar_sample = mock_prev_phyp.sample.lpars[0].processor + prev_lpar_sample.util_cap_proc_cycles = 0 + prev_lpar_sample.util_uncap_proc_cycles = 0 + prev_lpar_sample.donated_proc_cycles = 0 + delta3 = host_stats._delta_proc_cycles(mock_phyp.sample.lpars, + mock_prev_phyp.sample.lpars) + self.assertEqual(0, delta3) + @mock.patch('pypowervm.tasks.monitor.util.MetricCache._refresh_if_needed') @mock.patch('pypowervm.tasks.monitor.util.ensure_ltm_monitors') def test_delta_user_cycles(self, mock_ensure_ltm, mock_refresh): @@ -243,15 +254,17 @@ class TestHostCPUStats(test.TestCase): mock_phyp, mock_prev_phyp = self._get_mock_phyps() mock_phyp.sample.lpars[0].processor.util_cap_proc_cycles = 250000 mock_phyp.sample.lpars[0].processor.util_uncap_proc_cycles = 250000 + mock_phyp.sample.lpars[0].processor.donated_proc_cycles = 500 mock_prev_phyp.sample.lpars[0].processor.util_cap_proc_cycles = 0 num = 455000 mock_prev_phyp.sample.lpars[0].processor.util_uncap_proc_cycles = num + mock_prev_phyp.sample.lpars[0].processor.donated_proc_cycles = 1000 # Test that a previous sample allows us to gather just the delta. new_elem = self._get_sample(4, mock_phyp.sample) old_elem = self._get_sample(4, mock_prev_phyp.sample) delta = host_stats._delta_user_cycles(new_elem, old_elem) - self.assertEqual(45000, delta) + self.assertEqual(45500, delta) # Validate the scenario where we don't have a previous. Should default # to 0, given no context of why the previous sample did not have the @@ -302,13 +315,54 @@ class TestHostCPUStats(test.TestCase): host_stats = pvm_host.HostCPUStats(self.adpt, 'host_uuid') mock_phyp = mock.MagicMock() mock_phyp.sample = mock.MagicMock() - mock_phyp.sample.processor.configurable_proc_units = 1 - mock_phyp.sample.time_based_cycles = 1.6125945178663e+16 + mock_phyp.sample.processor.configurable_proc_units = 5 + mock_phyp.sample.time_based_cycles = 500 host_stats.cur_phyp = mock_phyp # Make sure we get the full system cycles. - max_cycles = host_stats._get_total_cycles() - self.assertEqual(1.6125945178663e+16, max_cycles) + max_cycles = host_stats._get_total_cycles_delta() + self.assertEqual(2500, max_cycles) + + @mock.patch('pypowervm.tasks.monitor.util.MetricCache._refresh_if_needed') + @mock.patch('pypowervm.tasks.monitor.util.ensure_ltm_monitors') + def test_get_total_cycles_diff_cores(self, mock_ensure_ltm, mock_refresh): + # Mock objects to test with + host_stats = pvm_host.HostCPUStats(self.adpt, 'host_uuid') + + # Latest Sample + mock_phyp = mock.MagicMock(sample=mock.MagicMock()) + mock_phyp.sample.processor.configurable_proc_units = 48 + mock_phyp.sample.time_based_cycles = 1000 + host_stats.cur_phyp = mock_phyp + + # Earlier sample. Use a higher proc unit sample + mock_phyp = mock.MagicMock(sample=mock.MagicMock()) + mock_phyp.sample.processor.configurable_proc_units = 1 + mock_phyp.sample.time_based_cycles = 500 + host_stats.prev_phyp = mock_phyp + + # Make sure we get the full system cycles. + max_cycles = host_stats._get_total_cycles_delta() + self.assertEqual(24000, max_cycles) + + @mock.patch('pypowervm.tasks.monitor.util.MetricCache._refresh_if_needed') + @mock.patch('pypowervm.tasks.monitor.util.ensure_ltm_monitors') + def test_get_firmware_cycles(self, mock_ensure_ltm, mock_refresh): + # Mock objects to test with + host_stats = pvm_host.HostCPUStats(self.adpt, 'host_uuid') + + # Latest Sample + mock_phyp = mock.MagicMock(sample=mock.MagicMock()) + mock_phyp.sample.system_firmware.utilized_proc_cycles = 2000 + # Previous Sample + prev_phyp = mock.MagicMock(sample=mock.MagicMock()) + prev_phyp.sample.system_firmware.utilized_proc_cycles = 1000 + + host_stats.cur_phyp = mock_phyp + host_stats.prev_phyp = prev_phyp + # Get delta + delta_firmware_cycles = host_stats._get_fw_cycles_delta() + self.assertEqual(1000, delta_firmware_cycles) def _get_mock_phyps(self): """Helper method to return cur_phyp and prev_phyp.""" @@ -316,12 +370,14 @@ class TestHostCPUStats(test.TestCase): mock_lpar_4A.configure_mock(id=4, name='A') mock_lpar_4A.processor = mock.MagicMock( util_cap_proc_cycles=5005045000, - util_uncap_proc_cycles=5005045000) + util_uncap_proc_cycles=5005045000, + donated_proc_cycles=10000) mock_lpar_4A_prev = mock.Mock() mock_lpar_4A_prev.configure_mock(id=4, name='A') mock_lpar_4A_prev.processor = mock.MagicMock( - util_cap_proc_cycles=0, - util_uncap_proc_cycles=0) + util_cap_proc_cycles=40000, + util_uncap_proc_cycles=40000, + donated_proc_cycles=0) mock_phyp = mock.MagicMock(sample=mock.MagicMock(lpars=[mock_lpar_4A])) mock_prev_phyp = mock.MagicMock( sample=mock.MagicMock(lpars=[mock_lpar_4A_prev])) diff --git a/nova_powervm/virt/powervm/host.py b/nova_powervm/virt/powervm/host.py index 9b76437f..c96b93a1 100644 --- a/nova_powervm/virt/powervm/host.py +++ b/nova_powervm/virt/powervm/host.py @@ -112,9 +112,18 @@ class HostCPUStats(pcm_util.MetricCache): :param host_uuid: The UUID of the host CEC to maintain a metrics cache for. """ - # A dictionary to store the number of cycles spent. This is defined - # in the _update_internal_metric method. - self.cur_data, self.prev_data = None, None + # This represents the current state of cycles spent on the system. + # These are used to figure out usage statistics. As such, they are + # tied to the start of the nova compute process. + # + # - idle: Total idle cycles on the compute host. + # - kernel: How many cycles the hypervisor has consumed. Not a direct + # analogy to KVM + # - user: The amount of time spent by the VM's themselves. + # - iowait: Not used in PowerVM, but needed for nova. + # - frequency: The CPU frequency + self.tot_data = {'idle': 0, 'kernel': 0, 'user': 0, 'iowait': 0, + 'frequency': 0} # Invoke the parent to seed the metrics. Don't include VIO - will # result in quicker calls. @@ -136,7 +145,7 @@ class HostCPUStats(pcm_util.MetricCache): # Return the dictionary format of the cycles as derived by the # _update_internal_metric method. If there is no data yet, None would # be the result. - return self.cur_data + return self.tot_data def _update_internal_metric(self): """Uses the latest stats from the cache, and parses to Nova format. @@ -147,53 +156,53 @@ class HostCPUStats(pcm_util.MetricCache): # If there is no 'new' data (perhaps sampling is not turned on) then # return no data. if self.cur_phyp is None: - self.cur_data = None return - # Move the current data to the previous. The previous data is used - # for some internal calculations. Blank out the current data just - # in case of error. Don't want to persist two copies of same. - self.prev_data, self.cur_data = self.cur_data, None + # Compute the cycles spent in FW since last collection. + fw_cycles_delta = self._get_fw_cycles_delta() - # Now we need the firmware cycles. - fw_cycles = self.cur_phyp.sample.system_firmware.utilized_proc_cycles + # Compute the cycles the system spent since last run. + tot_cycles_delta = self._get_total_cycles_delta() - # Compute the max cycles. - tot_cycles = self._get_total_cycles() - - # Get the total user cycles. - user_cycles = self._gather_user_cycles() + # Get the user cycles since last run + user_cycles_delta = self._gather_user_cycles_delta() # Make sure that the total cycles is higher than the user/fw cycles. # Should not happen, but just in case there is any precision loss from # CPU data back to system. - if user_cycles + fw_cycles > tot_cycles: - LOG.warning(_LW("Host CPU Metrics determined that the total " - "cycles reported was less than the used cycles. " - "This indicates an issue with the PCM data. " - "Please investigate the results.")) - tot_cycles = user_cycles + fw_cycles + if user_cycles_delta + fw_cycles_delta > tot_cycles_delta: + LOG.warning(_LW( + "Host CPU Metrics determined that the total cycles reported " + "was less than the used cycles. This indicates an issue with " + "the PCM data. Please investigate the results.\n" + "Total Delta Cycles: %(tot_cycles)d\n" + "User Delta Cycles: %(user_cycles)d\n" + "Firmware Delta Cycles: %(fw_cycles)d"), + {'tot_cycles': tot_cycles_delta, 'fw_cycles': fw_cycles_delta, + 'user_cycles': user_cycles_delta}) + tot_cycles_delta = user_cycles_delta + fw_cycles_delta # Idle is the subtraction of all. - idle_cycles = tot_cycles - user_cycles - fw_cycles + idle_delta_cycles = (tot_cycles_delta - user_cycles_delta - + fw_cycles_delta) - # Get the processor frequency. - freq = self._get_cpu_freq() + # The only moving cycles are idle, kernel and user. + self.tot_data['idle'] += idle_delta_cycles + self.tot_data['kernel'] += fw_cycles_delta + self.tot_data['user'] += user_cycles_delta - # Now save these cycles to the internal data structure. - self.cur_data = {'idle': idle_cycles, 'kernel': fw_cycles, - 'user': user_cycles, 'iowait': 0, 'frequency': freq} + # Frequency doesn't accumulate like the others. So this stays static. + self.tot_data['frequency'] = self._get_cpu_freq() - def _gather_user_cycles(self): - """The estimated total user cycles. + def _gather_user_cycles_delta(self): + """The estimated user cycles of all VMs/VIOSes since last run. The sample data includes information about how much CPU has been used by workloads and the Virtual I/O Servers. There is not one global counter that can be used to obtain the CPU spent cycles. This method will calculate the delta of workload (and I/O Server) - cycles between the previous sample and the current sample, and then - add it to the previous 'user cycles'. + cycles between the previous sample and the current sample. There are edge cases for this however. If a VM is deleted or migrated its cycles will no longer be taken into account. The algorithm takes @@ -220,11 +229,7 @@ class HostCPUStats(pcm_util.MetricCache): vios_delta_cycles = self._delta_proc_cycles(vios_cur_samples, vios_prev_samples) - # The used cycles is the total of used cycles from before along with - # the new delta cycles. - prev_user_cycles = (0 if self.prev_data is None - else self.prev_data['user']) - return prev_user_cycles + vm_delta_cycles + vios_delta_cycles + return vm_delta_cycles + vios_delta_cycles @staticmethod def _get_cpu_freq(): @@ -270,11 +275,20 @@ class HostCPUStats(pcm_util.MetricCache): # included. if prev_sample is None: return 0 + # If the previous sample values are all 0 (happens when VM is just + # migrated, phyp creates entry for VM with 0 values), then ignore the + # sample. + if (prev_sample.processor.util_cap_proc_cycles == + prev_sample.processor.util_uncap_proc_cycles == + prev_sample.processor.donated_proc_cycles == 0): + return 0 prev_amount = (prev_sample.processor.util_cap_proc_cycles + - prev_sample.processor.util_uncap_proc_cycles) + prev_sample.processor.util_uncap_proc_cycles - + prev_sample.processor.donated_proc_cycles) cur_amount = (cur_sample.processor.util_cap_proc_cycles + - cur_sample.processor.util_uncap_proc_cycles) + cur_sample.processor.util_uncap_proc_cycles - + cur_sample.processor.donated_proc_cycles) return cur_amount - prev_amount @staticmethod @@ -293,16 +307,30 @@ class HostCPUStats(pcm_util.MetricCache): return prev_sample return None - def _get_total_cycles(self): - """Returns the 'total cycles' on the system. + def _get_total_cycles_delta(self): + """Returns the 'total cycles' on the system since last sample. - :return: The estimated total cycles spent + :return: The total delta cycles since the last run. """ sample = self.cur_phyp.sample + cur_cores = sample.processor.configurable_proc_units + cur_cycles_per_core = sample.time_based_cycles - # Gather the estimated cycle count - total_procs = sample.processor.configurable_proc_units - cycles_per_sec = sample.time_based_cycles - est_total_cycles_per_sec = total_procs * cycles_per_sec + if self.prev_phyp: + prev_cycles_per_core = self.prev_phyp.sample.time_based_cycles + else: + prev_cycles_per_core = 0 - return est_total_cycles_per_sec + # Get the delta cycles between the cores. + delta_cycles_per_core = cur_cycles_per_core - prev_cycles_per_core + + # Total cycles since last sample is the 'per cpu' cycles spent + # times the number of active cores. + return delta_cycles_per_core * cur_cores + + def _get_fw_cycles_delta(self): + """Returns the number of cycles spent on firmware since last sample.""" + cur_fw = self.cur_phyp.sample.system_firmware.utilized_proc_cycles + prev_fw = (self.prev_phyp.sample.system_firmware.utilized_proc_cycles + if self.prev_phyp else 0) + return cur_fw - prev_fw