Count log messages only in the current interval

The logs_counter filter needs to discard the log messages that are not in the
current interval otherwise the aggregated metrics can be incorrect.

Change-Id: I40e08b9a0bba6d9b8c5042a0fd16a6818a42b9c9
Closes-Bug: #1549873
This commit is contained in:
Simon Pasquier 2016-04-15 16:47:53 +02:00
parent cf496654eb
commit 8c6e4e54ef
2 changed files with 24 additions and 5 deletions

View File

@ -12,11 +12,17 @@
-- See the License for the specific language governing permissions and
-- limitations under the License.
require 'math'
require 'os'
require 'string'
local utils = require 'lma_utils'
local hostname = read_config('hostname') or error('hostname must be specified')
local interval = (read_config('interval') or error('interval must be specified!')) + 0
local interval = (read_config('interval') or error('interval must be specified')) + 0
-- Heka cannot guarantee that logs are processed in real-time so the
-- grace_interval parameter allows to take into account log messages that are
-- received in the current interval but emitted before it.
local grace_interval = (read_config('grace_interval') or 0) + 0
local discovered_services = {}
local logs_counters = {}
@ -24,12 +30,17 @@ local last_timer_events = {}
local current_service = 1
local enter_at
local interval_in_ns = interval * 1e9
local start_time = os.time()
local msg = {
Type = "metric",
Timestamp = nil,
Severity = 6,
}
function convert_to_sec(ns)
return math.floor(ns/1e9)
end
function process_message ()
local severity = read_message("Fields[severity_label]")
local logger = read_message("Logger")
@ -39,11 +50,17 @@ function process_message ()
return -1, "Cannot match any services from " .. logger
end
-- timestamp values should be converted to seconds because log timestamps
-- have a precision of one second (or millisecond sometimes)
if convert_to_sec(read_message('Timestamp')) + grace_interval < math.max(convert_to_sec(last_timer_events[service] or 0), start_time) then
-- skip the the log message if it doesn't fall into the current interval
return 0
end
if not logs_counters[service] then
-- a new service has been discovered
discovered_services[#discovered_services + 1] = service
logs_counters[service] = {}
last_timer_events[service] = 0
for _, label in pairs(utils.severity_to_label_map) do
logs_counters[service][label] = 0
end
@ -74,7 +91,7 @@ function timer_event(ns)
-- all metrics.
if ns - enter_at < interval_in_ns and current_service <= #discovered_services then
local service_name = discovered_services[current_service]
local last_timer_event = last_timer_events[service_name]
local last_timer_event = last_timer_events[service_name] or 0
local delta_sec = (ns - last_timer_event) / 1e9
for level, val in pairs(logs_counters[service_name]) do

View File

@ -15,6 +15,7 @@
class lma_collector::logs::counter (
$hostname,
$interval = 60,
$grace_interval = 30,
) {
include lma_collector::params
include lma_collector::service::log
@ -28,8 +29,9 @@ class lma_collector::logs::counter (
ticker_interval => 1,
preserve_data => true,
config => {
interval => $interval,
hostname => $hostname,
interval => $interval,
hostname => $hostname,
grace_interval => $grace_interval,
},
module_directory => $lua_modules_dir,
notify => Class['lma_collector::service::log'],