monasca-transform/monasca_transform/component/usage/fetch_quantity_util.py

279 lines
11 KiB
Python

# Copyright 2016 Hewlett Packard Enterprise Development Company LP
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from pyspark.sql.functions import col
from pyspark.sql.functions import when
from pyspark.sql import SQLContext
from monasca_transform.component import Component
from monasca_transform.component.component_utils import ComponentUtils
from monasca_transform.component.usage.fetch_quantity import FetchQuantity
from monasca_transform.component.usage import UsageComponent
from monasca_transform.transform.transform_utils import InstanceUsageUtils
import json
class FetchQuantityUtilException(Exception):
"""Exception thrown when fetching quantity
Attributes:
value: string representing the error
"""
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class FetchQuantityUtil(UsageComponent):
@staticmethod
def _supported_fetch_quantity_util_operations():
# The results of "sum", "max", and "min" don't make sense and/or
# may be misleading (the latter two due to the metrics which are
# used as input to the utilization calculation potentially not
# being from the same time period...e.g., one being from the
# beginning of the streaming intervale and the other being from
# the end.
return ["avg", "latest", "oldest"]
@staticmethod
def _is_valid_fetch_quantity_util_operation(operation):
"""return true if its a valid fetch operation"""
if operation in FetchQuantityUtil.\
_supported_fetch_quantity_util_operations():
return True
else:
return False
@staticmethod
def _format_quantity_util(row):
"""Converts calculated utilized quantity to an instance usage format
Calculation based on idle percentage
"""
#
tenant_id = getattr(row, "tenant_id", "all")
resource_uuid = getattr(row, "resource_uuid",
Component.DEFAULT_UNAVAILABLE_VALUE)
user_id = getattr(row, "user_id",
Component.DEFAULT_UNAVAILABLE_VALUE)
geolocation = getattr(row, "geolocation",
Component.DEFAULT_UNAVAILABLE_VALUE)
region = getattr(row, "region", Component.DEFAULT_UNAVAILABLE_VALUE)
zone = getattr(row, "zone", Component.DEFAULT_UNAVAILABLE_VALUE)
host = getattr(row, "host", "all")
usage_date = getattr(row, "usage_date",
Component.DEFAULT_UNAVAILABLE_VALUE)
usage_hour = getattr(row, "usage_hour",
Component.DEFAULT_UNAVAILABLE_VALUE)
usage_minute = getattr(row, "usage_minute",
Component.DEFAULT_UNAVAILABLE_VALUE)
aggregated_metric_name = getattr(row, "aggregated_metric_name",
Component.DEFAULT_UNAVAILABLE_VALUE)
# get utilized quantity
quantity = row.utilized_quantity
firstrecord_timestamp_unix = \
getattr(row, "firstrecord_timestamp_unix",
Component.DEFAULT_UNAVAILABLE_VALUE)
firstrecord_timestamp_string = \
getattr(row, "firstrecord_timestamp_string",
Component.DEFAULT_UNAVAILABLE_VALUE)
lastrecord_timestamp_unix = \
getattr(row, "lastrecord_timestamp_unix",
Component.DEFAULT_UNAVAILABLE_VALUE)
lastrecord_timestamp_string = \
getattr(row, "lastrecord_timestamp_string",
Component.DEFAULT_UNAVAILABLE_VALUE)
record_count = getattr(row, "record_count",
Component.DEFAULT_UNAVAILABLE_VALUE)
# service id
service_group = Component.DEFAULT_UNAVAILABLE_VALUE
service_id = Component.DEFAULT_UNAVAILABLE_VALUE
# aggregation period
aggregation_period = Component.DEFAULT_UNAVAILABLE_VALUE
instance_usage_dict = {"tenant_id": tenant_id, "user_id": user_id,
"resource_uuid": resource_uuid,
"geolocation": geolocation, "region": region,
"zone": zone, "host": host,
"aggregated_metric_name":
aggregated_metric_name,
"quantity": quantity,
"firstrecord_timestamp_unix":
firstrecord_timestamp_unix,
"firstrecord_timestamp_string":
firstrecord_timestamp_string,
"lastrecord_timestamp_unix":
lastrecord_timestamp_unix,
"lastrecord_timestamp_string":
lastrecord_timestamp_string,
"record_count": record_count,
"service_group": service_group,
"service_id": service_id,
"usage_date": usage_date,
"usage_hour": usage_hour,
"usage_minute": usage_minute,
"aggregation_period": aggregation_period}
instance_usage_data_json = json.dumps(instance_usage_dict)
return instance_usage_data_json
@staticmethod
def usage(transform_context, record_store_df):
"""Method to return instance usage dataframe:
It groups together record store records by
provided group by columns list, sorts within the group by event
timestamp field, applies group stats udf and returns the latest
quantity as a instance usage dataframe
This component does groups records by event_type (a.k.a metric name)
and expects two kinds of records in record_store data
total quantity records - the total available quantity
e.g. cpu.total_logical_cores
idle perc records - percentage that is idle
e.g. cpu.idle_perc
To calculate the utilized quantity this component uses following
formula:
utilized quantity = (100 - idle_perc) * total_quantity / 100
"""
sql_context = SQLContext.getOrCreate(record_store_df.rdd.context)
transform_spec_df = transform_context.transform_spec_df_info
# get rollup operation (sum, max, avg, min)
agg_params = transform_spec_df.select(
"aggregation_params_map.usage_fetch_operation"). \
collect()[0].asDict()
usage_fetch_operation = agg_params["usage_fetch_operation"]
# check if operation is valid
if not FetchQuantityUtil. \
_is_valid_fetch_quantity_util_operation(usage_fetch_operation):
raise FetchQuantityUtilException(
"Operation %s is not supported" % usage_fetch_operation)
# get the quantities for idle perc and quantity
instance_usage_df = FetchQuantity().usage(
transform_context, record_store_df)
# get aggregation period for instance usage dataframe
agg_params = transform_spec_df.select(
"aggregation_params_map.aggregation_period").collect()[0].asDict()
aggregation_period = agg_params["aggregation_period"]
group_by_period_list = ComponentUtils.\
_get_instance_group_by_period_list(aggregation_period)
# get what we want to group by
agg_params = transform_spec_df.select(
"aggregation_params_map.aggregation_group_by_list").\
collect()[0].asDict()
aggregation_group_by_list = agg_params["aggregation_group_by_list"]
# group by columns list
group_by_columns_list = group_by_period_list + \
aggregation_group_by_list
# get quantity event type
agg_params = transform_spec_df.select(
"aggregation_params_map.usage_fetch_util_quantity_event_type").\
collect()[0].asDict()
usage_fetch_util_quantity_event_type = \
agg_params["usage_fetch_util_quantity_event_type"]
# check if driver parameter is provided
if usage_fetch_util_quantity_event_type is None or \
usage_fetch_util_quantity_event_type == "":
raise FetchQuantityUtilException(
"Driver parameter '%s' is missing"
% "usage_fetch_util_quantity_event_type")
# get idle perc event type
agg_params = transform_spec_df.select(
"aggregation_params_map.usage_fetch_util_idle_perc_event_type").\
collect()[0].asDict()
usage_fetch_util_idle_perc_event_type = \
agg_params["usage_fetch_util_idle_perc_event_type"]
# check if driver parameter is provided
if usage_fetch_util_idle_perc_event_type is None or \
usage_fetch_util_idle_perc_event_type == "":
raise FetchQuantityUtilException(
"Driver parameter '%s' is missing"
% "usage_fetch_util_idle_perc_event_type")
# get quantity records dataframe
event_type_quantity_clause = "processing_meta.event_type='%s'" \
% usage_fetch_util_quantity_event_type
quantity_df = instance_usage_df.select('*').where(
event_type_quantity_clause).alias("quantity_df_alias")
# get idle perc records dataframe
event_type_idle_perc_clause = "processing_meta.event_type='%s'" \
% usage_fetch_util_idle_perc_event_type
idle_perc_df = instance_usage_df.select('*').where(
event_type_idle_perc_clause).alias("idle_perc_df_alias")
# join quantity records with idle perc records
# create a join condition without the event_type
cond = [item for item in group_by_columns_list
if item != 'event_type']
quant_idle_perc_df = quantity_df.join(idle_perc_df, cond, 'left')
#
# Find utilized quantity based on idle percentage
#
# utilized quantity = (100 - idle_perc) * total_quantity / 100
#
quant_idle_perc_calc_df = quant_idle_perc_df.select(
col("quantity_df_alias.*"),
when(col("idle_perc_df_alias.quantity") != 0.0,
(100.0 - col(
"idle_perc_df_alias.quantity")) * col(
"quantity_df_alias.quantity") / 100.0)
.otherwise(col("quantity_df_alias.quantity"))
.alias("utilized_quantity"),
col("quantity_df_alias.quantity")
.alias("total_quantity"),
col("idle_perc_df_alias.quantity")
.alias("idle_perc"))
instance_usage_json_rdd = \
quant_idle_perc_calc_df.rdd.map(
FetchQuantityUtil._format_quantity_util)
instance_usage_df = \
InstanceUsageUtils.create_df_from_json_rdd(sql_context,
instance_usage_json_rdd)
return instance_usage_df