monasca-transform/monasca_transform/component/usage/calculate_rate.py

165 lines
7.2 KiB
Python

# Copyright 2016 Hewlett Packard Enterprise Development Company LP
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from pyspark.sql import SQLContext
from monasca_transform.component import Component
from monasca_transform.component.setter.rollup_quantity import RollupQuantity
from monasca_transform.component.usage.fetch_quantity import FetchQuantity
from monasca_transform.component.usage import UsageComponent
from monasca_transform.transform.transform_utils import InstanceUsageUtils
import json
class CalculateRateException(Exception):
"""Exception thrown when calculating rate
Attributes:
value: string representing the error
"""
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class CalculateRate(UsageComponent):
@staticmethod
def usage(transform_context, record_store_df):
"""Method to return instance usage dataframe:
It groups together record store records by
provided group by columns list,sorts within the group by event
timestamp field, calculates the rate of change between the
oldest and latest values, and returns the resultant value as an
instance usage dataframe
"""
instance_usage_data_json_list = []
transform_spec_df = transform_context.transform_spec_df_info
# get aggregated metric name
agg_params = transform_spec_df.select(
"aggregation_params_map.aggregated_metric_name"). \
collect()[0].asDict()
aggregated_metric_name = agg_params["aggregated_metric_name"]
# get aggregation period
agg_params = transform_spec_df.select(
"aggregation_params_map.aggregation_period").collect()[0].asDict()
aggregation_period = agg_params["aggregation_period"]
# Fetch the oldest quantities
latest_instance_usage_df = \
FetchQuantity().usage_by_operation(transform_context,
record_store_df,
"avg")
# Roll up the latest quantities
latest_rolled_up_instance_usage_df = \
RollupQuantity().setter_by_operation(transform_context,
latest_instance_usage_df,
"sum")
# Fetch the oldest quantities
oldest_instance_usage_df = \
FetchQuantity().usage_by_operation(transform_context,
record_store_df,
"oldest")
# Roll up the oldest quantities
oldest_rolled_up_instance_usage_df = \
RollupQuantity().setter_by_operation(transform_context,
oldest_instance_usage_df,
"sum")
# Calculate the rate change by percentage
oldest_dict = oldest_rolled_up_instance_usage_df.collect()[0].asDict()
oldest_quantity = float(oldest_dict['quantity'])
latest_dict = latest_rolled_up_instance_usage_df.collect()[0].asDict()
latest_quantity = float(latest_dict['quantity'])
rate_percentage = \
((oldest_quantity - latest_quantity) / oldest_quantity) * 100
# create a new instance usage dict
instance_usage_dict = {"tenant_id":
latest_dict.get("tenant_id", "all"),
"user_id":
latest_dict.get("user_id", "all"),
"resource_uuid":
latest_dict.get("resource_uuid", "all"),
"geolocation":
latest_dict.get("geolocation", "all"),
"region":
latest_dict.get("region", "all"),
"zone":
latest_dict.get("zone", "all"),
"host":
latest_dict.get("host", "all"),
"project_id":
latest_dict.get("project_id", "all"),
"aggregated_metric_name":
aggregated_metric_name,
"quantity": rate_percentage,
"firstrecord_timestamp_unix":
oldest_dict["firstrecord_timestamp_unix"],
"firstrecord_timestamp_string":
oldest_dict["firstrecord_timestamp_string"],
"lastrecord_timestamp_unix":
latest_dict["lastrecord_timestamp_unix"],
"lastrecord_timestamp_string":
latest_dict["lastrecord_timestamp_string"],
"record_count": oldest_dict["record_count"] +
latest_dict["record_count"],
"usage_date": latest_dict["usage_date"],
"usage_hour": latest_dict["usage_hour"],
"usage_minute": latest_dict["usage_minute"],
"aggregation_period": aggregation_period,
"processing_meta":
{"event_type":
latest_dict.get("event_type",
Component.
DEFAULT_UNAVAILABLE_VALUE),
"oldest_timestamp_string":
oldest_dict[
"firstrecord_timestamp_string"],
"oldest_quantity": oldest_quantity,
"latest_timestamp_string":
latest_dict[
"lastrecord_timestamp_string"],
"latest_quantity": latest_quantity
}
}
instance_usage_data_json = json.dumps(instance_usage_dict)
instance_usage_data_json_list.append(instance_usage_data_json)
spark_context = record_store_df.rdd.context
instance_usage_rdd = \
spark_context.parallelize(instance_usage_data_json_list)
sql_context = SQLContext\
.getOrCreate(record_store_df.rdd.context)
instance_usage_df = InstanceUsageUtils.create_df_from_json_rdd(
sql_context,
instance_usage_rdd)
return instance_usage_df