165 lines
7.2 KiB
Python
165 lines
7.2 KiB
Python
# Copyright 2016 Hewlett Packard Enterprise Development Company LP
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from pyspark.sql import SQLContext
|
|
|
|
from monasca_transform.component import Component
|
|
from monasca_transform.component.setter.rollup_quantity import RollupQuantity
|
|
from monasca_transform.component.usage.fetch_quantity import FetchQuantity
|
|
from monasca_transform.component.usage import UsageComponent
|
|
from monasca_transform.transform.transform_utils import InstanceUsageUtils
|
|
|
|
import json
|
|
|
|
|
|
class CalculateRateException(Exception):
|
|
"""Exception thrown when calculating rate
|
|
|
|
Attributes:
|
|
value: string representing the error
|
|
"""
|
|
|
|
def __init__(self, value):
|
|
self.value = value
|
|
|
|
def __str__(self):
|
|
return repr(self.value)
|
|
|
|
|
|
class CalculateRate(UsageComponent):
|
|
|
|
@staticmethod
|
|
def usage(transform_context, record_store_df):
|
|
"""Method to return instance usage dataframe:
|
|
|
|
It groups together record store records by
|
|
provided group by columns list,sorts within the group by event
|
|
timestamp field, calculates the rate of change between the
|
|
oldest and latest values, and returns the resultant value as an
|
|
instance usage dataframe
|
|
"""
|
|
instance_usage_data_json_list = []
|
|
|
|
transform_spec_df = transform_context.transform_spec_df_info
|
|
|
|
# get aggregated metric name
|
|
agg_params = transform_spec_df.select(
|
|
"aggregation_params_map.aggregated_metric_name"). \
|
|
collect()[0].asDict()
|
|
aggregated_metric_name = agg_params["aggregated_metric_name"]
|
|
|
|
# get aggregation period
|
|
agg_params = transform_spec_df.select(
|
|
"aggregation_params_map.aggregation_period").collect()[0].asDict()
|
|
aggregation_period = agg_params["aggregation_period"]
|
|
|
|
# Fetch the oldest quantities
|
|
latest_instance_usage_df = \
|
|
FetchQuantity().usage_by_operation(transform_context,
|
|
record_store_df,
|
|
"avg")
|
|
|
|
# Roll up the latest quantities
|
|
latest_rolled_up_instance_usage_df = \
|
|
RollupQuantity().setter_by_operation(transform_context,
|
|
latest_instance_usage_df,
|
|
"sum")
|
|
|
|
# Fetch the oldest quantities
|
|
oldest_instance_usage_df = \
|
|
FetchQuantity().usage_by_operation(transform_context,
|
|
record_store_df,
|
|
"oldest")
|
|
|
|
# Roll up the oldest quantities
|
|
oldest_rolled_up_instance_usage_df = \
|
|
RollupQuantity().setter_by_operation(transform_context,
|
|
oldest_instance_usage_df,
|
|
"sum")
|
|
|
|
# Calculate the rate change by percentage
|
|
oldest_dict = oldest_rolled_up_instance_usage_df.collect()[0].asDict()
|
|
oldest_quantity = float(oldest_dict['quantity'])
|
|
|
|
latest_dict = latest_rolled_up_instance_usage_df.collect()[0].asDict()
|
|
latest_quantity = float(latest_dict['quantity'])
|
|
|
|
rate_percentage = \
|
|
((oldest_quantity - latest_quantity) / oldest_quantity) * 100
|
|
|
|
# create a new instance usage dict
|
|
instance_usage_dict = {"tenant_id":
|
|
latest_dict.get("tenant_id", "all"),
|
|
"user_id":
|
|
latest_dict.get("user_id", "all"),
|
|
"resource_uuid":
|
|
latest_dict.get("resource_uuid", "all"),
|
|
"geolocation":
|
|
latest_dict.get("geolocation", "all"),
|
|
"region":
|
|
latest_dict.get("region", "all"),
|
|
"zone":
|
|
latest_dict.get("zone", "all"),
|
|
"host":
|
|
latest_dict.get("host", "all"),
|
|
"project_id":
|
|
latest_dict.get("project_id", "all"),
|
|
"aggregated_metric_name":
|
|
aggregated_metric_name,
|
|
"quantity": rate_percentage,
|
|
"firstrecord_timestamp_unix":
|
|
oldest_dict["firstrecord_timestamp_unix"],
|
|
"firstrecord_timestamp_string":
|
|
oldest_dict["firstrecord_timestamp_string"],
|
|
"lastrecord_timestamp_unix":
|
|
latest_dict["lastrecord_timestamp_unix"],
|
|
"lastrecord_timestamp_string":
|
|
latest_dict["lastrecord_timestamp_string"],
|
|
"record_count": oldest_dict["record_count"] +
|
|
latest_dict["record_count"],
|
|
"usage_date": latest_dict["usage_date"],
|
|
"usage_hour": latest_dict["usage_hour"],
|
|
"usage_minute": latest_dict["usage_minute"],
|
|
"aggregation_period": aggregation_period,
|
|
"processing_meta":
|
|
{"event_type":
|
|
latest_dict.get("event_type",
|
|
Component.
|
|
DEFAULT_UNAVAILABLE_VALUE),
|
|
"oldest_timestamp_string":
|
|
oldest_dict[
|
|
"firstrecord_timestamp_string"],
|
|
"oldest_quantity": oldest_quantity,
|
|
"latest_timestamp_string":
|
|
latest_dict[
|
|
"lastrecord_timestamp_string"],
|
|
"latest_quantity": latest_quantity
|
|
}
|
|
}
|
|
|
|
instance_usage_data_json = json.dumps(instance_usage_dict)
|
|
instance_usage_data_json_list.append(instance_usage_data_json)
|
|
spark_context = record_store_df.rdd.context
|
|
|
|
instance_usage_rdd = \
|
|
spark_context.parallelize(instance_usage_data_json_list)
|
|
|
|
sql_context = SQLContext\
|
|
.getOrCreate(record_store_df.rdd.context)
|
|
instance_usage_df = InstanceUsageUtils.create_df_from_json_rdd(
|
|
sql_context,
|
|
instance_usage_rdd)
|
|
|
|
return instance_usage_df
|