monasca-transform/tests/functional/component/insert/dummy_insert.py

# Copyright 2016 Hewlett Packard Enterprise Development Company LP
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

from oslo_config import cfg

from monasca_transform.component.insert import InsertComponent
from tests.functional.messaging.adapter import DummyAdapter


class DummyInsert(InsertComponent):
    """Insert component that writes metric data to kafka queue"""

    @staticmethod
    def insert(transform_context, instance_usage_df):
        """write instance usage data to kafka"""

        transform_spec_df = transform_context.transform_spec_df_info

        agg_params = transform_spec_df.select("aggregation_params_map"
                                              ".dimension_list"
                                              ).collect()[0].asDict()

        cfg.CONF.set_override(
            'adapter',
            'tests.functional.messaging.adapter:DummyAdapter',
            group='messaging')

        # Approach 1
        # using foreachPartition to iterate through elements in an
        # RDD is the recommended approach so as to not overwhelm kafka with the
        # zillion connections (but in our case the MessageAdapter does
        # store the adapter_impl so we should not create many producers)

        # using foreachpartitions was causing some serialization (cpickle)
        # problems where few libs like kafka.SimpleProducer and oslo_config.cfg
        # were not available
        #
        # removing _write_metrics_from_partition for now in favor of
        # Approach 2
        #

        # instance_usage_df_agg_params = instance_usage_df.rdd.map(
        #     lambda x: InstanceUsageDataAggParams(x,
        #                                        agg_params))
        # instance_usage_df_agg_params.foreachPartition(
        #     DummyInsert._write_metrics_from_partition)

        #
        # Approach # 2
        #
        # using collect() to fetch all elements of an RDD
        # and write to kafka
        #

        for instance_usage_row in instance_usage_df.collect():
            metric = InsertComponent._get_metric(instance_usage_row,
                                                 agg_params)
            # validate metric part
            if InsertComponent._validate_metric(metric):
                DummyAdapter.send_metric(metric)
        return instance_usage_df