Browse Source

Update pep8 checks

* set the maximum line length to 100
* cleaned up the codes for pep8

Change-Id: Iab260a4e77584aae31c0596f39146dd5092b807a
Signed-off-by: Amir Mofakhar <amofakhar@op5.com>
Amir Mofakhar 1 year ago
parent
commit
37d4f09057
31 changed files with 154 additions and 151 deletions
  1. 1
    2
      monasca_transform/component/__init__.py
  2. 4
    11
      monasca_transform/component/insert/__init__.py
  3. 1
    3
      monasca_transform/component/insert/kafka_insert.py
  4. 1
    3
      monasca_transform/component/insert/kafka_insert_pre_hourly.py
  5. 1
    3
      monasca_transform/component/insert/prepare_data.py
  6. 1
    0
      monasca_transform/component/setter/pre_hourly_calculate_rate.py
  7. 1
    0
      monasca_transform/component/setter/rollup_quantity.py
  8. 2
    3
      monasca_transform/component/setter/set_aggregated_metric_name.py
  9. 2
    3
      monasca_transform/component/setter/set_aggregated_period.py
  10. 4
    1
      monasca_transform/component/usage/calculate_rate.py
  11. 14
    8
      monasca_transform/component/usage/fetch_quantity.py
  12. 7
    3
      monasca_transform/component/usage/fetch_quantity_util.py
  13. 3
    2
      monasca_transform/driver/mon_metrics_kafka.py
  14. 1
    0
      monasca_transform/offset_specs.py
  15. 1
    3
      monasca_transform/processor/__init__.py
  16. 25
    26
      monasca_transform/processor/pre_hourly_processor.py
  17. 1
    0
      monasca_transform/processor/processor_util.py
  18. 5
    10
      monasca_transform/service/transform_service.py
  19. 1
    3
      monasca_transform/transform/__init__.py
  20. 10
    8
      monasca_transform/transform/builder/generic_transform_builder.py
  21. 5
    5
      monasca_transform/transform/grouping/__init__.py
  22. 9
    3
      monasca_transform/transform/grouping/group_sort_by_timestamp.py
  23. 23
    22
      monasca_transform/transform/grouping/group_sort_by_timestamp_partition.py
  24. 9
    3
      monasca_transform/transform/grouping/group_sortby_timestamp.py
  25. 15
    10
      monasca_transform/transform/grouping/group_sortby_timestamp_partition.py
  26. 2
    3
      monasca_transform/transform/storage_utils.py
  27. 1
    3
      tests/functional/component/insert/dummy_insert.py
  28. 1
    3
      tests/functional/component/insert/dummy_insert_pre_hourly.py
  29. 1
    0
      tests/functional/data_driven_specs/test_data_driven_specs.py
  30. 1
    2
      tests/functional/json_offset_specs.py
  31. 1
    5
      tox.ini

+ 1
- 2
monasca_transform/component/__init__.py View File

@@ -31,8 +31,7 @@ InstanceUsageDataAggParamsBase = namedtuple('InstanceUsageDataAggParams',
31 31
 
32 32
 
33 33
 class InstanceUsageDataAggParams(InstanceUsageDataAggParamsBase):
34
-    """A tuple which is a wrapper containing the instance usage data
35
-    and aggregation params
34
+    """A tuple which is a wrapper containing the instance usage data and aggregation params
36 35
 
37 36
     namdetuple contains:
38 37
 

+ 4
- 11
monasca_transform/component/insert/__init__.py View File

@@ -41,8 +41,7 @@ class InsertComponent(Component):
41 41
 
42 42
     @staticmethod
43 43
     def _validate_metric(metric):
44
-        """validate monasca metric.
45
-        """
44
+        """validate monasca metric."""
46 45
         try:
47 46
             # validate metric part, without the wrapper
48 47
             metric_validator.validate(metric["metric"])
@@ -124,9 +123,7 @@ class InsertComponent(Component):
124 123
 
125 124
     @staticmethod
126 125
     def _get_metric(row, agg_params):
127
-        """write data to kafka. extracts and formats
128
-        metric data and write s the data to kafka
129
-        """
126
+        """write data to kafka. extracts and formats metric data and write s the data to kafka"""
130 127
         instance_usage_dict = {"tenant_id": row.tenant_id,
131 128
                                "user_id": row.user_id,
132 129
                                "resource_uuid": row.resource_uuid,
@@ -171,9 +168,7 @@ class InsertComponent(Component):
171 168
     @staticmethod
172 169
     def _get_instance_usage_pre_hourly(row,
173 170
                                        metric_id):
174
-        """write data to kafka. extracts and formats
175
-        metric data and writes the data to kafka
176
-        """
171
+        """write data to kafka. extracts and formats metric data and writes the data to kafka"""
177 172
         # retrieve the processing meta from the row
178 173
         processing_meta = row.processing_meta
179 174
         # add transform spec metric id to the processing meta
@@ -228,9 +223,7 @@ class InsertComponent(Component):
228 223
 
229 224
     @staticmethod
230 225
     def _write_metrics_from_partition(partlistiter):
231
-        """iterate through all rdd elements in partition
232
-           and write metrics to kafka
233
-           """
226
+        """iterate through all rdd elements in partition and write metrics to kafka"""
234 227
         for part in partlistiter:
235 228
             agg_params = part.agg_params
236 229
             row = part.instance_usage_data

+ 1
- 3
monasca_transform/component/insert/kafka_insert.py View File

@@ -18,9 +18,7 @@ from monasca_transform.messaging.adapter import KafkaMessageAdapter
18 18
 
19 19
 
20 20
 class KafkaInsert(InsertComponent):
21
-    """Insert component that writes instance usage data
22
-    to kafka queue
23
-    """
21
+    """Insert component that writes instance usage data to kafka queue"""
24 22
 
25 23
     @staticmethod
26 24
     def insert(transform_context, instance_usage_df):

+ 1
- 3
monasca_transform/component/insert/kafka_insert_pre_hourly.py View File

@@ -18,9 +18,7 @@ from monasca_transform.messaging.adapter import KafkaMessageAdapterPreHourly
18 18
 
19 19
 
20 20
 class KafkaInsertPreHourly(InsertComponent):
21
-    """Insert component that writes instance usage data
22
-    to kafka queue
23
-    """
21
+    """Insert component that writes instance usage data to kafka queue"""
24 22
 
25 23
     @staticmethod
26 24
     def insert(transform_context, instance_usage_df):

+ 1
- 3
monasca_transform/component/insert/prepare_data.py View File

@@ -16,9 +16,7 @@ from monasca_transform.component.insert import InsertComponent
16 16
 
17 17
 
18 18
 class PrepareData(InsertComponent):
19
-    """prepare for insert component validates instance usage
20
-    data before calling Insert component
21
-    """
19
+    """prepare for insert component validates instance usage data before calling Insert component"""
22 20
     @staticmethod
23 21
     def insert(transform_context, instance_usage_df):
24 22
         """write instance usage data to kafka"""

+ 1
- 0
monasca_transform/component/setter/pre_hourly_calculate_rate.py View File

@@ -24,6 +24,7 @@ import json
24 24
 
25 25
 class PreHourlyCalculateRateException(Exception):
26 26
     """Exception thrown when doing pre-hourly rate calculations
27
+
27 28
     Attributes:
28 29
     value: string representing the error
29 30
     """

+ 1
- 0
monasca_transform/component/setter/rollup_quantity.py View File

@@ -26,6 +26,7 @@ import json
26 26
 
27 27
 class RollupQuantityException(Exception):
28 28
     """Exception thrown when doing quantity rollup
29
+
29 30
     Attributes:
30 31
     value: string representing the error
31 32
     """

+ 2
- 3
monasca_transform/component/setter/set_aggregated_metric_name.py View File

@@ -23,6 +23,7 @@ import json
23 23
 
24 24
 class SetAggregatedMetricName(SetterComponent):
25 25
     """setter component that sets final aggregated metric name.
26
+
26 27
     aggregated metric name is available as a parameter 'aggregated_metric_name'
27 28
     in aggregation_params in metric processing driver table.
28 29
     """
@@ -79,9 +80,7 @@ class SetAggregatedMetricName(SetterComponent):
79 80
 
80 81
     @staticmethod
81 82
     def setter(transform_context, instance_usage_df):
82
-        """set the aggregated metric name field for elements in instance usage
83
-        rdd
84
-        """
83
+        """set the aggregated metric name field for elements in instance usage rdd"""
85 84
 
86 85
         transform_spec_df = transform_context.transform_spec_df_info
87 86
 

+ 2
- 3
monasca_transform/component/setter/set_aggregated_period.py View File

@@ -23,6 +23,7 @@ import json
23 23
 
24 24
 class SetAggregatedPeriod(SetterComponent):
25 25
     """setter component that sets final aggregated metric name.
26
+
26 27
     aggregated metric name is available as a parameter 'aggregated_metric_name'
27 28
     in aggregation_params in metric processing driver table.
28 29
     """
@@ -80,9 +81,7 @@ class SetAggregatedPeriod(SetterComponent):
80 81
 
81 82
     @staticmethod
82 83
     def setter(transform_context, instance_usage_df):
83
-        """set the aggregated metric name field for elements in instance usage
84
-        rdd
85
-        """
84
+        """set the aggregated metric name field for elements in instance usage rdd"""
86 85
 
87 86
         transform_spec_df = transform_context.transform_spec_df_info
88 87
 

+ 4
- 1
monasca_transform/component/usage/calculate_rate.py View File

@@ -25,6 +25,7 @@ import json
25 25
 
26 26
 class CalculateRateException(Exception):
27 27
     """Exception thrown when calculating rate
28
+
28 29
     Attributes:
29 30
     value: string representing the error
30 31
     """
@@ -40,7 +41,9 @@ class CalculateRate(UsageComponent):
40 41
 
41 42
     @staticmethod
42 43
     def usage(transform_context, record_store_df):
43
-        """component which groups together record store records by
44
+        """Method to return instance usage dataframe:
45
+
46
+        It groups together record store records by
44 47
         provided group by columns list,sorts within the group by event
45 48
         timestamp field, calculates the rate of change between the
46 49
         oldest and latest values, and returns the resultant value as an

+ 14
- 8
monasca_transform/component/usage/fetch_quantity.py View File

@@ -33,6 +33,7 @@ import json
33 33
 
34 34
 class FetchQuantityException(Exception):
35 35
     """Exception thrown when fetching quantity
36
+
36 37
     Attributes:
37 38
     value: string representing the error
38 39
     """
@@ -50,8 +51,7 @@ GroupedDataWithOperation = namedtuple("GroupedDataWithOperation",
50 51
 
51 52
 
52 53
 class GroupedDataWithOperation(GroupedDataWithOperation):
53
-    """A tuple which is a wrapper containing record store data
54
-    and the usage operation
54
+    """A tuple which is a wrapper containing record store data and the usage operation
55 55
 
56 56
     namdetuple contains:
57 57
 
@@ -76,8 +76,10 @@ class FetchQuantity(UsageComponent):
76 76
 
77 77
     @staticmethod
78 78
     def _get_latest_oldest_quantity(grouping_results_with_operation):
79
-        """get quantity for each group by performing the requested
80
-        usage operation and return a instance usage data.
79
+        """Method to return an instance usage data
80
+
81
+        Get quantity for each group by performing the requested
82
+        usage operation and return an instance usage data.
81 83
         """
82 84
 
83 85
         # row
@@ -337,10 +339,12 @@ class FetchQuantity(UsageComponent):
337 339
 
338 340
     @staticmethod
339 341
     def usage(transform_context, record_store_df):
340
-        """component which groups together record store records by
342
+        """Method to return the latest quantity as an instance usage dataframe:
343
+
344
+        It groups together record store records by
341 345
         provided group by columns list , sorts within the group by event
342 346
         timestamp field, applies group stats udf and returns the latest
343
-        quantity as a instance usage dataframe
347
+        quantity as an instance usage dataframe
344 348
         """
345 349
         transform_spec_df = transform_context.transform_spec_df_info
346 350
 
@@ -358,10 +362,12 @@ class FetchQuantity(UsageComponent):
358 362
     @staticmethod
359 363
     def usage_by_operation(transform_context, record_store_df,
360 364
                            usage_fetch_operation):
361
-        """component which groups together record store records by
365
+        """Returns the latest quantity as a instance usage dataframe
366
+
367
+        It groups together record store records by
362 368
         provided group by columns list , sorts within the group by event
363 369
         timestamp field, applies group stats udf and returns the latest
364
-        quantity as a instance usage dataframe
370
+        quantity as an instance usage dataframe
365 371
         """
366 372
         transform_spec_df = transform_context.transform_spec_df_info
367 373
 

+ 7
- 3
monasca_transform/component/usage/fetch_quantity_util.py View File

@@ -28,6 +28,7 @@ import json
28 28
 
29 29
 class FetchQuantityUtilException(Exception):
30 30
     """Exception thrown when fetching quantity
31
+
31 32
     Attributes:
32 33
     value: string representing the error
33 34
     """
@@ -62,8 +63,9 @@ class FetchQuantityUtil(UsageComponent):
62 63
 
63 64
     @staticmethod
64 65
     def _format_quantity_util(row):
65
-        """calculate the utilized quantity based on idle percentage
66
-        quantity and convert to instance usage format
66
+        """Converts calculated utilized quantity to an instance usage format
67
+
68
+        Calculation based on idle percentage
67 69
         """
68 70
         #
69 71
         tenant_id = getattr(row, "tenant_id", "all")
@@ -141,7 +143,9 @@ class FetchQuantityUtil(UsageComponent):
141 143
 
142 144
     @staticmethod
143 145
     def usage(transform_context, record_store_df):
144
-        """component which groups together record store records by
146
+        """Method to return instance usage dataframe:
147
+
148
+        It groups together record store records by
145 149
         provided group by columns list, sorts within the group by event
146 150
         timestamp field, applies group stats udf and returns the latest
147 151
         quantity as a instance usage dataframe

+ 3
- 2
monasca_transform/driver/mon_metrics_kafka.py View File

@@ -221,6 +221,7 @@ class MonMetricsKafkaProcessor(object):
221 221
     @staticmethod
222 222
     def process_metric(transform_context, record_store_df):
223 223
         """process (aggregate) metric data from record_store data
224
+
224 225
         All the parameters to drive processing should be available
225 226
         in transform_spec_df dataframe.
226 227
         """
@@ -231,8 +232,7 @@ class MonMetricsKafkaProcessor(object):
231 232
 
232 233
     @staticmethod
233 234
     def process_metrics(transform_context, record_store_df):
234
-        """start processing (aggregating) metrics
235
-        """
235
+        """start processing (aggregating) metrics"""
236 236
         #
237 237
         # look in record_store_df for list of metrics to be processed
238 238
         #
@@ -536,6 +536,7 @@ class MonMetricsKafkaProcessor(object):
536 536
     @staticmethod
537 537
     def transform_to_recordstore(kvs):
538 538
         """Transform metrics data from kafka to record store format.
539
+
539 540
         extracts, validates, filters, generates data from kakfa to only keep
540 541
         data that has to be aggregated. Generate data generates multiple
541 542
         records for for the same incoming metric if the metric has multiple

+ 1
- 0
monasca_transform/offset_specs.py View File

@@ -60,6 +60,7 @@ class OffsetSpec(object):
60 60
 @six.add_metaclass(abc.ABCMeta)
61 61
 class OffsetSpecs(object):
62 62
     """Class representing offset specs to help recover.
63
+
63 64
     From where processing should pick up in case of failure
64 65
     """
65 66
 

+ 1
- 3
monasca_transform/processor/__init__.py View File

@@ -19,9 +19,7 @@ class Processor(object):
19 19
 
20 20
     @abc.abstractmethod
21 21
     def get_app_name(self):
22
-        """get name of this application. Will be used to
23
-        store offsets in database
24
-        """
22
+        """get name of this application. Will be used to store offsets in database"""
25 23
         raise NotImplementedError(
26 24
             "Class %s doesn't implement get_app_name()"
27 25
             % self.__class__.__name__)

+ 25
- 26
monasca_transform/processor/pre_hourly_processor.py View File

@@ -59,7 +59,9 @@ class PreHourlyProcessorDataProvider(ProcessUtilDataProvider):
59 59
 
60 60
 
61 61
 class PreHourlyProcessor(Processor):
62
-    """Processor to process usage data published to metrics_pre_hourly topic a
62
+    """Publish metrics in kafka
63
+
64
+    Processor to process usage data published to metrics_pre_hourly topic a
63 65
     and publish final rolled up metrics to metrics topic in kafka.
64 66
     """
65 67
 
@@ -95,9 +97,7 @@ class PreHourlyProcessor(Processor):
95 97
 
96 98
     @staticmethod
97 99
     def get_app_name():
98
-        """get name of this application. Will be used to
99
-        store offsets in database
100
-        """
100
+        """get name of this application. Will be used to store offsets in database"""
101 101
         return "mon_metrics_kafka_pre_hourly"
102 102
 
103 103
     @staticmethod
@@ -113,9 +113,7 @@ class PreHourlyProcessor(Processor):
113 113
     def _get_offsets_from_kafka(brokers,
114 114
                                 topic,
115 115
                                 offset_time):
116
-        """get dict representing kafka
117
-        offsets.
118
-        """
116
+        """get dict representing kafka offsets."""
119 117
         # get client
120 118
         client = KafkaClient(brokers)
121 119
 
@@ -144,9 +142,7 @@ class PreHourlyProcessor(Processor):
144 142
 
145 143
     @staticmethod
146 144
     def _parse_saved_offsets(app_name, topic, saved_offset_spec):
147
-        """get dict representing saved
148
-        offsets.
149
-        """
145
+        """get dict representing saved offsets."""
150 146
         offset_dict = {}
151 147
         for key, value in saved_offset_spec.items():
152 148
             if key.startswith("%s_%s" % (app_name, topic)):
@@ -197,8 +193,7 @@ class PreHourlyProcessor(Processor):
197 193
                                topic,
198 194
                                app_name,
199 195
                                saved_offset_spec):
200
-        """get offset range from saved offset to latest.
201
-        """
196
+        """get offset range from saved offset to latest."""
202 197
         offset_range_list = []
203 198
 
204 199
         # https://cwiki.apache.org/confluence/display/KAFKA/
@@ -243,8 +238,9 @@ class PreHourlyProcessor(Processor):
243 238
 
244 239
     @staticmethod
245 240
     def get_processing_offset_range_list(processing_time):
246
-        """get offset range to fetch data from. The
247
-        range will last from the last saved offsets to current offsets
241
+        """Get offset range to fetch data from.
242
+
243
+        The range will last from the last saved offsets to current offsets
248 244
         available. If there are no last saved offsets available in the
249 245
         database the starting offsets will be set to the earliest
250 246
         available in kafka.
@@ -284,13 +280,13 @@ class PreHourlyProcessor(Processor):
284 280
 
285 281
     @staticmethod
286 282
     def get_offset_specs():
287
-        """get offset specifications.
288
-        """
283
+        """get offset specifications."""
289 284
         return simport.load(cfg.CONF.repositories.offsets)()
290 285
 
291 286
     @staticmethod
292 287
     def get_effective_offset_range_list(offset_range_list):
293
-        """get effective batch offset range.
288
+        """Get effective batch offset range.
289
+
294 290
         Effective batch offset range covers offsets starting
295 291
         from effective batch revision (defined by effective_batch_revision
296 292
         config property). By default this method will set the
@@ -432,8 +428,9 @@ class PreHourlyProcessor(Processor):
432 428
 
433 429
     @staticmethod
434 430
     def filter_out_records_not_in_current_batch(instance_usage_df):
435
-        """Filter out any records which don't pertain to the
436
-        current batch (i.e., records before or after the
431
+        """Filter out any records which don't pertain to the current batch
432
+
433
+        (i.e., records before or after the
437 434
         batch currently being processed).
438 435
         """
439 436
         # get the most recent batch time from the stored offsets
@@ -476,7 +473,9 @@ class PreHourlyProcessor(Processor):
476 473
 
477 474
     @staticmethod
478 475
     def process_instance_usage(transform_context, instance_usage_df):
479
-        """second stage aggregation. Aggregate instance usage rdd
476
+        """Second stage aggregation.
477
+
478
+        Aggregate instance usage rdd
480 479
         data and write results to metrics topic in kafka.
481 480
         """
482 481
 
@@ -525,8 +524,7 @@ class PreHourlyProcessor(Processor):
525 524
 
526 525
     @staticmethod
527 526
     def do_transform(instance_usage_df):
528
-        """start processing (aggregating) metrics
529
-        """
527
+        """start processing (aggregating) metrics"""
530 528
         #
531 529
         # look in instance_usage_df for list of metrics to be processed
532 530
         #
@@ -571,10 +569,11 @@ class PreHourlyProcessor(Processor):
571 569
 
572 570
     @staticmethod
573 571
     def run_processor(spark_context, processing_time):
574
-        """process data in metrics_pre_hourly queue, starting
575
-           from the last saved offsets, else start from earliest
576
-           offsets available
577
-           """
572
+        """Process data in metrics_pre_hourly queue
573
+
574
+        Starting from the last saved offsets, else start from earliest
575
+        offsets available
576
+        """
578 577
 
579 578
         offset_range_list = (
580 579
             PreHourlyProcessor.get_processing_offset_range_list(

+ 1
- 0
monasca_transform/processor/processor_util.py View File

@@ -40,6 +40,7 @@ class PreHourlyProcessorUtil(object):
40 40
     @staticmethod
41 41
     def is_time_to_run(check_date_time):
42 42
         """return True if its time to run this processor.
43
+
43 44
         It is time to run the processor if:
44 45
             The processor has no previous recorded run time.
45 46
             It is more than the configured 'late_metric_slack_time' (to allow

+ 5
- 10
monasca_transform/service/transform_service.py View File

@@ -43,6 +43,7 @@ def main():
43 43
 
44 44
 def shutdown_all_threads_and_die():
45 45
     """Shut down all threads and exit process.
46
+
46 47
     Hit it with a hammer to kill all threads and die.
47 48
     """
48 49
     LOG = log.getLogger(__name__)
@@ -51,9 +52,7 @@ def shutdown_all_threads_and_die():
51 52
 
52 53
 
53 54
 def get_process(proc_name):
54
-    """Get process given  string in
55
-    process cmd line.
56
-    """
55
+    """Get process given  string in process cmd line."""
57 56
     LOG = log.getLogger(__name__)
58 57
     proc = None
59 58
     try:
@@ -91,8 +90,7 @@ def stop_spark_submit_process():
91 90
 
92 91
 
93 92
 class Transform(os_service.Service):
94
-    """Class used with Openstack service.
95
-    """
93
+    """Class used with Openstack service."""
96 94
 
97 95
     LOG = log.getLogger(__name__)
98 96
 
@@ -143,9 +141,7 @@ class TransformService(threading.Thread):
143 141
             CONF.service.election_polling_frequency))
144 142
 
145 143
     def check_if_still_leader(self):
146
-        """Return true if the this host is the
147
-        leader
148
-        """
144
+        """Return true if the this host is the leader"""
149 145
         leader = None
150 146
         try:
151 147
             leader = self.coordinator.get_leader(self.group).get()
@@ -289,8 +285,7 @@ class TransformService(threading.Thread):
289 285
 
290 286
 
291 287
 def main_service():
292
-    """Method to use with Openstack service.
293
-    """
288
+    """Method to use with Openstack service."""
294 289
     ConfigInitializer.basic_config()
295 290
     LogUtils.init_logger(__name__)
296 291
     launcher = os_service.ServiceLauncher(cfg.CONF)

+ 1
- 3
monasca_transform/transform/__init__.py View File

@@ -23,9 +23,7 @@ TransformContextBase = namedtuple("TransformContext",
23 23
 
24 24
 
25 25
 class TransformContext(TransformContextBase):
26
-    """A tuple which contains all the configuration information
27
-    to drive processing
28
-
26
+    """A tuple which contains all the configuration information to drive processing
29 27
 
30 28
     namedtuple contains:
31 29
 

+ 10
- 8
monasca_transform/transform/builder/generic_transform_builder.py View File

@@ -16,9 +16,10 @@ from monasca_transform.log_utils import LogUtils
16 16
 from stevedore import extension
17 17
 
18 18
 
19
-class GenericTransformBuilder (object):
20
-    """Build transformation pipeline based on
21
-    aggregation_pipeline spec in metric processing
19
+class GenericTransformBuilder(object):
20
+    """Build transformation pipeline
21
+
22
+    Based on aggregation_pipeline spec in metric processing
22 23
     configuration
23 24
     """
24 25
 
@@ -67,9 +68,8 @@ class GenericTransformBuilder (object):
67 68
 
68 69
     @staticmethod
69 70
     def _parse_transform_pipeline(transform_spec_df):
70
-        """parse aggregation pipeline from metric
71
-        processing configuration
72
-        """
71
+        """Parse aggregation pipeline from metric processing configuration"""
72
+
73 73
         # get aggregation pipeline df
74 74
         aggregation_pipeline_df = transform_spec_df\
75 75
             .select("aggregation_params_map.aggregation_pipeline")
@@ -95,8 +95,10 @@ class GenericTransformBuilder (object):
95 95
     @staticmethod
96 96
     def do_transform(transform_context,
97 97
                      record_store_df):
98
-        """Build a dynamic aggregation pipeline and call components to
99
-        process record store dataframe
98
+        """Method to return instance usage dataframe
99
+
100
+        Build a dynamic aggregation pipeline
101
+        and call components to process record store dataframe
100 102
         """
101 103
         transform_spec_df = transform_context.transform_spec_df_info
102 104
         (source,

+ 5
- 5
monasca_transform/transform/grouping/__init__.py View File

@@ -20,8 +20,7 @@ RecordStoreWithGroupByBase = namedtuple("RecordStoreWithGroupBy",
20 20
 
21 21
 
22 22
 class RecordStoreWithGroupBy(RecordStoreWithGroupByBase):
23
-    """A tuple which is a wrapper containing record store data
24
-    and the group by columns
23
+    """A tuple which is a wrapper containing record store data and the group by columns
25 24
 
26 25
     namdetuple contains:
27 26
 
@@ -36,8 +35,7 @@ GroupingResultsBase = namedtuple("GroupingResults",
36 35
 
37 36
 
38 37
 class GroupingResults(GroupingResultsBase):
39
-    """A tuple which is a wrapper containing grouping key
40
-    and grouped result set
38
+    """A tuple which is a wrapper containing grouping key and grouped result set
41 39
 
42 40
     namdetuple contains:
43 41
 
@@ -52,7 +50,9 @@ class Grouping(object):
52 50
 
53 51
     @staticmethod
54 52
     def _parse_grouping_key(grouping_str):
55
-        """parse grouping key which in "^key1=value1^key2=value2..." format
53
+        """parse grouping key
54
+
55
+        which in "^key1=value1^key2=value2..." format
56 56
         into a dictionary of key value pairs
57 57
         """
58 58
         group_by_dict = {}

+ 9
- 3
monasca_transform/transform/grouping/group_sort_by_timestamp.py View File

@@ -42,7 +42,9 @@ class GroupSortbyTimestamp(Grouping):
42 42
 
43 43
     @staticmethod
44 44
     def _prepare_for_group_by(record_store_with_group_by_rdd):
45
-        """creates a new rdd where the first element of each row
45
+        """creates a new rdd where:
46
+
47
+        the first element of each row
46 48
         contains array of grouping key and event timestamp fields.
47 49
         Grouping key and event timestamp fields are used by
48 50
         partitioning and sorting function to partition the data
@@ -100,7 +102,9 @@ class GroupSortbyTimestamp(Grouping):
100 102
 
101 103
     @staticmethod
102 104
     def _get_group_first_last_quantity_udf(grouplistiter):
103
-        """Return stats that include first row key, first_event_timestamp,
105
+        """Return stats that include:
106
+
107
+        first row key, first_event_timestamp,
104 108
         first event quantity, last_event_timestamp and last event quantity
105 109
         """
106 110
         first_row = None
@@ -159,7 +163,9 @@ class GroupSortbyTimestamp(Grouping):
159 163
     def fetch_group_latest_oldest_quantity(record_store_df,
160 164
                                            transform_spec_df,
161 165
                                            group_by_columns_list):
162
-        """function to group record store data, sort by timestamp within group
166
+        """Function to group record store data
167
+
168
+        Sort by timestamp within group
163 169
         and get first and last timestamp along with quantity within each group
164 170
 
165 171
         This function uses key-value pair rdd's groupBy function to do group_by

+ 23
- 22
monasca_transform/transform/grouping/group_sort_by_timestamp_partition.py View File

@@ -26,9 +26,11 @@ class GroupSortbyTimestampPartition(Grouping):
26 26
 
27 27
     @staticmethod
28 28
     def _get_group_first_last_quantity_udf(partition_list_iter):
29
-        """user defined function to to through a list of partitions. Each
30
-        partition contains elements for a group. All the elements are sorted by
29
+        """User defined function to go through a list of partitions.
30
+
31
+        Each partition contains elements for a group. All the elements are sorted by
31 32
         timestamp.
33
+
32 34
         The stats include first row key, first_event_timestamp,
33 35
         fist event quantity, last_event_timestamp and last event quantity
34 36
         """
@@ -87,8 +89,11 @@ class GroupSortbyTimestampPartition(Grouping):
87 89
 
88 90
     @staticmethod
89 91
     def _prepare_for_group_by(record_store_with_group_by_rdd):
90
-        """creates a new rdd where the first element of each row
91
-        contains array of grouping key and event timestamp fields.
92
+        """Creates a new rdd where:
93
+
94
+        The first element of each row contains array of grouping
95
+        key and event timestamp fields.
96
+
92 97
         Grouping key and event timestamp fields are used by
93 98
         partitioning and sorting function to partition the data
94 99
         by grouping key and then sort the elements in a group by the
@@ -118,7 +123,9 @@ class GroupSortbyTimestampPartition(Grouping):
118 123
 
119 124
     @staticmethod
120 125
     def _get_partition_by_group(group_composite):
121
-        """get a hash of the grouping key, which is then used by partitioning
126
+        """Get a hash of the grouping key,
127
+
128
+        which is then used by partitioning
122 129
         function to get partition where the groups data should end up in.
123 130
         It uses hash % num_partitions to get partition
124 131
         """
@@ -133,8 +140,7 @@ class GroupSortbyTimestampPartition(Grouping):
133 140
 
134 141
     @staticmethod
135 142
     def _sort_by_timestamp(group_composite):
136
-        """get timestamp which will be used to sort grouped data
137
-        """
143
+        """get timestamp which will be used to sort grouped data"""
138 144
         event_timestamp_string = group_composite[1]
139 145
         return event_timestamp_string
140 146
 
@@ -142,9 +148,7 @@ class GroupSortbyTimestampPartition(Grouping):
142 148
     def _group_sort_by_timestamp_partition(record_store_df,
143 149
                                            group_by_columns_list,
144 150
                                            num_of_groups):
145
-        """component that does a group by and then sorts all
146
-        the items within the group by event timestamp.
147
-        """
151
+        """It does a group by and then sorts all the items within the group by event timestamp."""
148 152
         # convert the dataframe rdd to normal rdd and add the group by
149 153
         # column list
150 154
         record_store_with_group_by_rdd = record_store_df.rdd.\
@@ -174,6 +178,7 @@ class GroupSortbyTimestampPartition(Grouping):
174 178
     @staticmethod
175 179
     def _remove_none_filter(row):
176 180
         """remove any rows which have None as grouping key
181
+
177 182
         [GroupingResults(grouping_key="key1", results={})] rows get created
178 183
         when partition does not get any grouped data assigned to it
179 184
         """
@@ -185,22 +190,18 @@ class GroupSortbyTimestampPartition(Grouping):
185 190
                                         transform_spec_df,
186 191
                                         group_by_columns_list,
187 192
                                         num_of_groups):
188
-        """function to group record store data, sort by timestamp within group
189
-        and get first and last timestamp along with quantity within each group
193
+        """Function to group record store data
190 194
 
195
+        Sort by timestamp within group
196
+        and get first and last timestamp along with quantity within each group
191 197
         To do group by it uses custom partitioning function which creates a new
192
-        partition
193
-        for each group and uses RDD's repartitionAndSortWithinPartitions
198
+        partition for each group and uses RDD's repartitionAndSortWithinPartitions
194 199
         function to do the grouping and sorting within the group.
195
-
196 200
         This is more scalable than just using RDD's group_by as using this
197
-        technique
198
-        group is not materialized into a list and stored in memory, but rather
199
-        it uses RDD's in built partitioning capability to do the sort
200
-
201
-        num_of_groups should be more than expected groups, otherwise the same
202
-        partition can get used for two groups which will cause incorrect
203
-        results.
201
+        technique group is not materialized into a list and stored in memory, but rather
202
+        it uses RDD's in built partitioning capability to do the sort num_of_groups should
203
+        be more than expected groups, otherwise the same
204
+        partition can get used for two groups which will cause incorrect results.
204 205
         """
205 206
 
206 207
         # group and order elements in group using repartition

+ 9
- 3
monasca_transform/transform/grouping/group_sortby_timestamp.py View File

@@ -42,7 +42,9 @@ class GroupSortbyTimestamp(Grouping):
42 42
 
43 43
     @staticmethod
44 44
     def _prepare_for_groupby(record_store_with_groupby_rdd):
45
-        """creates a new rdd where the first element of each row
45
+        """creates a new rdd where:
46
+
47
+        the first element of each row
46 48
         contains array of grouping key and event timestamp fields.
47 49
         Grouping key and event timestamp fields are used by
48 50
         partitioning and sorting function to partition the data
@@ -99,7 +101,9 @@ class GroupSortbyTimestamp(Grouping):
99 101
 
100 102
     @staticmethod
101 103
     def _get_group_first_last_quantity_udf(grouplistiter):
102
-        """Return stats that include first row key, first_event_timestamp,
104
+        """Return stats that include:
105
+
106
+        first row key, first_event_timestamp,
103 107
         first event quantity, last_event_timestamp and last event quantity
104 108
         """
105 109
         first_row = None
@@ -158,7 +162,9 @@ class GroupSortbyTimestamp(Grouping):
158 162
     def fetch_group_latest_oldest_quantity(record_store_df,
159 163
                                            transform_spec_df,
160 164
                                            groupby_columns_list):
161
-        """function to group record store data, sort by timestamp within group
165
+        """To group record store data
166
+
167
+        sort by timestamp within group
162 168
         and get first and last timestamp along with quantity within each group
163 169
 
164 170
         This function uses key-value pair rdd's groupBy function to do groupby

+ 15
- 10
monasca_transform/transform/grouping/group_sortby_timestamp_partition.py View File

@@ -26,8 +26,9 @@ class GroupSortbyTimestampPartition(Grouping):
26 26
 
27 27
     @staticmethod
28 28
     def _get_group_first_last_quantity_udf(partitionlistiter):
29
-        """user defined function to to through a list of partitions. Each
30
-        partition contains elements for a group. All the elements are sorted by
29
+        """user defined function to to through a list of partitions.
30
+
31
+        Each partition contains elements for a group. All the elements are sorted by
31 32
         timestamp.
32 33
         The stats include first row key, first_event_timestamp,
33 34
         fist event quantity, last_event_timestamp and last event quantity
@@ -87,7 +88,9 @@ class GroupSortbyTimestampPartition(Grouping):
87 88
 
88 89
     @staticmethod
89 90
     def _prepare_for_groupby(record_store_with_groupby_rdd):
90
-        """creates a new rdd where the first element of each row
91
+        """creates a new rdd where:
92
+
93
+        the first element of each row
91 94
         contains array of grouping key and event timestamp fields.
92 95
         Grouping key and event timestamp fields are used by
93 96
         partitioning and sorting function to partition the data
@@ -118,7 +121,9 @@ class GroupSortbyTimestampPartition(Grouping):
118 121
 
119 122
     @staticmethod
120 123
     def _get_partition_by_group(group_composite):
121
-        """get a hash of the grouping key, which is then used by partitioning
124
+        """get a hash of the grouping key,
125
+
126
+        which is then used by partitioning
122 127
         function to get partition where the groups data should end up in.
123 128
         It uses hash % num_partitions to get partition
124 129
         """
@@ -133,8 +138,7 @@ class GroupSortbyTimestampPartition(Grouping):
133 138
 
134 139
     @staticmethod
135 140
     def _sortby_timestamp(group_composite):
136
-        """get timestamp which will be used to sort grouped data
137
-        """
141
+        """get timestamp which will be used to sort grouped data"""
138 142
         event_timestamp_string = group_composite[1]
139 143
         return event_timestamp_string
140 144
 
@@ -142,9 +146,7 @@ class GroupSortbyTimestampPartition(Grouping):
142 146
     def _group_sortby_timestamp_partition(record_store_df,
143 147
                                           groupby_columns_list,
144 148
                                           num_of_groups):
145
-        """component that does a group by and then sorts all
146
-        the items within the group by event timestamp.
147
-        """
149
+        """It does a group by and then sorts all the items within the group by event timestamp."""
148 150
         # convert the dataframe rdd to normal rdd and add the group by
149 151
         # column list
150 152
         record_store_with_groupby_rdd = record_store_df.rdd.\
@@ -174,6 +176,7 @@ class GroupSortbyTimestampPartition(Grouping):
174 176
     @staticmethod
175 177
     def _remove_none_filter(row):
176 178
         """remove any rows which have None as grouping key
179
+
177 180
         [GroupingResults(grouping_key="key1", results={})] rows get created
178 181
         when partition does not get any grouped data assigned to it
179 182
         """
@@ -185,7 +188,9 @@ class GroupSortbyTimestampPartition(Grouping):
185 188
                                         transform_spec_df,
186 189
                                         groupby_columns_list,
187 190
                                         num_of_groups):
188
-        """function to group record store data, sort by timestamp within group
191
+        """Function to group record store data
192
+
193
+        Sort by timestamp within group
189 194
         and get first and last timestamp along with quantity within each group
190 195
 
191 196
         To do group by it uses custom partitioning function which creates a new

+ 2
- 3
monasca_transform/transform/storage_utils.py View File

@@ -17,6 +17,7 @@ from pyspark import StorageLevel
17 17
 
18 18
 class InvalidCacheStorageLevelException(Exception):
19 19
     """Exception thrown when an invalid cache storage level is encountered
20
+
20 21
     Attributes:
21 22
     value: string representing the error
22 23
     """
@@ -33,9 +34,7 @@ class StorageUtils(object):
33 34
 
34 35
     @staticmethod
35 36
     def get_storage_level(storage_level_str):
36
-        """get pyspark storage level from storage level
37
-        string
38
-        """
37
+        """get pyspark storage level from storage level string"""
39 38
         if (storage_level_str == "DISK_ONLY"):
40 39
             return StorageLevel.DISK_ONLY
41 40
         elif (storage_level_str == "DISK_ONLY_2"):

+ 1
- 3
tests/functional/component/insert/dummy_insert.py View File

@@ -19,9 +19,7 @@ from tests.functional.messaging.adapter import DummyAdapter
19 19
 
20 20
 
21 21
 class DummyInsert(InsertComponent):
22
-    """Insert component that writes metric data to
23
-    to kafka queue
24
-    """
22
+    """Insert component that writes metric data to kafka queue"""
25 23
 
26 24
     @staticmethod
27 25
     def insert(transform_context, instance_usage_df):

+ 1
- 3
tests/functional/component/insert/dummy_insert_pre_hourly.py View File

@@ -19,9 +19,7 @@ from tests.functional.messaging.adapter import DummyAdapter
19 19
 
20 20
 
21 21
 class DummyInsertPreHourly(InsertComponent):
22
-    """Insert component that writes metric data to
23
-    to kafka queue
24
-    """
22
+    """Insert component that writes metric data to kafka queue"""
25 23
 
26 24
     @staticmethod
27 25
     def insert(transform_context, instance_usage_df):

+ 1
- 0
tests/functional/data_driven_specs/test_data_driven_specs.py View File

@@ -542,6 +542,7 @@ class TestDataDrivenSpecsRepo(SparkContextTest):
542 542
                                event_type=None,
543 543
                                pre_transform_specs_data_frame=None):
544 544
         """get row for event type
545
+
545 546
         :rtype: Row
546 547
         """
547 548
         rows = pre_transform_specs_data_frame.filter(

+ 1
- 2
tests/functional/json_offset_specs.py View File

@@ -53,8 +53,7 @@ class JSONOffsetSpecs(OffsetSpecs):
53 53
             log.info('No kafka offsets found at startup')
54 54
 
55 55
     def _save(self):
56
-        """get the specs of last run time of offset
57
-        """
56
+        """get the specs of last run time of offset"""
58 57
         log.info("Saving json offsets: %s", self._kafka_offsets)
59 58
 
60 59
         with open(self.kafka_offset_spec_file, 'w') as offset_file:

+ 1
- 5
tox.ini View File

@@ -83,11 +83,7 @@ commands =
83 83
 
84 84
 [flake8]
85 85
 max-complexity = 30
86
-# TODO: ignored checks should be enabled in the future
87
-# H904 Wrap long lines in parentheses instead of a backslash (DEPRECATED)
88
-# H405 Multiline docstring separated by empty line
89
-# E402 module level import not at top of file FIXME remove this
90
-ignore = H904,H405,E402
86
+max-line-length = 100
91 87
 # H106 Don’t put vim configuration in source files
92 88
 # H203 Use assertIs(Not)None to check for None
93 89
 enable-extensions=H106,H203

Loading…
Cancel
Save