From dd7c6cef4e90aebf4b861ce5137a1965beea6599 Mon Sep 17 00:00:00 2001
From: Steve Baker <sbaker@redhat.com>
Date: Mon, 24 Jun 2013 14:46:47 +1200
Subject: [PATCH] Heat autoscaling scenario test

This test starts with a single server and scales up to
three servers triggered by a script that consumes memory.

Seven minutes after stack creation, memory consumption script
will quit and the scale down alarms will scale back down to
a single server.

Due to the nature of this test, it takes about 10 minutes to
run locally.

The scenario test has been put in package orchestration
for the following reasons:
- this will be the first of many heat scenario tests
- this will allow a tox filter to run this test for the
  slow heat gating job

Change-Id: I53ed12369d12b902108b9b8fa7885df34f6ab51f
---
 requirements.txt                              |   1 +
 tempest/scenario/manager.py                   |  80 +++++++-
 tempest/scenario/orchestration/__init__.py    |   0
 .../orchestration/test_autoscaling.py         | 108 +++++++++++
 .../orchestration/test_autoscaling.yaml       | 182 ++++++++++++++++++
 5 files changed, 364 insertions(+), 7 deletions(-)
 create mode 100644 tempest/scenario/orchestration/__init__.py
 create mode 100644 tempest/scenario/orchestration/test_autoscaling.py
 create mode 100644 tempest/scenario/orchestration/test_autoscaling.yaml

diff --git a/requirements.txt b/requirements.txt
index cc61b01b92..06db0e6431 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,6 +13,7 @@ python-keystoneclient>=0.2.0
 python-novaclient>=2.10.0
 python-neutronclient>=2.2.3,<3.0.0
 python-cinderclient>=1.0.4
+python-heatclient>=0.2.3
 testresources
 keyring
 testrepository
diff --git a/tempest/scenario/manager.py b/tempest/scenario/manager.py
index 277eae4cc4..65c5d9a5fc 100644
--- a/tempest/scenario/manager.py
+++ b/tempest/scenario/manager.py
@@ -16,11 +16,13 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.
 
+import os
 import subprocess
 
 # Default client libs
 import cinderclient.client
 import glanceclient
+import heatclient.client
 import keystoneclient.v2_0.client
 import netaddr
 from neutronclient.common import exceptions as exc
@@ -48,6 +50,7 @@ class OfficialClientManager(tempest.manager.Manager):
 
     NOVACLIENT_VERSION = '2'
     CINDERCLIENT_VERSION = '1'
+    HEATCLIENT_VERSION = '1'
 
     def __init__(self, username, password, tenant_name):
         super(OfficialClientManager, self).__init__()
@@ -62,6 +65,10 @@ class OfficialClientManager(tempest.manager.Manager):
         self.volume_client = self._get_volume_client(username,
                                                      password,
                                                      tenant_name)
+        self.orchestration_client = self._get_orchestration_client(
+            username,
+            password,
+            tenant_name)
 
     def _get_compute_client(self, username, password, tenant_name):
         # Novaclient will not execute operations for anyone but the
@@ -98,6 +105,32 @@ class OfficialClientManager(tempest.manager.Manager):
                                           tenant_name,
                                           auth_url)
 
+    def _get_orchestration_client(self, username=None, password=None,
+                                  tenant_name=None):
+        if not username:
+            username = self.config.identity.admin_username
+        if not password:
+            password = self.config.identity.admin_password
+        if not tenant_name:
+            tenant_name = self.config.identity.tenant_name
+
+        self._validate_credentials(username, password, tenant_name)
+
+        keystone = self._get_identity_client(username, password, tenant_name)
+        token = keystone.auth_token
+        try:
+            endpoint = keystone.service_catalog.url_for(
+                service_type='orchestration',
+                endpoint_type='publicURL')
+        except keystoneclient.exceptions.EndpointNotFound:
+            return None
+        else:
+            return heatclient.client.Client(self.HEATCLIENT_VERSION,
+                                            endpoint,
+                                            token=token,
+                                            username=username,
+                                            password=password)
+
     def _get_identity_client(self, username, password, tenant_name):
         # This identity client is not intended to check the security
         # of the identity service, so use admin credentials by default.
@@ -153,13 +186,8 @@ class OfficialClientTest(tempest.test.BaseTestCase):
         super(OfficialClientTest, cls).setUpClass()
         cls.isolated_creds = isolated_creds.IsolatedCreds(
             __name__, tempest_client=False)
-        if cls.config.compute.allow_tenant_isolation:
-            creds = cls.isolated_creds.get_primary_creds()
-            username, tenant_name, password = creds
-        else:
-            username = cls.config.identity.username
-            password = cls.config.identity.password
-            tenant_name = cls.config.identity.tenant_name
+
+        username, tenant_name, password = cls.credentials()
 
         cls.manager = OfficialClientManager(username, password, tenant_name)
         cls.compute_client = cls.manager.compute_client
@@ -167,9 +195,20 @@ class OfficialClientTest(tempest.test.BaseTestCase):
         cls.identity_client = cls.manager.identity_client
         cls.network_client = cls.manager.network_client
         cls.volume_client = cls.manager.volume_client
+        cls.orchestration_client = cls.manager.orchestration_client
         cls.resource_keys = {}
         cls.os_resources = []
 
+    @classmethod
+    def credentials(cls):
+        if cls.config.compute.allow_tenant_isolation:
+            return cls.isolated_creds.get_primary_creds()
+
+        username = cls.config.identity.username
+        password = cls.config.identity.password
+        tenant_name = cls.config.identity.tenant_name
+        return username, tenant_name, password
+
     @classmethod
     def tearDownClass(cls):
         # NOTE(jaypipes): Because scenario tests are typically run in a
@@ -498,3 +537,30 @@ class NetworkScenarioTest(OfficialClientTest):
             timeout=self.config.compute.ssh_timeout),
             'Auth failure in connecting to %s@%s via ssh' %
             (username, ip_address))
+
+
+class OrchestrationScenarioTest(OfficialClientTest):
+    """
+    Base class for orchestration scenario tests
+    """
+
+    @classmethod
+    def credentials(cls):
+        username = cls.config.identity.admin_username
+        password = cls.config.identity.admin_password
+        tenant_name = cls.config.identity.tenant_name
+        return username, tenant_name, password
+
+    def _load_template(self, base_file, file_name):
+        filepath = os.path.join(os.path.dirname(os.path.realpath(base_file)),
+                                file_name)
+        with open(filepath) as f:
+            return f.read()
+
+    @classmethod
+    def _stack_rand_name(cls):
+        return rand_name(cls.__name__ + '-')
+
+    def _create_keypair(self):
+        kp_name = rand_name('keypair-smoke')
+        return self.compute_client.keypairs.create(kp_name)
diff --git a/tempest/scenario/orchestration/__init__.py b/tempest/scenario/orchestration/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tempest/scenario/orchestration/test_autoscaling.py b/tempest/scenario/orchestration/test_autoscaling.py
new file mode 100644
index 0000000000..cd959a8ea5
--- /dev/null
+++ b/tempest/scenario/orchestration/test_autoscaling.py
@@ -0,0 +1,108 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+from tempest.openstack.common import log as logging
+from tempest.scenario import manager
+from tempest.test import attr
+from tempest.test import call_until_true
+import time
+
+
+LOG = logging.getLogger(__name__)
+
+
+class AutoScalingTest(manager.OrchestrationScenarioTest):
+
+    def setUp(self):
+        super(AutoScalingTest, self).setUp()
+        if not self.config.orchestration.image_ref:
+            raise self.skipException("No image available to test")
+        self.client = self.orchestration_client
+
+    def assign_keypair(self):
+        self.stack_name = self._stack_rand_name()
+        if self.config.orchestration.keypair_name:
+            self.keypair_name = self.config.orchestration.keypair_name
+        else:
+            self.keypair = self._create_keypair()
+            self.keypair_name = self.keypair.id
+            self.set_resource('keypair', self.keypair)
+
+    def launch_stack(self):
+        self.parameters = {
+            'KeyName': self.keypair_name,
+            'InstanceType': self.config.orchestration.instance_type,
+            'ImageId': self.config.orchestration.image_ref,
+            'StackStart': str(time.time())
+        }
+
+        # create the stack
+        self.template = self._load_template(__file__, 'test_autoscaling.yaml')
+        self.client.stacks.create(
+            stack_name=self.stack_name,
+            template=self.template,
+            parameters=self.parameters)
+
+        self.stack = self.client.stacks.get(self.stack_name)
+        self.stack_identifier = '%s/%s' % (self.stack_name, self.stack.id)
+
+        # if a keypair was set, do not delete the stack on exit to allow
+        # for manual post-mortums
+        if not self.config.orchestration.keypair_name:
+            self.set_resource('stack', self.stack)
+
+    @attr(type='slow')
+    def test_scale_up_then_down(self):
+
+        self.assign_keypair()
+        self.launch_stack()
+
+        sid = self.stack_identifier
+        timeout = self.config.orchestration.build_timeout
+        interval = 10
+
+        self.assertEqual('CREATE', self.stack.action)
+        # wait for create to complete.
+        self.status_timeout(self.client.stacks, sid, 'COMPLETE')
+
+        self.stack.get()
+        self.assertEqual('CREATE_COMPLETE', self.stack.stack_status)
+
+        # the resource SmokeServerGroup is implemented as a nested
+        # stack, so servers can be counted by counting the resources
+        # inside that nested stack
+        resource = self.client.resources.get(sid, 'SmokeServerGroup')
+        nested_stack_id = resource.physical_resource_id
+
+        def server_count():
+            # the number of servers is the number of resources
+            # in the nexted stack
+            self.server_count = len(
+                self.client.resources.list(nested_stack_id))
+            return self.server_count
+
+        def assertScale(from_servers, to_servers):
+            call_until_true(lambda: server_count() == to_servers,
+                            timeout, interval)
+            self.assertEqual(to_servers, self.server_count,
+                             'Failed scaling from %d to %d servers' % (
+                                 from_servers, to_servers))
+
+        # he marched them up to the top of the hill
+        assertScale(1, 2)
+        assertScale(2, 3)
+
+        # and he marched them down again
+        assertScale(3, 2)
+        assertScale(2, 1)
diff --git a/tempest/scenario/orchestration/test_autoscaling.yaml b/tempest/scenario/orchestration/test_autoscaling.yaml
new file mode 100644
index 0000000000..045b3bc16d
--- /dev/null
+++ b/tempest/scenario/orchestration/test_autoscaling.yaml
@@ -0,0 +1,182 @@
+HeatTemplateFormatVersion: '2012-12-12'
+Description: |
+  Template which tests autoscaling and load balancing
+Parameters:
+  KeyName:
+    Type: String
+  InstanceType:
+    Type: String
+  ImageId:
+    Type: String
+  StackStart:
+    Description: Epoch seconds when the stack was launched
+    Type: Number
+  ConsumeStartSeconds:
+    Description: Seconds after invocation when memory should be consumed
+    Type: Number
+    Default: '60'
+  ConsumeStopSeconds:
+    Description: Seconds after StackStart when memory should be released
+    Type: Number
+    Default: '420'
+  ScaleUpThreshold:
+    Description: Memory percentage threshold to scale up on
+    Type: Number
+    Default: '70'
+  ScaleDownThreshold:
+    Description: Memory percentage threshold to scale down on
+    Type: Number
+    Default: '60'
+  ConsumeMemoryLimit:
+    Description: Memory percentage threshold to consume
+    Type: Number
+    Default: '71'
+Resources:
+  SmokeServerGroup:
+    Type: AWS::AutoScaling::AutoScalingGroup
+    Properties:
+      AvailabilityZones: {'Fn::GetAZs': ''}
+      LaunchConfigurationName: {Ref: LaunchConfig}
+      MinSize: '1'
+      MaxSize: '3'
+  SmokeServerScaleUpPolicy:
+    Type: AWS::AutoScaling::ScalingPolicy
+    Properties:
+      AdjustmentType: ChangeInCapacity
+      AutoScalingGroupName: {Ref: SmokeServerGroup}
+      Cooldown: '60'
+      ScalingAdjustment: '1'
+  SmokeServerScaleDownPolicy:
+    Type: AWS::AutoScaling::ScalingPolicy
+    Properties:
+      AdjustmentType: ChangeInCapacity
+      AutoScalingGroupName: {Ref: SmokeServerGroup}
+      Cooldown: '60'
+      ScalingAdjustment: '-1'
+  MEMAlarmHigh:
+    Type: AWS::CloudWatch::Alarm
+    Properties:
+      AlarmDescription: Scale-up if MEM > ScaleUpThreshold% for 10 seconds
+      MetricName: MemoryUtilization
+      Namespace: system/linux
+      Statistic: Average
+      Period: '10'
+      EvaluationPeriods: '1'
+      Threshold: {Ref: ScaleUpThreshold}
+      AlarmActions: [{Ref: SmokeServerScaleUpPolicy}]
+      Dimensions:
+      - Name: AutoScalingGroupName
+        Value: {Ref: SmokeServerGroup}
+      ComparisonOperator: GreaterThanThreshold
+  MEMAlarmLow:
+    Type: AWS::CloudWatch::Alarm
+    Properties:
+      AlarmDescription: Scale-down if MEM < ScaleDownThreshold% for 10 seconds
+      MetricName: MemoryUtilization
+      Namespace: system/linux
+      Statistic: Average
+      Period: '10'
+      EvaluationPeriods: '1'
+      Threshold: {Ref: ScaleDownThreshold}
+      AlarmActions: [{Ref: SmokeServerScaleDownPolicy}]
+      Dimensions:
+      - Name: AutoScalingGroupName
+        Value: {Ref: SmokeServerGroup}
+      ComparisonOperator: LessThanThreshold
+  CfnUser:
+    Type: AWS::IAM::User
+  SmokeKeys:
+    Type: AWS::IAM::AccessKey
+    Properties:
+      UserName: {Ref: CfnUser}
+  SmokeSecurityGroup:
+    Type: AWS::EC2::SecurityGroup
+    Properties:
+      GroupDescription: Standard firewall rules
+      SecurityGroupIngress:
+      - {IpProtocol: tcp, FromPort: '22', ToPort: '22', CidrIp: 0.0.0.0/0}
+      - {IpProtocol: tcp, FromPort: '80', ToPort: '80', CidrIp: 0.0.0.0/0}
+  LaunchConfig:
+    Type: AWS::AutoScaling::LaunchConfiguration
+    Metadata:
+      AWS::CloudFormation::Init:
+        config:
+          files:
+            /etc/cfn/cfn-credentials:
+              content:
+                Fn::Replace:
+                - $AWSAccessKeyId: {Ref: SmokeKeys}
+                  $AWSSecretKey: {'Fn::GetAtt': [SmokeKeys, SecretAccessKey]}
+                - |
+                  AWSAccessKeyId=$AWSAccessKeyId
+                  AWSSecretKey=$AWSSecretKey
+              mode: '000400'
+              owner: root
+              group: root
+            /root/watch_loop:
+              content:
+                Fn::Replace:
+                - _hi_: {Ref: MEMAlarmHigh}
+                  _lo_: {Ref: MEMAlarmLow}
+                - |
+                  #!/bin/bash
+                  while :
+                  do
+                    /opt/aws/bin/cfn-push-stats --watch _hi_ --mem-util
+                    /opt/aws/bin/cfn-push-stats --watch _lo_ --mem-util
+                    sleep 4
+                  done
+              mode: '000700'
+              owner: root
+              group: root
+            /root/consume_memory:
+              content:
+                Fn::Replace:
+                - StackStart: {Ref: StackStart}
+                  ConsumeStopSeconds: {Ref: ConsumeStopSeconds}
+                  ConsumeStartSeconds: {Ref: ConsumeStartSeconds}
+                  ConsumeMemoryLimit: {Ref: ConsumeMemoryLimit}
+                - |
+                  #!/usr/bin/env python
+                  import psutil
+                  import time
+                  import datetime
+                  import sys
+                  a = []
+                  sleep_until_consume = ConsumeStartSeconds
+                  stack_start = StackStart
+                  consume_stop_time = stack_start + ConsumeStopSeconds
+                  memory_limit = ConsumeMemoryLimit
+                  if sleep_until_consume > 0:
+                      sys.stdout.flush()
+                      time.sleep(sleep_until_consume)
+                  while psutil.virtual_memory().percent < memory_limit:
+                      sys.stdout.flush()
+                      a.append(' ' * 10**5)
+                      time.sleep(0.1)
+                  sleep_until_exit = consume_stop_time - time.time()
+                  if sleep_until_exit > 0:
+                      time.sleep(sleep_until_exit)
+              mode: '000700'
+              owner: root
+              group: root
+    Properties:
+      ImageId: {Ref: ImageId}
+      InstanceType: {Ref: InstanceType}
+      KeyName: {Ref: KeyName}
+      SecurityGroups: [{Ref: SmokeSecurityGroup}]
+      UserData:
+        Fn::Base64:
+          Fn::Replace:
+          - ConsumeStopSeconds: {Ref: ConsumeStopSeconds}
+            ConsumeStartSeconds: {Ref: ConsumeStartSeconds}
+            ConsumeMemoryLimit: {Ref: ConsumeMemoryLimit}
+          - |
+            #!/bin/bash -v
+            /opt/aws/bin/cfn-init
+            # report on memory consumption every 4 seconds
+            /root/watch_loop &
+            # wait ConsumeStartSeconds then ramp up memory consumption
+            # until it is over ConsumeMemoryLimit%
+            # then exits ConsumeStopSeconds seconds after stack launch
+            /root/consume_memory > /root/consume_memory.log &
\ No newline at end of file