heat-cfntools/bin/cfn-push-stats

#!/usr/bin/env python
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

"""
Implements cfn-push-stats CloudFormation functionality
"""
import argparse
import logging
import os
import subprocess

# Override BOTO_CONFIG, which makes boto look only at the specified
# config file, instead of the default locations
os.environ['BOTO_CONFIG'] = '/var/lib/heat-cfntools/cfn-boto-cfg'
from boto.ec2 import cloudwatch  # noqa


log_format = '%(levelname)s [%(asctime)s] %(message)s'
log_file_name = "/var/log/cfn-push-stats.log"
logging.basicConfig(filename=log_file_name,
                    format=log_format)
LOG = logging.getLogger('cfntools')

try:
    import psutil
except ImportError:
    LOG.warning("psutil not available. If you want process and memory "
                "statistics, you need to install it.")

from heat_cfntools.cfntools import cfn_helper  # noqa

KILO = 1024
MEGA = 1048576
GIGA = 1073741824
unit_map = {'bytes': 1,
            'kilobytes': KILO,
            'megabytes': MEGA,
            'gigabytes': GIGA}

description = " "
parser = argparse.ArgumentParser(description=description)
parser.add_argument('-v', '--verbose', action="store_true",
                    help="Verbose logging", required=False)
parser.add_argument('--credential-file', dest="credential_file",
                    help="credential-file", required=False,
                    default='/etc/cfn/cfn-credentials')
parser.add_argument('--service-failure', required=False, action="store_true",
                    help='Reports a service failure.')
parser.add_argument('--mem-util', required=False, action="store_true",
                    help='Reports memory utilization in percentages.')
parser.add_argument('--mem-used', required=False, action="store_true",
                    help='Reports memory used (excluding cache/buffers) '
                         'in megabytes.')
parser.add_argument('--mem-avail', required=False, action="store_true",
                    help='Reports available memory (including cache/buffers) '
                         'in megabytes.')
parser.add_argument('--swap-util', required=False, action="store_true",
                    help='Reports swap utilization in percentages.')
parser.add_argument('--swap-used', required=False, action="store_true",
                    help='Reports allocated swap space in megabytes.')
parser.add_argument('--disk-space-util', required=False, action="store_true",
                    help='Reports disk space utilization in percentages.')
parser.add_argument('--disk-space-used', required=False, action="store_true",
                    help='Reports allocated disk space in gigabytes.')
parser.add_argument('--disk-space-avail', required=False, action="store_true",
                    help='Reports available disk space in gigabytes.')
parser.add_argument('--memory-units', required=False, default='megabytes',
                    help='Specifies units for memory metrics.')
parser.add_argument('--disk-units', required=False, default='megabytes',
                    help='Specifies units for disk metrics.')
parser.add_argument('--disk-path', required=False, default='/',
                    help='Selects the disk by the path on which to report.')
parser.add_argument('--cpu-util', required=False, action="store_true",
                    help='Reports cpu utilization in percentages.')
parser.add_argument('--haproxy', required=False, action='store_true',
                    help='Reports HAProxy loadbalancer usage.')
parser.add_argument('--haproxy-latency', required=False, action='store_true',
                    help='Reports HAProxy latency')
parser.add_argument('--heartbeat', required=False, action='store_true',
                    help='Sends a Heartbeat.')
parser.add_argument('--watch', required=False,
                    help='the name of the watch to post to.')
parser.add_argument('--metric', required=False,
                    help='name of the metric to post to.')
parser.add_argument('--units', required=False,
                    help='name of the units to be used for the specified'
                    'metric')
parser.add_argument('--value', required=False,
                    help='value to post to the specified metric')
args = parser.parse_args()

LOG.debug('cfn-push-stats called %s ' % (str(args)))

credentials = cfn_helper.parse_creds_file(args.credential_file)

namespace = 'system/linux'
data = {}

# Logging
# =======
if args.verbose:
    LOG.setLevel(logging.DEBUG)

# Generic user-specified metric
# =============================
if args.metric and args.units and args.value:
    data[args.metric] = {
        'Value': args.value,
        'Units': args.units}

# service failure
# ===============
if args.service_failure:
    data['ServiceFailure'] = {
        'Value': 1,
        'Units': 'Counter'}

# heartbeat
# ========
if args.heartbeat:
    data['Heartbeat'] = {
        'Value': 1,
        'Units': 'Counter'}

# memory space
# ============
if args.mem_util or args.mem_used or args.mem_avail:
    mem = psutil.phymem_usage()
if args.mem_util:
    data['MemoryUtilization'] = {
        'Value': mem.percent,
        'Units': 'Percent'}
if args.mem_used:
    data['MemoryUsed'] = {
        'Value': mem.used / unit_map[args.memory_units],
        'Units': args.memory_units}
if args.mem_avail:
    data['MemoryAvailable'] = {
        'Value': mem.free / unit_map[args.memory_units],
        'Units': args.memory_units}

# swap space
# ==========
if args.swap_util or args.swap_used:
    swap = psutil.virtmem_usage()
if args.swap_util:
    data['SwapUtilization'] = {
        'Value': swap.percent,
        'Units': 'Percent'}
if args.swap_used:
    data['SwapUsed'] = {
        'Value': swap.used / unit_map[args.memory_units],
        'Units': args.memory_units}

# disk space
# ==========
if args.disk_space_util or args.disk_space_used or args.disk_space_avail:
    disk = psutil.disk_usage(args.disk_path)
if args.disk_space_util:
    data['DiskSpaceUtilization'] = {
        'Value': disk.percent,
        'Units': 'Percent'}
if args.disk_space_used:
    data['DiskSpaceUsed'] = {
        'Value': disk.used / unit_map[args.disk_units],
        'Units': args.disk_units}
if args.disk_space_avail:
    data['DiskSpaceAvailable'] = {
        'Value': disk.free / unit_map[args.disk_units],
        'Units': args.disk_units}

# cpu utilization
# ===============
if args.cpu_util:
    # blocks for 1 second.
    cpu_percent = psutil.cpu_percent(interval=1)
    data['CPUUtilization'] = {
        'Value': cpu_percent,
        'Units': 'Percent'}


# HAProxy
# =======
def parse_haproxy_unix_socket(res, latency_only=False):
    # http://docs.amazonwebservices.com/ElasticLoadBalancing/latest
    # /DeveloperGuide/US_MonitoringLoadBalancerWithCW.html

    type_map = {'FRONTEND': '0', 'BACKEND': '1', 'SERVER': '2', 'SOCKET': '3'}
    num_map = {'status': 17, 'svname': 1, 'check_duration': 38, 'type': 32,
               'req_tot': 48, 'hrsp_2xx': 40, 'hrsp_3xx': 41, 'hrsp_4xx': 42,
               'hrsp_5xx': 43}

    def add_stat(key, value, unit='Counter'):
        res[key] = {'Value': value,
                    'Units': unit}

    echo = subprocess.Popen(['echo', 'show stat'],
                            stdout=subprocess.PIPE)
    socat = subprocess.Popen(['socat', 'stdio', '/tmp/.haproxy-stats'],
                             stdin=echo.stdout,
                             stdout=subprocess.PIPE)
    end_pipe = socat.stdout
    raw = [l.strip('\n').split(',')
           for l in end_pipe if l[0] != '#' and len(l) > 2]
    latency = 0
    up_count = 0
    down_count = 0
    for f in raw:
        if latency_only is False:
            if f[num_map['type']] == type_map['FRONTEND']:
                add_stat('RequestCount', f[num_map['req_tot']])
                add_stat('HTTPCode_ELB_4XX', f[num_map['hrsp_4xx']])
                add_stat('HTTPCode_ELB_5XX', f[num_map['hrsp_5xx']])
            elif f[num_map['type']] == type_map['BACKEND']:
                add_stat('HTTPCode_Backend_2XX', f[num_map['hrsp_2xx']])
                add_stat('HTTPCode_Backend_3XX', f[num_map['hrsp_3xx']])
                add_stat('HTTPCode_Backend_4XX', f[num_map['hrsp_4xx']])
                add_stat('HTTPCode_Backend_5XX', f[num_map['hrsp_5xx']])
            else:
                if f[num_map['status']] == 'UP':
                    up_count = up_count + 1
                else:
                    down_count = down_count + 1
        if f[num_map['check_duration']] != '':
            latency = max(float(f[num_map['check_duration']]), latency)

    # note: haproxy's check_duration is in ms, but Latency is in seconds
    add_stat('Latency', str(latency / 1000), unit='Seconds')
    if latency_only is False:
        add_stat('HealthyHostCount', str(up_count))
        add_stat('UnHealthyHostCount', str(down_count))


def send_stats(info):

    # Create boto connection, need the hard-coded port/path as boto
    # can't read these from config values in BOTO_CONFIG
    # FIXME : currently only http due to is_secure=False
    client = cloudwatch.CloudWatchConnection(
        aws_access_key_id=credentials['AWSAccessKeyId'],
        aws_secret_access_key=credentials['AWSSecretKey'],
        is_secure=False, port=8003, path="/v1", debug=0)

    # Then we send the metric datapoints passed in "info", note this could
    # contain multiple keys as the options parsed above are not exclusive
    # The alarm name is passed as a dimension so the metric datapoint can
    # be associated with the alarm/watch in the engine
    metadata = cfn_helper.Metadata('not-used', None)
    metric_dims = metadata.get_tags()
    if args.watch:
        metric_dims['AlarmName'] = args.watch
    for key in info:
        LOG.info("Sending metric %s, Units %s, Value %s" %
                 (key, info[key]['Units'], info[key]['Value']))
        client.put_metric_data(namespace=namespace,
                               name=key,
                               value=info[key]['Value'],
                               timestamp=None,  # means use "now" in the engine
                               unit=info[key]['Units'],
                               dimensions=metric_dims,
                               statistics=None)


if args.haproxy:
    namespace = 'AWS/ELB'
    lb_data = {}
    parse_haproxy_unix_socket(lb_data)
    send_stats(lb_data)
elif args.haproxy_latency:
    namespace = 'AWS/ELB'
    lb_data = {}
    parse_haproxy_unix_socket(lb_data, latency_only=True)
    send_stats(lb_data)
else:
    send_stats(data)