#!/usr/bin/env python # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. """ Implements cfn-push-stats CloudFormation functionality """ import argparse import logging import os import subprocess # Override BOTO_CONFIG, which makes boto look only at the specified # config file, instead of the default locations os.environ['BOTO_CONFIG'] = '/var/lib/heat-cfntools/cfn-boto-cfg' from boto.ec2 import cloudwatch # noqa log_format = '%(levelname)s [%(asctime)s] %(message)s' log_file_name = "/var/log/cfn-push-stats.log" logging.basicConfig(filename=log_file_name, format=log_format) LOG = logging.getLogger('cfntools') try: import psutil except ImportError: LOG.warning("psutil not available. If you want process and memory " "statistics, you need to install it.") from heat_cfntools.cfntools import cfn_helper # noqa KILO = 1024 MEGA = 1048576 GIGA = 1073741824 unit_map = {'bytes': 1, 'kilobytes': KILO, 'megabytes': MEGA, 'gigabytes': GIGA} description = " " parser = argparse.ArgumentParser(description=description) parser.add_argument('-v', '--verbose', action="store_true", help="Verbose logging", required=False) parser.add_argument('--credential-file', dest="credential_file", help="credential-file", required=False, default='/etc/cfn/cfn-credentials') parser.add_argument('--service-failure', required=False, action="store_true", help='Reports a service failure.') parser.add_argument('--mem-util', required=False, action="store_true", help='Reports memory utilization in percentages.') parser.add_argument('--mem-used', required=False, action="store_true", help='Reports memory used (excluding cache/buffers) ' 'in megabytes.') parser.add_argument('--mem-avail', required=False, action="store_true", help='Reports available memory (including cache/buffers) ' 'in megabytes.') parser.add_argument('--swap-util', required=False, action="store_true", help='Reports swap utilization in percentages.') parser.add_argument('--swap-used', required=False, action="store_true", help='Reports allocated swap space in megabytes.') parser.add_argument('--disk-space-util', required=False, action="store_true", help='Reports disk space utilization in percentages.') parser.add_argument('--disk-space-used', required=False, action="store_true", help='Reports allocated disk space in gigabytes.') parser.add_argument('--disk-space-avail', required=False, action="store_true", help='Reports available disk space in gigabytes.') parser.add_argument('--memory-units', required=False, default='megabytes', help='Specifies units for memory metrics.') parser.add_argument('--disk-units', required=False, default='megabytes', help='Specifies units for disk metrics.') parser.add_argument('--disk-path', required=False, default='/', help='Selects the disk by the path on which to report.') parser.add_argument('--cpu-util', required=False, action="store_true", help='Reports cpu utilization in percentages.') parser.add_argument('--haproxy', required=False, action='store_true', help='Reports HAProxy loadbalancer usage.') parser.add_argument('--haproxy-latency', required=False, action='store_true', help='Reports HAProxy latency') parser.add_argument('--heartbeat', required=False, action='store_true', help='Sends a Heartbeat.') parser.add_argument('--watch', required=False, help='the name of the watch to post to.') parser.add_argument('--metric', required=False, help='name of the metric to post to.') parser.add_argument('--units', required=False, help='name of the units to be used for the specified' 'metric') parser.add_argument('--value', required=False, help='value to post to the specified metric') args = parser.parse_args() LOG.debug('cfn-push-stats called %s ' % (str(args))) credentials = cfn_helper.parse_creds_file(args.credential_file) namespace = 'system/linux' data = {} # Logging # ======= if args.verbose: LOG.setLevel(logging.DEBUG) # Generic user-specified metric # ============================= if args.metric and args.units and args.value: data[args.metric] = { 'Value': args.value, 'Units': args.units} # service failure # =============== if args.service_failure: data['ServiceFailure'] = { 'Value': 1, 'Units': 'Counter'} # heartbeat # ======== if args.heartbeat: data['Heartbeat'] = { 'Value': 1, 'Units': 'Counter'} # memory space # ============ if args.mem_util or args.mem_used or args.mem_avail: mem = psutil.phymem_usage() if args.mem_util: data['MemoryUtilization'] = { 'Value': mem.percent, 'Units': 'Percent'} if args.mem_used: data['MemoryUsed'] = { 'Value': mem.used / unit_map[args.memory_units], 'Units': args.memory_units} if args.mem_avail: data['MemoryAvailable'] = { 'Value': mem.free / unit_map[args.memory_units], 'Units': args.memory_units} # swap space # ========== if args.swap_util or args.swap_used: swap = psutil.virtmem_usage() if args.swap_util: data['SwapUtilization'] = { 'Value': swap.percent, 'Units': 'Percent'} if args.swap_used: data['SwapUsed'] = { 'Value': swap.used / unit_map[args.memory_units], 'Units': args.memory_units} # disk space # ========== if args.disk_space_util or args.disk_space_used or args.disk_space_avail: disk = psutil.disk_usage(args.disk_path) if args.disk_space_util: data['DiskSpaceUtilization'] = { 'Value': disk.percent, 'Units': 'Percent'} if args.disk_space_used: data['DiskSpaceUsed'] = { 'Value': disk.used / unit_map[args.disk_units], 'Units': args.disk_units} if args.disk_space_avail: data['DiskSpaceAvailable'] = { 'Value': disk.free / unit_map[args.disk_units], 'Units': args.disk_units} # cpu utilization # =============== if args.cpu_util: # blocks for 1 second. cpu_percent = psutil.cpu_percent(interval=1) data['CPUUtilization'] = { 'Value': cpu_percent, 'Units': 'Percent'} # HAProxy # ======= def parse_haproxy_unix_socket(res, latency_only=False): # http://docs.amazonwebservices.com/ElasticLoadBalancing/latest # /DeveloperGuide/US_MonitoringLoadBalancerWithCW.html type_map = {'FRONTEND': '0', 'BACKEND': '1', 'SERVER': '2', 'SOCKET': '3'} num_map = {'status': 17, 'svname': 1, 'check_duration': 38, 'type': 32, 'req_tot': 48, 'hrsp_2xx': 40, 'hrsp_3xx': 41, 'hrsp_4xx': 42, 'hrsp_5xx': 43} def add_stat(key, value, unit='Counter'): res[key] = {'Value': value, 'Units': unit} echo = subprocess.Popen(['echo', 'show stat'], stdout=subprocess.PIPE) socat = subprocess.Popen(['socat', 'stdio', '/tmp/.haproxy-stats'], stdin=echo.stdout, stdout=subprocess.PIPE) end_pipe = socat.stdout raw = [l.strip('\n').split(',') for l in end_pipe if l[0] != '#' and len(l) > 2] latency = 0 up_count = 0 down_count = 0 for f in raw: if latency_only is False: if f[num_map['type']] == type_map['FRONTEND']: add_stat('RequestCount', f[num_map['req_tot']]) add_stat('HTTPCode_ELB_4XX', f[num_map['hrsp_4xx']]) add_stat('HTTPCode_ELB_5XX', f[num_map['hrsp_5xx']]) elif f[num_map['type']] == type_map['BACKEND']: add_stat('HTTPCode_Backend_2XX', f[num_map['hrsp_2xx']]) add_stat('HTTPCode_Backend_3XX', f[num_map['hrsp_3xx']]) add_stat('HTTPCode_Backend_4XX', f[num_map['hrsp_4xx']]) add_stat('HTTPCode_Backend_5XX', f[num_map['hrsp_5xx']]) else: if f[num_map['status']] == 'UP': up_count = up_count + 1 else: down_count = down_count + 1 if f[num_map['check_duration']] != '': latency = max(float(f[num_map['check_duration']]), latency) # note: haproxy's check_duration is in ms, but Latency is in seconds add_stat('Latency', str(latency / 1000), unit='Seconds') if latency_only is False: add_stat('HealthyHostCount', str(up_count)) add_stat('UnHealthyHostCount', str(down_count)) def send_stats(info): # Create boto connection, need the hard-coded port/path as boto # can't read these from config values in BOTO_CONFIG # FIXME : currently only http due to is_secure=False client = cloudwatch.CloudWatchConnection( aws_access_key_id=credentials['AWSAccessKeyId'], aws_secret_access_key=credentials['AWSSecretKey'], is_secure=False, port=8003, path="/v1", debug=0) # Then we send the metric datapoints passed in "info", note this could # contain multiple keys as the options parsed above are not exclusive # The alarm name is passed as a dimension so the metric datapoint can # be associated with the alarm/watch in the engine metadata = cfn_helper.Metadata('not-used', None) metric_dims = metadata.get_tags() if args.watch: metric_dims['AlarmName'] = args.watch for key in info: LOG.info("Sending metric %s, Units %s, Value %s" % (key, info[key]['Units'], info[key]['Value'])) client.put_metric_data(namespace=namespace, name=key, value=info[key]['Value'], timestamp=None, # means use "now" in the engine unit=info[key]['Units'], dimensions=metric_dims, statistics=None) if args.haproxy: namespace = 'AWS/ELB' lb_data = {} parse_haproxy_unix_socket(lb_data) send_stats(lb_data) elif args.haproxy_latency: namespace = 'AWS/ELB' lb_data = {} parse_haproxy_unix_socket(lb_data, latency_only=True) send_stats(lb_data) else: send_stats(data)