diff --git a/.gitignore b/.gitignore index 3c2e78d..ea3ade2 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ local_settings.py .coverage worker.log worker.log.* +etc/stacktach_config.sh +etc/stacktach_worker_config.json diff --git a/etc/pip-requires.txt b/etc/pip-requires.txt index 3dcf02d..2676d61 100644 --- a/etc/pip-requires.txt +++ b/etc/pip-requires.txt @@ -1,3 +1,5 @@ Django>=1.4.2 MySQL-python>=1.2.3 eventlet>=0.9.17 +kombu>=2.4.7 +librabbitmq>=1.0.0 diff --git a/migrations/001_base.sql b/migrations/001_base.sql new file mode 100644 index 0000000..5f81c41 --- /dev/null +++ b/migrations/001_base.sql @@ -0,0 +1,61 @@ +BEGIN; +CREATE TABLE `stacktach_deployment` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL +) +; +CREATE TABLE `stacktach_rawdata` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `deployment_id` integer NOT NULL, + `tenant` varchar(50), + `json` longtext NOT NULL, + `routing_key` varchar(50), + `state` varchar(20), + `old_state` varchar(20), + `old_task` varchar(30), + `when` numeric(20, 6) NOT NULL, + `publisher` varchar(100), + `event` varchar(50), + `service` varchar(50), + `host` varchar(100), + `instance` varchar(50), + `request_id` varchar(50) +) +; +ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`); +CREATE TABLE `stacktach_lifecycle` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `instance` varchar(50), + `last_state` varchar(50), + `last_task_state` varchar(50), + `last_raw_id` integer +) +; +ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_d5fb17d3` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_timing` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `start_raw_id` integer, + `end_raw_id` integer, + `start_when` numeric(20, 6), + `end_when` numeric(20, 6), + `diff` numeric(20, 6) +) +; +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_c32dfe04` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_c32dfe04` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_requesttracker` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `request_id` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `last_timing_id` integer, + `start` numeric(20, 6) NOT NULL, + `duration` numeric(20, 6) NOT NULL, + `completed` bool NOT NULL +) +; +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f0827cca` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`); +COMMIT; diff --git a/migrations/002_delta.sql b/migrations/002_delta.sql new file mode 100644 index 0000000..eb89393 --- /dev/null +++ b/migrations/002_delta.sql @@ -0,0 +1,2 @@ +ALTER TABLE stacktach_rawdata ADD task VARCHAR(30); +CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`); diff --git a/migrations/002_task.sql b/migrations/002_task.sql new file mode 100644 index 0000000..be9a43b --- /dev/null +++ b/migrations/002_task.sql @@ -0,0 +1,62 @@ +BEGIN; +CREATE TABLE `stacktach_deployment` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL +) +; +CREATE TABLE `stacktach_rawdata` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `deployment_id` integer NOT NULL, + `tenant` varchar(50), + `json` longtext NOT NULL, + `routing_key` varchar(50), + `state` varchar(20), + `old_state` varchar(20), + `old_task` varchar(30), + `task` varchar(30), + `when` numeric(20, 6) NOT NULL, + `publisher` varchar(100), + `event` varchar(50), + `service` varchar(50), + `host` varchar(100), + `instance` varchar(50), + `request_id` varchar(50) +) +; +ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`); +CREATE TABLE `stacktach_lifecycle` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `instance` varchar(50), + `last_state` varchar(50), + `last_task_state` varchar(50), + `last_raw_id` integer +) +; +ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_d5fb17d3` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_timing` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `start_raw_id` integer, + `end_raw_id` integer, + `start_when` numeric(20, 6), + `end_when` numeric(20, 6), + `diff` numeric(20, 6) +) +; +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_c32dfe04` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_c32dfe04` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_requesttracker` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `request_id` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `last_timing_id` integer, + `start` numeric(20, 6) NOT NULL, + `duration` numeric(20, 6) NOT NULL, + `completed` bool NOT NULL +) +; +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f0827cca` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`); +COMMIT; diff --git a/migrations/002_task_indexes.sql b/migrations/002_task_indexes.sql new file mode 100644 index 0000000..69a7c0f --- /dev/null +++ b/migrations/002_task_indexes.sql @@ -0,0 +1,31 @@ +BEGIN; +CREATE INDEX `stacktach_rawdata_4ac6801` ON `stacktach_rawdata` (`deployment_id`); +CREATE INDEX `stacktach_rawdata_2207f86d` ON `stacktach_rawdata` (`tenant`); +CREATE INDEX `stacktach_rawdata_2192f43a` ON `stacktach_rawdata` (`routing_key`); +CREATE INDEX `stacktach_rawdata_355bfc27` ON `stacktach_rawdata` (`state`); +CREATE INDEX `stacktach_rawdata_b716e0bb` ON `stacktach_rawdata` (`old_state`); +CREATE INDEX `stacktach_rawdata_8182be12` ON `stacktach_rawdata` (`old_task`); +CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`); +CREATE INDEX `stacktach_rawdata_feaed089` ON `stacktach_rawdata` (`when`); +CREATE INDEX `stacktach_rawdata_878a2906` ON `stacktach_rawdata` (`publisher`); +CREATE INDEX `stacktach_rawdata_a90f9116` ON `stacktach_rawdata` (`event`); +CREATE INDEX `stacktach_rawdata_52c5ef6b` ON `stacktach_rawdata` (`service`); +CREATE INDEX `stacktach_rawdata_38dbea87` ON `stacktach_rawdata` (`host`); +CREATE INDEX `stacktach_rawdata_888b756a` ON `stacktach_rawdata` (`instance`); +CREATE INDEX `stacktach_rawdata_792812e8` ON `stacktach_rawdata` (`request_id`); +CREATE INDEX `stacktach_lifecycle_888b756a` ON `stacktach_lifecycle` (`instance`); +CREATE INDEX `stacktach_lifecycle_9b2555fd` ON `stacktach_lifecycle` (`last_state`); +CREATE INDEX `stacktach_lifecycle_67421a0e` ON `stacktach_lifecycle` (`last_task_state`); +CREATE INDEX `stacktach_lifecycle_dcf9e5f3` ON `stacktach_lifecycle` (`last_raw_id`); +CREATE INDEX `stacktach_timing_52094d6e` ON `stacktach_timing` (`name`); +CREATE INDEX `stacktach_timing_9f222e6b` ON `stacktach_timing` (`lifecycle_id`); +CREATE INDEX `stacktach_timing_efab905a` ON `stacktach_timing` (`start_raw_id`); +CREATE INDEX `stacktach_timing_c8bb8daf` ON `stacktach_timing` (`end_raw_id`); +CREATE INDEX `stacktach_timing_4401d15e` ON `stacktach_timing` (`diff`); +CREATE INDEX `stacktach_requesttracker_792812e8` ON `stacktach_requesttracker` (`request_id`); +CREATE INDEX `stacktach_requesttracker_9f222e6b` ON `stacktach_requesttracker` (`lifecycle_id`); +CREATE INDEX `stacktach_requesttracker_ce616a96` ON `stacktach_requesttracker` (`last_timing_id`); +CREATE INDEX `stacktach_requesttracker_29f4f2ea` ON `stacktach_requesttracker` (`start`); +CREATE INDEX `stacktach_requesttracker_8eb45f9b` ON `stacktach_requesttracker` (`duration`); +CREATE INDEX `stacktach_requesttracker_e490d511` ON `stacktach_requesttracker` (`completed`); +COMMIT; diff --git a/migrations/003_delta.sql b/migrations/003_delta.sql new file mode 100644 index 0000000..8c949bf --- /dev/null +++ b/migrations/003_delta.sql @@ -0,0 +1,2 @@ +ALTER TABLE stacktach_rawdata ADD image_type integer; +CREATE INDEX `stacktach_rawdata_cfde77eb` ON `stacktach_rawdata` (`image_type`); diff --git a/migrations/003_image_type.sql b/migrations/003_image_type.sql new file mode 100644 index 0000000..133d383 --- /dev/null +++ b/migrations/003_image_type.sql @@ -0,0 +1,63 @@ +BEGIN; +CREATE TABLE `stacktach_deployment` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL +) +; +CREATE TABLE `stacktach_rawdata` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `deployment_id` integer NOT NULL, + `tenant` varchar(50), + `json` longtext NOT NULL, + `routing_key` varchar(50), + `state` varchar(20), + `old_state` varchar(20), + `old_task` varchar(30), + `task` varchar(30), + `image_type` integer, + `when` numeric(20, 6) NOT NULL, + `publisher` varchar(100), + `event` varchar(50), + `service` varchar(50), + `host` varchar(100), + `instance` varchar(50), + `request_id` varchar(50) +) +; +ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`); +CREATE TABLE `stacktach_lifecycle` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `instance` varchar(50), + `last_state` varchar(50), + `last_task_state` varchar(50), + `last_raw_id` integer +) +; +ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_d5fb17d3` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_timing` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `start_raw_id` integer, + `end_raw_id` integer, + `start_when` numeric(20, 6), + `end_when` numeric(20, 6), + `diff` numeric(20, 6) +) +; +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_c32dfe04` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_c32dfe04` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_requesttracker` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `request_id` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `last_timing_id` integer, + `start` numeric(20, 6) NOT NULL, + `duration` numeric(20, 6) NOT NULL, + `completed` bool NOT NULL +) +; +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f0827cca` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`); +COMMIT; diff --git a/migrations/003_image_type_indexes.sql b/migrations/003_image_type_indexes.sql new file mode 100644 index 0000000..ada6b63 --- /dev/null +++ b/migrations/003_image_type_indexes.sql @@ -0,0 +1,32 @@ +BEGIN; +CREATE INDEX `stacktach_rawdata_4ac6801` ON `stacktach_rawdata` (`deployment_id`); +CREATE INDEX `stacktach_rawdata_2207f86d` ON `stacktach_rawdata` (`tenant`); +CREATE INDEX `stacktach_rawdata_2192f43a` ON `stacktach_rawdata` (`routing_key`); +CREATE INDEX `stacktach_rawdata_355bfc27` ON `stacktach_rawdata` (`state`); +CREATE INDEX `stacktach_rawdata_b716e0bb` ON `stacktach_rawdata` (`old_state`); +CREATE INDEX `stacktach_rawdata_8182be12` ON `stacktach_rawdata` (`old_task`); +CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`); +CREATE INDEX `stacktach_rawdata_cfde77eb` ON `stacktach_rawdata` (`image_type`); +CREATE INDEX `stacktach_rawdata_feaed089` ON `stacktach_rawdata` (`when`); +CREATE INDEX `stacktach_rawdata_878a2906` ON `stacktach_rawdata` (`publisher`); +CREATE INDEX `stacktach_rawdata_a90f9116` ON `stacktach_rawdata` (`event`); +CREATE INDEX `stacktach_rawdata_52c5ef6b` ON `stacktach_rawdata` (`service`); +CREATE INDEX `stacktach_rawdata_38dbea87` ON `stacktach_rawdata` (`host`); +CREATE INDEX `stacktach_rawdata_888b756a` ON `stacktach_rawdata` (`instance`); +CREATE INDEX `stacktach_rawdata_792812e8` ON `stacktach_rawdata` (`request_id`); +CREATE INDEX `stacktach_lifecycle_888b756a` ON `stacktach_lifecycle` (`instance`); +CREATE INDEX `stacktach_lifecycle_9b2555fd` ON `stacktach_lifecycle` (`last_state`); +CREATE INDEX `stacktach_lifecycle_67421a0e` ON `stacktach_lifecycle` (`last_task_state`); +CREATE INDEX `stacktach_lifecycle_dcf9e5f3` ON `stacktach_lifecycle` (`last_raw_id`); +CREATE INDEX `stacktach_timing_52094d6e` ON `stacktach_timing` (`name`); +CREATE INDEX `stacktach_timing_9f222e6b` ON `stacktach_timing` (`lifecycle_id`); +CREATE INDEX `stacktach_timing_efab905a` ON `stacktach_timing` (`start_raw_id`); +CREATE INDEX `stacktach_timing_c8bb8daf` ON `stacktach_timing` (`end_raw_id`); +CREATE INDEX `stacktach_timing_4401d15e` ON `stacktach_timing` (`diff`); +CREATE INDEX `stacktach_requesttracker_792812e8` ON `stacktach_requesttracker` (`request_id`); +CREATE INDEX `stacktach_requesttracker_9f222e6b` ON `stacktach_requesttracker` (`lifecycle_id`); +CREATE INDEX `stacktach_requesttracker_ce616a96` ON `stacktach_requesttracker` (`last_timing_id`); +CREATE INDEX `stacktach_requesttracker_29f4f2ea` ON `stacktach_requesttracker` (`start`); +CREATE INDEX `stacktach_requesttracker_8eb45f9b` ON `stacktach_requesttracker` (`duration`); +CREATE INDEX `stacktach_requesttracker_e490d511` ON `stacktach_requesttracker` (`completed`); +COMMIT; diff --git a/migrations/003_populate_task_and_image.py b/migrations/003_populate_task_and_image.py new file mode 100644 index 0000000..dd09d79 --- /dev/null +++ b/migrations/003_populate_task_and_image.py @@ -0,0 +1,63 @@ +import datetime +import json +import sys + +sys.path.append("/stacktach") + +from stacktach import datetime_to_decimal as dt +from stacktach import image_type +from stacktach import models + + +if __name__ != '__main__': + sys.exit(1) + +states = {} + + +def fix_chunk(hours, length): + now = datetime.datetime.utcnow() + start = now - datetime.timedelta(hours=hours+length) + end = now - datetime.timedelta(hours=hours) + dstart = dt.dt_to_decimal(start) + dend = dt.dt_to_decimal(end) + + done = 0 + updated = 0 + block = 0 + print "Hours ago (%d to %d) %d - %d" % (hours + length, hours, dstart, dend) + updates = models.RawData.objects.filter(event='compute.instance.update', + when__gt=dstart, when__lte=dend)\ + .only('task', 'image_type', 'json') + + for raw in updates: + queue, body = json.loads(raw.json) + payload = body.get('payload', {}) + task = payload.get('new_task_state', None) + + if task != None and task != 'None': + states[task] = states.get(task, 0) + 1 + raw.task = task + + raw.image_type = image_type.get_numeric_code(payload, raw.image_type) + updated += 1 + raw.save() + + done += 1 + if done >= 10000: + block += 1 + done = 0 + print "# 10k blocks processed: %d (events %d)" % \ + (block, updated) + updated = 0 + + for kv in states.iteritems(): + print "%s = %d" % kv + +for day in xrange(0, 90): + hours = day * 24 + steps = 12 + chunk = 24 / steps + for x in xrange(steps): + fix_chunk(hours, chunk) + hours += chunk diff --git a/reports/pretty.py b/reports/pretty.py new file mode 100644 index 0000000..d177867 --- /dev/null +++ b/reports/pretty.py @@ -0,0 +1,166 @@ +import datetime +import json +import sys +import time + +import prettytable + +sys.path.append("/stacktach") + +from stacktach import datetime_to_decimal as dt +from stacktach import image_type +from stacktach import models + + +if __name__ != '__main__': + sys.exit(1) + +yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(days=1) +if len(sys.argv) == 2: + try: + t = time.strptime(sys.argv[1], "%Y-%m-%d") + yesterday = datetime.datetime(*t[:6]) + except Exception, e: + print e + print "Usage: python requests.py YYYY-MM-DD (the end date)" + sys.exit(1) + +percentile = 90 +hours = 24 + +start = datetime.datetime(year=yesterday.year, month=yesterday.month, + day=yesterday.day) +end = start + datetime.timedelta(hours=hours-1, minutes=59, seconds=59) + +print "Generating report for %s to %s" % (start, end) + +dstart = dt.dt_to_decimal(start) +dend = dt.dt_to_decimal(end) + +codes = {} + +# Get all the instances that have changed in the last N hours ... +updates = models.RawData.objects.filter(event='compute.instance.update', + when__gt=dstart, when__lte=dend)\ + .values('instance').distinct() + +expiry = 60 * 60 # 1 hour +cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot'] + +failures = {} +durations = {} +attempts = {} + +for uuid_dict in updates: + uuid = uuid_dict['instance'] + + # All the unique Request ID's for this instance during that timespan. + reqs = models.RawData.objects.filter(instance=uuid, + when__gt=dstart, when__lte=dend) \ + .values('request_id').distinct() + + + for req_dict in reqs: + report = False + req = req_dict['request_id'] + raws = models.RawData.objects.filter(request_id=req)\ + .exclude(event='compute.instance.exists')\ + .order_by('when') + + start = None + err = None + + operation = "aux" + image_type_num = 0 + + for raw in raws: + if not start: + start = raw.when + if 'error' in raw.routing_key: + err = raw + report = True + + for cmd in cmds: + if cmd in raw.event: + operation = cmd + break + + if raw.image_type: + image_type_num |= raw.image_type + + image = "?" + if image_type.isset(image_type_num, image_type.BASE_IMAGE): + image = "base" + if image_type.isset(image_type_num, image_type.SNAPSHOT_IMAGE): + image = "snap" + + if not start: + continue + + end = raw.when + diff = end - start + + if diff > 3600: + report = True + + key = (operation, image) + + # Track durations for all attempts, good and bad ... + _durations = durations.get(key, []) + _durations.append(diff) + durations[key] = _durations + + attempts[key] = attempts.get(key, 0) + 1 + + if report: + failures[key] = failures.get(key, 0) + 1 + +# Print the results ... +cols = ["Operation", "Image", "Min*", "Max*", "Avg*", + "Requests", "# Fail", "Fail %"] +p = prettytable.PrettyTable(cols) +for c in cols[2:]: + p.align[c] = 'r' +p.sortby = cols[0] + +pct = (float(100 - percentile) / 2.0) / 100.0 +print "* Using %d-th percentile for results (+/-%.1f%% cut)" % \ + (percentile, pct * 100.0) +total = 0 +failure_total = 0 +for key, count in attempts.iteritems(): + total += count + operation, image = key + + failure_count = failures.get(key, 0) + failure_total += failure_count + failure_percentage = float(failure_count) / float(count) + _failure_percentage = "%.1f%%" % (failure_percentage * 100.0) + + # N-th % of durations ... + _values = durations[key] + _values.sort() + _outliers = int(float(len(_values)) * pct) + if _outliers > 0: + before = len(_values) + _values = _values[_outliers:-_outliers] + print "culling %d -> %d" % (before, len(_values)) + _min = 99999999 + _max = 0 + _total = 0.0 + for value in _values: + _min = min(_min, value) + _max = max(_max, value) + _total += float(value) + _avg = float(_total) / float(len(_values)) + _fmin = dt.sec_to_str(_min) + _fmax = dt.sec_to_str(_max) + _favg = dt.sec_to_str(_avg) + + p.add_row([operation, image, _fmin, _fmax, _favg, count, + failure_count, _failure_percentage]) +print p + +print "Total: %d, Failures: %d, Failure Rate: %.1f%%" % \ + (total, failure_total, + (float(failure_total)/float(total)) * 100.0) diff --git a/reports/requests.py b/reports/requests.py new file mode 100644 index 0000000..942fed8 --- /dev/null +++ b/reports/requests.py @@ -0,0 +1,258 @@ +import datetime +import json +import sys +import time + +import prettytable + +sys.path.append("/stacktach") + +from stacktach import datetime_to_decimal as dt +from stacktach import image_type +from stacktach import models + + +if __name__ != '__main__': + sys.exit(1) + +yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(days=1) +if len(sys.argv) == 2: + try: + t = time.strptime(sys.argv[1], "%Y-%m-%d") + yesterday = datetime.datetime(*t[:6]) + except Exception, e: + print e + print "Usage: python requests.py YYYY-MM-DD (the end date)" + sys.exit(1) + +hours = 0 +length = 6 + +start = datetime.datetime(year=yesterday.year, month=yesterday.month, + day=yesterday.day) +end = start + datetime.timedelta(hours=length-1, minutes=59, seconds=59) + +print "Generating report for %s to %s" % (start, end) + +dstart = dt.dt_to_decimal(start) +dend = dt.dt_to_decimal(end) + +codes = {} + +# Get all the instances that have changed in the last N hours ... +updates = models.RawData.objects.filter(event='compute.instance.update', + when__gt=dstart, when__lte=dend)\ + .values('instance').distinct() + +expiry = 60 * 60 # 1 hour +cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot'] + +failures = {} +causes = {} +durations = {} +error_messages = {} +successes = {} +tenant_issues = {} + +for uuid_dict in updates: + uuid = uuid_dict['instance'] + + # All the unique Request ID's for this instance during that timespan. + reqs = models.RawData.objects.filter(instance=uuid, + when__gt=dstart, when__lte=dend) \ + .values('request_id').distinct() + + + for req_dict in reqs: + report = False + req = req_dict['request_id'] + raws = models.RawData.objects.filter(request_id=req)\ + .exclude(event='compute.instance.exists')\ + .order_by('when') + + start = None + err = None + + operation = "aux" + platform = 0 + tenant = 0 + cell = "unk" + + for raw in raws: + if not start: + start = raw.when + if 'error' in raw.routing_key: + err = raw + report = True + + if raw.tenant: + if tenant > 0 and raw.tenant != tenant: + print "Conflicting tenant ID", raw.tenant, tenant + tenant = raw.tenant + + for cmd in cmds: + if cmd in raw.event: + operation = cmd + cell = raw.deployment.name + break + + if raw.image_type > 0: + platform = raw.image_type + + if not start: + continue + + end = raw.when + diff = end - start + + if diff > 3600: + report = True + + key = (operation, platform, cell) + + # Track durations for all attempts, good and bad ... + duration_min, duration_max, duration_count, duration_total = \ + durations.get(key, (9999999, 0, 0, 0)) + duration_min = min(duration_min, diff) + duration_max = max(duration_max, diff) + duration_count += 1 + duration_total += diff + durations[key] = (duration_min, duration_max, duration_count, + duration_total) + + if not report: + successes[key] = successes.get(key, 0) + 1 + else: + print "------", uuid, "----------" + print " Req:", req + print " Duration: %.2f minutes" % (diff / 60) + print " Operation:", operation + print " Platform:", image_type.readable(platform) + cause = "> %d min" % (expiry / 60) + failures[key] = failures.get(key, 0) + 1 + tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1 + + if err: + queue, body = json.loads(err.json) + payload = body['payload'] + print "Error. EventID: %s, Tenant %s, Service %s, Host %s, "\ + "Deployment %s, Event %s, When %s"\ + % (err.id, err.tenant, err.service, err.host, + err.deployment.name, + err.event, dt.dt_from_decimal(err.when)) + exc = payload.get('exception') + if exc: + # group the messages ... + exc_str = str(exc) + print exc_str + error_messages[exc_str] = \ + error_messages.get(exc_str, 0) + 1 + + # extract the code, if any ... + code = exc.get('kwargs', {}).get('code') + if code: + codes[code] = codes.get(code, 0) + 1 + cause = code + cause_key = (key, cause) + causes[cause_key] = causes.get(cause_key, 0) + 1 + + +def dump_breakdown(totals, label): + p = prettytable.PrettyTable(["Category", "Count"]) + for k, v in totals.iteritems(): + p.add_row([k, v]) + print label + p.sortby = 'Count' + print p + + + + +def dump_summary(info, label): + print "-- %s by operation by cell by platform --" % (label,) + p = prettytable.PrettyTable(["Operation", "Cell", "Platform", "Count", + "Min", "Max", "Avg"]) + for c in ["Count", "Min", "Max", "Avg"]: + p.align[c] = 'r' + + total = 0 + op_totals = {} + cell_totals = {} + platform_totals = {} + for key, count in info.iteritems(): + operation, platform, cell = key + readable = image_type.readable(platform) + text = "n/a" + if readable: + text = ", ".join(readable) + + _min, _max, _count, _total = durations[key] + _avg = float(_total) / float(_count) + _fmin = dt.sec_to_str(_min) + _fmax = dt.sec_to_str(_max) + _favg = dt.sec_to_str(_avg * 100.0) + + op_totals[operation] = op_totals.get(operation, 0) + count + cell_totals[cell] = cell_totals.get(cell, 0) + count + platform_totals[text] = platform_totals.get(text, 0) + count + + p.add_row([operation, cell, text, count, _fmin, _fmax, _favg]) + total += count + p.sortby = 'Count' + print p + + dump_breakdown(op_totals, "Total %s by Operation" % label) + dump_breakdown(cell_totals, "Total %s by Cell" % label) + dump_breakdown(platform_totals, "Total %s by Platform" % label) + + print + + return total + + +print +print "SUMMARY" +print +good = dump_summary(successes, "Success") +bad = dump_summary(failures, "Failures") +print "=====================================================" +print "Total Success: %d Total Failure: %d" % (good, bad) +print + +print "-- Errors by Tenant --" +p = prettytable.PrettyTable(["Tenant", "Count"]) +for tenant, count in tenant_issues.iteritems(): + p.add_row([tenant, count]) +p.sortby = 'Count' +print p + +print +print "-- Return code counts --" +p = prettytable.PrettyTable(["Return Code", "Count"]) +for k, v in codes.iteritems(): + p.add_row([k, v]) +p.sortby = 'Count' +print p + +print +print "-- Cause breakdown --" +p = prettytable.PrettyTable(["Cause", "Operation", "Cell", "Platform", "Count"]) +for cause_key, count in causes.iteritems(): + key, cause = cause_key + operation, platform, cell = key + readable = image_type.readable(platform) + text = "n/a" + if readable: + text = ", ".join(readable) + p.add_row([cause, operation, cell, text, count]) +p.sortby = 'Count' +print p + +print +print "-- Error Message Counts --" +p = prettytable.PrettyTable(["Count", "Message"]) +for k, v in error_messages.iteritems(): + p.add_row([v, k[:80]]) +p.sortby = 'Count' +print p + diff --git a/stacktach/datetime_to_decimal.py b/stacktach/datetime_to_decimal.py index 225863e..8053fad 100644 --- a/stacktach/datetime_to_decimal.py +++ b/stacktach/datetime_to_decimal.py @@ -12,8 +12,23 @@ def dt_to_decimal(utc): def dt_from_decimal(dec): + if dec == None: + return "n/a" integer = int(dec) micro = (dec - decimal.Decimal(integer)) * decimal.Decimal(1000000) daittyme = datetime.datetime.utcfromtimestamp(integer) return daittyme.replace(microsecond=micro) + + +def sec_to_str(sec): + sec = int(sec) + if sec < 60: + return "%ds" % sec + minutes = sec / 60 + sec = sec % 60 + if minutes < 60: + return "%d:%02ds" % (minutes, sec) + hours = minutes / 60 + minutes = minutes % 60 + return "%02d:%02d:%02d" % (hours, minutes, sec) diff --git a/stacktach/db.py b/stacktach/db.py index 23db4fb..a55b4df 100644 --- a/stacktach/db.py +++ b/stacktach/db.py @@ -1,37 +1,71 @@ import models + def get_or_create_deployment(name): return models.Deployment.objects.get_or_create(name=name) + def create_rawdata(**kwargs): return models.RawData(**kwargs) + def create_lifecycle(**kwargs): return models.Lifecycle(**kwargs) + def find_lifecycles(**kwargs): return models.Lifecycle.objects.select_related().filter(**kwargs) + def create_timing(**kwargs): return models.Timing(**kwargs) + def find_timings(**kwargs): return models.Timing.objects.select_related().filter(**kwargs) + def create_request_tracker(**kwargs): return models.RequestTracker(**kwargs) + def find_request_trackers(**kwargs): return models.RequestTracker.objects.filter(**kwargs) + def create_instance_usage(**kwargs): return models.InstanceUsage(**kwargs) + +def get_or_create_instance_usage(**kwargs): + return models.InstanceUsage.objects.get_or_create(**kwargs) + + def get_instance_usage(**kwargs): - return models.InstanceUsage.objects.get(**kwargs) + usage = None + try: + usage = models.InstanceUsage.objects.get(**kwargs) + except models.InstanceUsage.DoesNotExist: + pass + return usage + + +def create_instance_delete(**kwargs): + return models.InstanceDeletes(**kwargs) + + +def get_instance_delete(**kwargs): + delete = None + try: + delete = models.InstanceDeletes.objects.get(**kwargs) + except models.InstanceDeletes.DoesNotExist: + pass + return delete + def create_instance_exists(**kwargs): return models.InstanceExists(**kwargs) + def save(obj): obj.save() \ No newline at end of file diff --git a/stacktach/dbapi.py b/stacktach/dbapi.py new file mode 100644 index 0000000..6f01a1a --- /dev/null +++ b/stacktach/dbapi.py @@ -0,0 +1,65 @@ +import decimal +import json + +from django.forms.models import model_to_dict +from django.http import HttpResponse + +import datetime_to_decimal +import models + + +def rsp(data): + return HttpResponse(json.dumps(data), content_type="application/json") + + +def list_usage_launches(request): + filter_args = {} + if 'instance' in request.GET: + filter_args['instance'] = request.GET['instance'] + + if len(filter_args) > 0: + objects = models.InstanceUsage.objects.filter(**filter_args) + else: + objects = models.InstanceUsage.objects.all() + + dicts = _convert_model_list(objects.order_by("launched_at")) + return rsp({'launches': dicts}) + + +def list_usage_deletes(request): + filter_args = {} + if 'instance' in request.GET: + filter_args['instance'] = request.GET['instance'] + + if len(filter_args) > 0: + objects = models.InstanceDeletes.objects.filter(**filter_args) + else: + objects = models.InstanceDeletes.objects.all() + + dicts = _convert_model_list(objects.order_by("launched_at")) + return rsp({'deletes': dicts}) + + +def list_usage_exists(request): + filter_args = {} + if 'instance' in request.GET: + filter_args['instance'] = request.GET['instance'] + + if len(filter_args) > 0: + objects = models.InstanceExists.objects.filter(**filter_args) + else: + objects = models.InstanceExists.objects.all() + + dicts = _convert_model_list(objects.order_by("id")) + return rsp({'exists': dicts}) + + +def _convert_model_list(list): + converted = [] + for item in list: + dict = model_to_dict(item) + for key in dict: + if isinstance(dict[key], decimal.Decimal): + dict[key] = str(datetime_to_decimal.dt_from_decimal(dict[key])) + converted.append(dict) + return converted diff --git a/stacktach/image_type.py b/stacktach/image_type.py new file mode 100644 index 0000000..83c4eb0 --- /dev/null +++ b/stacktach/image_type.py @@ -0,0 +1,60 @@ +BASE_IMAGE = 0x1 +SNAPSHOT_IMAGE = 0x2 +LINUX_IMAGE = 0x10 + +OS_UBUNTU = 0x100 +OS_DEBIAN = 0x200 +OS_CENTOS = 0x400 +OS_RHEL = 0x800 + + +def isset(num, flag): + if not num: + return False + return num & flag > 0 + + +flags = {'base' : BASE_IMAGE, + 'snapshot' : SNAPSHOT_IMAGE, + 'linux' : LINUX_IMAGE, + 'ubuntu' : OS_UBUNTU, + 'debian' : OS_DEBIAN, + 'centos' : OS_CENTOS, + 'rhel' : OS_RHEL} + + +def readable(num): + result = [] + for k, v in flags.iteritems(): + if isset(num, v): + result.append(k) + return result + + +def get_numeric_code(payload, default=0): + meta = payload.get('image_meta', {}) + if default == None: + default = 0 + num = default + + image_type = meta.get('image_type', '') + if image_type == 'base': + num |= BASE_IMAGE + if image_type == 'snapshot': + num |= SNAPSHOT_IMAGE + + os_type = meta.get('os_type', payload.get('os_type', '')) + if os_type == 'linux': + num |= LINUX_IMAGE + + os_distro = meta.get('os_distro', '') + if os_distro == 'ubuntu': + num |= OS_UBUNTU + if os_distro == 'debian': + num |= OS_DEBIAN + if os_distro == 'centos': + num |= OS_CENTOS + if os_distro == 'rhel': + num |= OS_RHEL + + return num diff --git a/stacktach/models.py b/stacktach/models.py index ca2ea94..9a09059 100644 --- a/stacktach/models.py +++ b/stacktach/models.py @@ -34,6 +34,9 @@ class RawData(models.Model): blank=True, db_index=True) old_task = models.CharField(max_length=30, null=True, blank=True, db_index=True) + task = models.CharField(max_length=30, null=True, + blank=True, db_index=True) + image_type = models.IntegerField(null=True, default=0, db_index=True) when = models.DecimalField(max_digits=20, decimal_places=6, db_index=True) publisher = models.CharField(max_length=100, null=True, @@ -72,18 +75,26 @@ class Lifecycle(models.Model): class InstanceUsage(models.Model): instance = models.CharField(max_length=50, null=True, blank=True, db_index=True) - #launched_at = models.IntegerField(null=True, db_index=True) launched_at = models.DecimalField(null=True, max_digits=20, decimal_places=6) - #deleted_at = models.IntegerField(null=True, db_index=True) - deleted_at = models.DecimalField(null=True, max_digits=20, - decimal_places=6) request_id = models.CharField(max_length=50, null=True, blank=True, db_index=True) instance_type_id = models.CharField(max_length=50, null=True, blank=True, db_index=True) + + +class InstanceDeletes(models.Model): + instance = models.CharField(max_length=50, null=True, + blank=True, db_index=True) + launched_at = models.DecimalField(null=True, max_digits=20, + decimal_places=6) + deleted_at = models.DecimalField(null=True, max_digits=20, + decimal_places=6) + raw = models.ForeignKey(RawData, null=True) + + class InstanceExists(models.Model): PENDING = 'pending' VERIFIED = 'verified' @@ -95,10 +106,8 @@ class InstanceExists(models.Model): ] instance = models.CharField(max_length=50, null=True, blank=True, db_index=True) - #launched_at = models.IntegerField(null=True, db_index=True) launched_at = models.DecimalField(null=True, max_digits=20, decimal_places=6) - #deleted_at = models.IntegerField(null=True, db_index=True) deleted_at = models.DecimalField(null=True, max_digits=20, decimal_places=6) message_id = models.CharField(max_length=50, null=True, @@ -112,6 +121,7 @@ class InstanceExists(models.Model): default=PENDING) raw = models.ForeignKey(RawData, related_name='+', null=True) usage = models.ForeignKey(InstanceUsage, related_name='+', null=True) + delete = models.ForeignKey(InstanceDeletes, related_name='+', null=True) class Timing(models.Model): diff --git a/stacktach/stacky_server.py b/stacktach/stacky_server.py index 327bb25..994729c 100644 --- a/stacktach/stacky_server.py +++ b/stacktach/stacky_server.py @@ -72,7 +72,7 @@ def sec_to_time(diff): def rsp(data): - return HttpResponse(json.dumps(data)) + return HttpResponse(json.dumps(data), content_type="application/json") def do_deployments(request): @@ -297,3 +297,81 @@ def do_kpi(request, tenant_id=None): results.append([event, sec_to_time(track.duration), uuid, end_event.deployment.name]) return rsp(results) + + +def do_list_usage_launches(request): + + filter_args = {} + if 'instance' in request.GET: + filter_args['instance'] = request.GET['instance'] + + if len(filter_args) > 0: + launches = models.InstanceUsage.objects.filter(**filter_args) + else: + launches = models.InstanceUsage.objects.all() + + results = [] + results.append(["UUID", "Launched At", "Instance Type Id"]) + + for launch in launches: + launched = None + if launch.launched_at: + launched = str(dt.dt_from_decimal(launch.launched_at)) + results.append([launch.instance, launched, launch.instance_type_id]) + + return rsp(results) + + +def do_list_usage_deletes(request): + + filter_args = {} + if 'instance' in request.GET: + filter_args['instance'] = request.GET['instance'] + + if len(filter_args) > 0: + deletes = models.InstanceDeletes.objects.filter(**filter_args) + else: + deletes = models.InstanceDeletes.objects.all() + + results = [] + results.append(["UUID", "Launched At", "Deleted At"]) + + for delete in deletes: + launched = None + if delete.launched_at: + launched = str(dt.dt_from_decimal(delete.launched_at)) + deleted = None + if delete.deleted_at: + deleted = str(dt.dt_from_decimal(delete.deleted_at)) + results.append([delete.instance, launched, deleted]) + + return rsp(results) + + +def do_list_usage_exists(request): + + filter_args = {} + if 'instance' in request.GET: + filter_args['instance'] = request.GET['instance'] + + if len(filter_args) > 0: + exists = models.InstanceExists.objects.filter(**filter_args) + else: + exists = models.InstanceExists.objects.all() + + results = [] + results.append(["UUID", "Launched At", "Deleted At", "Instance Type Id", + "Message ID", "Status"]) + + for exist in exists: + launched = None + if exist.launched_at: + launched = str(dt.dt_from_decimal(exist.launched_at)) + deleted = None + if exist.deleted_at: + deleted = str(dt.dt_from_decimal(exist.deleted_at)) + results.append([exist.instance, launched, deleted, + exist.instance_type_id, exist.message_id, + exist.status]) + + return rsp(results) diff --git a/stacktach/test_utils.py b/stacktach/test_utils.py index 9f14069..7625b91 100644 --- a/stacktach/test_utils.py +++ b/stacktach/test_utils.py @@ -160,10 +160,11 @@ def make_resize_revert_end_json(launched_at, instance_type_id='1', def create_raw(deployment, when, event, instance=INSTANCE_ID_1, request_id=REQUEST_ID_1, state='active', old_task='', - host='compute', json=''): + host='compute', service='compute', json=''): raw_values = { 'deployment': deployment, 'host': host, + 'service': service, 'state': state, 'old_task': old_task, 'when': when, diff --git a/stacktach/tests.py b/stacktach/tests.py index 7024c8e..9930695 100644 --- a/stacktach/tests.py +++ b/stacktach/tests.py @@ -35,7 +35,7 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase): when3 = views.str_time_to_unix('2012-12-21 12:36:56.124') self.update_raw = create_raw(self.deployment, when1, 'compute.instance.update', - host='api') + host='api', service='api') self.start_raw = create_raw(self.deployment, when2, 'compute.instance.reboot.start') self.end_raw = create_raw(self.deployment, when3, @@ -105,7 +105,7 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase): 'compute.instance.update', instance=INSTANCE_ID_2, request_id=REQUEST_ID_2, - host='api') + host='api', service='api') start_raw2 = create_raw(self.deployment, when2, 'compute.instance.resize.start', instance=INSTANCE_ID_2, @@ -157,7 +157,7 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase): update_raw2 = create_raw(self.deployment, when1, 'compute.instance.update', request_id=REQUEST_ID_2, - host='api') + host='api', service='api') start_raw2 = create_raw(self.deployment, when2, 'compute.instance.resize.start', request_id=REQUEST_ID_2) @@ -227,7 +227,7 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase): 'compute.instance.update', instance=INSTANCE_ID_2, request_id=REQUEST_ID_2, - host='api') + host='api', service='api') start_raw2 = create_raw(self.deployment, when2, 'compute.instance.resize.start', instance=INSTANCE_ID_2, @@ -268,9 +268,9 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase): when2 = views.str_time_to_unix('2012-12-21 13:34:50.123') when3 = views.str_time_to_unix('2012-12-21 13:37:50.124') update_raw2 = create_raw(self.deployment, when1, - 'compute.instance.update', - request_id=REQUEST_ID_2, - host='api') + 'compute.instance.update', + request_id=REQUEST_ID_2, + host='api', service='api') start_raw2 = create_raw(self.deployment, when2, 'compute.instance.resize.start', request_id=REQUEST_ID_2) @@ -309,7 +309,7 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase): update_raw2 = create_raw(self.deployment, when1, 'compute.instance.update', request_id=REQUEST_ID_2, - host='api') + host='api', service='api') start_raw2 = create_raw(self.deployment, when2, 'compute.instance.resize.start', request_id=REQUEST_ID_2) @@ -492,14 +492,6 @@ class ViewsUsageTestCase(unittest.TestCase): def test_process_delete(self): launched_str = '2012-12-21 06:34:50.123' launched = views.str_time_to_unix(launched_str) - values = { - 'instance': INSTANCE_ID_1, - 'request_id': REQUEST_ID_1, - 'instance_type_id': '1', - 'launched_at': launched, - } - InstanceUsage(**values).save() - deleted_str = '2012-12-21 12:34:50.123' deleted = views.str_time_to_unix(deleted_str) json = test_utils.make_delete_end_json(launched_str, deleted_str) @@ -508,10 +500,13 @@ class ViewsUsageTestCase(unittest.TestCase): views._process_delete(raw) - usages = InstanceUsage.objects.all() - self.assertEqual(len(usages), 1) - usage = usages[0] - self.assertEqual(usage.deleted_at, deleted) + delete = InstanceDeletes.objects.all() + self.assertEqual(len(delete), 1) + delete = delete[0] + self.assertEqual(delete.instance, INSTANCE_ID_1) + self.assertEqual(delete.launched_at, launched) + self.assertEqual(delete.deleted_at, deleted) + self.assertEqual(delete.raw.id, raw.id) def test_process_exists(self): launched_str = '2012-12-21 06:34:50.123' @@ -556,7 +551,6 @@ class ViewsUsageTestCase(unittest.TestCase): 'request_id': REQUEST_ID_1, 'instance_type_id': '1', 'launched_at': launched, - 'deleted_at': deleted, } InstanceUsage(**values).save() @@ -621,7 +615,6 @@ class ViewsUsageWorkflowTestCase(unittest.TestCase): usage = usages[0] self.assertOnUsage(usage, INSTANCE_ID_1, '1', launched, REQUEST_ID_1) - @unittest.skip("can't handle late starts yet") def test_create_workflow_start_late(self): created_str = '2012-12-21 06:30:50.123' created = views.str_time_to_unix(created_str) @@ -749,7 +742,6 @@ class ViewsUsageWorkflowTestCase(unittest.TestCase): self.assertOnUsage(usage_after, INSTANCE_ID_1, '2', finish_time, REQUEST_ID_2) - @unittest.skip("can't handle late starts yet") def test_resize_workflow_start_late(self): launched_str = '2012-12-21 06:34:50.123' launched = views.str_time_to_unix(launched_str) @@ -852,7 +844,6 @@ class ViewsUsageWorkflowTestCase(unittest.TestCase): self.assertOnUsage(usage_after_revert, INSTANCE_ID_1, '1', end_time, REQUEST_ID_3) - @unittest.skip("can't handle late starts yet") def test_resize_revert_workflow_start_late(self): launched_str = '2012-12-21 06:34:50.123' launched = views.str_time_to_unix(launched_str) diff --git a/stacktach/urls.py b/stacktach/urls.py index 212cdea..59c6098 100644 --- a/stacktach/urls.py +++ b/stacktach/urls.py @@ -18,6 +18,18 @@ urlpatterns = patterns('', 'stacktach.stacky_server.do_watch'), url(r'stacky/kpi/$', 'stacktach.stacky_server.do_kpi'), url(r'stacky/kpi/(?P\d+)/$', 'stacktach.stacky_server.do_kpi'), + url(r'stacky/usage/launches/$', + 'stacktach.stacky_server.do_list_usage_launches'), + url(r'stacky/usage/deletes/$', + 'stacktach.stacky_server.do_list_usage_deletes'), + url(r'stacky/usage/exists/$', + 'stacktach.stacky_server.do_list_usage_exists'), + + url(r'db/usage/launches/$', + 'stacktach.dbapi.list_usage_launches'), + url(r'db/usage/deletes/$', + 'stacktach.dbapi.list_usage_deletes'), + url(r'db/usage/exists/$', 'stacktach.dbapi.list_usage_exists'), url(r'^(?P\d+)/$', 'stacktach.views.home', name='home'), url(r'^(?P\d+)/details/(?P\w+)/(?P\d+)/$', diff --git a/stacktach/views.py b/stacktach/views.py index 47f9807..d770645 100644 --- a/stacktach/views.py +++ b/stacktach/views.py @@ -1,17 +1,18 @@ # Copyright 2012 - Dark Secret Software Inc. +import datetime +import json +import pprint + from django import db from django import http from django.shortcuts import render_to_response from django import template +from stacktach import datetime_to_decimal as dt from stacktach import db as stackdb from stacktach import models -from stacktach import datetime_to_decimal as dt - -import datetime -import json -import pprint +from stacktach import image_type STACKDB = stackdb @@ -21,7 +22,9 @@ def _extract_states(payload): return { 'state' : payload.get('state', ""), 'old_state' : payload.get('old_state', ""), - 'old_task' : payload.get('old_task_state', "") + 'old_task' : payload.get('old_task_state', ""), + 'task' : payload.get('new_task_state', ""), + 'image_type' : image_type.get_numeric_code(payload) } @@ -221,10 +224,11 @@ def _process_usage_for_new_launch(raw): values['instance'] = payload['instance_id'] values['request_id'] = notif[1]['_context_request_id'] - if raw.event == INSTANCE_EVENT['create_start']: - values['instance_type_id'] = payload['instance_type_id'] + (usage, new) = STACKDB.get_or_create_instance_usage(**values) + + if raw.event == INSTANCE_EVENT['create_start']: + usage.instance_type_id = payload['instance_type_id'] - usage = STACKDB.create_instance_usage(**values) STACKDB.save(usage) @@ -233,8 +237,8 @@ def _process_usage_for_updates(raw): payload = notif[1]['payload'] instance_id = payload['instance_id'] request_id = notif[1]['_context_request_id'] - usage = STACKDB.get_instance_usage(instance=instance_id, - request_id=request_id) + (usage, new) = STACKDB.get_or_create_instance_usage(instance=instance_id, + request_id=request_id) if raw.event in [INSTANCE_EVENT['create_end'], INSTANCE_EVENT['resize_finish_end'], @@ -253,28 +257,37 @@ def _process_delete(raw): notif = json.loads(raw.json) payload = notif[1]['payload'] instance_id = payload['instance_id'] - launched_at = payload['launched_at'] - launched_at = str_time_to_unix(launched_at) - instance = STACKDB.get_instance_usage(instance=instance_id, - launched_at=launched_at) - instance.deleted_at = str_time_to_unix(payload['deleted_at']) - STACKDB.save(instance) + launched_at = str_time_to_unix(payload['launched_at']) + deleted_at = str_time_to_unix(payload['deleted_at']) + values = { + 'instance': instance_id, + 'launched_at': launched_at, + 'deleted_at': deleted_at, + 'raw': raw + } + delete = STACKDB.create_instance_delete(**values) + STACKDB.save(delete) def _process_exists(raw): notif = json.loads(raw.json) payload = notif[1]['payload'] instance_id = payload['instance_id'] - launched_at = payload['launched_at'] - launched_at = str_time_to_unix(launched_at) + launched_at = str_time_to_unix(payload['launched_at']) + launched_range = (launched_at, launched_at+1) usage = STACKDB.get_instance_usage(instance=instance_id, - launched_at=launched_at) + launched_at__range=launched_range) + delete = STACKDB.get_instance_delete(instance=instance_id, + launched_at__range=launched_range) values = {} values['message_id'] = notif[1]['message_id'] values['instance'] = instance_id values['launched_at'] = launched_at values['instance_type_id'] = payload['instance_type_id'] - values['usage'] = usage + if usage: + values['usage'] = usage + if delete: + values['delete'] = delete values['raw'] = raw deleted_at = payload.get('deleted_at') @@ -309,13 +322,16 @@ def aggregate_usage(raw): def str_time_to_unix(when): try: + when = datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S") + except ValueError: try: when = datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S.%f") except ValueError: - # Old way of doing it - when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f") - except Exception, e: - pass + try: + # Old way of doing it + when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f") + except Exception, e: + print "BAD DATE: ", e return dt.dt_to_decimal(when) diff --git a/tests/unit/test_stacktach.py b/tests/unit/test_stacktach.py index 576e306..34293ab 100644 --- a/tests/unit/test_stacktach.py +++ b/tests/unit/test_stacktach.py @@ -410,13 +410,13 @@ class StacktackUsageParsingTestCase(unittest.TestCase): event = 'compute.instance.create.start' raw = utils.create_raw(self.mox, when, event=event, json_str=json_str) usage = self.mox.CreateMockAnything() - views.STACKDB.create_instance_usage(instance=INSTANCE_ID_1, - request_id=REQUEST_ID_1, - instance_type_id = '1')\ - .AndReturn(usage) + views.STACKDB.get_or_create_instance_usage(instance=INSTANCE_ID_1, + request_id=REQUEST_ID_1)\ + .AndReturn((usage, True)) views.STACKDB.save(usage) self.mox.ReplayAll() views._process_usage_for_new_launch(raw) + self.assertEquals(usage.instance_type_id, '1') self.mox.VerifyAll() def test_process_usage_for_updates_create_end(self): @@ -433,9 +433,9 @@ class StacktackUsageParsingTestCase(unittest.TestCase): usage.instance = INSTANCE_ID_1 usage.request_id = REQUEST_ID_1 usage.instance_type_id = '1' - views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1, - request_id=REQUEST_ID_1)\ - .AndReturn(usage) + views.STACKDB.get_or_create_instance_usage(instance=INSTANCE_ID_1, + request_id=REQUEST_ID_1)\ + .AndReturn((usage, True)) views.STACKDB.save(usage) self.mox.ReplayAll() @@ -459,9 +459,9 @@ class StacktackUsageParsingTestCase(unittest.TestCase): usage.instance = INSTANCE_ID_1 usage.request_id = REQUEST_ID_1 usage.instance_type_id = '1' - views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1, - request_id=REQUEST_ID_1)\ - .AndReturn(usage) + views.STACKDB.get_or_create_instance_usage(instance=INSTANCE_ID_1, + request_id=REQUEST_ID_1)\ + .AndReturn((usage, True)) views.STACKDB.save(usage) self.mox.ReplayAll() @@ -484,9 +484,9 @@ class StacktackUsageParsingTestCase(unittest.TestCase): usage = self.mox.CreateMockAnything() usage.instance = INSTANCE_ID_1 usage.request_id = REQUEST_ID_1 - views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1, - request_id=REQUEST_ID_1)\ - .AndReturn(usage) + views.STACKDB.get_or_create_instance_usage(instance=INSTANCE_ID_1, + request_id=REQUEST_ID_1)\ + .AndReturn((usage, True)) views.STACKDB.save(usage) self.mox.ReplayAll() @@ -508,23 +508,22 @@ class StacktackUsageParsingTestCase(unittest.TestCase): event = 'compute.instance.delete.end' raw = utils.create_raw(self.mox, delete_decimal, event=event, json_str=json_str) - usage = self.mox.CreateMockAnything() - usage.instance = INSTANCE_ID_1 - usage.request_id = REQUEST_ID_1 - usage.instance_type_id = '1' - usage.launched_at = launch_decimal - views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1, - launched_at=launch_decimal)\ - .AndReturn(usage) - views.STACKDB.save(usage) + delete = self.mox.CreateMockAnything() + delete.instance = INSTANCE_ID_1 + delete.launched_at = launch_decimal + delete.deleted_at = delete_decimal + views.STACKDB.create_instance_delete(instance=INSTANCE_ID_1, + launched_at=launch_decimal, + deleted_at=delete_decimal, + raw=raw)\ + .AndReturn(delete) + views.STACKDB.save(delete) self.mox.ReplayAll() views._process_delete(raw) - self.assertEqual(usage.instance, INSTANCE_ID_1) - self.assertEqual(usage.request_id, REQUEST_ID_1) - self.assertEqual(usage.instance_type_id, '1') - self.assertEqual(usage.launched_at, launch_decimal) - self.assertEqual(usage.deleted_at, delete_decimal) + self.assertEqual(delete.instance, INSTANCE_ID_1) + self.assertEqual(delete.launched_at, launch_decimal) + self.assertEqual(delete.deleted_at, delete_decimal) self.mox.VerifyAll() def test_process_exists(self): @@ -538,9 +537,13 @@ class StacktackUsageParsingTestCase(unittest.TestCase): raw = utils.create_raw(self.mox, current_decimal, event=event, json_str=json_str) usage = self.mox.CreateMockAnything() + launched_range = (launch_decimal, launch_decimal+1) views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1, - launched_at=launch_decimal)\ + launched_at__range=launched_range)\ .AndReturn(usage) + views.STACKDB.get_instance_delete(instance=INSTANCE_ID_1, + launched_at__range=launched_range)\ + .AndReturn(None) exists_values = { 'message_id': MESSAGE_ID_1, 'instance': INSTANCE_ID_1, @@ -570,9 +573,14 @@ class StacktackUsageParsingTestCase(unittest.TestCase): raw = utils.create_raw(self.mox, current_decimal, event=event, json_str=json_str) usage = self.mox.CreateMockAnything() + launched_range = (launch_decimal, launch_decimal+1) views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1, - launched_at=launch_decimal)\ + launched_at__range=launched_range)\ .AndReturn(usage) + delete = self.mox.CreateMockAnything() + views.STACKDB.get_instance_delete(instance=INSTANCE_ID_1, + launched_at__range=launched_range)\ + .AndReturn(delete) exists_values = { 'message_id': MESSAGE_ID_1, 'instance': INSTANCE_ID_1, @@ -580,6 +588,7 @@ class StacktackUsageParsingTestCase(unittest.TestCase): 'deleted_at': deleted_decimal, 'instance_type_id': '1', 'usage': usage, + 'delete': delete, 'raw': raw, } exists = self.mox.CreateMockAnything() diff --git a/worker/worker.py b/worker/worker.py index db7a22b..7e10f4e 100644 --- a/worker/worker.py +++ b/worker/worker.py @@ -22,6 +22,7 @@ import kombu import kombu.entity import kombu.mixins import logging +import sys import time from pympler.process import ProcessMemoryInfo @@ -33,6 +34,8 @@ LOG = logging.getLogger(__name__) LOG.setLevel(logging.DEBUG) handler = logging.handlers.TimedRotatingFileHandler('worker.log', when='h', interval=6, backupCount=4) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +handler.setFormatter(formatter) LOG.addHandler(handler) @@ -144,16 +147,21 @@ def run(deployment_config): transport="librabbitmq", virtual_host=virtual_host) - while continue_running(): - LOG.debug("Processing on '%s'" % name) - with kombu.connection.BrokerConnection(**params) as conn: - try: - consumer = NovaConsumer(name, conn, deployment, durable, - queue_arguments) - consumer.run() - except Exception as e: - LOG.exception("name=%s, exception=%s. Reconnecting in 5s" % - (name, e)) - time.sleep(5) - LOG.debug("Completed processing on '%s'" % name) - + while True: + try: + LOG.debug("Processing on '%s'" % name) + with kombu.connection.BrokerConnection(**params) as conn: + try: + consumer = NovaConsumer(name, conn, deployment, durable, + queue_arguments) + consumer.run() + except Exception as e: + LOG.exception("name=%s, exception=%s. Reconnecting in 5s" % + (name, e)) + time.sleep(5) + LOG.debug("Completed processing on '%s'" % name) + except: + e = sys.exc_info()[0] + msg = "Uncaught exception: deployment=%s, exception=%s. Retrying in 5s" + LOG.exception(msg % (name, e)) + time.sleep(5)