Merge branch 'master' of git://github.com/rackspace/stacktach

This commit is contained in:
Andrew Melton
2013-02-14 14:45:12 -05:00
24 changed files with 1143 additions and 100 deletions

2
.gitignore vendored
View File

@@ -5,3 +5,5 @@ local_settings.py
.coverage
worker.log
worker.log.*
etc/stacktach_config.sh
etc/stacktach_worker_config.json

View File

@@ -1,3 +1,5 @@
Django>=1.4.2
MySQL-python>=1.2.3
eventlet>=0.9.17
kombu>=2.4.7
librabbitmq>=1.0.0

61
migrations/001_base.sql Normal file
View File

@@ -0,0 +1,61 @@
BEGIN;
CREATE TABLE `stacktach_deployment` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`name` varchar(50) NOT NULL
)
;
CREATE TABLE `stacktach_rawdata` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`deployment_id` integer NOT NULL,
`tenant` varchar(50),
`json` longtext NOT NULL,
`routing_key` varchar(50),
`state` varchar(20),
`old_state` varchar(20),
`old_task` varchar(30),
`when` numeric(20, 6) NOT NULL,
`publisher` varchar(100),
`event` varchar(50),
`service` varchar(50),
`host` varchar(100),
`instance` varchar(50),
`request_id` varchar(50)
)
;
ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`);
CREATE TABLE `stacktach_lifecycle` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`instance` varchar(50),
`last_state` varchar(50),
`last_task_state` varchar(50),
`last_raw_id` integer
)
;
ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_d5fb17d3` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
CREATE TABLE `stacktach_timing` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`name` varchar(50) NOT NULL,
`lifecycle_id` integer NOT NULL,
`start_raw_id` integer,
`end_raw_id` integer,
`start_when` numeric(20, 6),
`end_when` numeric(20, 6),
`diff` numeric(20, 6)
)
;
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`);
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_c32dfe04` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_c32dfe04` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
CREATE TABLE `stacktach_requesttracker` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`request_id` varchar(50) NOT NULL,
`lifecycle_id` integer NOT NULL,
`last_timing_id` integer,
`start` numeric(20, 6) NOT NULL,
`duration` numeric(20, 6) NOT NULL,
`completed` bool NOT NULL
)
;
ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`);
ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f0827cca` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`);
COMMIT;

2
migrations/002_delta.sql Normal file
View File

@@ -0,0 +1,2 @@
ALTER TABLE stacktach_rawdata ADD task VARCHAR(30);
CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`);

62
migrations/002_task.sql Normal file
View File

@@ -0,0 +1,62 @@
BEGIN;
CREATE TABLE `stacktach_deployment` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`name` varchar(50) NOT NULL
)
;
CREATE TABLE `stacktach_rawdata` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`deployment_id` integer NOT NULL,
`tenant` varchar(50),
`json` longtext NOT NULL,
`routing_key` varchar(50),
`state` varchar(20),
`old_state` varchar(20),
`old_task` varchar(30),
`task` varchar(30),
`when` numeric(20, 6) NOT NULL,
`publisher` varchar(100),
`event` varchar(50),
`service` varchar(50),
`host` varchar(100),
`instance` varchar(50),
`request_id` varchar(50)
)
;
ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`);
CREATE TABLE `stacktach_lifecycle` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`instance` varchar(50),
`last_state` varchar(50),
`last_task_state` varchar(50),
`last_raw_id` integer
)
;
ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_d5fb17d3` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
CREATE TABLE `stacktach_timing` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`name` varchar(50) NOT NULL,
`lifecycle_id` integer NOT NULL,
`start_raw_id` integer,
`end_raw_id` integer,
`start_when` numeric(20, 6),
`end_when` numeric(20, 6),
`diff` numeric(20, 6)
)
;
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`);
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_c32dfe04` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_c32dfe04` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
CREATE TABLE `stacktach_requesttracker` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`request_id` varchar(50) NOT NULL,
`lifecycle_id` integer NOT NULL,
`last_timing_id` integer,
`start` numeric(20, 6) NOT NULL,
`duration` numeric(20, 6) NOT NULL,
`completed` bool NOT NULL
)
;
ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`);
ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f0827cca` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`);
COMMIT;

View File

@@ -0,0 +1,31 @@
BEGIN;
CREATE INDEX `stacktach_rawdata_4ac6801` ON `stacktach_rawdata` (`deployment_id`);
CREATE INDEX `stacktach_rawdata_2207f86d` ON `stacktach_rawdata` (`tenant`);
CREATE INDEX `stacktach_rawdata_2192f43a` ON `stacktach_rawdata` (`routing_key`);
CREATE INDEX `stacktach_rawdata_355bfc27` ON `stacktach_rawdata` (`state`);
CREATE INDEX `stacktach_rawdata_b716e0bb` ON `stacktach_rawdata` (`old_state`);
CREATE INDEX `stacktach_rawdata_8182be12` ON `stacktach_rawdata` (`old_task`);
CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`);
CREATE INDEX `stacktach_rawdata_feaed089` ON `stacktach_rawdata` (`when`);
CREATE INDEX `stacktach_rawdata_878a2906` ON `stacktach_rawdata` (`publisher`);
CREATE INDEX `stacktach_rawdata_a90f9116` ON `stacktach_rawdata` (`event`);
CREATE INDEX `stacktach_rawdata_52c5ef6b` ON `stacktach_rawdata` (`service`);
CREATE INDEX `stacktach_rawdata_38dbea87` ON `stacktach_rawdata` (`host`);
CREATE INDEX `stacktach_rawdata_888b756a` ON `stacktach_rawdata` (`instance`);
CREATE INDEX `stacktach_rawdata_792812e8` ON `stacktach_rawdata` (`request_id`);
CREATE INDEX `stacktach_lifecycle_888b756a` ON `stacktach_lifecycle` (`instance`);
CREATE INDEX `stacktach_lifecycle_9b2555fd` ON `stacktach_lifecycle` (`last_state`);
CREATE INDEX `stacktach_lifecycle_67421a0e` ON `stacktach_lifecycle` (`last_task_state`);
CREATE INDEX `stacktach_lifecycle_dcf9e5f3` ON `stacktach_lifecycle` (`last_raw_id`);
CREATE INDEX `stacktach_timing_52094d6e` ON `stacktach_timing` (`name`);
CREATE INDEX `stacktach_timing_9f222e6b` ON `stacktach_timing` (`lifecycle_id`);
CREATE INDEX `stacktach_timing_efab905a` ON `stacktach_timing` (`start_raw_id`);
CREATE INDEX `stacktach_timing_c8bb8daf` ON `stacktach_timing` (`end_raw_id`);
CREATE INDEX `stacktach_timing_4401d15e` ON `stacktach_timing` (`diff`);
CREATE INDEX `stacktach_requesttracker_792812e8` ON `stacktach_requesttracker` (`request_id`);
CREATE INDEX `stacktach_requesttracker_9f222e6b` ON `stacktach_requesttracker` (`lifecycle_id`);
CREATE INDEX `stacktach_requesttracker_ce616a96` ON `stacktach_requesttracker` (`last_timing_id`);
CREATE INDEX `stacktach_requesttracker_29f4f2ea` ON `stacktach_requesttracker` (`start`);
CREATE INDEX `stacktach_requesttracker_8eb45f9b` ON `stacktach_requesttracker` (`duration`);
CREATE INDEX `stacktach_requesttracker_e490d511` ON `stacktach_requesttracker` (`completed`);
COMMIT;

2
migrations/003_delta.sql Normal file
View File

@@ -0,0 +1,2 @@
ALTER TABLE stacktach_rawdata ADD image_type integer;
CREATE INDEX `stacktach_rawdata_cfde77eb` ON `stacktach_rawdata` (`image_type`);

View File

@@ -0,0 +1,63 @@
BEGIN;
CREATE TABLE `stacktach_deployment` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`name` varchar(50) NOT NULL
)
;
CREATE TABLE `stacktach_rawdata` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`deployment_id` integer NOT NULL,
`tenant` varchar(50),
`json` longtext NOT NULL,
`routing_key` varchar(50),
`state` varchar(20),
`old_state` varchar(20),
`old_task` varchar(30),
`task` varchar(30),
`image_type` integer,
`when` numeric(20, 6) NOT NULL,
`publisher` varchar(100),
`event` varchar(50),
`service` varchar(50),
`host` varchar(100),
`instance` varchar(50),
`request_id` varchar(50)
)
;
ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`);
CREATE TABLE `stacktach_lifecycle` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`instance` varchar(50),
`last_state` varchar(50),
`last_task_state` varchar(50),
`last_raw_id` integer
)
;
ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_d5fb17d3` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
CREATE TABLE `stacktach_timing` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`name` varchar(50) NOT NULL,
`lifecycle_id` integer NOT NULL,
`start_raw_id` integer,
`end_raw_id` integer,
`start_when` numeric(20, 6),
`end_when` numeric(20, 6),
`diff` numeric(20, 6)
)
;
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`);
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_c32dfe04` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_c32dfe04` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`);
CREATE TABLE `stacktach_requesttracker` (
`id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY,
`request_id` varchar(50) NOT NULL,
`lifecycle_id` integer NOT NULL,
`last_timing_id` integer,
`start` numeric(20, 6) NOT NULL,
`duration` numeric(20, 6) NOT NULL,
`completed` bool NOT NULL
)
;
ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`);
ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f0827cca` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`);
COMMIT;

View File

@@ -0,0 +1,32 @@
BEGIN;
CREATE INDEX `stacktach_rawdata_4ac6801` ON `stacktach_rawdata` (`deployment_id`);
CREATE INDEX `stacktach_rawdata_2207f86d` ON `stacktach_rawdata` (`tenant`);
CREATE INDEX `stacktach_rawdata_2192f43a` ON `stacktach_rawdata` (`routing_key`);
CREATE INDEX `stacktach_rawdata_355bfc27` ON `stacktach_rawdata` (`state`);
CREATE INDEX `stacktach_rawdata_b716e0bb` ON `stacktach_rawdata` (`old_state`);
CREATE INDEX `stacktach_rawdata_8182be12` ON `stacktach_rawdata` (`old_task`);
CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`);
CREATE INDEX `stacktach_rawdata_cfde77eb` ON `stacktach_rawdata` (`image_type`);
CREATE INDEX `stacktach_rawdata_feaed089` ON `stacktach_rawdata` (`when`);
CREATE INDEX `stacktach_rawdata_878a2906` ON `stacktach_rawdata` (`publisher`);
CREATE INDEX `stacktach_rawdata_a90f9116` ON `stacktach_rawdata` (`event`);
CREATE INDEX `stacktach_rawdata_52c5ef6b` ON `stacktach_rawdata` (`service`);
CREATE INDEX `stacktach_rawdata_38dbea87` ON `stacktach_rawdata` (`host`);
CREATE INDEX `stacktach_rawdata_888b756a` ON `stacktach_rawdata` (`instance`);
CREATE INDEX `stacktach_rawdata_792812e8` ON `stacktach_rawdata` (`request_id`);
CREATE INDEX `stacktach_lifecycle_888b756a` ON `stacktach_lifecycle` (`instance`);
CREATE INDEX `stacktach_lifecycle_9b2555fd` ON `stacktach_lifecycle` (`last_state`);
CREATE INDEX `stacktach_lifecycle_67421a0e` ON `stacktach_lifecycle` (`last_task_state`);
CREATE INDEX `stacktach_lifecycle_dcf9e5f3` ON `stacktach_lifecycle` (`last_raw_id`);
CREATE INDEX `stacktach_timing_52094d6e` ON `stacktach_timing` (`name`);
CREATE INDEX `stacktach_timing_9f222e6b` ON `stacktach_timing` (`lifecycle_id`);
CREATE INDEX `stacktach_timing_efab905a` ON `stacktach_timing` (`start_raw_id`);
CREATE INDEX `stacktach_timing_c8bb8daf` ON `stacktach_timing` (`end_raw_id`);
CREATE INDEX `stacktach_timing_4401d15e` ON `stacktach_timing` (`diff`);
CREATE INDEX `stacktach_requesttracker_792812e8` ON `stacktach_requesttracker` (`request_id`);
CREATE INDEX `stacktach_requesttracker_9f222e6b` ON `stacktach_requesttracker` (`lifecycle_id`);
CREATE INDEX `stacktach_requesttracker_ce616a96` ON `stacktach_requesttracker` (`last_timing_id`);
CREATE INDEX `stacktach_requesttracker_29f4f2ea` ON `stacktach_requesttracker` (`start`);
CREATE INDEX `stacktach_requesttracker_8eb45f9b` ON `stacktach_requesttracker` (`duration`);
CREATE INDEX `stacktach_requesttracker_e490d511` ON `stacktach_requesttracker` (`completed`);
COMMIT;

View File

@@ -0,0 +1,63 @@
import datetime
import json
import sys
sys.path.append("/stacktach")
from stacktach import datetime_to_decimal as dt
from stacktach import image_type
from stacktach import models
if __name__ != '__main__':
sys.exit(1)
states = {}
def fix_chunk(hours, length):
now = datetime.datetime.utcnow()
start = now - datetime.timedelta(hours=hours+length)
end = now - datetime.timedelta(hours=hours)
dstart = dt.dt_to_decimal(start)
dend = dt.dt_to_decimal(end)
done = 0
updated = 0
block = 0
print "Hours ago (%d to %d) %d - %d" % (hours + length, hours, dstart, dend)
updates = models.RawData.objects.filter(event='compute.instance.update',
when__gt=dstart, when__lte=dend)\
.only('task', 'image_type', 'json')
for raw in updates:
queue, body = json.loads(raw.json)
payload = body.get('payload', {})
task = payload.get('new_task_state', None)
if task != None and task != 'None':
states[task] = states.get(task, 0) + 1
raw.task = task
raw.image_type = image_type.get_numeric_code(payload, raw.image_type)
updated += 1
raw.save()
done += 1
if done >= 10000:
block += 1
done = 0
print "# 10k blocks processed: %d (events %d)" % \
(block, updated)
updated = 0
for kv in states.iteritems():
print "%s = %d" % kv
for day in xrange(0, 90):
hours = day * 24
steps = 12
chunk = 24 / steps
for x in xrange(steps):
fix_chunk(hours, chunk)
hours += chunk

166
reports/pretty.py Normal file
View File

@@ -0,0 +1,166 @@
import datetime
import json
import sys
import time
import prettytable
sys.path.append("/stacktach")
from stacktach import datetime_to_decimal as dt
from stacktach import image_type
from stacktach import models
if __name__ != '__main__':
sys.exit(1)
yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(days=1)
if len(sys.argv) == 2:
try:
t = time.strptime(sys.argv[1], "%Y-%m-%d")
yesterday = datetime.datetime(*t[:6])
except Exception, e:
print e
print "Usage: python requests.py YYYY-MM-DD (the end date)"
sys.exit(1)
percentile = 90
hours = 24
start = datetime.datetime(year=yesterday.year, month=yesterday.month,
day=yesterday.day)
end = start + datetime.timedelta(hours=hours-1, minutes=59, seconds=59)
print "Generating report for %s to %s" % (start, end)
dstart = dt.dt_to_decimal(start)
dend = dt.dt_to_decimal(end)
codes = {}
# Get all the instances that have changed in the last N hours ...
updates = models.RawData.objects.filter(event='compute.instance.update',
when__gt=dstart, when__lte=dend)\
.values('instance').distinct()
expiry = 60 * 60 # 1 hour
cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot']
failures = {}
durations = {}
attempts = {}
for uuid_dict in updates:
uuid = uuid_dict['instance']
# All the unique Request ID's for this instance during that timespan.
reqs = models.RawData.objects.filter(instance=uuid,
when__gt=dstart, when__lte=dend) \
.values('request_id').distinct()
for req_dict in reqs:
report = False
req = req_dict['request_id']
raws = models.RawData.objects.filter(request_id=req)\
.exclude(event='compute.instance.exists')\
.order_by('when')
start = None
err = None
operation = "aux"
image_type_num = 0
for raw in raws:
if not start:
start = raw.when
if 'error' in raw.routing_key:
err = raw
report = True
for cmd in cmds:
if cmd in raw.event:
operation = cmd
break
if raw.image_type:
image_type_num |= raw.image_type
image = "?"
if image_type.isset(image_type_num, image_type.BASE_IMAGE):
image = "base"
if image_type.isset(image_type_num, image_type.SNAPSHOT_IMAGE):
image = "snap"
if not start:
continue
end = raw.when
diff = end - start
if diff > 3600:
report = True
key = (operation, image)
# Track durations for all attempts, good and bad ...
_durations = durations.get(key, [])
_durations.append(diff)
durations[key] = _durations
attempts[key] = attempts.get(key, 0) + 1
if report:
failures[key] = failures.get(key, 0) + 1
# Print the results ...
cols = ["Operation", "Image", "Min*", "Max*", "Avg*",
"Requests", "# Fail", "Fail %"]
p = prettytable.PrettyTable(cols)
for c in cols[2:]:
p.align[c] = 'r'
p.sortby = cols[0]
pct = (float(100 - percentile) / 2.0) / 100.0
print "* Using %d-th percentile for results (+/-%.1f%% cut)" % \
(percentile, pct * 100.0)
total = 0
failure_total = 0
for key, count in attempts.iteritems():
total += count
operation, image = key
failure_count = failures.get(key, 0)
failure_total += failure_count
failure_percentage = float(failure_count) / float(count)
_failure_percentage = "%.1f%%" % (failure_percentage * 100.0)
# N-th % of durations ...
_values = durations[key]
_values.sort()
_outliers = int(float(len(_values)) * pct)
if _outliers > 0:
before = len(_values)
_values = _values[_outliers:-_outliers]
print "culling %d -> %d" % (before, len(_values))
_min = 99999999
_max = 0
_total = 0.0
for value in _values:
_min = min(_min, value)
_max = max(_max, value)
_total += float(value)
_avg = float(_total) / float(len(_values))
_fmin = dt.sec_to_str(_min)
_fmax = dt.sec_to_str(_max)
_favg = dt.sec_to_str(_avg)
p.add_row([operation, image, _fmin, _fmax, _favg, count,
failure_count, _failure_percentage])
print p
print "Total: %d, Failures: %d, Failure Rate: %.1f%%" % \
(total, failure_total,
(float(failure_total)/float(total)) * 100.0)

258
reports/requests.py Normal file
View File

@@ -0,0 +1,258 @@
import datetime
import json
import sys
import time
import prettytable
sys.path.append("/stacktach")
from stacktach import datetime_to_decimal as dt
from stacktach import image_type
from stacktach import models
if __name__ != '__main__':
sys.exit(1)
yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(days=1)
if len(sys.argv) == 2:
try:
t = time.strptime(sys.argv[1], "%Y-%m-%d")
yesterday = datetime.datetime(*t[:6])
except Exception, e:
print e
print "Usage: python requests.py YYYY-MM-DD (the end date)"
sys.exit(1)
hours = 0
length = 6
start = datetime.datetime(year=yesterday.year, month=yesterday.month,
day=yesterday.day)
end = start + datetime.timedelta(hours=length-1, minutes=59, seconds=59)
print "Generating report for %s to %s" % (start, end)
dstart = dt.dt_to_decimal(start)
dend = dt.dt_to_decimal(end)
codes = {}
# Get all the instances that have changed in the last N hours ...
updates = models.RawData.objects.filter(event='compute.instance.update',
when__gt=dstart, when__lte=dend)\
.values('instance').distinct()
expiry = 60 * 60 # 1 hour
cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot']
failures = {}
causes = {}
durations = {}
error_messages = {}
successes = {}
tenant_issues = {}
for uuid_dict in updates:
uuid = uuid_dict['instance']
# All the unique Request ID's for this instance during that timespan.
reqs = models.RawData.objects.filter(instance=uuid,
when__gt=dstart, when__lte=dend) \
.values('request_id').distinct()
for req_dict in reqs:
report = False
req = req_dict['request_id']
raws = models.RawData.objects.filter(request_id=req)\
.exclude(event='compute.instance.exists')\
.order_by('when')
start = None
err = None
operation = "aux"
platform = 0
tenant = 0
cell = "unk"
for raw in raws:
if not start:
start = raw.when
if 'error' in raw.routing_key:
err = raw
report = True
if raw.tenant:
if tenant > 0 and raw.tenant != tenant:
print "Conflicting tenant ID", raw.tenant, tenant
tenant = raw.tenant
for cmd in cmds:
if cmd in raw.event:
operation = cmd
cell = raw.deployment.name
break
if raw.image_type > 0:
platform = raw.image_type
if not start:
continue
end = raw.when
diff = end - start
if diff > 3600:
report = True
key = (operation, platform, cell)
# Track durations for all attempts, good and bad ...
duration_min, duration_max, duration_count, duration_total = \
durations.get(key, (9999999, 0, 0, 0))
duration_min = min(duration_min, diff)
duration_max = max(duration_max, diff)
duration_count += 1
duration_total += diff
durations[key] = (duration_min, duration_max, duration_count,
duration_total)
if not report:
successes[key] = successes.get(key, 0) + 1
else:
print "------", uuid, "----------"
print " Req:", req
print " Duration: %.2f minutes" % (diff / 60)
print " Operation:", operation
print " Platform:", image_type.readable(platform)
cause = "> %d min" % (expiry / 60)
failures[key] = failures.get(key, 0) + 1
tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1
if err:
queue, body = json.loads(err.json)
payload = body['payload']
print "Error. EventID: %s, Tenant %s, Service %s, Host %s, "\
"Deployment %s, Event %s, When %s"\
% (err.id, err.tenant, err.service, err.host,
err.deployment.name,
err.event, dt.dt_from_decimal(err.when))
exc = payload.get('exception')
if exc:
# group the messages ...
exc_str = str(exc)
print exc_str
error_messages[exc_str] = \
error_messages.get(exc_str, 0) + 1
# extract the code, if any ...
code = exc.get('kwargs', {}).get('code')
if code:
codes[code] = codes.get(code, 0) + 1
cause = code
cause_key = (key, cause)
causes[cause_key] = causes.get(cause_key, 0) + 1
def dump_breakdown(totals, label):
p = prettytable.PrettyTable(["Category", "Count"])
for k, v in totals.iteritems():
p.add_row([k, v])
print label
p.sortby = 'Count'
print p
def dump_summary(info, label):
print "-- %s by operation by cell by platform --" % (label,)
p = prettytable.PrettyTable(["Operation", "Cell", "Platform", "Count",
"Min", "Max", "Avg"])
for c in ["Count", "Min", "Max", "Avg"]:
p.align[c] = 'r'
total = 0
op_totals = {}
cell_totals = {}
platform_totals = {}
for key, count in info.iteritems():
operation, platform, cell = key
readable = image_type.readable(platform)
text = "n/a"
if readable:
text = ", ".join(readable)
_min, _max, _count, _total = durations[key]
_avg = float(_total) / float(_count)
_fmin = dt.sec_to_str(_min)
_fmax = dt.sec_to_str(_max)
_favg = dt.sec_to_str(_avg * 100.0)
op_totals[operation] = op_totals.get(operation, 0) + count
cell_totals[cell] = cell_totals.get(cell, 0) + count
platform_totals[text] = platform_totals.get(text, 0) + count
p.add_row([operation, cell, text, count, _fmin, _fmax, _favg])
total += count
p.sortby = 'Count'
print p
dump_breakdown(op_totals, "Total %s by Operation" % label)
dump_breakdown(cell_totals, "Total %s by Cell" % label)
dump_breakdown(platform_totals, "Total %s by Platform" % label)
print
return total
print
print "SUMMARY"
print
good = dump_summary(successes, "Success")
bad = dump_summary(failures, "Failures")
print "====================================================="
print "Total Success: %d Total Failure: %d" % (good, bad)
print
print "-- Errors by Tenant --"
p = prettytable.PrettyTable(["Tenant", "Count"])
for tenant, count in tenant_issues.iteritems():
p.add_row([tenant, count])
p.sortby = 'Count'
print p
print
print "-- Return code counts --"
p = prettytable.PrettyTable(["Return Code", "Count"])
for k, v in codes.iteritems():
p.add_row([k, v])
p.sortby = 'Count'
print p
print
print "-- Cause breakdown --"
p = prettytable.PrettyTable(["Cause", "Operation", "Cell", "Platform", "Count"])
for cause_key, count in causes.iteritems():
key, cause = cause_key
operation, platform, cell = key
readable = image_type.readable(platform)
text = "n/a"
if readable:
text = ", ".join(readable)
p.add_row([cause, operation, cell, text, count])
p.sortby = 'Count'
print p
print
print "-- Error Message Counts --"
p = prettytable.PrettyTable(["Count", "Message"])
for k, v in error_messages.iteritems():
p.add_row([v, k[:80]])
p.sortby = 'Count'
print p

View File

@@ -12,8 +12,23 @@ def dt_to_decimal(utc):
def dt_from_decimal(dec):
if dec == None:
return "n/a"
integer = int(dec)
micro = (dec - decimal.Decimal(integer)) * decimal.Decimal(1000000)
daittyme = datetime.datetime.utcfromtimestamp(integer)
return daittyme.replace(microsecond=micro)
def sec_to_str(sec):
sec = int(sec)
if sec < 60:
return "%ds" % sec
minutes = sec / 60
sec = sec % 60
if minutes < 60:
return "%d:%02ds" % (minutes, sec)
hours = minutes / 60
minutes = minutes % 60
return "%02d:%02d:%02d" % (hours, minutes, sec)

View File

@@ -1,37 +1,71 @@
import models
def get_or_create_deployment(name):
return models.Deployment.objects.get_or_create(name=name)
def create_rawdata(**kwargs):
return models.RawData(**kwargs)
def create_lifecycle(**kwargs):
return models.Lifecycle(**kwargs)
def find_lifecycles(**kwargs):
return models.Lifecycle.objects.select_related().filter(**kwargs)
def create_timing(**kwargs):
return models.Timing(**kwargs)
def find_timings(**kwargs):
return models.Timing.objects.select_related().filter(**kwargs)
def create_request_tracker(**kwargs):
return models.RequestTracker(**kwargs)
def find_request_trackers(**kwargs):
return models.RequestTracker.objects.filter(**kwargs)
def create_instance_usage(**kwargs):
return models.InstanceUsage(**kwargs)
def get_or_create_instance_usage(**kwargs):
return models.InstanceUsage.objects.get_or_create(**kwargs)
def get_instance_usage(**kwargs):
return models.InstanceUsage.objects.get(**kwargs)
usage = None
try:
usage = models.InstanceUsage.objects.get(**kwargs)
except models.InstanceUsage.DoesNotExist:
pass
return usage
def create_instance_delete(**kwargs):
return models.InstanceDeletes(**kwargs)
def get_instance_delete(**kwargs):
delete = None
try:
delete = models.InstanceDeletes.objects.get(**kwargs)
except models.InstanceDeletes.DoesNotExist:
pass
return delete
def create_instance_exists(**kwargs):
return models.InstanceExists(**kwargs)
def save(obj):
obj.save()

65
stacktach/dbapi.py Normal file
View File

@@ -0,0 +1,65 @@
import decimal
import json
from django.forms.models import model_to_dict
from django.http import HttpResponse
import datetime_to_decimal
import models
def rsp(data):
return HttpResponse(json.dumps(data), content_type="application/json")
def list_usage_launches(request):
filter_args = {}
if 'instance' in request.GET:
filter_args['instance'] = request.GET['instance']
if len(filter_args) > 0:
objects = models.InstanceUsage.objects.filter(**filter_args)
else:
objects = models.InstanceUsage.objects.all()
dicts = _convert_model_list(objects.order_by("launched_at"))
return rsp({'launches': dicts})
def list_usage_deletes(request):
filter_args = {}
if 'instance' in request.GET:
filter_args['instance'] = request.GET['instance']
if len(filter_args) > 0:
objects = models.InstanceDeletes.objects.filter(**filter_args)
else:
objects = models.InstanceDeletes.objects.all()
dicts = _convert_model_list(objects.order_by("launched_at"))
return rsp({'deletes': dicts})
def list_usage_exists(request):
filter_args = {}
if 'instance' in request.GET:
filter_args['instance'] = request.GET['instance']
if len(filter_args) > 0:
objects = models.InstanceExists.objects.filter(**filter_args)
else:
objects = models.InstanceExists.objects.all()
dicts = _convert_model_list(objects.order_by("id"))
return rsp({'exists': dicts})
def _convert_model_list(list):
converted = []
for item in list:
dict = model_to_dict(item)
for key in dict:
if isinstance(dict[key], decimal.Decimal):
dict[key] = str(datetime_to_decimal.dt_from_decimal(dict[key]))
converted.append(dict)
return converted

60
stacktach/image_type.py Normal file
View File

@@ -0,0 +1,60 @@
BASE_IMAGE = 0x1
SNAPSHOT_IMAGE = 0x2
LINUX_IMAGE = 0x10
OS_UBUNTU = 0x100
OS_DEBIAN = 0x200
OS_CENTOS = 0x400
OS_RHEL = 0x800
def isset(num, flag):
if not num:
return False
return num & flag > 0
flags = {'base' : BASE_IMAGE,
'snapshot' : SNAPSHOT_IMAGE,
'linux' : LINUX_IMAGE,
'ubuntu' : OS_UBUNTU,
'debian' : OS_DEBIAN,
'centos' : OS_CENTOS,
'rhel' : OS_RHEL}
def readable(num):
result = []
for k, v in flags.iteritems():
if isset(num, v):
result.append(k)
return result
def get_numeric_code(payload, default=0):
meta = payload.get('image_meta', {})
if default == None:
default = 0
num = default
image_type = meta.get('image_type', '')
if image_type == 'base':
num |= BASE_IMAGE
if image_type == 'snapshot':
num |= SNAPSHOT_IMAGE
os_type = meta.get('os_type', payload.get('os_type', ''))
if os_type == 'linux':
num |= LINUX_IMAGE
os_distro = meta.get('os_distro', '')
if os_distro == 'ubuntu':
num |= OS_UBUNTU
if os_distro == 'debian':
num |= OS_DEBIAN
if os_distro == 'centos':
num |= OS_CENTOS
if os_distro == 'rhel':
num |= OS_RHEL
return num

View File

@@ -34,6 +34,9 @@ class RawData(models.Model):
blank=True, db_index=True)
old_task = models.CharField(max_length=30, null=True,
blank=True, db_index=True)
task = models.CharField(max_length=30, null=True,
blank=True, db_index=True)
image_type = models.IntegerField(null=True, default=0, db_index=True)
when = models.DecimalField(max_digits=20, decimal_places=6,
db_index=True)
publisher = models.CharField(max_length=100, null=True,
@@ -72,18 +75,26 @@ class Lifecycle(models.Model):
class InstanceUsage(models.Model):
instance = models.CharField(max_length=50, null=True,
blank=True, db_index=True)
#launched_at = models.IntegerField(null=True, db_index=True)
launched_at = models.DecimalField(null=True, max_digits=20,
decimal_places=6)
#deleted_at = models.IntegerField(null=True, db_index=True)
deleted_at = models.DecimalField(null=True, max_digits=20,
decimal_places=6)
request_id = models.CharField(max_length=50, null=True,
blank=True, db_index=True)
instance_type_id = models.CharField(max_length=50,
null=True,
blank=True,
db_index=True)
class InstanceDeletes(models.Model):
instance = models.CharField(max_length=50, null=True,
blank=True, db_index=True)
launched_at = models.DecimalField(null=True, max_digits=20,
decimal_places=6)
deleted_at = models.DecimalField(null=True, max_digits=20,
decimal_places=6)
raw = models.ForeignKey(RawData, null=True)
class InstanceExists(models.Model):
PENDING = 'pending'
VERIFIED = 'verified'
@@ -95,10 +106,8 @@ class InstanceExists(models.Model):
]
instance = models.CharField(max_length=50, null=True,
blank=True, db_index=True)
#launched_at = models.IntegerField(null=True, db_index=True)
launched_at = models.DecimalField(null=True, max_digits=20,
decimal_places=6)
#deleted_at = models.IntegerField(null=True, db_index=True)
deleted_at = models.DecimalField(null=True, max_digits=20,
decimal_places=6)
message_id = models.CharField(max_length=50, null=True,
@@ -112,6 +121,7 @@ class InstanceExists(models.Model):
default=PENDING)
raw = models.ForeignKey(RawData, related_name='+', null=True)
usage = models.ForeignKey(InstanceUsage, related_name='+', null=True)
delete = models.ForeignKey(InstanceDeletes, related_name='+', null=True)
class Timing(models.Model):

View File

@@ -72,7 +72,7 @@ def sec_to_time(diff):
def rsp(data):
return HttpResponse(json.dumps(data))
return HttpResponse(json.dumps(data), content_type="application/json")
def do_deployments(request):
@@ -297,3 +297,81 @@ def do_kpi(request, tenant_id=None):
results.append([event, sec_to_time(track.duration),
uuid, end_event.deployment.name])
return rsp(results)
def do_list_usage_launches(request):
filter_args = {}
if 'instance' in request.GET:
filter_args['instance'] = request.GET['instance']
if len(filter_args) > 0:
launches = models.InstanceUsage.objects.filter(**filter_args)
else:
launches = models.InstanceUsage.objects.all()
results = []
results.append(["UUID", "Launched At", "Instance Type Id"])
for launch in launches:
launched = None
if launch.launched_at:
launched = str(dt.dt_from_decimal(launch.launched_at))
results.append([launch.instance, launched, launch.instance_type_id])
return rsp(results)
def do_list_usage_deletes(request):
filter_args = {}
if 'instance' in request.GET:
filter_args['instance'] = request.GET['instance']
if len(filter_args) > 0:
deletes = models.InstanceDeletes.objects.filter(**filter_args)
else:
deletes = models.InstanceDeletes.objects.all()
results = []
results.append(["UUID", "Launched At", "Deleted At"])
for delete in deletes:
launched = None
if delete.launched_at:
launched = str(dt.dt_from_decimal(delete.launched_at))
deleted = None
if delete.deleted_at:
deleted = str(dt.dt_from_decimal(delete.deleted_at))
results.append([delete.instance, launched, deleted])
return rsp(results)
def do_list_usage_exists(request):
filter_args = {}
if 'instance' in request.GET:
filter_args['instance'] = request.GET['instance']
if len(filter_args) > 0:
exists = models.InstanceExists.objects.filter(**filter_args)
else:
exists = models.InstanceExists.objects.all()
results = []
results.append(["UUID", "Launched At", "Deleted At", "Instance Type Id",
"Message ID", "Status"])
for exist in exists:
launched = None
if exist.launched_at:
launched = str(dt.dt_from_decimal(exist.launched_at))
deleted = None
if exist.deleted_at:
deleted = str(dt.dt_from_decimal(exist.deleted_at))
results.append([exist.instance, launched, deleted,
exist.instance_type_id, exist.message_id,
exist.status])
return rsp(results)

View File

@@ -160,10 +160,11 @@ def make_resize_revert_end_json(launched_at, instance_type_id='1',
def create_raw(deployment, when, event, instance=INSTANCE_ID_1,
request_id=REQUEST_ID_1, state='active', old_task='',
host='compute', json=''):
host='compute', service='compute', json=''):
raw_values = {
'deployment': deployment,
'host': host,
'service': service,
'state': state,
'old_task': old_task,
'when': when,

View File

@@ -35,7 +35,7 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase):
when3 = views.str_time_to_unix('2012-12-21 12:36:56.124')
self.update_raw = create_raw(self.deployment, when1,
'compute.instance.update',
host='api')
host='api', service='api')
self.start_raw = create_raw(self.deployment, when2,
'compute.instance.reboot.start')
self.end_raw = create_raw(self.deployment, when3,
@@ -105,7 +105,7 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase):
'compute.instance.update',
instance=INSTANCE_ID_2,
request_id=REQUEST_ID_2,
host='api')
host='api', service='api')
start_raw2 = create_raw(self.deployment, when2,
'compute.instance.resize.start',
instance=INSTANCE_ID_2,
@@ -157,7 +157,7 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase):
update_raw2 = create_raw(self.deployment, when1,
'compute.instance.update',
request_id=REQUEST_ID_2,
host='api')
host='api', service='api')
start_raw2 = create_raw(self.deployment, when2,
'compute.instance.resize.start',
request_id=REQUEST_ID_2)
@@ -227,7 +227,7 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase):
'compute.instance.update',
instance=INSTANCE_ID_2,
request_id=REQUEST_ID_2,
host='api')
host='api', service='api')
start_raw2 = create_raw(self.deployment, when2,
'compute.instance.resize.start',
instance=INSTANCE_ID_2,
@@ -268,9 +268,9 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase):
when2 = views.str_time_to_unix('2012-12-21 13:34:50.123')
when3 = views.str_time_to_unix('2012-12-21 13:37:50.124')
update_raw2 = create_raw(self.deployment, when1,
'compute.instance.update',
request_id=REQUEST_ID_2,
host='api')
'compute.instance.update',
request_id=REQUEST_ID_2,
host='api', service='api')
start_raw2 = create_raw(self.deployment, when2,
'compute.instance.resize.start',
request_id=REQUEST_ID_2)
@@ -309,7 +309,7 @@ class ViewsLifecycleWorkflowTestCase(unittest.TestCase):
update_raw2 = create_raw(self.deployment, when1,
'compute.instance.update',
request_id=REQUEST_ID_2,
host='api')
host='api', service='api')
start_raw2 = create_raw(self.deployment, when2,
'compute.instance.resize.start',
request_id=REQUEST_ID_2)
@@ -492,14 +492,6 @@ class ViewsUsageTestCase(unittest.TestCase):
def test_process_delete(self):
launched_str = '2012-12-21 06:34:50.123'
launched = views.str_time_to_unix(launched_str)
values = {
'instance': INSTANCE_ID_1,
'request_id': REQUEST_ID_1,
'instance_type_id': '1',
'launched_at': launched,
}
InstanceUsage(**values).save()
deleted_str = '2012-12-21 12:34:50.123'
deleted = views.str_time_to_unix(deleted_str)
json = test_utils.make_delete_end_json(launched_str, deleted_str)
@@ -508,10 +500,13 @@ class ViewsUsageTestCase(unittest.TestCase):
views._process_delete(raw)
usages = InstanceUsage.objects.all()
self.assertEqual(len(usages), 1)
usage = usages[0]
self.assertEqual(usage.deleted_at, deleted)
delete = InstanceDeletes.objects.all()
self.assertEqual(len(delete), 1)
delete = delete[0]
self.assertEqual(delete.instance, INSTANCE_ID_1)
self.assertEqual(delete.launched_at, launched)
self.assertEqual(delete.deleted_at, deleted)
self.assertEqual(delete.raw.id, raw.id)
def test_process_exists(self):
launched_str = '2012-12-21 06:34:50.123'
@@ -556,7 +551,6 @@ class ViewsUsageTestCase(unittest.TestCase):
'request_id': REQUEST_ID_1,
'instance_type_id': '1',
'launched_at': launched,
'deleted_at': deleted,
}
InstanceUsage(**values).save()
@@ -621,7 +615,6 @@ class ViewsUsageWorkflowTestCase(unittest.TestCase):
usage = usages[0]
self.assertOnUsage(usage, INSTANCE_ID_1, '1', launched, REQUEST_ID_1)
@unittest.skip("can't handle late starts yet")
def test_create_workflow_start_late(self):
created_str = '2012-12-21 06:30:50.123'
created = views.str_time_to_unix(created_str)
@@ -749,7 +742,6 @@ class ViewsUsageWorkflowTestCase(unittest.TestCase):
self.assertOnUsage(usage_after, INSTANCE_ID_1, '2', finish_time,
REQUEST_ID_2)
@unittest.skip("can't handle late starts yet")
def test_resize_workflow_start_late(self):
launched_str = '2012-12-21 06:34:50.123'
launched = views.str_time_to_unix(launched_str)
@@ -852,7 +844,6 @@ class ViewsUsageWorkflowTestCase(unittest.TestCase):
self.assertOnUsage(usage_after_revert, INSTANCE_ID_1, '1', end_time,
REQUEST_ID_3)
@unittest.skip("can't handle late starts yet")
def test_resize_revert_workflow_start_late(self):
launched_str = '2012-12-21 06:34:50.123'
launched = views.str_time_to_unix(launched_str)

View File

@@ -18,6 +18,18 @@ urlpatterns = patterns('',
'stacktach.stacky_server.do_watch'),
url(r'stacky/kpi/$', 'stacktach.stacky_server.do_kpi'),
url(r'stacky/kpi/(?P<tenant_id>\d+)/$', 'stacktach.stacky_server.do_kpi'),
url(r'stacky/usage/launches/$',
'stacktach.stacky_server.do_list_usage_launches'),
url(r'stacky/usage/deletes/$',
'stacktach.stacky_server.do_list_usage_deletes'),
url(r'stacky/usage/exists/$',
'stacktach.stacky_server.do_list_usage_exists'),
url(r'db/usage/launches/$',
'stacktach.dbapi.list_usage_launches'),
url(r'db/usage/deletes/$',
'stacktach.dbapi.list_usage_deletes'),
url(r'db/usage/exists/$', 'stacktach.dbapi.list_usage_exists'),
url(r'^(?P<deployment_id>\d+)/$', 'stacktach.views.home', name='home'),
url(r'^(?P<deployment_id>\d+)/details/(?P<column>\w+)/(?P<row_id>\d+)/$',

View File

@@ -1,17 +1,18 @@
# Copyright 2012 - Dark Secret Software Inc.
import datetime
import json
import pprint
from django import db
from django import http
from django.shortcuts import render_to_response
from django import template
from stacktach import datetime_to_decimal as dt
from stacktach import db as stackdb
from stacktach import models
from stacktach import datetime_to_decimal as dt
import datetime
import json
import pprint
from stacktach import image_type
STACKDB = stackdb
@@ -21,7 +22,9 @@ def _extract_states(payload):
return {
'state' : payload.get('state', ""),
'old_state' : payload.get('old_state', ""),
'old_task' : payload.get('old_task_state', "")
'old_task' : payload.get('old_task_state', ""),
'task' : payload.get('new_task_state', ""),
'image_type' : image_type.get_numeric_code(payload)
}
@@ -221,10 +224,11 @@ def _process_usage_for_new_launch(raw):
values['instance'] = payload['instance_id']
values['request_id'] = notif[1]['_context_request_id']
if raw.event == INSTANCE_EVENT['create_start']:
values['instance_type_id'] = payload['instance_type_id']
(usage, new) = STACKDB.get_or_create_instance_usage(**values)
if raw.event == INSTANCE_EVENT['create_start']:
usage.instance_type_id = payload['instance_type_id']
usage = STACKDB.create_instance_usage(**values)
STACKDB.save(usage)
@@ -233,8 +237,8 @@ def _process_usage_for_updates(raw):
payload = notif[1]['payload']
instance_id = payload['instance_id']
request_id = notif[1]['_context_request_id']
usage = STACKDB.get_instance_usage(instance=instance_id,
request_id=request_id)
(usage, new) = STACKDB.get_or_create_instance_usage(instance=instance_id,
request_id=request_id)
if raw.event in [INSTANCE_EVENT['create_end'],
INSTANCE_EVENT['resize_finish_end'],
@@ -253,28 +257,37 @@ def _process_delete(raw):
notif = json.loads(raw.json)
payload = notif[1]['payload']
instance_id = payload['instance_id']
launched_at = payload['launched_at']
launched_at = str_time_to_unix(launched_at)
instance = STACKDB.get_instance_usage(instance=instance_id,
launched_at=launched_at)
instance.deleted_at = str_time_to_unix(payload['deleted_at'])
STACKDB.save(instance)
launched_at = str_time_to_unix(payload['launched_at'])
deleted_at = str_time_to_unix(payload['deleted_at'])
values = {
'instance': instance_id,
'launched_at': launched_at,
'deleted_at': deleted_at,
'raw': raw
}
delete = STACKDB.create_instance_delete(**values)
STACKDB.save(delete)
def _process_exists(raw):
notif = json.loads(raw.json)
payload = notif[1]['payload']
instance_id = payload['instance_id']
launched_at = payload['launched_at']
launched_at = str_time_to_unix(launched_at)
launched_at = str_time_to_unix(payload['launched_at'])
launched_range = (launched_at, launched_at+1)
usage = STACKDB.get_instance_usage(instance=instance_id,
launched_at=launched_at)
launched_at__range=launched_range)
delete = STACKDB.get_instance_delete(instance=instance_id,
launched_at__range=launched_range)
values = {}
values['message_id'] = notif[1]['message_id']
values['instance'] = instance_id
values['launched_at'] = launched_at
values['instance_type_id'] = payload['instance_type_id']
values['usage'] = usage
if usage:
values['usage'] = usage
if delete:
values['delete'] = delete
values['raw'] = raw
deleted_at = payload.get('deleted_at')
@@ -309,13 +322,16 @@ def aggregate_usage(raw):
def str_time_to_unix(when):
try:
when = datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S")
except ValueError:
try:
when = datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S.%f")
except ValueError:
# Old way of doing it
when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f")
except Exception, e:
pass
try:
# Old way of doing it
when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f")
except Exception, e:
print "BAD DATE: ", e
return dt.dt_to_decimal(when)

View File

@@ -410,13 +410,13 @@ class StacktackUsageParsingTestCase(unittest.TestCase):
event = 'compute.instance.create.start'
raw = utils.create_raw(self.mox, when, event=event, json_str=json_str)
usage = self.mox.CreateMockAnything()
views.STACKDB.create_instance_usage(instance=INSTANCE_ID_1,
request_id=REQUEST_ID_1,
instance_type_id = '1')\
.AndReturn(usage)
views.STACKDB.get_or_create_instance_usage(instance=INSTANCE_ID_1,
request_id=REQUEST_ID_1)\
.AndReturn((usage, True))
views.STACKDB.save(usage)
self.mox.ReplayAll()
views._process_usage_for_new_launch(raw)
self.assertEquals(usage.instance_type_id, '1')
self.mox.VerifyAll()
def test_process_usage_for_updates_create_end(self):
@@ -433,9 +433,9 @@ class StacktackUsageParsingTestCase(unittest.TestCase):
usage.instance = INSTANCE_ID_1
usage.request_id = REQUEST_ID_1
usage.instance_type_id = '1'
views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1,
request_id=REQUEST_ID_1)\
.AndReturn(usage)
views.STACKDB.get_or_create_instance_usage(instance=INSTANCE_ID_1,
request_id=REQUEST_ID_1)\
.AndReturn((usage, True))
views.STACKDB.save(usage)
self.mox.ReplayAll()
@@ -459,9 +459,9 @@ class StacktackUsageParsingTestCase(unittest.TestCase):
usage.instance = INSTANCE_ID_1
usage.request_id = REQUEST_ID_1
usage.instance_type_id = '1'
views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1,
request_id=REQUEST_ID_1)\
.AndReturn(usage)
views.STACKDB.get_or_create_instance_usage(instance=INSTANCE_ID_1,
request_id=REQUEST_ID_1)\
.AndReturn((usage, True))
views.STACKDB.save(usage)
self.mox.ReplayAll()
@@ -484,9 +484,9 @@ class StacktackUsageParsingTestCase(unittest.TestCase):
usage = self.mox.CreateMockAnything()
usage.instance = INSTANCE_ID_1
usage.request_id = REQUEST_ID_1
views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1,
request_id=REQUEST_ID_1)\
.AndReturn(usage)
views.STACKDB.get_or_create_instance_usage(instance=INSTANCE_ID_1,
request_id=REQUEST_ID_1)\
.AndReturn((usage, True))
views.STACKDB.save(usage)
self.mox.ReplayAll()
@@ -508,23 +508,22 @@ class StacktackUsageParsingTestCase(unittest.TestCase):
event = 'compute.instance.delete.end'
raw = utils.create_raw(self.mox, delete_decimal, event=event,
json_str=json_str)
usage = self.mox.CreateMockAnything()
usage.instance = INSTANCE_ID_1
usage.request_id = REQUEST_ID_1
usage.instance_type_id = '1'
usage.launched_at = launch_decimal
views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1,
launched_at=launch_decimal)\
.AndReturn(usage)
views.STACKDB.save(usage)
delete = self.mox.CreateMockAnything()
delete.instance = INSTANCE_ID_1
delete.launched_at = launch_decimal
delete.deleted_at = delete_decimal
views.STACKDB.create_instance_delete(instance=INSTANCE_ID_1,
launched_at=launch_decimal,
deleted_at=delete_decimal,
raw=raw)\
.AndReturn(delete)
views.STACKDB.save(delete)
self.mox.ReplayAll()
views._process_delete(raw)
self.assertEqual(usage.instance, INSTANCE_ID_1)
self.assertEqual(usage.request_id, REQUEST_ID_1)
self.assertEqual(usage.instance_type_id, '1')
self.assertEqual(usage.launched_at, launch_decimal)
self.assertEqual(usage.deleted_at, delete_decimal)
self.assertEqual(delete.instance, INSTANCE_ID_1)
self.assertEqual(delete.launched_at, launch_decimal)
self.assertEqual(delete.deleted_at, delete_decimal)
self.mox.VerifyAll()
def test_process_exists(self):
@@ -538,9 +537,13 @@ class StacktackUsageParsingTestCase(unittest.TestCase):
raw = utils.create_raw(self.mox, current_decimal, event=event,
json_str=json_str)
usage = self.mox.CreateMockAnything()
launched_range = (launch_decimal, launch_decimal+1)
views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1,
launched_at=launch_decimal)\
launched_at__range=launched_range)\
.AndReturn(usage)
views.STACKDB.get_instance_delete(instance=INSTANCE_ID_1,
launched_at__range=launched_range)\
.AndReturn(None)
exists_values = {
'message_id': MESSAGE_ID_1,
'instance': INSTANCE_ID_1,
@@ -570,9 +573,14 @@ class StacktackUsageParsingTestCase(unittest.TestCase):
raw = utils.create_raw(self.mox, current_decimal, event=event,
json_str=json_str)
usage = self.mox.CreateMockAnything()
launched_range = (launch_decimal, launch_decimal+1)
views.STACKDB.get_instance_usage(instance=INSTANCE_ID_1,
launched_at=launch_decimal)\
launched_at__range=launched_range)\
.AndReturn(usage)
delete = self.mox.CreateMockAnything()
views.STACKDB.get_instance_delete(instance=INSTANCE_ID_1,
launched_at__range=launched_range)\
.AndReturn(delete)
exists_values = {
'message_id': MESSAGE_ID_1,
'instance': INSTANCE_ID_1,
@@ -580,6 +588,7 @@ class StacktackUsageParsingTestCase(unittest.TestCase):
'deleted_at': deleted_decimal,
'instance_type_id': '1',
'usage': usage,
'delete': delete,
'raw': raw,
}
exists = self.mox.CreateMockAnything()

View File

@@ -22,6 +22,7 @@ import kombu
import kombu.entity
import kombu.mixins
import logging
import sys
import time
from pympler.process import ProcessMemoryInfo
@@ -33,6 +34,8 @@ LOG = logging.getLogger(__name__)
LOG.setLevel(logging.DEBUG)
handler = logging.handlers.TimedRotatingFileHandler('worker.log',
when='h', interval=6, backupCount=4)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
LOG.addHandler(handler)
@@ -144,16 +147,21 @@ def run(deployment_config):
transport="librabbitmq",
virtual_host=virtual_host)
while continue_running():
LOG.debug("Processing on '%s'" % name)
with kombu.connection.BrokerConnection(**params) as conn:
try:
consumer = NovaConsumer(name, conn, deployment, durable,
queue_arguments)
consumer.run()
except Exception as e:
LOG.exception("name=%s, exception=%s. Reconnecting in 5s" %
(name, e))
time.sleep(5)
LOG.debug("Completed processing on '%s'" % name)
while True:
try:
LOG.debug("Processing on '%s'" % name)
with kombu.connection.BrokerConnection(**params) as conn:
try:
consumer = NovaConsumer(name, conn, deployment, durable,
queue_arguments)
consumer.run()
except Exception as e:
LOG.exception("name=%s, exception=%s. Reconnecting in 5s" %
(name, e))
time.sleep(5)
LOG.debug("Completed processing on '%s'" % name)
except:
e = sys.exc_info()[0]
msg = "Uncaught exception: deployment=%s, exception=%s. Retrying in 5s"
LOG.exception(msg % (name, e))
time.sleep(5)