Add postgresql retention cronjob

Adding cronjob to purge the drydock DB based on retention day value. Additionally adding drydock API endpoint for purging the tasks and result_message tables and running vacuum full on drydock DB.

Change-Id: Ibcce61ecdafa637ca3ffec654152060aae26d4b8
This commit is contained in:
SPEARS, DUSTIN (ds443n) 2022-04-04 10:55:33 -04:00
parent e3f984d92b
commit a171f3c7a5
15 changed files with 332 additions and 1 deletions

View File

@ -15,7 +15,7 @@
apiVersion: v1
description: A Helm chart for Drydock
name: drydock
version: 0.1.0
version: 0.1.1
keywords:
- drydock
home: https://github.com/openstack/airship-drydock

View File

@ -0,0 +1,43 @@
#!/bin/bash
{{/*
Copyright (c) 2017 AT&T Intellectual Property. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
set -ex
log () {
msg_default="Need some text to log"
level_default="INFO"
component_default="Drydock Retention Cronjob"
msg=${1:-$msg_default}
level=${2:-$level_default}
component=${3:-"$component_default"}
echo "$(date +'%Y-%m-%d %H:%M:%S,%3N') - ${component} - ${level} - ${msg}"
}
delete_tasks () {
drydock task delete --days {{ .Values.endpoints.postgresql.days_to_retain }}
}
if delete_tasks ; then
log "DB table purge completed successfully"
exit 0
else
log "Failed to purge tables!" "ERROR"
exit 1
fi

View File

@ -32,5 +32,7 @@ data:
{{- include "helm-toolkit.scripts.pg_db_init" . | indent 4 }}
db-sync.sh: |+
{{ tuple "bin/_db-sync.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
db-pg-purge-table.sh: |+
{{ tuple "bin/_db-pg-purge-table.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
...
{{- end -}}

View File

@ -0,0 +1,71 @@
{{/*
Copyright (c) 2017 AT&T Intellectual Property. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.cronjob_drydock_db_cleanup }}
{{- $envAll := . }}
{{- $serviceAccountName := "drydock-db-cleanup" }}
{{ tuple $envAll "db_cleanup" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: drydock-db-cleanup
labels:
{{ tuple $envAll "drydock" "db-cleanup" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }}
annotations:
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }}
spec:
schedule: {{ .Values.endpoints.postgresql.cleanup_schedule | quote }}
jobTemplate:
spec:
template:
metadata:
labels:
{{ tuple $envAll "drydock" "db-cleanup" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 12 }}
annotations:
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" | indent 12 }}
{{ dict "envAll" $envAll "podName" "drydock-db-cleanup" "containerNames" (list "drydock-db-cleanup") | include "helm-toolkit.snippets.kubernetes_mandatory_access_control_annotation" | indent 12 }}
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
spec:
{{ dict "envAll" $envAll "application" "db_cleanup" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 10 }}
serviceAccountName: {{ $serviceAccountName }}
restartPolicy: OnFailure
nodeSelector:
{{ .Values.labels.job.node_selector_key }}: {{ .Values.labels.job.node_selector_value }}
containers:
- name: drydock-db-cleanup
image: {{ .Values.images.tags.drydock_db_cleanup | quote }}
imagePullPolicy: {{ .Values.images.pull_policy | quote }}
{{ tuple $envAll $envAll.Values.pod.resources.cronjobs.drydock_db_cleanup | include "helm-toolkit.snippets.kubernetes_resources" | indent 12 }}
{{ dict "envAll" $envAll "application" "db_cleanup" "container" "drydock_db_cleanup" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 14 }}
envFrom:
- secretRef:
name: {{ .Values.secrets.identity.drydock }}
command:
- /tmp/db-pg-purge-table.sh
volumeMounts:
- name: drydock-bin
mountPath: /tmp/db-pg-purge-table.sh
subPath: db-pg-purge-table.sh
readOnly: true
volumes:
- name: drydock-bin
configMap:
name: drydock-bin
defaultMode: 0555
...
{{- end }}

View File

@ -34,6 +34,10 @@
# POST /api/v1.0/tasks
#"physical_provisioner:destroy_node": "role:admin"
# Deletes tasks by age
# DELETE /api/v1.0/tasks
#"physical_provisioner:delete_tasks": "role:admin"
# Read loaded design data
# GET /api/v1.0/designs
# GET /api/v1.0/designs/{design_id}

View File

@ -36,6 +36,7 @@ images:
ks_service: docker.io/openstackhelm/heat:newton
ks_endpoints: docker.io/openstackhelm/heat:newton
drydock_db_init: docker.io/postgres:9.5
drydock_db_cleanup: quay.io/airshipit/drydock:master
drydock_db_sync: quay.io/airshipit/drydock:master
pull_policy: "IfNotPresent"
#TODO(mattmceuen): This chart does not yet support local image caching
@ -66,6 +67,9 @@ pod:
drydock-db-init:
init: runtime/default
drydock-db-init: runtime/default
drydock-db-cleanup:
init: runtime/default
drydock-db-cleanup: runtime/default
drydock-db-sync:
init: runtime/default
drydock-db-sync: runtime/default
@ -88,6 +92,13 @@ pod:
drydock_db_init:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
db_cleanup:
pod:
runAsUser: 65534
container:
drydock_db_cleanup:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
db_sync:
pod:
runAsUser: 65534
@ -175,6 +186,14 @@ pod:
requests:
memory: "128Mi"
cpu: "100m"
cronjobs:
drydock_db_cleanup:
limits:
memory: "128Mi"
cpu: "100m"
requests:
memory: "128Mi"
cpu: "100m"
network_policy:
drydock:
@ -184,6 +203,7 @@ network_policy:
- {}
manifests:
cronjob_drydock_db_cleanup: false
job_ks_service: true
job_ks_user: true
job_ks_endpoints: true
@ -304,6 +324,10 @@ endpoints:
# ca: replace
# key: replace
postgresql:
# frequency to run table purges
cleanup_schedule: "0 1 * * *"
# number of days to retain for the tasks table
days_to_retain: "90"
name: postgresql
auth:
admin:

View File

@ -38,6 +38,10 @@
# POST /api/v1.0/tasks
#"physical_provisioner:destroy_nodes": "role:admin"
# Deletes tasks by age
# DELETE /api/v1.0/tasks
#"physical_provisioner:delete_tasks": "role:admin"
# Create relabel_nodes task
# POST /api/v1.0/tasks
#"physical_provisioner:relabel_nodes": "role:admin"

View File

@ -38,6 +38,10 @@
# POST /api/v1.0/tasks
#"physical_provisioner:destroy_nodes": "role:admin"
# Deletes tasks by age
# DELETE /api/v1.0/tasks
#"physical_provisioner:delete_tasks": "role:admin"
# Create relabel_nodes task
# POST /api/v1.0/tasks
#"physical_provisioner:relabel_nodes": "role:admin"

View File

@ -156,3 +156,17 @@ class TaskBuildData(CliAction):
def invoke(self):
return self.api_client.get_task_build_data(self.task_id)
class TasksDelete(CliAction):
"""Action to delete tasks in database."""
def __init__(self, api_client, days):
"""
:param DrydockClient api_client: the api client instance used for invocation.
:param str days: Number of days to keep of tasks based on the created timestamp
"""
super().__init__(api_client)
self.days = days
def invoke(self):
return self.api_client.delete_tasks(days=self.days)

View File

@ -20,6 +20,7 @@ from drydock_provisioner.cli.task.actions import TaskList
from drydock_provisioner.cli.task.actions import TaskShow
from drydock_provisioner.cli.task.actions import TaskCreate
from drydock_provisioner.cli.task.actions import TaskBuildData
from drydock_provisioner.cli.task.actions import TasksDelete
@click.group()
@ -130,3 +131,15 @@ def task_builddata(ctx, task_id=None, output='yaml'):
click.echo(
yaml.safe_dump(
task_bd, allow_unicode=True, default_flow_style=False))
@task.command(name='delete')
@click.option('--days', '-d', help='The required number of days to retain tasks')
@click.pass_context
def task_delete(ctx, days=None):
"""Delete tasks from database"""
if not days:
ctx.fail('The number of days must be specified using --days or -d')
click.echo(
TasksDelete(ctx.obj['CLIENT'], days=days).invoke())

View File

@ -86,6 +86,28 @@ class TasksResource(StatefulResource):
self.return_error(
resp, falcon.HTTP_500, message="Unknown error", retry=False)
@policy.ApiEnforcer('physical_provisioner:delete_tasks')
def on_delete(self, req, resp):
"""Handler resource for /tasks delete endpoint."""
try:
days_to_retain = int(req.params["days"])
except Exception:
days_to_retain = 90
try:
retention_status = self.state_manager.task_retention(
retain_days=str(days_to_retain))
if not retention_status:
resp.status = falcon.HTTP_404
return
resp.body = "Tables purged successfully."
except Exception as e:
self.error(req.context, "Unknown error: %s" % (str(e)))
resp.body = "Unexpected error."
resp.status = falcon.HTTP_500
return
resp.status = falcon.HTTP_200
@policy.ApiEnforcer('physical_provisioner:validate_design')
def task_validate_design(self, req, resp, json_data):
"""Create async task for validate design."""

View File

@ -129,6 +129,22 @@ class DrydockClient(object):
return resp.json()
def delete_tasks(self, days=None):
"""
Enforce retention policy.
:param int days: default to 90 days retention of tasks table.
"""
endpoint = 'v1.0/tasks'
endpoint = endpoint + '?days=' + str(days)
resp = self.session.delete(endpoint)
self._check_response(resp)
return "Task table purged successfully."
def create_task(self, design_ref, task_action, node_filter=None):
"""
Create a new task in Drydock

View File

@ -144,6 +144,34 @@ class DrydockSession(object):
return resp
def delete(self, endpoint, query=None, timeout=None):
"""
Send a DELETE request to Drydock. If both body and data are specified,
body will will be used.
:param string endpoint: The URL string following the hostname and API prefix
:param dict query: A dict of k, v pairs to add to the query string
:param timeout: A single or tuple value for connect, read timeout.
A single value indicates the read timeout only
:return: A requests.Response object
"""
auth_refresh = False
while True:
url = self.base_url + endpoint
self.logger.debug('DELETE ' + url)
self.logger.debug('Query Params: ' + str(query))
resp = self.__session.delete(
url, params=query, timeout=self._timeout(timeout))
if resp.status_code == 401 and not auth_refresh:
self.set_auth()
auth_refresh = True
else:
break
return resp
def _timeout(self, timeout=None):
"""Calculate the default timeouts for this session

View File

@ -95,6 +95,12 @@ class DrydockPolicy(object):
'path': '/api/v1.0/tasks',
'method': 'POST'
}]),
policy.DocumentedRuleDefault('physical_provisioner:delete_tasks',
'role:admin', 'Deletes tasks by age',
[{
'path': '/api/v1.0/tasks',
'method': 'DELETE'
}]),
policy.DocumentedRuleDefault('physical_provisioner:relabel_nodes',
'role:admin', 'Create relabel_nodes task',
[{

View File

@ -241,6 +241,23 @@ class DrydockState(object):
% (str(task_id), str(ex)))
return False
def delete_result_message(self, task_id, msg):
"""Delete a result message to database attached to task task_id.
:param task_id: uuid.UUID ID of the task the msg belongs to
:param msg: instance of objects.TaskStatusMessage
"""
try:
with self.db_engine.connect() as conn:
query = self.result_message_tbl.delete().values(
task_id=task_id.bytes, **(msg.to_db()))
conn.execute(query)
return True
except Exception as ex:
self.logger.error("Error delete result message for task %s: %s"
% (str(task_id), str(ex)))
return False
def _assemble_tasks(self, task_list=None):
"""Attach all the appropriate result messages to the tasks in the list.
@ -304,6 +321,69 @@ class DrydockState(object):
"Error updating task %s: %s" % (str(task.task_id), str(ex)))
return False
def task_retention(self, retain_days):
"""Delete all tasks in the database older than x days.
:param days: number of days to keep tasks
"""
with self.db_engine.connect() as conn:
try:
query_tasks_text = sql.text(
"DELETE FROM tasks WHERE created < now() - interval '"
+ retain_days
+ " days'").execution_options(autocommit=True)
conn.execute(query_tasks_text)
conn.close()
except Exception as ex:
self.logger.error(
"Error deleting tasks: %s" % str(ex))
return False
with self.db_engine.connect() as conn:
try:
query_subtasks_text = (
"DELETE FROM tasks "
"WHERE parent_task_id IS NOT NULL AND "
"parent_task_id NOT IN "
"(SELECT task_id FROM tasks);")
conn.execute(sql.text(query_subtasks_text))
conn.close()
except Exception as ex:
self.logger.error(
"Error deleting subtasks: %s" % str(ex))
return False
with self.db_engine.connect() as conn:
try:
query_result_message_text = (
"DELETE FROM result_message WHERE ts IN "
"(SELECT result_message.ts FROM result_message "
"LEFT JOIN tasks ON "
"result_message.task_id=tasks.task_id "
"WHERE tasks.task_id IS NULL);")
conn.execute(sql.text(query_result_message_text))
conn.close()
except Exception as ex:
self.logger.error(
"Error deleting result messages: %s" % str(ex))
return False
with self.db_engine.connect() as conn:
try:
real_conn = conn.connection
old_isolation_level = real_conn.isolation_level
real_conn.set_isolation_level(0)
query_vacuum_text = sql.text("VACUUM FULL")
conn.execute(query_vacuum_text)
real_conn.set_isolation_level(old_isolation_level)
conn.close()
except Exception as ex:
self.logger.error(
"Error running vacuum full: %s" % str(ex))
return False
return True
def add_subtask(self, task_id, subtask_id):
"""Add new task to subtask list.