From b734cfefc554434228e3b8f654b6c36b137a29bd Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Mon, 28 Sep 2015 10:07:31 -0700 Subject: [PATCH] Restart jenkins masters safely once a week Jenkins masters leak threads and fall over. This causes jobs to fail and unfortunately we haven't been able to fix this in Jenkins itself. Work around this by performing weekly Jenkins master rolling restarts. Change-Id: Ib51536c5a2f4f25a17c952ce30fb89992a6ada1a Depends-On: Ieac15a0fe2a47ec3dae51db96ad2fe40992c353a --- manifests/site.pp | 4 ++- .../playbooks/restart_jenkins_masters.yaml | 12 +++++++++ .../manifests/puppetmaster.pp | 26 +++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 modules/openstack_project/files/ansible/playbooks/restart_jenkins_masters.yaml diff --git a/manifests/site.pp b/manifests/site.pp index 2e3423b031..682dde8855 100644 --- a/manifests/site.pp +++ b/manifests/site.pp @@ -217,7 +217,9 @@ node 'puppetmaster.openstack.org' { pin_puppet => '3.6.', } class { 'openstack_project::puppetmaster': - root_rsa_key => hiera('puppetmaster_root_rsa_key', 'XXX'), + root_rsa_key => hiera('puppetmaster_root_rsa_key', 'XXX'), + jenkins_api_user => hiera('jenkins_api_user', 'username'), + jenkins_api_key => hiera('jenkins_api_key', 'XXX'), } } diff --git a/modules/openstack_project/files/ansible/playbooks/restart_jenkins_masters.yaml b/modules/openstack_project/files/ansible/playbooks/restart_jenkins_masters.yaml new file mode 100644 index 0000000000..0f96c6b4ec --- /dev/null +++ b/modules/openstack_project/files/ansible/playbooks/restart_jenkins_masters.yaml @@ -0,0 +1,12 @@ +--- +- hosts: 'jenkins0*.openstack.org' + # Treat any errors as fatal so that we don't stop all the jenkins + # masters. + any_errors_fatal: true + tasks: + - shell: '/usr/local/jenkins/bin/safe_jenkins_shutdown --url https://{{ ansible_fqdn }}/ --user {{ user }} --password {{ password }}' + - service: name=jenkins state=stopped + # This is necessary because stopping Jenkins is not reliable. + # We allow return code 1 which means no processes found. + - shell: 'pkill -9 -U jenkins || [ $? -eq "1" ]' + - service: name=jenkins state=started diff --git a/modules/openstack_project/manifests/puppetmaster.pp b/modules/openstack_project/manifests/puppetmaster.pp index c25fecf2e7..bdfbcd1ad1 100644 --- a/modules/openstack_project/manifests/puppetmaster.pp +++ b/modules/openstack_project/manifests/puppetmaster.pp @@ -1,6 +1,8 @@ # == Class: openstack_project::puppetmaster # class openstack_project::puppetmaster ( + $jenkins_api_key, + $jenkins_api_user = 'hudson-openstack', $root_rsa_key = 'xxx', $puppetdb = true, $puppetdb_server = 'puppetdb.openstack.org', @@ -157,6 +159,30 @@ class openstack_project::puppetmaster ( } } +# Jenkins master management + cron { 'restartjenkinsmasters': + user => 'root', + # Run through all masters onces a week. + weekday => '6', + hour => '0', + minute => '15', + command => "flock -n /var/run/puppet/restart_jenkins_masters.lock ansible-playbook -f 1 /etc/ansible/playbooks/restart_jenkins_masters.yaml --extra-vars 'user=${jenkins_api_user} password=${jenkins_api_key}' >> /var/log/restart_jenkins_masters.log 2>&1", + } + + logrotate::file { 'restartjenkinsmasters': + ensure => present, + log => '/var/log/restart_jenkins_masters.log', + options => ['compress', + 'copytruncate', + 'delaycompress', + 'missingok', + 'rotate 7', + 'daily', + 'notifempty', + ], + require => Cron['restartjenkinsmasters'], + } + # Playbooks # file { '/etc/ansible/playbooks':