diff --git a/.gitignore b/.gitignore deleted file mode 100644 index ad7f7aba..00000000 --- a/.gitignore +++ /dev/null @@ -1,17 +0,0 @@ -*.swp -*~ -*.qcow2 - -.DS_Store - -*.egg* -*.pyc - -.tox -doc/build -deploy-guide/source/_build -build - -# pbr generates these -AUTHORS -ChangeLog diff --git a/.zuul.yaml b/.zuul.yaml deleted file mode 100644 index 739edefa..00000000 --- a/.zuul.yaml +++ /dev/null @@ -1,4 +0,0 @@ -- project: - templates: - - publish-openstack-docs-pti - - deploy-guide-jobs diff --git a/README.rst b/README.rst index 34779d11..4ee2c5f1 100644 --- a/README.rst +++ b/README.rst @@ -1,51 +1,10 @@ -======================== -Team and repository tags -======================== +This project is no longer maintained. -.. image:: https://governance.openstack.org/tc/badges/tripleo-docs.svg - :target: https://governance.openstack.org/tc/reference/tags/index.html +The contents of this repository are still available in the Git +source code management system. To see the contents of this +repository before it reached its end of life, please check out the +previous commit with "git checkout HEAD^1". -.. Change things from this point on - -TripleO Documentation -===================== - -This is the documentation source for the TripleO project. You can read -the generated documentation at `TripleO -Docs `__. - -You can find out more about TripleO at the `TripleO -Wiki `__. - -Getting Started ---------------- - -Documentation for the TripleO project is hosted on the OpenStack Gerrit -site. You can view all open and resolved issues in the -``openstack/tripleo-docs`` project at `TripleO -Reviews `__. - -General information about contributing to the OpenStack documentation -available at `OpenStack Documentation Contributor -Guide `__ - -Quick Start ------------ - -The following is a quick set of instructions to get you up and running -by building the TripleO documentation locally. The first step is to get -your Python environment configured. Information on configuring is -available at `Python Project -Guide `__ - -Next you can generate the documentation using the following command. Be -sure to run all the commands from within the recently checked out -repository. - -:: - - tox -edocs,pdf-docs,deploy-guide - -Now you have the documentation generated for the various available -formats from the local source. The resulting documentation will be -available within the ``doc/build/`` directory. +For any further questions, please email +openstack-discuss@lists.openstack.org or join #openstack-dev on +OFTC. diff --git a/_custom/admonition_selector.js b/_custom/admonition_selector.js deleted file mode 100644 index 3776e9ff..00000000 --- a/_custom/admonition_selector.js +++ /dev/null @@ -1,58 +0,0 @@ -/* - This function will search for all classes matching all IDs which are under - #admonition_selector element and display/hide their content. - - State is saved in cookies so user doesn't lose his settings after page - reload or changing pages. - - To make this feature work, you need to: - - add checkbox to _templates/layout.html file with proper ID - - in admonitions use proper class which matches above mentioned ID -*/ - - - -// after document is loaded -$(document).ready(function() { - - // for each checkbox in #admonition_selector do - $('#admonition_selector :checkbox').each(function() { - - // check value of cookies and set state to the related element - if ($.cookie($(this).attr("id")) == "true") { - $(this).prop("checked", true); - } else { - $(this).prop("checked", false); - } - - // show/hide elements after page loaded - toggle_admonition($(this).attr("id")); - }); - - // when user clicks on the checkbox, react - $('#admonition_selector :checkbox').change(function() { - - // show/hide related elements - toggle_admonition($(this).attr("id")); - - // save the state in the cookies - $.cookie($(this).attr("id"), $(this).is(':checked'), { path: '/' }); - }); -}); - - -// function to show/hide elements based on checkbox state -// checkbox has ID and it toggles elements having class named same way as the ID -function toggle_admonition(admonition) { - - // for each element having class as the checkbox's ID - $(".admonition." + admonition).each(function() { - - // set show/hide - if($("#" + admonition).is(':checked')) { - $(this).show(); - } else { - $(this).hide(); - } - }); -} diff --git a/_custom/cookies.js b/_custom/cookies.js deleted file mode 100644 index a845cd5c..00000000 --- a/_custom/cookies.js +++ /dev/null @@ -1,117 +0,0 @@ -/*! - * jQuery Cookie Plugin v1.4.1 - * https://github.com/carhartl/jquery-cookie - * - * Copyright 2013 Klaus Hartl - * Released under the MIT license - */ -(function (factory) { - if (typeof define === 'function' && define.amd) { - // AMD - define(['jquery'], factory); - } else if (typeof exports === 'object') { - // CommonJS - factory(require('jquery')); - } else { - // Browser globals - factory(jQuery); - } -}(function ($) { - - var pluses = /\+/g; - - function encode(s) { - return config.raw ? s : encodeURIComponent(s); - } - - function decode(s) { - return config.raw ? s : decodeURIComponent(s); - } - - function stringifyCookieValue(value) { - return encode(config.json ? JSON.stringify(value) : String(value)); - } - - function parseCookieValue(s) { - if (s.indexOf('"') === 0) { - // This is a quoted cookie as according to RFC2068, unescape... - s = s.slice(1, -1).replace(/\\"/g, '"').replace(/\\\\/g, '\\'); - } - - try { - // Replace server-side written pluses with spaces. - // If we can't decode the cookie, ignore it, it's unusable. - // If we can't parse the cookie, ignore it, it's unusable. - s = decodeURIComponent(s.replace(pluses, ' ')); - return config.json ? JSON.parse(s) : s; - } catch(e) {} - } - - function read(s, converter) { - var value = config.raw ? s : parseCookieValue(s); - return $.isFunction(converter) ? converter(value) : value; - } - - var config = $.cookie = function (key, value, options) { - - // Write - - if (value !== undefined && !$.isFunction(value)) { - options = $.extend({}, config.defaults, options); - - if (typeof options.expires === 'number') { - var days = options.expires, t = options.expires = new Date(); - t.setTime(+t + days * 864e+5); - } - - return (document.cookie = [ - encode(key), '=', stringifyCookieValue(value), - options.expires ? '; expires=' + options.expires.toUTCString() : '', // use expires attribute, max-age is not supported by IE - options.path ? '; path=' + options.path : '', - options.domain ? '; domain=' + options.domain : '', - options.secure ? '; secure' : '' - ].join('')); - } - - // Read - - var result = key ? undefined : {}; - - // To prevent the for loop in the first place assign an empty array - // in case there are no cookies at all. Also prevents odd result when - // calling $.cookie(). - var cookies = document.cookie ? document.cookie.split('; ') : []; - - for (var i = 0, l = cookies.length; i < l; i++) { - var parts = cookies[i].split('='); - var name = decode(parts.shift()); - var cookie = parts.join('='); - - if (key && key === name) { - // If second argument (value) is a function it's a converter... - result = read(cookie, value); - break; - } - - // Prevent storing a cookie that we couldn't decode. - if (!key && (cookie = read(cookie)) !== undefined) { - result[name] = cookie; - } - } - - return result; - }; - - config.defaults = {}; - - $.removeCookie = function (key, options) { - if ($.cookie(key) === undefined) { - return false; - } - - // Must not alter options, thus extending a fresh object... - $.cookie(key, '', $.extend({}, options, { expires: -1 })); - return !$.cookie(key); - }; - -})); diff --git a/_custom/custom.css b/_custom/custom.css deleted file mode 100644 index c84f039a..00000000 --- a/_custom/custom.css +++ /dev/null @@ -1,146 +0,0 @@ -/* CUSTOM CSS OVERRIDES GO HERE */ -/* ============================ */ - -/* remove backgrounds */ -#admonition_selector { - background: none !important; - color: black !important; -} - -/* admonition selector */ -#admonition_selector { - border-top: 0 none !important; -} - -#admonition_selector .title { - color: rgba(0, 0, 0, 0.6) !important; -} - -.trigger { - color: rgba(0, 0, 0, 0.7) !important; - border-top: 1px solid rgba(0, 0, 0, 0.2); - border-bottom: 1px solid rgba(0, 0, 0, 0.2); - background: rgba(0, 0, 0, 0.05); -} - -.trigger:hover { - color: rgba(0, 0, 0, 0.9) !important; -} - -/* NOTES, ADMONITIONS AND TAGS */ -.admonition { - font-size: 85%; /* match code size */ - background: rgb(240, 240, 240); - color: rgba(0, 0, 0, 0.55); - border: 1px solid rgba(0, 0, 0, 0.1); - padding: 0.5em 1em 0.75em 1em; - margin-bottom: 24px; -} - -.admonition .admonition { - /* Don't keep shrinking the font for nested admonitions. */ - font-size: 100%; -} - -.admonition p { - font-size: inherit; -} - -.admonition p.last { - margin-bottom: 0; -} - -.admonition p.first.admonition-title { - display: inline; - background: none; - font-weight: bold; - color: rgba(0, 0, 0, 0.75); -} - -/* notes */ -.rst-content .note { - background: rgb(240, 240, 240); -} - -/* tags */ -.fedora28 {background: #aee;} -.centos7 {background: #cea;} -.centos8 {background: #cae;} -.rhel {background: #fee;} -.portal {background-color: #ded;} -.satellite {background-color: #dee;} -.stable {background: #eed;} -.newton {background: #ede;} -.ocata {background: #edd;} -.pike {background: #dfb;} -.queens {background: #afd;} -.rocky {background: #aee;} -.stein {background: #ade;} -.centos {background: #fef;} -.baremetal {background: #eef;} -.virtual {background: #efe;} -.ceph {background: #eff;} -.mton {background: #ded;} -.ntoo {background: #edd;} -.otop {background: #dfb;} -.ptoq {background: #afd;} -.qtor {background: #aee;} -.rtos {background: #ade;} -.validations {background: #fdd;} -.optional {background: #ffe;} -.tls {background: #ded;} - -/* admonition selector */ -#admonition_selector { - color: white; - font-size: 85%; - line-height: 1.4; - background: #2980b9; - border-top: 1px solid rgba(255, 255, 255, 0.4); -} - -.trigger { - color: rgba(255, 255, 255, 0.75); - line-height: 2.5; - position: relative; - cursor: pointer; - padding: 0 1.618em; -} - -.trigger:after { - content: '▾'; - font-family: FontAwesome; -} - -.trigger:hover { - color: white; -} - -.content { - display: none; - border-top: 1px solid rgba(255, 255, 255, 0.1); - background: rgba(255, 255, 255, 0.1); - padding: 0.5em 1.618em; -} - -.displayed .trigger:after { - content: '▴'; -} - -#admonition_selector .title { - color: rgba(255, 255, 255, 0.45); -} - -#admonition_selector ul { - margin-bottom: 0.75em; -} - -#admonition_selector ul li { - display: block; -} - -#admonition_selector label { - display: inline; - color: inherit; - text-decoration: underline dotted; -} diff --git a/_custom/expandable.js b/_custom/expandable.js deleted file mode 100644 index a11eb9e2..00000000 --- a/_custom/expandable.js +++ /dev/null @@ -1,31 +0,0 @@ -$(document).ready(function() { - - // for each trigger - $('.trigger').each(function() { - - // check if cookie has value on true - if ($.cookie($(this).parent().prop('id')) == "true") { - // add displayed class and show the content - $(this).parent().addClass("displayed"); - $(this).next('.content').show(); - - } else { - // remove displayed class and hide the content - $(this).parent().removeClass("displayed"); - $(this).next('.content').hide(); - } - }); - - // if user clicked trigger element - $('.trigger').click(function() { - - // toggle parent's class and animate the content - $(this).parent().toggleClass('displayed'); - $(this).next('.content').slideToggle("fast"); - - // save the state to cookies - $.cookie($(this).parent().prop('id'), - $(this).parent().hasClass('displayed'), - { path: '/' }); - }); -}); diff --git a/_custom/jquery.nav.js b/_custom/jquery.nav.js deleted file mode 100644 index 665157f8..00000000 --- a/_custom/jquery.nav.js +++ /dev/null @@ -1,223 +0,0 @@ -/* - * jQuery One Page Nav Plugin - * http://github.com/davist11/jQuery-One-Page-Nav - * - * Copyright (c) 2010 Trevor Davis (http://trevordavis.net) - * Dual licensed under the MIT and GPL licenses. - * Uses the same license as jQuery, see: - * http://jquery.org/license - * - * @version 3.0.0 - * - * Example usage: - * $('#nav').onePageNav({ - * currentClass: 'current', - * changeHash: false, - * scrollSpeed: 750 - * }); - */ - -;(function($, window, document, undefined){ - - // our plugin constructor - var OnePageNav = function(elem, options){ - this.elem = elem; - this.$elem = $(elem); - this.options = options; - this.metadata = this.$elem.data('plugin-options'); - this.$win = $(window); - this.sections = {}; - this.didScroll = false; - this.$doc = $(document); - this.docHeight = this.$doc.height(); - }; - - // the plugin prototype - OnePageNav.prototype = { - defaults: { - navItems: 'a', - currentClass: 'active', - changeHash: false, - easing: 'swing', - filter: '', - scrollSpeed: 750, - scrollThreshold: 0.2, - begin: false, - end: false, - scrollChange: false - }, - - init: function() { - // Introduce defaults that can be extended either - // globally or using an object literal. - this.config = $.extend({}, this.defaults, this.options, this.metadata); - - this.$nav = this.$elem.find(this.config.navItems); - - //Filter any links out of the nav - if(this.config.filter !== '') { - this.$nav = this.$nav.filter(this.config.filter); - } - - //Handle clicks on the nav - this.$nav.on('click.onePageNav', $.proxy(this.handleClick, this)); - - //Get the section positions - this.getPositions(); - - //Handle scroll changes - this.bindInterval(); - - //Update the positions on resize too - this.$win.on('resize.onePageNav', $.proxy(this.getPositions, this)); - - return this; - }, - - adjustNav: function(self, $parent) { - self.$elem.find('.' + self.config.currentClass).removeClass(self.config.currentClass); - $parent.addClass(self.config.currentClass); - }, - - bindInterval: function() { - var self = this; - var docHeight; - - self.$win.on('scroll.onePageNav', function() { - self.didScroll = true; - }); - - self.t = setInterval(function() { - docHeight = self.$doc.height(); - - //If it was scrolled - if(self.didScroll) { - self.didScroll = false; - self.scrollChange(); - } - - //If the document height changes - if(docHeight !== self.docHeight) { - self.docHeight = docHeight; - self.getPositions(); - } - }, 250); - }, - - getHash: function($link) { - return $link.attr('href').split('#')[1]; - }, - - getPositions: function() { - var self = this; - var linkHref; - var topPos; - var $target; - - self.$nav.each(function() { - linkHref = self.getHash($(this)); - $target = $('#' + linkHref); - - if($target.length) { - topPos = $target.offset().top; - self.sections[linkHref] = Math.round(topPos); - } - }); - }, - - getSection: function(windowPos) { - var returnValue = null; - var windowHeight = Math.round(this.$win.height() * this.config.scrollThreshold); - - for(var section in this.sections) { - if((this.sections[section] - windowHeight) < windowPos) { - returnValue = section; - } - } - - return returnValue; - }, - - handleClick: function(e) { - var self = this; - var $link = $(e.currentTarget); - var $parent = $link.parent(); - var newLoc = '#' + self.getHash($link); - - if(!$parent.hasClass(self.config.currentClass)) { - //Start callback - if(self.config.begin) { - self.config.begin(); - } - - //Change the highlighted nav item - self.adjustNav(self, $parent); - - //Removing the auto-adjust on scroll - self.unbindInterval(); - - //Scroll to the correct position - self.scrollTo(newLoc, function() { - //Do we need to change the hash? - if(self.config.changeHash) { - window.location.hash = newLoc; - } - - //Add the auto-adjust on scroll back in - self.bindInterval(); - - //End callback - if(self.config.end) { - self.config.end(); - } - }); - } - - e.preventDefault(); - }, - - scrollChange: function() { - var windowTop = this.$win.scrollTop(); - var position = this.getSection(windowTop); - var $parent; - - //If the position is set - if(position !== null) { - $parent = this.$elem.find('a[href$="#' + position + '"]').parent(); - - //If it's not already the current section - if(!$parent.hasClass(this.config.currentClass)) { - //Change the highlighted nav item - this.adjustNav(this, $parent); - - //If there is a scrollChange callback - if(this.config.scrollChange) { - this.config.scrollChange($parent); - } - } - } - }, - - scrollTo: function(target, callback) { - var offset = $(target).offset().top; - - $('html, body').animate({ - scrollTop: offset - }, this.config.scrollSpeed, this.config.easing, callback); - }, - - unbindInterval: function() { - clearInterval(this.t); - this.$win.unbind('scroll.onePageNav'); - } - }; - - OnePageNav.defaults = OnePageNav.prototype.defaults; - - $.fn.onePageNav = function(options) { - return this.each(function() { - new OnePageNav(this, options).init(); - }); - }; - -})( jQuery, window , document ); diff --git a/_custom/jquery.scrollTo.js b/_custom/jquery.scrollTo.js deleted file mode 100644 index 4fc95659..00000000 --- a/_custom/jquery.scrollTo.js +++ /dev/null @@ -1,208 +0,0 @@ -/*! - * jQuery.scrollTo - * Copyright (c) 2007-2015 Ariel Flesler - afleslergmailcom | http://flesler.blogspot.com - * Licensed under MIT - * http://flesler.blogspot.com/2007/10/jqueryscrollto.html - * @projectDescription Easy element scrolling using jQuery. - * @author Ariel Flesler - * @version 2.1.0 - */ -;(function(define) { - 'use strict'; - - define(['jquery'], function($) { - var $scrollTo = $.scrollTo = function(target, duration, settings) { - return $(window).scrollTo(target, duration, settings); - }; - - $scrollTo.defaults = { - axis:'xy', - duration: 0, - limit:true - }; - - function isWin(elem) { - return !elem.nodeName || - $.inArray(elem.nodeName.toLowerCase(), ['iframe','#document','html','body']) !== -1; - } - - $.fn.scrollTo = function(target, duration, settings) { - if (typeof duration === 'object') { - settings = duration; - duration = 0; - } - if (typeof settings === 'function') { - settings = { onAfter:settings }; - } - if (target === 'max') { - target = 9e9; - } - - settings = $.extend({}, $scrollTo.defaults, settings); - // Speed is still recognized for backwards compatibility - duration = duration || settings.duration; - // Make sure the settings are given right - var queue = settings.queue && settings.axis.length > 1; - if (queue) { - // Let's keep the overall duration - duration /= 2; - } - settings.offset = both(settings.offset); - settings.over = both(settings.over); - - return this.each(function() { - // Null target yields nothing, just like jQuery does - if (target === null) return; - - var win = isWin(this), - elem = win ? this.contentWindow || window : this, - $elem = $(elem), - targ = target, - attr = {}, - toff; - - switch (typeof targ) { - // A number will pass the regex - case 'number': - case 'string': - if (/^([+-]=?)?\d+(\.\d+)?(px|%)?$/.test(targ)) { - targ = both(targ); - // We are done - break; - } - // Relative/Absolute selector - targ = win ? $(targ) : $(targ, elem); - if (!targ.length) return; - /* falls through */ - case 'object': - // DOMElement / jQuery - if (targ.is || targ.style) { - // Get the real position of the target - toff = (targ = $(targ)).offset(); - } - } - - var offset = $.isFunction(settings.offset) && settings.offset(elem, targ) || settings.offset; - - $.each(settings.axis.split(''), function(i, axis) { - var Pos = axis === 'x' ? 'Left' : 'Top', - pos = Pos.toLowerCase(), - key = 'scroll' + Pos, - prev = $elem[key](), - max = $scrollTo.max(elem, axis); - - if (toff) {// jQuery / DOMElement - attr[key] = toff[pos] + (win ? 0 : prev - $elem.offset()[pos]); - - // If it's a dom element, reduce the margin - if (settings.margin) { - attr[key] -= parseInt(targ.css('margin'+Pos), 10) || 0; - attr[key] -= parseInt(targ.css('border'+Pos+'Width'), 10) || 0; - } - - attr[key] += offset[pos] || 0; - - if (settings.over[pos]) { - // Scroll to a fraction of its width/height - attr[key] += targ[axis === 'x'?'width':'height']() * settings.over[pos]; - } - } else { - var val = targ[pos]; - // Handle percentage values - attr[key] = val.slice && val.slice(-1) === '%' ? - parseFloat(val) / 100 * max - : val; - } - - // Number or 'number' - if (settings.limit && /^\d+$/.test(attr[key])) { - // Check the limits - attr[key] = attr[key] <= 0 ? 0 : Math.min(attr[key], max); - } - - // Don't waste time animating, if there's no need. - if (!i && settings.axis.length > 1) { - if (prev === attr[key]) { - // No animation needed - attr = {}; - } else if (queue) { - // Intermediate animation - animate(settings.onAfterFirst); - // Don't animate this axis again in the next iteration. - attr = {}; - } - } - }); - - animate(settings.onAfter); - - function animate(callback) { - var opts = $.extend({}, settings, { - // The queue setting conflicts with animate() - // Force it to always be true - queue: true, - duration: duration, - complete: callback && function() { - callback.call(elem, targ, settings); - } - }); - $elem.animate(attr, opts); - } - }); - }; - - // Max scrolling position, works on quirks mode - // It only fails (not too badly) on IE, quirks mode. - $scrollTo.max = function(elem, axis) { - var Dim = axis === 'x' ? 'Width' : 'Height', - scroll = 'scroll'+Dim; - - if (!isWin(elem)) - return elem[scroll] - $(elem)[Dim.toLowerCase()](); - - var size = 'client' + Dim, - doc = elem.ownerDocument || elem.document, - html = doc.documentElement, - body = doc.body; - - return Math.max(html[scroll], body[scroll]) - Math.min(html[size], body[size]); - }; - - function both(val) { - return $.isFunction(val) || $.isPlainObject(val) ? val : { top:val, left:val }; - } - - // Add special hooks so that window scroll properties can be animated - $.Tween.propHooks.scrollLeft = - $.Tween.propHooks.scrollTop = { - get: function(t) { - return $(t.elem)[t.prop](); - }, - set: function(t) { - var curr = this.get(t); - // If interrupt is true and user scrolled, stop animating - if (t.options.interrupt && t._last && t._last !== curr) { - return $(t.elem).stop(); - } - var next = Math.round(t.now); - // Don't waste CPU - // Browsers don't render floating point scroll - if (curr !== next) { - $(t.elem)[t.prop](next); - t._last = this.get(t); - } - } - }; - - // AMD requirement - return $scrollTo; - }); -}(typeof define === 'function' && define.amd ? define : function(deps, factory) { - 'use strict'; - if (typeof module !== 'undefined' && module.exports) { - // Node - module.exports = factory(require('jquery')); - } else { - factory(jQuery); - } -})); diff --git a/_custom/menu.js b/_custom/menu.js deleted file mode 100644 index ed63fdbb..00000000 --- a/_custom/menu.js +++ /dev/null @@ -1,3 +0,0 @@ -$(document).ready(function() { - $('.wy-menu').onePageNav(); -}); diff --git a/_templates/layout.html b/_templates/layout.html deleted file mode 100644 index 6912afb5..00000000 --- a/_templates/layout.html +++ /dev/null @@ -1,65 +0,0 @@ -{% extends "!layout.html" %} - -{% set script_files = script_files + ["_static/cookies.js"] %} -{% set script_files = script_files + ["_static/expandable.js"] %} -{% set script_files = script_files + ["_static/admonition_selector.js"] %} -{% set script_files = script_files + ["_static/jquery.scrollTo.js"] %} -{% set script_files = script_files + ["_static/jquery.nav.js"] %} -{% set script_files = script_files + ["_static/menu.js"] %} - -{% set css_files = css_files + ['_static/custom.css'] %} - -{% block otherversions %} -
- Limit Environment Specific Content - -
- Operating Systems -
    -
  • -
  • -
- - Branches -
    -
  • -
  • -
  • -
- - RHEL Registration Types -
    -
  • -
  • -
- - Environments -
    -
  • -
  • -
- - Features -
    -
  • -
  • -
- - Additional Overcloud Roles -
    -
  • -
- - Upgrade Version -
    -
  • -
  • -
  • -
- - -
-
- - {{ super() }} -{% endblock %} diff --git a/bindep.txt b/bindep.txt deleted file mode 100644 index 3991ef06..00000000 --- a/bindep.txt +++ /dev/null @@ -1,2 +0,0 @@ -librsvg2-tools [doc platform:rpm] -librsvg2-bin [doc platform:dpkg] diff --git a/deploy-guide/source/_images/TripleO_Network_Diagram_.jpg b/deploy-guide/source/_images/TripleO_Network_Diagram_.jpg deleted file mode 100644 index 72d9f3a9..00000000 Binary files a/deploy-guide/source/_images/TripleO_Network_Diagram_.jpg and /dev/null differ diff --git a/deploy-guide/source/_images/spine_and_leaf.svg b/deploy-guide/source/_images/spine_and_leaf.svg deleted file mode 100644 index 0f729946..00000000 --- a/deploy-guide/source/_images/spine_and_leaf.svg +++ /dev/null @@ -1,938 +0,0 @@ - -image/svg+xmlL3 -L2 - - - -Rack A -Rack B -Undercloud -Controller-0 -Controller-1 -Controller-2 -Compute-1 -Compute-2 -Compute-3 -Compute-4 -Spine 1 -Spine Switches -Servers -Rack C -Compute-5 -Compute-6 -Compute-7 -Compute-8 -Rack D -Compute-9 -Compute-10 -Compute-11 -Compute-12 -Spine 2 -Leaf Switches -Leaf 1 -Leaf 1 -Leaf 2 -Leaf 2 -Leaf 3 -Leaf 3 -Leaf 4 -Leaf 4 -Spine 3 - \ No newline at end of file diff --git a/deploy-guide/source/_images/tripleo_ansible_arch.png b/deploy-guide/source/_images/tripleo_ansible_arch.png deleted file mode 100644 index adf4a2a1..00000000 Binary files a/deploy-guide/source/_images/tripleo_ansible_arch.png and /dev/null differ diff --git a/deploy-guide/source/conf.py b/deploy-guide/source/conf.py deleted file mode 100644 index 638ecc15..00000000 --- a/deploy-guide/source/conf.py +++ /dev/null @@ -1,131 +0,0 @@ -# instack-undercloud documentation build configuration file, created by -# sphinx-quickstart on Wed Feb 25 10:56:57 2015. -# -# This file is execfile()d with the current directory set to its containing -# dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# import os -# import sys - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) - -# -- General configuration --------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [ - 'sphinx.ext.intersphinx', - 'openstackdocstheme' -] - - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'TripleO' -copyright = u'2015, OpenStack Foundation' -bug_tracker = u'Launchpad' -bug_tracker_url = u'https://launchpad.net/tripleo' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '3.0.0' -# The full version, including alpha/beta/rc tags. -release = '3.0.0' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'native' - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - - -# -- Options for HTML output ------------------------------------------------- - -html_static_path = ['../../_custom'] -# html_style = 'custom.css' -templates_path = ['../../_templates'] - -# Output file base name for HTML help builder. -htmlhelp_basename = '%sdoc' % project - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'openstackdocs' - -# -- Options for LaTeX output ------------------------------------------------ - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # 'preamble': '', - } - -rst_prolog = """ -.. |project| replace:: %s -.. |bug_tracker| replace:: %s -.. |bug_tracker_url| replace:: %s -""" % (project, bug_tracker, bug_tracker_url) - -# openstackdocstheme options -openstackdocs_repo_name = 'openstack/tripleo-docs' -openstackdocs_auto_name = False -openstackdocs_auto_version = False -openstackdocs_bug_project = 'tripleo' -openstackdocs_bug_tag = 'documentation' diff --git a/deploy-guide/source/deployment/3rd_party.rst b/deploy-guide/source/deployment/3rd_party.rst deleted file mode 100644 index eb6194df..00000000 --- a/deploy-guide/source/deployment/3rd_party.rst +++ /dev/null @@ -1,447 +0,0 @@ -Integrating 3rd Party Containers in TripleO -=========================================== - -.. _build_container_images: - -One of the following methods can be used to extend or build from scratch -custom 3rd party containers. - -Extend TripleO Containers -------------------------- - -Any extra RPMs required by 3rd party drivers may need to be post-installed into -our stock TripleO containers. In this case the 3rd party vendor may opt to add -a layer to an existing container in order to deploy their software. - -Adding layers to existing containers using TripleO tooling -.......................................................... - -The example below demonstrates how to extend a container image, where the goal -is to create a layer on top of the cinder-volume image that will be named -"cinder-cooldriver". - -* Make sure python-tripleoclient and the dependencies are installed: - - .. code-block:: shell - - sudo dnf install -y python-tripleoclient - - -* Create a vendor directory (which later can be pushed into a git - repository): - - .. code-block:: shell - - mkdir ~/vendor - -* Create a tcib directory under the vendor folder. All container build - yaml needs to live in a tcib folder as a root directory. - - .. code-block:: shell - - mkdir ~/vendor/tcib - -* Create the `~/vendor/containers.yaml` which contains the list - of images that we want to build: - - .. code-block:: yaml - - container_images: - - image_source: tripleo - imagename: localhost/tripleomaster/openstack-cinder-cooldriver:latest - -* Create `~/vendor/tcib/cinder-cooldriver` to hold our container image - configuration. - - .. code-block:: shell - - mkdir ~/vendor/tcib/cinder-cooldriver - -* Optionally, add custom files into the build environment. - - .. code-block:: shell - - mkdir ~/vendor/tcib/cinder-cooldriver/files - cp custom-package.rpm ~/vendor/tcib/cinder-cooldriver/files - - -* Create `~/vendor/tcib/cinder-cooldriver/cinder-cooldriver.yaml` file which - contains the container image configuration: - - .. code-block:: yaml - - --- - # that's the parent layer, here cinder-volume - tcib_from: localhost/tripleomaster/openstack-cinder-volume:latest - tcib_actions: - - user: root - - run: mkdir /tmp/cooldriver/example.py - - run: mkdir -p /rpms - - run: dnf install -y cooldriver_package - tcib_copies: - - '{{lookup(''env'',''HOME'')}}/vendor/tcib/cinder-cooldriver/files/custom-package.rpm /rpms' - tcib_gather_files: > - {{ lookup('fileglob', '~/vendor/tcib/cinder-cooldriver/files/*', wantlist=True) }} - tcib_runs: - - dnf install -y /rpms/*.rpm - tcib_user: cinder - -.. note:: Here `tcib_runs` provides a shortcut to `tcib_actions:run`. See more tcib parameters documented in the `tcib`_ role. - -.. _tcib: https://docs.openstack.org/tripleo-ansible/latest/roles/role-tripleo_container_image_build.html#r-o-l-e-d-e-f-a-u-l-t-s - - -* The result file structure should look something like: - - .. code-block:: shell - - $ tree vendor - vendor - ├── containers.yaml - └── tcib - └── cinder-cooldriver - └── cinder-cooldriver.yaml - └── files - └── custom-package.rpm - -* Build the vendor container image: - - .. code-block:: shell - - openstack tripleo container image build \ - --config-file ~/vendor/containers.yaml \ - --config-path ~/vendor - -* Use `sudo buildah images` command to check if the image was built: - - .. code-block:: shell - - localhost/tripleomaster/openstack-cinder-cooldriver latest 257592a90133 1 minute ago 1.22 GB - -.. note:: If you want to push the image into a Docker Registry, you can use - `--push` with `--registry`. Use - `openstack tripleo container image build --help` for more details. - -* Push the image into the TripleO Container registry: - - .. code-block:: shell - - sudo openstack tripleo container image push \ - --local --registry-url 192.168.24.1:8787 \ - localhost/tripleomaster/openstack-cinder-cooldriver:latest - -* Use `openstack tripleo container image list` to check if the image was pushed: - - .. code-block:: shell - - +--------------------------------------------------------------------------------------------------+ - | Image Name | - +--------------------------------------------------------------------------------------------------+ - | docker://undercloud.ctlplane.localdomain:8787/tripleomaster/openstack-cinder-vendor:latest | - +--------------------------------------------------------------------------------------------------+ - -Adding layers to existing containers using Docker -................................................. - -.. note:: Note that this method has been simplified in Victoria and backported - down to train, with the new `openstack tripleo container image build` - command. - -The example below demonstrates how to extend a container on the Undercloud host -machine. It assumes you are running a local docker registry on the undercloud. -We recommend that you create a Dockerfile to extend the existing container. -Here is an example extending the cinder-volume container:: - - FROM 127.0.0.1:8787/tripleo/centos-binary-cinder-volume - MAINTAINER Vendor X - LABEL name="tripleo/centos-binary-cinder-volume-vendorx" vendor="Vendor X" version="2.1" release="1" - - # switch to root and install a custom RPM, etc. - USER root - COPY vendor_x.rpm /tmp - RUN rpm -ivh /tmp/vendor_x.rpm - - # switch the container back to the default user - USER cinder - -Docker build the container above using `docker build` on the command line. This -will output a container image (used below to tag it). Create a docker tag -and push it into the local registry:: - - docker tag 127.0.0.1:8787/tripleo/centos-binary-cinder-volume-vendorx:rev1 - docker push 127.0.0.1:8787/tripleo/centos-binary-cinder-volume-vendorx:rev1 - -Start an overcloud deployment as normal with the extra custom Heat environment -above to obtain the new container. - -.. warning:: Note that the new container will have the complete software stack - built into it as is normal for containers. When other containers - are updated and include security fixes in these lower layers, this - container will NOT be updated as a result and will require rebuilding. - -Building new containers with tripleo container image build ----------------------------------------------------------- - -Usage -..... - -Use the following command to build all of the container images used in TripleO: - - .. code-block:: shell - - openstack tripleo container image build - -Different options are provided for advanced usage. They can be discovered -by using `--help` argument. -Here are some of them: - -* `--config-file` to use a custom YAML config file specifying the images to build. -* `--config-path` to use a custom base configuration path. - This is the base path for all container-image files. If this option is set, - the default path for will be modified. -* `--extra-config` to apply additional options from a given configuration YAML - file. This will apply to all containers built. -* `--exclude` to skip some containers during the build. -* `--registry` to specify a Container Registry where the images will be pushed. -* `--authfile` to specify an authentication file if the Container Registry - requires authentication. -* `--skip-build` if we don't want to build and push images. It will only - generate the configuration files. -* `--push` to push the container images into the Container Registry. -* `--volume` to overrides the default bind mounts needed when the container - images are built. If you use this argument, don't forget that you might need - to include the default ones. -* `--work-dir` to specify the place where the configuration files will be generated. - -Tips and Tricks with tripleo_container_image_build -.................................................. - -Here's a non-exhaustive list of tips and tricks that might make things faster, -especially on a dev env where you need to build multiple times the containers. - -Inject a caching proxy -______________________ - -Using a caching proxy can make things faster when it comes to package fetching. - -One of the way is to either expose the dnf.conf/yum.conf using `--volume`. -Since `dnf.conf is edited during the container build`_, you want to expose a -copy of your host config:: - - sudo cp -r /etc/dnf /srv/container-dnf - openstack tripleo container image build --volume /srv/container-dnf:/etc/dnf:z - -Another way is to expose the `http_proxy` and `https_proxy` environment -variable. - -In order to do so, create a simple yaml file, for instance ~/proxy.yaml:: - - --- - tcib_envs: - LANG: en_US.UTF-8 - container: oci - http_proxy: http://PROXY_HOST:PORT - https_proxy: http://PROXY_HOST:PORT - -Then, pass that file using the `--extra-config` parameter:: - - openstack tripleo container image build --extra-config proxy.yaml - -And you're set. - -.. note:: Please ensure you also pass the `default values`_, since ansible - isn't configured to `merge dicts/lists`_ by default. - -.. _dnf.conf is edited during the container build: https://opendev.org/openstack/tripleo-common/src/commit/156b565bdf74c19d3513f9586fa5fcf1181db3a7/container-images/tcib/base/base.yaml#L3-L14 -.. _default values: https://opendev.org/openstack/tripleo-common/src/commit/156b565bdf74c19d3513f9586fa5fcf1181db3a7/container-images/tcib/base/base.yaml#L35-L37 -.. _merge dicts/lists: https://docs.ansible.com/ansible/latest/reference_appendices/config.html#default-hash-behaviour - - -Get a minimal environment to build containers -_____________________________________________ - -As a dev, you might want to get a daily build of your container images. While -you can, of course, run this on an Undercloud, you actually don't need an -undercloud: you can use `this playbook`_ from `tripleo-operator-ansible`_ -project - -With this, you can set a nightly cron that will ensure you're always getting -latest build on your registry. - -.. _this playbook: https://opendev.org/openstack/tripleo-operator-ansible/src/branch/master/playbooks/container-build.yaml -.. _tripleo-operator-ansible: https://docs.openstack.org/tripleo-operator-ansible/latest/ - - -Building new containers with kolla-build -........................................ - -.. note:: Note that this method will be deprecated during the Victoria cycle - and replaced by the new `openstack tripleo container image build` - command. - -To create new containers, or modify existing ones, you can use ``kolla-build`` -from the `Kolla`_ project to build and push the images yourself. The command -to build a new containers is below. Note that this assumes you are on an -undercloud host where the registry IP address is 192.168.24.1. - -Configure Kolla to build images for TripleO, in `/etc/kolla/kolla-build.conf`:: - - [DEFAULT] - base=centos - type=binary - namespace=master - registry=192.168.24.1:8787 - tag=latest - template_override=/usr/share/tripleo-common/container-images/tripleo_kolla_template_overrides.j2 - rpm_setup_config=http://trunk.rdoproject.org/centos9/current-tripleo/delorean.repo,http://trunk.rdoproject.org/centos9/delorean-deps.repo - push=True - -Use the following command to build all of the container images used in TripleO:: - - openstack overcloud container image build \ - --config-file /usr/share/tripleo-common/container-images/overcloud_containers.yaml \ - --kolla-config-file /etc/kolla/kolla-build.conf - -Or use `kolla-build` to build the images yourself, which provides more -flexibility and allows you to rebuild selectively just the images matching -a given name, for example to build only the heat images with the TripleO -customization:: - - kolla-build heat - -Notice that TripleO already uses the -``/usr/share/tripleo-common/container-images/tripleo_kolla_template_overrides.j2`` -to add or change specific aspects of the containers using the `kolla template -override mechanism`_. This file can be copied and modified to create custom -containers. The original copy of this file can be found in the -`tripleo-common`_ repository. - -The following template is an example of the template used for building the base -images that are consumed by TripleO. In this case we are adding the `puppet` -RPM to the base image:: - - {% extends parent_template %} - {% set base_centos_binary_packages_append = ['puppet'] %} - -.. _Kolla: https://github.com/openstack/kolla -.. _kolla template override mechanism: https://docs.openstack.org/kolla/latest/admin/image-building.html#dockerfile-customisation -.. _tripleo-common: https://github.com/openstack/tripleo-common/blob/master/container-images/tripleo_kolla_template_overrides.j2 - - -Integrating 3rd party containers with tripleo-heat-templates ------------------------------------------------------------- - -The `TripleO Heat Templates`_ repo is where most of the logic resides in the form -of heat templates. These templates define each service, the containers' -configuration and the initialization or post-execution operations. - -.. _TripleO Heat Templates: https://opendev.org/openstack/tripleo-heat-templates - -The docker templates can be found under the `docker` sub directory in the -`tripleo-heat-templates` root. The services files are under the -`docker/service` directory. - -For more information on how to integrate containers into the TripleO Heat templates, -see the :ref:`Containerized TripleO architecture` document. - -If all you need to do is change out a container for a specific service, you can -create a custom heat environment file that contains your override. To swap out -the cinder container from our previous example we would add:: - - parameter_defaults: -    ContainerCinderVolumeImage: centos-binary-cinder-volume-vendorx:rev1 - -.. note:: Image parameters were named Docker*Image prior to the Train cycle. - - -3rd party kernel modules ------------------------- - -Some applications (like Neutron or Cinder plugins) require specific kernel modules to be installed -and loaded on the system. - -We recommend two different methods to deploy and load these modules. - -kernel module is deployed on the host -..................................... - -The kernel module is deployed on the base Operating System via RPM or DKMS. -Deploy the module by using the ``tripleo-mount-image`` tool and create a -``chroot``. - -First you need to create a repository file where the module will be downloaded from, and copy the repo file into the image:: - - temp_dir=$(mktemp -d) - sudo tripleo-mount-image -a /path/to/overcloud-full.qcow2 -m $temp_dir - sudo cp my-repo.repo $temp_dir/etc/yum.repos.d/ - -You can now start a chroot and install the rpm that contains the kernel module:: - - sudo mount -o bind /dev $temp_dir/dev/ - sudo cp /etc/resolv.conf $temp_dir/etc/resolv.conf - sudo chroot $temp_dir /bin/bash - dnf install my-rpm - exit - -Then unmount the image:: - - sudo rm $temp_dir/etc/resolv.conf - sudo umount $temp_dir/dev - sudo tripleo-unmount-image -m $temp_dir - -Now that the rpm is deployed with the kernel module, we need to configure TripleO to load it. -To configure an extra kernel module named "dpdk_module" for a specific role, we would add:: - - parameter_defaults: - ControllerExtraKernelModules: - dpdk_module: {} - -Since our containers don't get their own kernels, we load modules on the host. -Therefore, ExtraKernelModules parameter is used to configure which modules we want to configure. -This parameter will be applied to the Puppet manifest (in the kernel.yaml service). -The container needs the modules mounted from the host, so make sure the plugin template has the -following configuration (at minimum):: - - volumes: - - /lib/modules:/lib/modules:ro - -However, this method might be problematic if RPMs dependencies are too complex to deploy the kernel -module on the host. - - -kernel module is containerized -.............................. - -Kernel modules can be loaded from the container. -The module can be deployed in the same container as the application that will use it, or in a separated -container. - -Either way, if you need to run a privileged container, make sure to set this parameter:: - - privileged: true - -If privilege mode isn't required, it is suggested to set it to false for security reasons. - -Kernel modules will need to be loaded when the container will be started by Docker. To do so, it is -suggested to configure the composable service which deploys the module in the container this way:: - - kolla_config: - /var/lib/kolla/config_files/neutron_ovs_agent.json: - command: /dpdk_module_launcher.sh - docker_config_scripts: - dpdk_module_launcher.sh: - mode: "0755" - content: | - #!/bin/bash - set -xe - modprobe dpdk_module - docker_config: - step_3: - neutron_ovs_bridge: - volumes: - list_concat: - - {get_attr: [ContainersCommon, volumes]} - - - - /var/lib/docker-config-scripts/dpdk_module_launcher.sh:/dpdk_module_launcher.sh:ro - -That way, the container will be configured to load the module at start, so the operator can restart containers without caring about loading the module manually. diff --git a/deploy-guide/source/deployment/ansible_config_download.rst b/deploy-guide/source/deployment/ansible_config_download.rst deleted file mode 100644 index 20b35f0f..00000000 --- a/deploy-guide/source/deployment/ansible_config_download.rst +++ /dev/null @@ -1,657 +0,0 @@ -.. _config_download: - -TripleO config-download User's Guide: Deploying with Ansible -============================================================= - -Introduction ------------- -This documentation details using ``config-download``. - -``config-download`` is the feature that enables deploying the Overcloud software -configuration with Ansible in TripleO. - -Summary -------- -Since the Queens release, it has been possible to use Ansible to apply the -overcloud configuration and with the Rocky release it became the default. - -Ansible is used to replace the communication and transport of the software -configuration deployment data between Heat and the Heat agent -(os-collect-config) on the overcloud nodes. - -Instead of os-collect-config running on each overcloud node and polling for -deployment data from Heat, the Ansible control node applies the configuration -by running ``ansible-playbook`` with an Ansible inventory file and a set of -playbooks and tasks. - -The Ansible control node (the node running ``ansible-playbook``) is the -undercloud by default. - -``config-download`` is the feature name that enables using Ansible in this -manner, and will often be used to refer to the method detailed in this -documentation. - -Heat is still used to create the stack, then the ansible playbooks are saved -to the filesystem in a git repository. These playbook are used to deploy the -openstack services and configuration to the Overcloud nodes. -The same parameter values and environment files are passed to Heat as they were -previously. During the stack creation, Heat simply takes the user inputs from the -templates and renders the required playbooks for the deployment. - -The difference with ``config-download`` is that although Heat creates all the -deployment data necessary via SoftwareDeployment resources to perform the -overcloud installation and configuration, it does not apply any of the software -deployments. The data is only made available via the Heat API. Once the stack -is created, deployment data is downloaded from Heat and ansible playbooks are -generated. - -Using the downloaded deployment data and ansible playbooks configuration of -the overcloud using ``ansible-playbook`` are completed. - -This diagram details the overall sequence of how using config-download -completes an overcloud deployment: - -.. image:: ../_images/tripleo_ansible_arch.png - :scale: 40% - - -Deployment with config-download -------------------------------- -Ansible and ``config-download`` are used by default when ``openstack -overcloud deploy`` (tripleoclient) is run. The command is backwards compatible -in terms of functionality, meaning that running ``openstack overcloud deploy`` -will still result in a full overcloud deployment. - -The deployment is done through a series of steps in tripleoclient. All of the -workflow steps are automated by tripleoclient. The workflow steps are summarized -as: - -#. Create deployment plan -#. Create Heat stack -#. Create software configuration within the Heat stack -#. Create tripleo-admin ssh user -#. Download the software configuration from Heat -#. Applying the downloaded software configuration to the overcloud nodes with - ``ansible-playbook``. - -.. _`authorized on the overcloud nodes`: - -Creating the ``tripleo-admin`` user on each overcloud node is necessary since -ansible uses ssh to connect to each node to perform configuration. - -The following steps are done to create the ``tripleo-admin`` user: - -#. Runs a playbook to create ``tripleo-admin`` on each node. Also, gives sudo - permissions to the user, as well as creates and stores a new ssh keypair - for ``tripleo-admin``. - - -The values for these cli arguments must be the same for all nodes in the -overcloud deployment. ``overcloud-ssh-key`` should be the private key that -corresponds with the public key specified by the Heat parameter ``KeyName`` -when using Ironic deployed nodes. - -config-download related CLI arguments -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -There are some new CLI arguments for ``openstack overcloud deploy`` that can be -used to influence the behavior of the overcloud deployment as it relates to -``config-download``:: - - --overcloud-ssh-user # Initial ssh user used for creating tripleo-admin. - # Defaults to heat-admin - - --overcloud-ssh-key # Initial ssh private key (file path) to be used for - # creating tripleo-admin. - # Defaults to ~/.ssh/id_rsa - - --override-ansible-cfg # path to an ansible config file, to inject any - # arbitrary ansible config to be used when running - # ansible-playbook - - --stack-only # Only update the stack. Skips applying the - # software configuration with ansible-playbook. - - --config-download-only # Only apply the software configuration with - # ansible-playbook. Skips the stack update. - -See ``openstack overcloud deploy --help`` for further help text. - -.. include:: deployment_output.rst - -.. _deployment_status: - -.. include:: deployment_status.rst - -.. include:: deployment_log.rst - -Ansible configuration -^^^^^^^^^^^^^^^^^^^^^ -When ``ansible-playbook`` runs, it will use a configuration file with the -following default values:: - - [defaults] - retry_files_enabled = False - log_path = /ansible.log - forks = 25 - - [ssh_connection] - ssh_args = -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ControlMaster=auto -o ControlPersist=60s - control_path_dir = /ansible-ssh - -Any of the above configuration options can be overridden, or any additional -ansible configuration used by passing the path to an ansible configuration file -with ``--override-ansible-cfg`` on the deployment command. - -For example the following command will use the configuration options from -``/home/stack/ansible.cfg``. Any options specified in the override file will -take precedence over the defaults:: - - openstack overcloud deploy \ - ... - --override-ansible-cfg /home/stack/ansible.cfg - - -Ansible project directory -^^^^^^^^^^^^^^^^^^^^^^^^^ -The workflow will create an Ansible project directory with the plan name under -``$HOME/overcloud-deploy//config-download``. For the default plan name of ``overcloud`` the working -directory will be:: - - $HOME/overcloud-deploy/overcloud/config-download/overcloud - -The project directory is where the downloaded software configuration from -Heat will be saved. It also includes other ansible-related files necessary to -run ``ansible-playbook`` to configure the overcloud. - -The contents of the project directory include the following files: - -tripleo-ansible-inventory.yaml - Ansible inventory file containing hosts and vars for all the overcloud nodes. -ansible.log - Log file from the last run of ``ansible-playbook``. -ansible.cfg - Config file used when running ``ansible-playbook``. -ansible-playbook-command.sh - Executable script that can be used to rerun ``ansible-playbook``. -ssh_private_key - Private ssh key used to ssh to the overcloud nodes. - -Reproducing ansible-playbook -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Once in the project directory created, simply run ``ansible-playbook-command.sh`` -to reproduce the deployment:: - - ./ansible-playbook-command.sh - -Any additional arguments passed to this script will be passed unchanged to the -``ansible-playbook`` command:: - - ./ansible-playbook-command.sh --check - -Using this method it is possible to take advantage of various Ansible features, -such as check mode (``--check``), limiting hosts (``--limit``), or overriding -variables (``-e``). - -Git repository -^^^^^^^^^^^^^^ -The ansible project directory is a git repository. Each time config-download -downloads the software configuration data from Heat, the project directory will -be checked for differences. A new commit will be created if there are any -changes from the previous revision. - -From within the ansible project directory, standard git commands can be used to -explore each revision. Commands such as ``git log``, ``git show``, and ``git -diff`` are useful ways to describe how each commit to the software -configuration differs from previous commits. - -Applying earlier versions of configuration -__________________________________________ -Using commands such as ``git revert`` or ``git checkout``, it is possible to -update the ansible project directory to an earlier version of the software -configuration. - -It is possible to then apply this earlier version with ``ansible-playbook``. -However, caution should be exercised as this could lead to a broken overcloud -deployment. Only well understood earlier versions should be attempted to be -applied. - -.. note:: - - Data migration changes will never be undone by applying an earlier version - of the software configuration with config-download. For example, database - schema migrations that had already been applied would never be undone by - only applying an earlier version of the configuration. - - Software changes that were related to hardware changes in the overcloud - (such as scaling up or down) would also not be completely undone by - applying earlier versions of the software configuration. - -.. note:: - - Reverting to earlier revisions of the project directory has no effect on - the configuration stored in the Heat stack. A corresponding change should - be made to the deployment templates, and the stack updated to make the - changes permanent. - -.. _manual-config-download: - -Manual config-download ----------------------- -Prior to running the ansible playbooks generated by config-download, it is necessary -to ensure the baremetal nodes have already been provisioned. See the baremetal deployment -guide first: - -:doc:`configure-nodes-before-deployment <./network_v2>` - -The config-download steps can be skipped when running ``openstack overcloud deploy`` -by passing ``--stack-only``. This will cause tripleoclient to only deploy the Heat -stack. - -When running ``openstack overcloud deploy`` with the ``--stack-only`` option, this -will still download the ansible content to the default directory -``$HOME/overcloud-deploy/overcloud/config-download``. But it will stop before running -the ``ansible-playbook`` command. - -This method is described in the following sections. - - -Run ansible-playbook -^^^^^^^^^^^^^^^^^^^^ -Once the baremetal nodes have been configured, and the configuration has been -downloaded during the ``--stack-only`` run of ``openstack overcloud deploy``. -You can then run ``ansible-playbook`` manually to configure the overcloud nodes:: - - ansible-playbook \ - -i /home/stack/config-download/overcloud/tripleo-ansible-inventory.yaml \ - --private-key /path/private/ssh/key \ - --become \ - config-download/deploy_steps_playbook.yaml - -.. note:: - - ``--become`` is required when running ansible-playbook. - -All default ansible configuration values will be used when manually running -``ansible-playbook`` in this manner. These values can be customized through -`ansible configuration -`_. - -The following minimum configuration is recommended:: - - [defaults] - log_path = ansible.log - forks = 25 - timeout = 30 - - [ssh_connection] - ssh_args = -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ControlMaster=auto -o ControlPersist=30m - retries = 8 - pipelining = True - - -.. note:: - - When running ``ansible-playbook`` manually, the overcloud status as returned - by ``openstack overcloud status`` won't be automatically updated due to the - configuration being applied outside of the API. - - See :ref:`deployment_status` for setting the status manually. - -Ansible project directory contents ----------------------------------- -This section details the structure of the ``config-download`` generated -Ansible project directory. - -Playbooks -^^^^^^^^^ -deploy_steps_playbook.yaml - Initial deployment or template update (not minor update) - - Further detailed in :ref:`deploy_steps_playbook.yaml` -fast_forward_upgrade_playbook.yaml - Fast forward upgrades -post_upgrade_steps_playbook.yaml - Post upgrade steps for major upgrade -pre_upgrade_rolling_steps_playbook.yaml - Pre upgrade steps for major upgrade -update_steps_playbook.yaml - Minor update steps -upgrade_steps_playbook.yaml - Major upgrade steps - -.. _deploy_steps_playbook.yaml: - -deploy_steps_playbook.yaml -__________________________ -``deploy_steps_playbook.yaml`` is the playbook used for deployment and template -update. It applies all the software configuration necessary to deploy a full -overcloud based on the templates provided as input to the deployment command. - -This section will summarize at high level the different ansible plays used -within this playbook. The play names shown here are the same names used within -the playbook and are what will be shown in the output when ``ansible-playbook`` is -run. - -The ansible tags set on each play are also shown below. - -Gather facts from undercloud - Fact gathering for the undercloud node - - tags: facts -Gather facts from overcloud - Fact gathering for the overcloud nodes - - tags: facts -Load global variables - Loads all variables from `l`global_vars.yaml`` - - tags: always -Common roles for TripleO servers - Applies common ansible roles to all overcloud nodes. Includes - ``tripleo_bootstrap`` for installing bootstrap packages and - ``tripleo_ssh_known_hosts`` for configuring ssh known hosts. - - tags: common_roles -Overcloud deploy step tasks for step 0 - Applies tasks from the ``deploy_steps_tasks`` template interface - - tags: overcloud, deploy_steps -Server deployments - Applies server specific Heat deployments for configuration such as networking - and hieradata. Includes ``NetworkDeployment``, ``Deployment``, - ``AllNodesDeployment``, etc. - - tags: overcloud, pre_deploy_steps -Host prep steps - Applies tasks from the ``host_prep_steps`` template interface - - tags: overcloud, host_prep_steps -External deployment step [1,2,3,4,5] - Applies tasks from the ``external_deploy_steps_tasks`` template interface. - These tasks are run against the undercloud node only. - - tags: external, external_deploy_steps -Overcloud deploy step tasks for [1,2,3,4,5] - Applies tasks from the ``deploy_steps_tasks`` template interface - - tags: overcloud, deploy_steps -Overcloud common deploy step tasks [1,2,3,4,5] - Applies the common tasks done at each step to include puppet host - configuration, ``container-puppet.py``, and ``paunch`` or - ``tripleo_container_manage`` Ansible role (container configuration). - - tags: overcloud, deploy_steps -Server Post Deployments - Applies server specific Heat deployments for configuration done after the 5 - step deployment process. - - tags: overcloud, post_deploy_steps -External deployment Post Deploy tasks - Applies tasks from the ``external_post_deploy_steps_tasks`` template interface. - These tasks are run against the undercloud node only. - - tags: external, external_deploy_steps - - -Task files -^^^^^^^^^^ -These task files include tasks specific to their intended function. The task -files are automatically used by specific playbooks from the previous section. - -**boot_param_tasks.yaml** - -**common_deploy_steps_tasks.yaml** - -**docker_puppet_script.yaml** - -**external_deploy_steps_tasks.yaml** - -**external_post_deploy_steps_tasks.yaml** - -**fast_forward_upgrade_bootstrap_role_tasks.yaml** - -**fast_forward_upgrade_bootstrap_tasks.yaml** - -**fast_forward_upgrade_post_role_tasks.yaml** - -**fast_forward_upgrade_prep_role_tasks.yaml** - -**fast_forward_upgrade_prep_tasks.yaml** - -**fast_forward_upgrade_release_tasks.yaml** - -**upgrade_steps_tasks.yaml** - -**update_steps_tasks.yaml** - -**pre_upgrade_rolling_steps_tasks.yaml** - -**post_upgrade_steps_tasks.yaml** - -**post_update_steps_tasks.yaml** - -Heat Role directories -^^^^^^^^^^^^^^^^^^^^^ -Each Heat role from the roles data file used in the deployment (specified with -``-r`` from the ``openstack overcloud deploy`` command), will have a -correspondingly named directory. - -When using the default roles, these directories would be: - -**Controller** - -**Compute** - -**ObjectStorage** - -**BlockStorage** - -**CephStorage** - -A given role directory contains role specific task files and a subdirectory for -each host for that role. For example, when using the default hostnames, the -**Controller** role directory would contain the following host subdirectories: - -**overcloud-controller-0** - -**overcloud-controller-1** - -**overcloud-controller-2** - -Variable and template related files -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -group_vars - Directory which contains variables specific to different ansible inventory - groups. -global_vars.yaml - Global ansible variables applied to all overcloud nodes -templates - Directory containing any templates used during the deployment - -Other files -^^^^^^^^^^^ -Other files in the project directory are: - -ansible-playbook-command.sh - Script to reproduce ansible-playbook command -tripleo-ansible-inventory.yaml - Ansible inventory file -overcloud-config.tar.gz - Tarball of Ansible project directory - -Running specific tasks ----------------------- -Running only specific tasks (or skipping certain tasks) can be done from within -the ansible project directory. - -.. note:: - - Running specific tasks is an advanced use case and only recommended for - specific scenarios where the deployer is aware of the impact of skipping or - only running certain tasks. - - This can be useful during troubleshooting and debugging scenarios, but - should be used with caution as it can result in an overcloud that is not - fully configured. - -.. warning:: - - All tasks that are part of the deployment need to be run, and in the order - specified. When skipping tasks with ``--tags``, ``-skip-tags``, - ``--start-at-task``, the deployment could be left in an inoperable state. - - The functionality to skip tasks or only run certain tasks is meant to aid in - troubleshooting and iterating more quickly on failing deployments and - updates. - - All changes to the deployed cloud must still be applied through the Heat - templates and environment files passed to the ``openstack overcloud deploy`` - command. Doing so ensures that the deployed cloud is kept in sync with the - state of the templates and the state of the Heat stack. - -.. warning:: - - When skipping tasks, the overcloud must be in the state expected by the task - starting task. Meaning, the state of the overcloud should be the same as if - all the skipped tasks had been applied. Otherwise, the result of the tasks - that get executed will be undefined and could leave the cloud in an - inoperable state. - - Likewise, the deployed cloud may not be left in its fully configured state - if tasks are skipped at the end of the deployment. - -Complete the :ref:`manual-config-download` steps to create the ansible project -directory, or use the existing project directory at -``$HOME/overcloud-deploy//config-download/``. - - -Tags -^^^^ -The playbooks use tagged tasks for finer-grained control of what to apply if -desired. Tags can be used with the ``ansible-playbook`` CLI arguments ``--tags`` or -``--skip-tags`` to control what tasks are executed. The enabled tags are: - -facts - fact gathering -common_roles - ansible roles common to all nodes -overcloud - all plays for overcloud deployment -pre_deploy_steps - deployments that happen pre deploy_steps -host_prep_steps - Host preparation steps -deploy_steps - deployment steps -post_deploy_steps - deployments that happen post deploy_steps -external - all external deployments -external_deploy_steps - external deployments that run on the undercloud - -See :ref:`deploy_steps_playbook.yaml` for a description of which tags apply to -specific plays in the deployment playbook. - -Server specific pre and post deployments -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The list of server specific pre and post deployments run during the `Server -deployments` and `Server Post Deployments` plays (see -:ref:`deploy_steps_playbook.yaml`) are dependent upon what custom roles and -templates are used with the deployment. - -The list of these tasks are defined in an ansible group variable that applies -to each server in the inventory group named after the Heat role. From the -ansible project directory, the value can be seen within the group variable file -named after the Heat role:: - - $ cat group_vars/Compute - Compute_pre_deployments: - - UpgradeInitDeployment - - HostsEntryDeployment - - DeployedServerBootstrapDeployment - - InstanceIdDeployment - - NetworkDeployment - - ComputeUpgradeInitDeployment - - ComputeDeployment - - ComputeHostsDeployment - - ComputeAllNodesDeployment - - ComputeAllNodesValidationDeployment - - ComputeHostPrepDeployment - - ComputeArtifactsDeploy - - Compute_post_deployments: [] - -``_pre_deployments`` is the list of pre deployments, and -``_post_deployments`` is the list of post deployments. - -To specify the specific task to run for each deployment, the value of the -variable can be defined on the command line when running ``ansible-playbook``, -which will overwrite the value from the group variable file for that role. - -For example:: - - ansible-playbook \ - -e Compute_pre_deployments=NetworkDeployment \ - --tags pre_deploy_steps - # other CLI arguments - -Using the above example, only the task for the ``NetworkDeployment`` resource -would get applied since it would be the only value defined in -``Compute_pre_deployments``, and ``--tags pre_deploy_steps`` is also specified, -causing all other plays to get skipped. - -Starting at a specific task -^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To start the deployment at a specific task, use the ``ansible-playbook`` CLI -argument ``--start-at-task``. To see a list of task names for a given playbook, -``--list-tasks`` can be used to list the task names. - -.. note:: - - Some tasks that include the ``step`` variable or other ansible variables in - the task name do not work with ``--start-at-task`` due to a limitation in - ansible. For example the task with the name:: - - Start containers for step 1 - - won't work with ``--start-at-task`` since the step number is in the name - (1). - -When using ``--start-at-task``, the tasks that gather facts and load global -variables for the playbook execution are skipped by default. Skipping those -tasks can cause unexpected errors in later tasks. To avoid errors, those tasks -can be forced to execute when using ``--start-at-task`` by including the -following options to the ``ansible-playbook`` command:: - - ansible-playbook \ - \ - -e gather_facts=true \ - -e @global_vars.yaml - -The ``global_vars.yaml`` variable file exists in the config-download directory -that was either generated manually or under ``$HOME/config-download``. - -Previewing changes ------------------- -Changes can be previewed to see what will be changed before any changes are -applied to the overcloud. To preview changes, the stack update must be run with -the ``--stack-only`` cli argument:: - - openstack overcloud deploy \ - --stack-only - # other CLI arguments - - -When ansible-playbook is run, use the ``--check`` CLI argument with -ansible-playbook to preview any changes. The extent to which changes can be -previewed is dependent on many factors such as the underlying tools in use -(puppet, docker, etc) and the support for ansible check mode in the given -ansible module. - -The ``--diff`` option can also be used with ``--check`` to show the -differences that would result from changes. - -See `Ansible Check Mode ("Dry Run") -`_ -for more details. diff --git a/deploy-guide/source/deployment/ansible_config_download_differences.rst b/deploy-guide/source/deployment/ansible_config_download_differences.rst deleted file mode 100644 index 399ef4e7..00000000 --- a/deploy-guide/source/deployment/ansible_config_download_differences.rst +++ /dev/null @@ -1,133 +0,0 @@ -.. _config_download_differences: - -Ansible config-download differences -=================================== -With the Queens release, it became possible to use Ansible to apply the -overcloud configuration and this method became the default behavior with -the Rockt release. - -The feature is fully documented at -:doc:`ansible_config_download`, while this page details -the differences to the deployer experience with config-download. - -Ansible vs. os-collect-config ------------------------------ -Previously, TripleO used an agent running on each overcloud node called -``os-collect-config``. This agent periodically polled the undercloud Heat API for -software configuration changes that needed to be applied to the node. - -``os-collect-config`` ran ``os-refresh-config`` and ``os-apply-config`` as -needed whenever new software configuration changes were detected. This model -is a **"pull"** style model given each node polled the Heat API and pulled changes, -then applied them locally. - -With config-download, TripleO has switched to a **"push"** style model. Ansible -is run from a central control node which is the undercloud. -``ansible-playbook`` is run from the undercloud and software configuration -changes are pushed out to each overcloud node via ssh. - -With the new model, ``os-collect-config``, ``os-refresh-config``, and -``os-apply-config`` are no longer used in a TripleO deployment. The -``os-collect-config`` service is now disabled by default and won't start on -boot. - -.. note:: - - Heat standalone software deployments still rely on ``os-collect-config``. - They are a type of deployment that can be applied to overcloud nodes - directly via Heat outside of the overcloud stack, and without having to do - a full stack update of the overcloud stack. - - These types of deployments are **NOT** typically used when doing TripleO. - - However, if these deployments are being used in an environment to manage - overcloud nodes, then the ``os-collect-config`` service must be started and - enabled on the overcloud nodes where these types of deployments are - applied. - - For reference, the Heat CLI commands that are used to create these types of - deployments are:: - - openstack software config create ... - openstack software deployment create ... - - If these commands are not being used in the environment, then - ``os-collect-config`` can be left disabled. - -Deployment workflow -------------------- -The default workflow executed by ``openstack overcloud deploy`` takes care of -all the necessary changes when using config-download. In both the previous and -new workflows, ``openstack overcloud deploy`` (tripleoclient) takes care of -automating all the steps through Mistral workflow(s). Therefore, existing CLI -scripts that called ``openstack overcloud deploy`` will continue to work with -no changes. - -It's important to recognize the differences in the workflow to aid in -understanding the deployment and operator experience. Previously, Heat was -responsible for: - -#. (Heat) Creating OpenStack resources (Neutron networks, Nova/Ironic instances, etc) -#. (Heat) Creating software configuration -#. (Heat) Applying the created software configuration to the Nova/Ironic instances - -With config-download, Heat is no longer responsible for the last item of -applying the created software configuration as ``ansible-playbook`` is used -instead. - -Therefore, only creating the Heat stack for an overcloud is no longer all that -is required to fully deploy the overcloud. Ansible also must be run from the -undercloud to apply the software configuration, and do all the required tasks -to fully deploy an overcloud such as configuring services, bootstrap tasks, and -starting containers. - -The new steps are summarized as: - -#. (Heat) Creating OpenStack resources (Neutron networks, Nova/Ironic instances, etc) -#. (Heat) Creating software configuration -#. (tripleoclient) Enable tripleo-admin ssh user -#. (ansible) Applying the created software configuration to the Nova/Ironic instances - -See :doc:`ansible_config_download` for details on the -tripleo-admin ssh user step. - -Deployment CLI output ---------------------- -During a deployment, the expected output from ``openstack overcloud deploy`` -has changed. Output up to and including the stack create/update is similar to -previous releases. Stack events will be shown until the stack operation is -complete. - -After the stack goes to ``CREATE_COMPLETE`` (or ``UPDATE_COMPLETE``), output -from the steps to enable the tripleo-admin user via ssh are shown. - -.. include:: deployment_output.rst - -.. include:: deployment_status.rst - -.. include:: deployment_log.rst - -config-download Use Cases -------------------------- -config-download exposes the ability to manually run the ``ansible-playbook`` -command against the playbooks that are generated for the deployment. This leads -to many advantages over the older Heat deployment model. - -- Test deployments. Using the - ``ansible-playbook --check --diff deploy_steps_playbook.yaml`` - arguments will not modify an existing deployment. Instead, it will only show - any changes that would be made. -- Development environment testing. Ansible variables can be modified to do - quick testing. Once verified, Heat environment templates need to be updated - to reflect the change permanently. Then the config-download content should - be re-generated by running ``openstack overcloud deploy --stack-only``. -- Run specific tasks. It is possible to run certain parts of a deployment by - using ``--tags``. -- Prepare the deployment or update ahead of time and then run the playbooks - later. The operations around a deployment can be done at different times to - minimize risk. -- Integration with CI/CD. Additional checks and verification can be added to - a CI/CD pipeline relating to updating Heat templates and the Ansible - config-download content. -- AWX or Ansible Tower integration. Ansible content can be imported and ran - through a scalable and distributed system. diff --git a/deploy-guide/source/deployment/architecture.rst b/deploy-guide/source/deployment/architecture.rst deleted file mode 100644 index 528531c0..00000000 --- a/deploy-guide/source/deployment/architecture.rst +++ /dev/null @@ -1,335 +0,0 @@ -TripleO Containers Architecture -=============================== - -This document explains the details around TripleO's containers architecture. The -document goes into the details of how the containers are built for TripleO, -how the configuration files are generated and how the containers are eventually -run. - -Like other areas of TripleO, the containers based deployment requires a couple -of different projects to play together. The next section will cover each of the -parts that allow for deploying OpenStack in containers using TripleO. - - -Containers runtime deployment and configuration notes ------------------------------------------------------ - -TripleO has transitioned to the `podman`_ container runtime. Podman does not -use a persistent daemon to manage containers. TripleO wraps the container -service execution in systemd managed services. These services are named -tripleo_. Prior to Stein, TripleO deployed the containers -runtime and image components from the docker packages. The installed components -include the docker daemon system service and `OCI`_ compliant `Moby`_ and -`Containerd`_ - the building blocks for the container system. - -Containers control plane includes `Paunch`_ or tripleo_container_manage_ and -systemd for the stateless services, and Pacemaker `Bundle`_ for the -containerized stateful services, like the messaging system or database. - -.. _podman: https://podman.io/ -.. _OCI: https://www.opencontainers.org/ -.. _Moby: https://mobyproject.org/ -.. _Containerd: https://github.com/containerd/containerd -.. _Bundle: https://wiki.clusterlabs.org/wiki/Bundle_Walk-Through - -Currently we provide a ``ContainerCli`` parameter which can be used to change -the container runtimes, but only podman is supported for both undercloud and -overcloud. - -We have provided various ``Container*`` configuration parameters in TripleO -Heat Templates for operators to tune some of the container based settings. -There are still some ``Docker*`` configuration parameters in TripleO Heat -Templates available for operators which are left over for the Docker based -deployment or historical reasons. -Parameter override example:: - - parameter_defaults: - DockerDebug: true - DockerOptions: '--log-driver=syslog --live-restore' - DockerNetworkOptions: '--bip=10.10.0.1/16' - DockerInsecureRegistryAddress: ['myregistry.local:8787'] - DockerRegistryMirror: 'mirror.regionone.local:8081/myregistry-1.local/' - -* ``DockerDebug`` adds more framework-specific details to the deployment logs. - -* ``DockerOptions``, ``DockerNetworkOptions``, ``DockerAdditionalSockets`` define - the docker service startup options, like the default IP address for the - `docker0` bridge interface (``--bip``) or SELinux mode (``--selinux-enabled``). - - .. note:: Make sure the default CIDR assigned for the `docker0` bridge interface - does not conflict to other network ranges defined for your deployment. - - .. note:: These options have no effect when using podman. - -* ``DockerInsecureRegistryAddress``, ``DockerRegistryMirror`` allow you to - specify a custom registry mirror which can optionally be accessed insecurely - by using the ``DockerInsecureRegistryAddress`` parameter. - -See the official dockerd `documentation`_ for the reference. - -.. _documentation: https://docs.docker.com/engine/reference/commandline/dockerd/ - - -Building Containers -------------------- - -The containers used for TripleO are sourced from Kolla. Kolla is an OpenStack -team that aims to create tools to allow for deploying OpenStack on container -technologies. Kolla (or Kolla Build) is one of the tools produced by this team -and it allows for building and customizing container images for OpenStack -services and their dependencies. - -TripleO consumes these images and takes advantage of the customization -capabilities provided by the `Kolla`_ build tool to install some packages that -are required by other parts of TripleO. - -TripleO maintains its complete list of kolla customization in the -`tripleo-common`_ project. - -.. _Kolla: https://docs.openstack.org/kolla/latest/admin/image-building.html#dockerfile-customisation -.. _tripleo-common: https://github.com/openstack/tripleo-common/blob/master/container-images/tripleo_kolla_template_overrides.j2 - - -Paunch ------- - -.. note:: During Ussuri cycle, Paunch has been replaced by the - tripleo_container_manage_ Ansible role. Therefore, the following block - is deprecated in favor of the new role. However, the JSON input remains - backward compatible and the containers are configured the same way as it - was with Paunch. - -The `paunch`_ hook is used to manage containers. This hook takes json -as input and uses it to create and run containers on demand. The json -describes how the container will be started. Some example keys are: - -* **net**: To specify what network to use. This is commonly set to host. - -* **privileged**: Whether to give full access to the host's devices to the - container, similar to what happens when the service runs directly on the host. - -* **volumes**: List of host path volumes, named volumes, or dynamic volumes to - bind on the container. - -* **environment**: List of environment variables to set on the container. - -.. note:: The list above is not exhaustive and you should refer to the - `paunch` docs for the complete list. - -The json file passed to this hook is built out of the `docker_config` attribute -defined in the service's yaml file. Refer to the `Docker specific settings`_ -section for more info on this. - -.. _paunch: https://github.com/openstack/paunch -.. _tripleo_container_manage: https://docs.openstack.org/tripleo-ansible/latest/roles/role-tripleo_container_manage.html - -TripleO Heat Templates ----------------------- -.. _containers_arch_tht: - -The `TripleO Heat Templates`_ repo is where most of the logic resides in the form -of heat templates. These templates define each service, the containers' -configuration and the initialization or post-execution operations. - -.. _TripleO Heat Templates: https://opendev.org/openstack/tripleo-heat-templates - -Understanding container related files -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The docker templates can be found under the `docker` sub directory in the -`tripleo-heat-templates` root. The services files are under `docker/service` but -the `docker` directory contains a bit more than just service files and some of -them are worth diving into: - -deploy-steps.j2 -............... - -This file is a jinja template and it's rendered before the deployment is -started. This file defines the resources that are executed before and after the -container initialization. - -.. _container-puppet.py: - -container-puppet.py -................... - -This script is responsible for generating the config files for each service. The -script is called from the `deploy-steps.j2` file and it takes a `json` file as -configuration. The json files passed to this script are built out of the -`puppet_config` parameter set in every service template (explained in the -`Docker specific settings`_ section). - -The `container-puppet.py` execution results in a oneshot container being executed -(usually named `puppet-$service_name`) to generate the configuration options or -run other service specific initialization tasks. Example: Create Keystone endpoints. - -.. note:: container-puppet.py was previously docker-puppet.py prior to the Train - cycle. - -Anatomy of a containerized service template -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Containerized services templates inherit almost everything from the puppet based -templates, with some exceptions for some services. New properties have been -added to define container specific configurations, which will be covered in this -section. - -Docker specific settings -........................ - -Each service may define output variable(s) which control config file generation, -initialization, and stepwise deployment of all the containers for this service. -The following sections are available: - -* config_settings: This setting containers hiera data that is used - to control how the Puppet modules generate config files for each service. - -* step_config: This setting controls the manifest that is used to - create docker config files via puppet. The puppet tags below are - used along with this manifest to generate a config directory for - this container. - -* kolla_config: Contains YAML that represents how to map config files - into the kolla container. This config file is typically mapped into - the container itself at the /var/lib/kolla/config_files/config.json - location and drives how kolla's external config mechanisms work. - -* docker_config: Data that is passed to the docker-cmd hook to configure - a container, or step of containers at each step. See the available steps - below and the related docker-cmd hook documentation in the heat-agents - project. - -* puppet_config: This section is a nested set of key value pairs - that drive the creation of config files using puppet. - Required parameters include: - - * puppet_tags: Puppet resource tag names that are used to generate config - files with puppet. Only the named config resources are used to generate - a config file. Any service that specifies tags will have the default - tags of 'file,concat,file_line,augeas,cron' appended to the setting. - Example: keystone_config - - * config_volume: The name of the volume (directory) where config files - will be generated for this service. Use this as the location to - bind mount into the running Kolla container for configuration. - - * config_image: The name of the docker image that will be used for - generating configuration files. This is often the same container - that the runtime service uses. Some services share a common set of - config files which are generated in a common base container. - - * step_config: This setting controls the manifest that is used to - create docker config files via puppet. The puppet tags below are - used along with this manifest to generate a config directory for - this container. - -* container_puppet_tasks: This section provides data to drive the - container-puppet.py tool directly. The task is executed only once - within the cluster (not on each node) and is useful for several - puppet snippets we require for initialization of things like - keystone endpoints, database users, etc. See container-puppet.py - for formatting. NOTE: these tasks were docker_puppet_tasks prior to the - Train cycle. - - -Container steps -............... - -Similar to baremetal, containers are brought up in a stepwise manner. The -current architecture supports bringing up baremetal services alongside of -containers. Therefore, baremetal steps may be required depending on the service -and they are always executed before the corresponding container step. - -The list below represents the correlation between the baremetal and the -containers steps. These steps are executed sequentially: - -* Containers config files generated per hiera settings. -* Host Prep -* Load Balancer configuration baremetal - - * Step 1 external steps (execute Ansible on Undercloud) - * Step 1 deployment steps (Ansible) - * Common Deployment steps - - * Step 1 baremetal (Puppet) - * Step 1 containers - -* Core Services (Database/Rabbit/NTP/etc.) - - * Step 2 external steps (execute Ansible on Undercloud) - * Step 2 deployment steps (Ansible) - * Common Deployment steps - - * Step 2 baremetal (Puppet) - * Step 2 containers - -* Early Openstack Service setup (Ringbuilder, etc.) - - * Step 3 external steps (execute Ansible on Undercloud) - * Step 3 deployment steps (Ansible) - * Common Deployment steps - - * Step 3 baremetal (Puppet) - * Step 3 containers - -* General OpenStack Services - - * Step 4 external steps (execute Ansible on Undercloud) - * Step 4 deployment steps (Ansible) - * Common Deployment steps - - * Step 4 baremetal (Puppet) - * Step 4 containers (Keystone initialization occurs here) - -* Service activation (Pacemaker) - - * Step 5 external steps (execute Ansible on Undercloud) - * Step 5 deployment steps (Ansible) - * Common Deployment steps - - * Step 5 baremetal (Puppet) - * Step 5 containers - - -Service Bootstrap -~~~~~~~~~~~~~~~~~ - -Bootstrapping services is a one-shot operation for most services and it's done -by defining a separate container that shares the same structure as the main -service container commonly defined under the `docker_step` number 3 (see `Container -steps`_ section above). - -Unlike normal service containers, the bootstrap container should be run in the -foreground - `detach: false` - so there can be more control on when the -execution is done and whether it succeeded or not. - -Example taken from Glance's service file:: - - - docker_config: - step_3: - glance_api_db_sync: - image: *glance_image - net: host - privileged: false - detach: false - volumes: &glance_volumes - - /var/lib/kolla/config_files/glance-api.json:/var/lib/kolla/config_files/config.json - - /etc/localtime:/etc/localtime:ro - - /lib/modules:/lib/modules:ro - - /var/lib/config-data/glance_api/:/var/lib/kolla/config_files/src:ro - - /run:/run - - /dev:/dev - - /etc/hosts:/etc/hosts:ro - environment: - - KOLLA_BOOTSTRAP=True - - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS - step_4: - glance_api: - image: *glance_image - net: host - privileged: false - restart: always - volumes: *glance_volumes - environment: - - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS diff --git a/deploy-guide/source/deployment/build_single_image.rst b/deploy-guide/source/deployment/build_single_image.rst deleted file mode 100644 index 7191f6e6..00000000 --- a/deploy-guide/source/deployment/build_single_image.rst +++ /dev/null @@ -1,30 +0,0 @@ -Building a Single Image -======================= - -The ``openstack overcloud image build --all`` command builds all the images -needed for an overcloud deploy. However, you may need to rebuild a single -one of them. Use the following commands if you want to do it:: - - openstack overcloud image build --type {agent-ramdisk|deploy-ramdisk|fedora-user|overcloud-full} - -If the target image exist, this commands ends silently. Make sure to delete a -previous version of the image to run the command as you expect. - -Uploading the New Single Image ------------------------------- - -After the new image is built, it can be uploaded using the same command as -before, with the ``--update-existing`` flag added:: - - openstack overcloud image upload --update-existing - -Note that if the new image is a ramdisk, the Ironic nodes need to be -re-configured to use it. This can be done by re-running:: - - openstack overcloud node configure --all-manageable - -.. note:: - If you want to use custom images for boot configuration, specify their names in - ``--deploy-kernel`` and ``--deploy-ramdisk`` options. - -Now the new image should be fully ready for use by new deployments. diff --git a/deploy-guide/source/deployment/container_image_prepare.rst b/deploy-guide/source/deployment/container_image_prepare.rst deleted file mode 100644 index 39aba3ef..00000000 --- a/deploy-guide/source/deployment/container_image_prepare.rst +++ /dev/null @@ -1,552 +0,0 @@ -.. _prepare-environment-containers: - -Container Image Preparation -=========================== - -This documentation explains how to instruct container image preparation to do -different preparation tasks. - -Choosing an image registry strategy -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Container images need to be pulled from an image registry which is reliably -available to overcloud nodes. The three common options to serve images are to -use the default registry, the registry available on the undercloud, or an -independently managed registry. - -.. note:: Private SSL-enabled registries with a custom CA are not tested. - If you have to use one, the custom CA (certificate authority) that is needed - for the registry should be installed before deploying the overcloud. For - example, it can be injected into the overcloud image, or installed via first - boot scripts. - -During deployment the environment parameter -`ContainerImagePrepare` is used to specify any desired behaviour, including: - -- Where to pull images from -- Optionally, which local repository to push images to -- How to discover the latest versioned tag for each image - -In the following examples, the parameter `ContainerImagePrepare` will be -specified in its own file `containers-prepare-parameters.yaml`. - -Default registry -................ - -By default the images will be pulled from a remote registry namespace such as -`docker.io/tripleomaster`. This is fine for development or POC clouds but is -not appropriate for production clouds due to the transfer of large amounts of -duplicate image data over a potentially unreliable internet connection. - -During deployment with this default, any heat parameters which refer to -required container images will be populated with a value pointing at the -default registry, with a tag representing the latest image version. - -To generate the `containers-prepare-parameters.yaml` containing these defaults, -run this command:: - - openstack tripleo container image prepare default \ - --output-env-file containers-prepare-parameters.yaml - -This will generate a file containing a `ContainerImagePrepare` similar to the -following:: - - parameter_defaults: - ContainerImagePrepare: - - set: - ceph_image: daemon - ceph_namespace: docker.io/ceph - ceph_tag: v4.0.0-stable-4.0-nautilus-centos-7-x86_64 - name_prefix: centos-binary- - name_suffix: '' - namespace: docker.io/tripleomaster - neutron_driver: null - tag: current-tripleo - tag_from_label: rdo_version - -During deployment, this will lookup images in `docker.io/tripleomaster` tagged -with `current-tripleo` and discover a versioned tag by looking up the label -`rdo_version`. This will result in the heat image parameters in the plan being -set with appropriate values, such as:: - - DockerNeutronMetadataImage: docker.io/tripleomaster/centos-binary-neutron-metadata-agent:35414701c176a6288fc2ad141dad0f73624dcb94_43527485 - DockerNovaApiImage: docker.io/tripleomaster/centos-binary-nova-api:35414701c176a6288fc2ad141dad0f73624dcb94_43527485 - -.. note:: The tag is actually a Delorean hash. You can find out the versions - of packages by using this tag. - For example, `35414701c176a6288fc2ad141dad0f73624dcb94_43527485` tag, - is in fact using this `Delorean repository`_. - -.. _populate-local-registry-containers: - -Undercloud registry -................... - -As part of the undercloud install, an image registry is configured on port -`8787`. This can be used to increase reliability of image pulls, and minimise -overall network transfers. -The undercloud registry can be used by generating the following -`containers-prepare-parameters.yaml` file:: - - openstack tripleo container image prepare default \ - --local-push-destination \ - --output-env-file containers-prepare-parameters.yaml - -This will generate a file containing a `ContainerImagePrepare` similar to the -following:: - - parameter_defaults: - ContainerImagePrepare: - - push_destination: true - set: - ceph_image: daemon - ceph_namespace: docker.io/ceph - ceph_tag: v4.0.0-stable-4.0-nautilus-centos-7-x86_64 - name_prefix: centos-binary- - name_suffix: '' - namespace: docker.io/tripleomaster - neutron_driver: null - tag: current-tripleo - tag_from_label: rdo_version - -This is identical to the default registry, except for the `push_destination: -true` entry which indicates that the address of the local undercloud registry -will be discovered at upload time. - -By specifying a `push_destination` value such as `192.168.24.1:8787`, during -deployment all images will be pulled from the remote registry then pushed to -the specified registry. The resulting image parameters will also be modified to -refer to the images in `push_destination` instead of `namespace`. - -.. admonition:: Stein and newer - :class: stein - - Prior to Stein, Docker Registry v2 (provided by "Docker - Distribution" package), was the service running on tcp 8787. - Since Stein it has been replaced with an Apache vhost called - "image-serve", which serves the containers on tcp 8787 and - supports podman or buildah pull commands. Though podman or buildah - tag, push, and commit commands are not supported, they are not - necessary because the same functionality may be achieved through - use of the "sudo openstack tripleo container image prepare" - commands described in this document. - - -Running container image prepare -............................... -The prepare operations are run at the following times: - -#. During ``undercloud install`` when `undercloud.conf` has - `container_images_file=$HOME/containers-prepare-parameters.yaml` (see - :ref:`install_undercloud`) -#. During ``overcloud deploy`` when a `ContainerImagePrepare` parameter is - provided by including the argument `-e - $HOME/containers-prepare-parameters.yaml` - (see :ref:`overcloud-prepare-container-images`) -#. Any other time when ``sudo openstack tripleo container image prepare`` is run - -As seen in the last of the above commands, ``sudo openstack tripleo -container image prepare`` may be run without ``default`` to set up an -undercloud registry without deploying the overcloud. It is run with -``sudo`` because it needs to write to `/var/lib/image-serve` on the -undercloud. - - -Options available in heat parameter ContainerImagePrepare -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To do something different to the above two registry scenarios, your custom -environment can set the value of the ContainerImagePrepare heat parameter to -result in any desired registry and image scenario. - -Discovering versioned tags with tag_from_label -.............................................. - -If you want these parameters to have the actual tag `current-tripleo` instead of -the discovered tag (in this case the Delorean hash, -`35414701c176a6288fc2ad141dad0f73624dcb94_43527485` ) then the `tag_from_label` -entry can be omitted. - -Likewise, if all images should be deployed with a different tag, the value of -`tag` can be set to the desired tag. - -Some build pipelines have a versioned tag which can only be discovered via a -combination of labels. For this case, a template format can be specified -instead:: - - tag_from_label: {version}-{release} - -It's possible to use the above feature while also disabling it only -for a subset of images by using an `includes` and `excludes` list as -described later in this document. This is useful when using the above -but also using containers from external projects which doesn't follow -the same convention like Ceph. - -Copying images with push_destination -.................................... - -By specifying a `push_destination`, the required images will be copied from -`namespace` to this registry, for example:: - - ContainerImagePrepare: - - push_destination: 192.168.24.1:8787 - set: - namespace: docker.io/tripleomaster - ... - -This will result in images being copied from `docker.io/tripleomaster` to -`192.168.24.1:8787/tripleomaster` and heat parameters set with values such as:: - - DockerNeutronMetadataImage: 192.168.24.1:8787/tripleomaster/centos-binary-neutron-metadata-agent:35414701c176a6288fc2ad141dad0f73624dcb94_43527485 - DockerNovaApiImage: 192.168.24.1:8787/tripleomaster/centos-binary-nova-api:35414701c176a6288fc2ad141dad0f73624dcb94_43527485 - -.. note:: Use the IP address of your undercloud, which you previously set with - the `local_ip` parameter in your `undercloud.conf` file. For these example - commands, the address is assumed to be `192.168.24.1:8787`. - -By setting different values for `namespace` and `push_destination` any -alternative registry strategy can be specified. - -Ceph and other set options -.......................... - -The options `ceph_namespace`, `ceph_image`, and `ceph_tag` are similar to -`namespace` and `tag` but they specify the values for the ceph image. It will -often come from a different registry, and have a different versioned tag -policy. - -The values in the `set` map are used when evaluating the file -`/usr/share/openstack-tripleo-common/container-images/tripleo_containers.yaml.j2` -as a Jinja2 template. This file contains the list of every container image and -how it relates to TripleO services and heat parameters. -If Ceph is not part of the overcloud deployment, it's possible to skip pulling -the related containers by setting the `ceph_images` parameter to false as shown -in the example below:: - - ContainerImagePrepare: - - push_destination: 192.168.24.1:8787 - set: - ceph_images: false - -By doing this, the Ceph container images are not pulled from the remote registry -during the deployment. - -Authenticated Registries -........................ - -If a container registry requires a username and password, then those -values may be passed using the following syntax:: - - ContainerImagePrepare: - - push_destination: 192.168.24.1:8787 - set: - namespace: quay.io/... - ... - ContainerImageRegistryCredentials: - 'quay.io': {'': ''} - -.. note:: If the `ContainerImageRegistryCredentials` contain the credentials - for a registry whose name matches the `ceph_namespace` parameter, those - credentials will be extracted and passed to ceph-ansible as the - `ceph_docker_registry_username` and `ceph_docker_registry_password` parameters. - -Layering image preparation entries -.................................. - -Since the value of `ContainerImagePrepare` is a list, multiple entries can be -specified, and later entries will overwrite any earlier ones. Consider the -following:: - - ContainerImagePrepare: - - tag_from_label: rdo_version - push_destination: true - excludes: - - nova-api - set: - namespace: docker.io/tripleomaster - name_prefix: centos-binary- - name_suffix: '' - tag: current-tripleo - - push_destination: true - includes: - - nova-api - set: - namespace: mylocal - tag: myhotfix - -This will result in the following heat parameters which shows a `locally built -` -and tagged `centos-binary-nova-api` being used for `DockerNovaApiImage`:: - - DockerNeutronMetadataImage: 192.168.24.1:8787/tripleomaster/centos-binary-neutron-metadata-agent:35414701c176a6288fc2ad141dad0f73624dcb94_43527485 - DockerNovaApiImage: 192.168.24.1:8787/mylocal/centos-binary-nova-api:myhotfix - -The `includes` and `excludes` entries can control the resulting image list in -addition to the filtering which is determined by roles and containerized -services in the plan. `includes` matches take precedence over `excludes` -matches, followed by role/service filtering. The image name must contain the -value within it to be considered a match. - -The `includes` and `excludes` list is useful when pulling OpenStack -images using `tag_from_label: '{version}-{release}'` while also -pulling images which are not tagged the same way. The following -example shows how to do this with Ceph:: - - ContainerImagePrepare: - - push_destination: true - set: - namespace: docker.io/tripleomaster - name_prefix: centos-binary- - name_suffix: '' - tag: current-tripleo - tag_from_label: '{version}-{release}' - excludes: [ceph] - - push_destination: true - set: - ceph_image: ceph - ceph_namespace: docker.io/ceph - ceph_tag: latest - includes: [ceph] - -Modifying images during prepare -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -It is possible to modify images during prepare to make any required changes, -then immediately deploy with those changes. The use-cases for modifying images -include: - -- As part of a Continuous Integration pipeline where images are modified with - the changes being tested before deployment -- As part of a development workflow where local changes need to be deployed for - testing and development -- When changes need to be deployed but are not available through an image - build pipeline (proprietary addons, emergency fixes) - -The modification is done by invoking an ansible role on each image which needs -to be modified. The role takes a source image, makes the requested changes, -then tags the result. The prepare can then push the image and set the heat -parameters to refer to the modified image. The modification is done in -the undercloud registry so it is not possible to use this feature when -using the Default registry, where images are pulled directly from a -remote registry during deployment. - -The ansible role `tripleo-modify-image`_ conforms with the required role -interface, and provides the required behaviour for the modify use-cases. Modification is controlled via modify-specific keys in the -`ContainerImagePrepare` parameter: - -- `modify_role` specifies what ansible role to invoke for each image to modify. -- `modify_append_tag` is used to append to the end of the - source image tag. This makes it obvious that the resulting image has been - modified. It is also used to skip modification if the `push_destination` - registry already has that image, so it is recommended to change - `modify_append_tag` whenever the image must be modified. -- `modify_vars` is a dictionary of ansible variables to pass to the role. - -The different use-cases handled by role `tripleo-modify-image`_ are selected by -setting the `tasks_from` variable to the required file in that role. For all of -the following examples, see the documentation for the role -`tripleo-modify-image`_ for the other variables supported by that `tasks_from`. - -While developing and testing the `ContainerImagePrepare` entries which modify -images, it is recommended to run prepare on its own to confirm it is being -modified as expected:: - - sudo openstack tripleo container image prepare \ - -e ~/containers-prepare-parameters.yaml - -Updating existing packages -.......................... - -The following entries will result in all packages being updated in the images, -but using the undercloud host's yum repository configuration:: - - ContainerImagePrepare: - - push_destination: true - ... - modify_role: tripleo-modify-image - modify_append_tag: "-updated" - modify_vars: - tasks_from: yum_update.yml - compare_host_packages: true - yum_repos_dir_path: /etc/yum.repos.d - ... - -Install RPM files -................. - -It is possible to install a directory of RPM files, which is useful for -installing hotfixes, local package builds, or any package which is not -available through a package repository. For example the following would install -some hotfix packages only in the `centos-binary-nova-compute` image:: - - ContainerImagePrepare: - - push_destination: true - ... - includes: - - nova-compute - modify_role: tripleo-modify-image - modify_append_tag: "-hotfix" - modify_vars: - tasks_from: rpm_install.yml - rpms_path: /home/stack/nova-hotfix-pkgs - ... - -Modify with custom Dockerfile -............................. - -For maximum flexibility, it is possible to specify a directory containing a -`Dockerfile` to make the required changes. When the role is invoked, a -`Dockerfile.modified` is generated which changes the `FROM` directive and adds -extra `LABEL` directives. The following example runs the custom -`Dockerfile` on the `centos-binary-nova-compute` image:: - - ContainerImagePrepare: - - push_destination: true - ... - includes: - - nova-compute - modify_role: tripleo-modify-image - modify_append_tag: "-hotfix" - modify_vars: - tasks_from: modify_image.yml - modify_dir_path: /home/stack/nova-custom - ... - -An example `/home/stack/nova-custom/Dockerfile` follows. Note that after any -`USER root` directives have been run, it is necessary to switch back to the -original image default user:: - - FROM docker.io/tripleomaster/centos-binary-nova-compute:latest - - USER root - - COPY customize.sh /tmp/ - RUN /tmp/customize.sh - - USER "nova" - -.. _Delorean repository: https://trunk.rdoproject.org/centos7-master/ac/82/ac82ea9271a4ae3860528eaf8a813da7209e62a6_28eeb6c7/ -.. _tripleo-modify-image: https://github.com/openstack/ansible-role-tripleo-modify-image - - -Modify with Python source code installed via pip from OpenDev Gerrit -.................................................................... - - -If you would like to build an image and apply your patch in a Python project in -OpenStack, you can use this example:: - - ContainerImagePrepare: - - push_destination: true - ... - includes: - - heat-api - modify_role: tripleo-modify-image - modify_append_tag: "-devel" - modify_vars: - tasks_from: dev_install.yml - source_image: docker.io/tripleomaster/centos-binary-heat-api:current-tripleo - refspecs: - - - project: heat - refspec: refs/changes/12/1234/3 - ... - -It will produce a modified image with Python source code installed via pip. - -Building hotfixed containers -............................ - -The `tripleoclient` OpenStack plugin provides a command line interface which -will allow operators to apply packages (hotfixes) to running containers. This -capability leverages the **tripleo-modify-image** role, and automates its -application to a set of containers for a given collection of packages. - -Using the provided command line interface is simple. The interface has very few -required options. The noted options below inform the tooling which containers -need to have the hotfix(es) applied, and where to find the hotfixed package(s). - -============ ================================================================= - option Description -============ ================================================================= ---image The `--image` argument requires the use fully qualified image - name, something like *localhost/image/name:tag-data*. The - `--image` option can be used more than once, which will inform - the tooling that multiple containers need to have the same - hotfix packages applied. ---rpms-path The `--rpms-path` argument requires the full path to a - directory where RPMs exist. The RPMs within this directory will - be installed into the container, producing a new layer for an - existing container. ---tag The `--tag` argument is optional, though it is recommended to - be used. The value of this option will append to the tag of the - running container. By using the tag argument, images that have - been modified can be easily identified. -============ ================================================================= - -With all of the required information, the command to modify existing container -images can be executed like so. - -.. code-block:: shell - - # The shell variables need to be replaced with data that pertains to the given environment. - openstack tripleo container image hotfix --image ${FULLY_QUALIFIED_IMAGE_NAME} \ - --rpms-path ${RPM_DIRECTORY} \ - --tag ${TAG_VALUE} - -When this command completes, new container images will be available on the -local system and are ready to be integrated into the environment. - -You should see the image built on your local system via buildah CLI: - -.. code-block:: shell - - # The shell variables need to be replaced with data that pertains to the given environment. - sudo buildah images | grep ${TAG_VALUE} - -Here is an example on how to push it into the TripleO Container registry: - -.. code-block:: shell - - # ${IMAGE} is in this format: //: - sudo openstack tripleo container image push --local \ - --registry-url 192.168.24.1:8787 ${IMAGE} - -.. note:: - - Container images can be pushed to the TripleO Container registry or - a Docker Registry (using basic auth or the bearer token auth). - -Now that your container image is pushed into a registry, you can deploy it -where it's needed. Two ways are supported: - -* (Long but persistent): Update Container$NameImage where $Name is the name of - the service we update (e.g. ContainerNovaComputeImage). The parameters - can be found in TripleO Heat Templates. Once you update it into your - environment, you need to re-run the "openstack overcloud deploy" command - again and the necessary hosts will get the new container. - - Example:: - - parameter_defaults: - # Replace the values by where the image is stored - ContainerNovaComputeImage: //: - -* (Short but not persistent after a minor update): Run Paunch or Ansible - to update the container on a host. The procedure is already documented - in the :doc:`./tips_tricks` manual. - - -Once the hotfixed container image has been deployed, it's very important to -check that the container is running with the right rpm version. -For example, if the nova-compute container was updated with a new hotfix image, -we want to check that the right nova-compute rpm is installed: - -.. code-block:: shell - - sudo podman exec -ti -u root nova_compute rpm -qa | grep nova-compute - -It will return the version of the openstack-nova-compute rpm and we can compare -it with the one that was delivered via rpm. If the version is not correct (e.g. -older), it means that the hotfix image is wrong and doesn't contain the rpm -provided to build the new image. The image has to be rebuilt and redeployed. diff --git a/deploy-guide/source/deployment/deployment_log.rst b/deploy-guide/source/deployment/deployment_log.rst deleted file mode 100644 index b2d98c1f..00000000 --- a/deploy-guide/source/deployment/deployment_log.rst +++ /dev/null @@ -1,4 +0,0 @@ -Deployment Log -^^^^^^^^^^^^^^ -The ansible part of the deployment creates a log file that is saved on the -undercloud. The log file is available at ``$HOME/ansible.log``. diff --git a/deploy-guide/source/deployment/deployment_output.rst b/deploy-guide/source/deployment/deployment_output.rst deleted file mode 100644 index 1bef7893..00000000 --- a/deploy-guide/source/deployment/deployment_output.rst +++ /dev/null @@ -1,31 +0,0 @@ -Deployment Output -^^^^^^^^^^^^^^^^^ -After the tripleo-admin user is created, ``ansible-playbook`` will be used to -configure the overcloud nodes. - -The output from ``ansible-playbook`` will begin to appear in the console -and will be updated periodically as more tasks are applied. - -When ansible is finished a play recap will be shown, and the usual overcloudrc -details will then be displayed. The following is an example of the end of the -output from a successful deployment:: - - PLAY RECAP **************************************************************** - compute-0 : ok=134 changed=48 unreachable=0 failed=0 - openstack-0 : ok=164 changed=28 unreachable=0 failed=1 - openstack-1 : ok=160 changed=28 unreachable=0 failed=0 - openstack-2 : ok=160 changed=28 unreachable=0 failed=0 - pacemaker-0 : ok=138 changed=30 unreachable=0 failed=0 - pacemaker-1 : ok=138 changed=30 unreachable=0 failed=0 - pacemaker-2 : ok=138 changed=30 unreachable=0 failed=0 - undercloud : ok=2 changed=0 unreachable=0 failed=0 - - Overcloud configuration completed. - Overcloud Endpoint: http://192.168.24.8:5000/ - Overcloud rc file: /home/stack/overcloudrc - Overcloud Deployed - -When a failure happens, the deployment will stop and the error will be shown. - -Review the ``PLAY RECAP`` which will show each host that is part of the -overcloud and the grouped count of each task status. diff --git a/deploy-guide/source/deployment/deployment_status.rst b/deploy-guide/source/deployment/deployment_status.rst deleted file mode 100644 index 8acc64a1..00000000 --- a/deploy-guide/source/deployment/deployment_status.rst +++ /dev/null @@ -1,33 +0,0 @@ -Deployment Status -^^^^^^^^^^^^^^^^^ -Since Heat is no longer the source of authority on the status of the overcloud -deployment, a new tripleoclient command is available to show the overcloud -deployment status:: - - openstack overcloud status - -The output will report the status of the deployment, taking into consideration -the result of all the steps to do the full deployment. The following is an -example of the output:: - - [stack@undercloud ]$ openstack overcloud status - - +------------+-------------------+ - | Stack Name | Deployment Status | - +------------+-------------------+ - | overcloud | DEPLOY_SUCCESS | - +------------+-------------------+ - -A different stack name can be specified with ``--stack``:: - - [stack@undercloud ]$ openstack overcloud status --stack my-deployment - - +---------------+-------------------+ - | Stack Name | Deployment Status | - +-----------+-----------------------+ - | my-deployment | DEPLOY_SUCCESS | - +---------------+-------------------+ - -The deployment status is stored in the YAML file, generated at -``$HOME/overcloud-deploy//-deployment_status.yaml`` in -the undercloud node. diff --git a/deploy-guide/source/deployment/ephemeral_heat.rst b/deploy-guide/source/deployment/ephemeral_heat.rst deleted file mode 100644 index 6e39c9dd..00000000 --- a/deploy-guide/source/deployment/ephemeral_heat.rst +++ /dev/null @@ -1,169 +0,0 @@ -.. _ephemeral_heat: - -Ephemeral Heat -============== - -Introduction ------------- - -Ephemeral Heat is a means to install the overcloud by using an ephemeral Heat -process instead of a system installed Heat process. This change is possible -beginning in the Wallaby release. - -In a typical undercloud, Heat is installed on the undercloud and processes are -run in podman containers for heat-api and heat-engine. When using ephemeral -Heat, there is no longer a requirement that Heat is installed on the -undercloud, instead these processes are started on demand by the deployment, -update, and upgrade commands. - -This model has been in use within TripleO already for both the undercloud and -:ref:`standalone ` installation methods, which start an on demand -all in one heat-all process in order to perform only the installation. Using -ephemeral Heat in this way allows for re-use of the Heat templates from -tripleo-heat-templates without having to require an already fully installed -undercloud. - -Description ------------ - -Ephemeral Heat is enabled by passing the ``--heat-type`` argument to -``openstack overcloud deploy``. The ephemeral process can also be launched -outside of a deployment with the ``openstack tripleo launch heat`` command. The -latter command also takes a ``--heat-type`` argument to enable selecting the -type of Heat process to use. - -Heat types -__________ - -The ``--heat-type`` argument allows for the following options described below. - -installed - Use the system Heat installation. This is the historical TripleO usage of - Heat with Heat fully installed on the undercloud. This is the default - value, and requires a fully installed undercloud. - -native - Use an ephemeral ``heat-all`` process. The process will be started natively - on the system executing tripleoclient commands by way of an OS (operating - system) fork. - -container - A podman container will be started on the executing system that runs a - single ``heat-all`` process. - -pod - A podman pod will be started on the executing system that runs containers - for ``heat-api`` and ``heat-engine``. - -In all cases, the process(es) are terminated at the end of the deployment. - -.. note:: - - The native and container methods are limited in scale due to being a single - Heat process. Deploying more than 3 nodes or 2 roles will significantly - impact the deployment time with these methods as Heat has only a single - worker thread. - - Using the installed or pod methods enable scaling node and role counts as - is typically required. - -Using ------ - -The following example shows using ``--heat-type`` to enable ephemeral Heat:: - - openstack overcloud deploy \ - --stack overcloud \ - --work-dir ~/overcloud-deploy/overcloud \ - --heat-type \ - - -With ephemeral Heat enabled, several additional deployment artifacts are -generated related to the management of the Heat process(es). These artifacts -are generated under the working directory of the deployment in a -``heat-launcher`` subdirectory. The working directory can be overridden with -the ``--work-dir`` argument. - -Using the above example, the Heat artifact directory would be located at -``~/overcloud-deploy/overcloud/heat-launcher``. An example of the directory -contents is shown below:: - - [centos@ephemeral-heat ~]$ ls -l ~/overcloud-deploy/overcloud/heat-launcher/ - total 41864 - -rw-rw-r--. 1 centos centos 650 Mar 24 18:39 api-paste.ini - -rw-rw-r--. 1 centos centos 1054 Mar 24 18:39 heat.conf - -rw-rw-r--. 1 centos centos 42852118 Mar 24 18:31 heat-db-dump.sql - -rw-rw-r--. 1 centos centos 2704 Mar 24 18:39 heat-pod.yaml - drwxrwxr-x. 2 centos centos 49 Mar 24 16:02 log - -rw-rw-r--. 1 centos centos 1589 Mar 24 18:39 token_file.json - -The directory contains the necessary files to inspect and debug the Heat -process(es), and if necessary reproduce the deployment. - -.. note:: - - The consolidated log file for the Heat process is the ``log`` file in the - ``heat-launcher`` directory. - -Launching Ephemeral Heat -________________________ - -Outside of a deployment, the ephemeral Heat process can also be started with the -``openstack tripleo launch heat`` command. This can be used to interactively -use the ephemeral Heat process or to debug a previous deployment. - -When combined with ``--heat-dir`` and ``--restore-db``, the command can be used -to restore the Heat process and database from a previous deployment:: - - openstack tripleo launch heat \ - --heat-type pod \ - --heat-dir ~/overcloud-deploy/overcloud/heat-launcher \ - --restore-db - -The command will exit after launching the Heat process, and the Heat process -will continue to run in the background. - -Interacting with ephemeral Heat -............................... - -With the ephemeral Heat process launched and running, ``openstackclient`` can be -used to interact with the Heat API. The following shell environment -configuration must set up access to the Heat API:: - - unset OS_CLOUD - unset OS_PROJECT_NAME - unset OS_PROJECT_DOMAIN_NAME - unset OS_USER_DOMAIN_NAME - export OS_AUTH_TYPE=none - export OS_ENDPOINT=http://127.0.0.1:8006/v1/admin - -You can also use the ``OS_CLOUD`` environment to set up the same:: - - export OS_CLOUD=heat - -Once the environment is configured, ``openstackclient`` work as expected -against the Heat API:: - - [centos@ephemeral-heat ~]$ openstack stack list - +--------------------------------------+------------+---------+-----------------+----------------------+--------------+ - | ID | Stack Name | Project | Stack Status | Creation Time | Updated Time | - +--------------------------------------+------------+---------+-----------------+----------------------+--------------+ - | 761e2a54-c6f9-4e0f-abe6-c8e0ad51a76c | overcloud | admin | CREATE_COMPLETE | 2021-03-22T20:48:37Z | None | - +--------------------------------------+------------+---------+-----------------+----------------------+--------------+ - -Killing ephemeral Heat -...................... - -To stop the ephemeral Heat process previously started with ``openstack tripleo -launch heat``, use the ``--kill`` argument:: - - openstack tripleo launch heat \ - --heat-type pod \ - --heat-dir ~/overcloud-deploy/overcloud/heat-launcher \ - --kill - -Limitations ------------ -Ephemeral Heat currently only supports new deployments. Update and Upgrade -support for deployments that previously used the system installed Heat will be -coming. diff --git a/deploy-guide/source/deployment/index.rst b/deploy-guide/source/deployment/index.rst deleted file mode 100644 index b0cc7fa4..00000000 --- a/deploy-guide/source/deployment/index.rst +++ /dev/null @@ -1,35 +0,0 @@ -TripleO OpenStack Deployment -============================ - -This section describes how to deploy OpenStack clouds on containers, either on -the undercloud or the overcloud. - -.. toctree:: - :maxdepth: 1 - - undercloud - install_undercloud - overcloud - install_overcloud - -TripleO Deployment Advanced Topics -================================== - -This section has additional documentation around advanced deployment related topics. - -.. toctree:: - :maxdepth: 1 - - 3rd_party - ansible_config_download - ansible_config_download_differences - architecture - build_single_image - container_image_prepare - ephemeral_heat - instack_undercloud - network_v2 - standalone - template_deploy - tips_tricks - upload_single_image diff --git a/deploy-guide/source/deployment/instack_undercloud.rst b/deploy-guide/source/deployment/instack_undercloud.rst deleted file mode 100644 index a4d076f7..00000000 --- a/deploy-guide/source/deployment/instack_undercloud.rst +++ /dev/null @@ -1,227 +0,0 @@ -(DEPRECATED) Installing the Undercloud --------------------------------------- - -.. note:: - Instack-undercloud is deprecated in Rocky cycle. Containerized undercloud - should be installed instead. See :doc:`undercloud` for backward - compatibility related information. - -.. note:: - Please ensure all your nodes (undercloud, compute, controllers, etc) have - their internal clock set to UTC in order to prevent any issue with possible - file future-dated timestamp if hwclock is synced before any timezone offset - is applied. - - -#. Log in to your machine (baremetal or VM) where you want to install the - undercloud as a non-root user (such as the stack user):: - - ssh @ - - .. note:: - If you don't have a non-root user created yet, log in as root and create - one with following commands:: - - sudo useradd stack - sudo passwd stack # specify a password - - echo "stack ALL=(root) NOPASSWD:ALL" | sudo tee -a /etc/sudoers.d/stack - sudo chmod 0440 /etc/sudoers.d/stack - - su - stack - - .. note:: - The undercloud is intended to work correctly with SELinux enforcing. - Installations with the permissive/disabled SELinux are not recommended. - The ``undercloud_enable_selinux`` config option controls that setting. - - .. note:: - vlan tagged interfaces must follow the if_name.vlan_id convention, like for - example: eth0.vlan100 or bond0.vlan120. - - .. admonition:: Baremetal - :class: baremetal - - Ensure that there is a FQDN hostname set and that the $HOSTNAME environment - variable matches that value. The easiest way to do this is to set the - ``undercloud_hostname`` option in undercloud.conf before running the - install. This will allow the installer to configure all of the hostname- - related settings appropriately. - - Alternatively the hostname settings can be configured manually, but - this is strongly discouraged. The manual steps are as follows:: - - sudo hostnamectl set-hostname myhost.mydomain - sudo hostnamectl set-hostname --transient myhost.mydomain - - An entry for the system's FQDN hostname is also needed in /etc/hosts. For - example, if the system is named *myhost.mydomain*, /etc/hosts should have - an entry like:: - - 127.0.0.1 myhost.mydomain myhost - - -#. Enable needed repositories: - - .. admonition:: RHEL - :class: rhel - - Enable optional repo:: - - sudo yum install -y yum-utils - sudo yum-config-manager --enable rhelosp-rhel-7-server-opt - - .. include:: ../repositories.rst - - -#. Install the TripleO CLI, which will pull in all other necessary packages as dependencies:: - - sudo yum install -y python-tripleoclient - - .. admonition:: Ceph - :class: ceph - - If you intend to deploy Ceph in the overcloud, or configure the overcloud to use an external Ceph cluster, and are running Pike or newer, then install ceph-ansible on the undercloud:: - - sudo yum install -y ceph-ansible - -#. Prepare the configuration file:: - - cp /usr/share/python-tripleoclient/undercloud.conf.sample ~/undercloud.conf - - It is backwards compatible with non-containerized instack underclouds. - - .. admonition:: Stable Branch - :class: stable - - For a non-containerized undercloud, copy in the sample configuration - file and edit it to reflect your environment:: - - cp /usr/share/instack-undercloud/undercloud.conf.sample ~/undercloud.conf - - .. note:: There is a tool available that can help with writing a basic - ``undercloud.conf``: - `Undercloud Configuration Wizard `_ - It takes some basic information about the intended overcloud - environment and generates sane values for a number of the important - options. - -#. (OPTIONAL) Generate configuration for preparing container images - - As part of the undercloud install, an image registry is configured on port - `8787`. This is used to increase reliability of overcloud image pulls, and - minimise overall network transfers. The undercloud registry will be - populated with images required by the undercloud by generating the following - `containers-prepare-parameter.yaml` file and including it in - ``undercloud.conf: - container_images_file=$HOME/containers-prepare-parameter.yaml``:: - - openstack tripleo container image prepare default \ - --local-push-destination \ - --output-env-file ~/containers-prepare-parameter.yaml - - .. note:: - This command is available since Rocky. - - See :ref:`prepare-environment-containers` for details on using - `containers-prepare-parameter.yaml` to control what can be done - during the container images prepare phase of an undercloud install. - - Additionally, ``docker_insecure_registries`` and ``docker_registry_mirror`` - parameters allow to customize container registries via the - ``undercloud.conf`` file. - -#. (OPTIONAL) Override heat parameters and environment files used for undercloud - deployment. - - Similarly to overcloud deployments, see :ref:`override-heat-templates` and - :ref:`custom-template-location`, the ``undercloud.conf: custom_env_files`` - and ``undercloud.conf: templates`` configuration parameters allow to - use a custom heat templates location and override or specify additional - information for Heat resources used for undercloud deployment. - - Additionally, the ``undercloud.conf: roles_file`` parameter brings in the - ultimate flexibility of :ref:`custom_roles` and :ref:`composable_services`. - This allows you to deploy an undercloud composed of highly customized - containerized services, with the same workflow that TripleO uses for - overcloud deployments. - - .. note:: The CLI and configuration interface used to deploy a containerized - undercloud is the same as that used by 'legacy' non-containerized - underclouds. As noted above however mechanism by which the undercloud is - actually deployed is completely changed and what is more, for the first - time aligns with the overcloud deployment. See the command - ``openstack tripleo deploy --standalone`` help for details. - That interface extension for standalone clouds is experimental for Rocky. - It is normally should not be used directly for undercloud installations. - -#. Run the command to install the undercloud: - - .. admonition:: SSL - :class: optional - - To deploy an undercloud with SSL, see :doc:`../features/ssl`. - - .. admonition:: Validations - :class: validations - - :doc:`../post_deployment/validations/index` will be installed and - configured during undercloud installation. You can set - ``enable_validations = false`` in ``undercloud.conf`` to prevent - that. - - To deploy an undercloud:: - - openstack undercloud install - -.. note:: - The undercloud is containerized by default as of Rocky. - -.. note:: - It's possible to enable verbose logging with ``--verbose`` option. - -Since Rocky, we run all the OpenStack services in a moby container runtime -unless the default settings are overwritten. -This command requires 2 services to be running at all times. The first one is a -basic keystone service, which is currently executed by `tripleoclient` itself, the -second one is `heat-all` which executes the templates and installs the services. -The latter can be run on baremetal or in a container (tripleoclient will run it -in a container by default). - -Once the install has completed, you should take note of the files ``stackrc`` and -``undercloud-passwords.conf``. You can source ``stackrc`` to interact with the -undercloud via the OpenStack command-line client. The ``undercloud-passwords.conf`` -file contains the passwords used for each service in the undercloud. These passwords -will be automatically reused if the undercloud is reinstalled on the same system, -so it is not necessary to copy them to ``undercloud.conf``. - -.. note:: Heat installer configuration, logs and state is ephemeral for - undercloud deployments. Generated artifacts for consequent deployments get - overwritten or removed (when ``undercloud.conf: cleanup = true``). - Although, you can still find them stored in compressed files. - -Miscellaneous undercloud deployment artifacts, like processed heat templates and -compressed files, can be found in ``undercloud.conf: output_dir`` locations -like ``~/tripleo-heat-installer-templates``. - -There is also a compressed file created and placed into the output dir, named as -``undercloud-install-.tar.bzip2``, where TS represents a timestamp. - -Downloaded ansible playbooks and inventory files (see :ref:`config_download`) -used for undercloud deployment are stored in the tempdir -``~/undercloud-ansible-`` by default. - -.. note:: - Any passwords set in ``undercloud.conf`` will take precedence over the ones in - ``undercloud-passwords.conf``. - -.. note:: - The used undercloud installation command can be rerun to reapply changes from - ``undercloud.conf`` to the undercloud. Note that this should **not** be done - if an overcloud has already been deployed or is in progress. - -.. note:: - If running ``docker`` commands as a stack user after an undercloud install fail - with a permission error, log out and log in again. The stack user does get added - to the docker group during install, but that change gets reflected only after a - new login. diff --git a/deploy-guide/source/deployment/install_overcloud.rst b/deploy-guide/source/deployment/install_overcloud.rst deleted file mode 100644 index d3a805ee..00000000 --- a/deploy-guide/source/deployment/install_overcloud.rst +++ /dev/null @@ -1,712 +0,0 @@ -.. _basic-deployment-cli: - -Basic Deployment (CLI) -====================== - -These steps document a basic deployment with |project| in an environment using -the project defaults. - -.. note:: - - Since Rocky, Ansible is used to deploy the software configuration of - the overcloud nodes using a feature called **config-download**. While - there are no necessary changes to the default deployment commands, - there are several differences to the deployer experience. - - It's recommended to review these differences as documented at - :doc:`ansible_config_download_differences` - - **config-download** is fully documented at - :doc:`ansible_config_download` - - -Prepare Your Environment ------------------------- - -#. Make sure you have your environment ready and undercloud running: - - * :doc:`../environments/index` - * :doc:`undercloud` - -#. Log into your undercloud virtual machine and become the non-root user (stack - by default):: - - ssh root@ - - su - stack - -#. In order to use CLI commands easily you need to source needed environment - variables:: - - source stackrc - -.. _basic-deployment-cli-get-images: - -Get Images ----------- - -.. note:: - - If you already have images built, perhaps from a previous installation of - |project|, you can simply copy those image files into your non-root user's - home directory and skip this section. - - If you do this, be aware that sometimes newer versions of |project| do not - work with older images, so if the deployment fails it may be necessary to - delete the older images and restart the process from this step. - - Alternatively, images are available via RDO at - https://images.rdoproject.org/centos9/master/rdo_trunk/ which offers images from both the - CentOS Build System (cbs) and RDO Trunk (called rdo_trunk or delorean). - However this mirror is slow so if you experience slow download speeds - you should skip to building the images instead. - - The image files required are:: - - ironic-python-agent.initramfs - ironic-python-agent.kernel - overcloud-full.initrd - overcloud-full.qcow2 - overcloud-full.vmlinuz - -Images must be built prior to doing a deployment. An IPA ramdisk and -openstack-full image can all be built using tripleo-common. - -It's recommended to build images on the installed undercloud directly since all -the dependencies are already present, but this is not a requirement. - -The following steps can be used to build images. They should be run as the same -non-root user that was used to install the undercloud. If the images are not -created on the undercloud, one should use a non-root user. - - -#. Choose image operating system: - - .. admonition:: CentOS - :class: centos - - The image build with no arguments will build CentOS 8. It will include the - common YAML of - ``/usr/share/openstack-tripleo-common/image-yaml/overcloud-images-python3.yaml`` - and the CentOS YAML at - ``/usr/share/openstack-tripleo-common/image-yaml/overcloud-images-centos8.yaml``. - - .. admonition:: CentOS 9 - :class: centos9 - - The default YAML for Centos 9 is - ``/usr/share/openstack-tripleo-common/image-yaml/overcloud-images-centos9.yaml`` - - :: - - export OS_YAML="/usr/share/openstack-tripleo-common/image-yaml/overcloud-images-centos9.yaml" - - .. admonition:: RHEL - :class: rhel - - The common YAML is - ``/usr/share/openstack-tripleo-common/image-yaml/overcloud-images-python3.yaml``. - It must be specified along with the following. - - The default YAML for RHEL is - ``/usr/share/openstack-tripleo-common/image-yaml/overcloud-images-rhel8.yaml`` - - :: - - export OS_YAML="/usr/share/openstack-tripleo-common/image-yaml/overcloud-images-rhel8.yaml" - - -#. Install the ``current-tripleo`` delorean repository and deps repository: - - .. include:: ../repositories.rst - - -3. Export environment variables - - :: - - export DIB_YUM_REPO_CONF="/etc/yum.repos.d/delorean*" - - .. admonition:: Ceph - :class: ceph - - :: - - export DIB_YUM_REPO_CONF="$DIB_YUM_REPO_CONF /etc/yum.repos.d/tripleo-centos-ceph*.repo" - - .. admonition:: CentOS 9 - :class: centos9 - - :: - - export DIB_YUM_REPO_CONF="/etc/yum.repos.d/delorean* /etc/yum.repos.d/tripleo-centos-*" - - .. admonition:: Stable Branch - :class: stable - - .. admonition:: Victoria - :class: victoria - - :: - - export STABLE_RELEASE="victoria" - - .. admonition:: Ussuri - :class: ussuri - - :: - - export STABLE_RELEASE="ussuri" - - .. admonition:: Train - :class: train - - :: - - export STABLE_RELEASE="train" - -#. Build the required images: - - - .. admonition:: RHEL - :class: rhel - - Download the RHEL 7.4 cloud image or copy it over from a different location, - for example: - ``https://access.redhat.com/downloads/content/69/ver=/rhel---7/7.4/x86_64/product-software``, - and define the needed environment variables for RHEL 7.4 prior to running - ``tripleo-build-images``:: - - export DIB_LOCAL_IMAGE=rhel-server-7.4-x86_64-kvm.qcow2 - - .. admonition:: RHEL Portal Registration - :class: portal - - To register the image builds to the Red Hat Portal define the following variables:: - - export REG_METHOD=portal - export REG_USER="[your username]" - export REG_PASSWORD="[your password]" - # Find this with `sudo subscription-manager list --available` - export REG_POOL_ID="[pool id]" - export REG_REPOS="rhel-7-server-rpms rhel-7-server-extras-rpms rhel-ha-for-rhel-7-server-rpms \ - rhel-7-server-optional-rpms rhel-7-server-openstack-6.0-rpms" - - .. admonition:: Ceph - :class: ceph - - If using Ceph, additional channels need to be added to `REG_REPOS`. - Enable the appropriate channels for the desired release, as indicated below. - Do not enable any other channels not explicitly marked for that release. - - :: - - rhel-7-server-rhceph-2-mon-rpms - rhel-7-server-rhceph-2-osd-rpms - rhel-7-server-rhceph-2-tools-rpms - - - .. admonition:: RHEL Satellite Registration - :class: satellite - - To register the image builds to a Satellite define the following - variables. Only using an activation key is supported when registering to - Satellite, username/password is not supported for security reasons. The - activation key must enable the repos shown:: - - export REG_METHOD=satellite - # REG_SAT_URL should be in the format of: - # http:// - export REG_SAT_URL="[satellite url]" - export REG_ORG="[satellite org]" - # Activation key must enable these repos: - # rhel-7-server-rpms - # rhel-7-server-optional-rpms - # rhel-7-server-extras-rpms - # rhel-7-server-openstack-6.0-rpms - # rhel-7-server-rhceph-{2,1.3}-mon-rpms - # rhel-7-server-rhceph-{2,1.3}-osd-rpms - # rhel-7-server-rhceph-{2,1.3}-tools-rpms - export REG_ACTIVATION_KEY="[activation key]" - - :: - - openstack overcloud image build - - .. - - .. admonition:: RHEL 9 - :class: rhel9 - - :: - - openstack overcloud image build \ - --config-file /usr/share/openstack-tripleo-common/image-yaml/overcloud-images-python3.yaml \ - --config-file /usr/share/openstack-tripleo-common/image-yaml/overcloud-images-rhel9.yaml \ - --config-file $OS_YAML - - .. admonition:: CentOS 9 - :class: centos9 - - :: - - openstack overcloud image build \ - --config-file /usr/share/openstack-tripleo-common/image-yaml/overcloud-images-python3.yaml \ - --config-file /usr/share/openstack-tripleo-common/image-yaml/overcloud-images-centos9.yaml \ - --config-file $OS_YAML - - See the help for ``openstack overcloud image build`` for further options. - - The YAML files are cumulative. Order on the command line is important. The - packages, elements, and options sections will append. All others will overwrite - previously read values. - - .. note:: - This command will build **overcloud-full** images (\*.qcow2, \*.initrd, - \*.vmlinuz) and **ironic-python-agent** images (\*.initramfs, \*.kernel) - - In order to build specific images, one can use the ``--image-name`` flag - to ``openstack overcloud image build``. It can be specified multiple times. - -.. note:: - - If you want to use whole disk images with TripleO, please see :doc:`../provisioning/whole_disk_images`. - -.. _basic-deployment-cli-upload-images: - -Upload Images -------------- - -Load the images into the containerized undercloud Glance:: - - openstack overcloud image upload - - -To upload a single image, see :doc:`upload_single_image`. - -If working with multiple architectures and/or platforms with an architecture these -attributes can be specified at upload time as in:: - - openstack overcloud image upload - openstack overcloud image upload --arch x86_64 \ - --httpboot /var/lib/ironic/httpboot/x86_64 - openstack overcloud image upload --arch x86_64 --platform SNB \ - --httpboot /var/lib/ironic/httpboot/x86_64-SNB - -.. note:: - - Adding ``--httpboot`` is optional but suggested if you need to ensure that - the ``agent`` images are unique within your environment. - -.. admonition:: Prior to Rocky release - :class: stable - - Before Rocky, the undercloud isn't containerized by default. Hence - you should use the ``/httpboot/*`` paths instead. - -This will create 3 sets of images with in the undercloud image service for later -use in deployment, see :doc:`../environments/baremetal` - -.. _node-registration: - -Register Nodes --------------- - -Register and configure nodes for your deployment with Ironic:: - - openstack overcloud node import instackenv.json - -The file to be imported may be either JSON, YAML or CSV format, and -the type is detected via the file extension (json, yaml, csv). -The file format is documented in :ref:`instackenv`. - -The nodes status will be set to ``manageable`` by default, so that -introspection may later be run. To also run introspection and make the -nodes available for deployment in one step, the following flags can be -used:: - - openstack overcloud node import --introspect --provide instackenv.json - -Starting with the Newton release you can take advantage of the ``enroll`` -provisioning state - see :doc:`../provisioning/node_states` for details. - -If your hardware has several hard drives, it's highly recommended that you -specify the exact device to be used during introspection and deployment -as a root device. Please see :ref:`root_device` for details. - -.. warning:: - If you don't specify the root device explicitly, any device may be picked. - Also the device chosen automatically is **NOT** guaranteed to be the same - across rebuilds. Make sure to wipe the previous installation before - rebuilding in this case. - -If there is information from previous deployments on the nodes' disks, it is -recommended to at least remove the partitions and partition table(s). See -:doc:`../provisioning/cleaning` for information on how to do it. - -Finally, if you want your nodes to boot in the UEFI mode, additional steps may -have to be taken - see :doc:`../provisioning/uefi_boot` for details. - -.. warning:: - It's not recommended to delete nodes and/or rerun this command after - you have proceeded to the next steps. Particularly, if you start introspection - and then re-register nodes, you won't be able to retry introspection until - the previous one times out (1 hour by default). If you are having issues - with nodes after registration, please follow - :ref:`node_registration_problems`. - -Another approach to enrolling node is -:doc:`../provisioning/node_discovery`. - -.. _introspection: - -Introspect Nodes ----------------- - - -.. admonition:: Validations - :class: validations - - Once the undercloud is installed, you can run the - ``pre-introspection`` validations:: - - openstack tripleo validator run --group pre-introspection - - Then verify the results as described in :ref:`running_validation_group`. - -Nodes must be in the ``manageable`` provisioning state in order to run -introspection. Introspect hardware attributes of nodes with:: - - openstack overcloud node introspect --all-manageable - -Nodes can also be specified individually by UUID. The ``--provide`` -flag can be used in order to move the nodes automatically to the -``available`` provisioning state once the introspection is finished, -making the nodes available for deployment. -:: - - openstack overcloud node introspect --all-manageable --provide - -.. note:: **Introspection has to finish without errors.** - The process can take up to 5 minutes for VM / 15 minutes for baremetal. If - the process takes longer, see :ref:`introspection_problems`. - -.. note:: If you need to introspect just a single node, see - :doc:`../provisioning/introspect_single_node` - -Provide Nodes -------------- - -Only nodes in the ``available`` provisioning state can be deployed to -(see :doc:`../provisioning/node_states` for details). To move -nodes from ``manageable`` to ``available`` the following command can be -used:: - - openstack overcloud node provide --all-manageable - -Flavor Details --------------- - -The undercloud will have a number of default flavors created at install time. -In most cases these flavors do not need to be modified, but they can be if -desired. By default, all overcloud instances will be booted with the -``baremetal`` flavor, so all baremetal nodes must have at least as much -memory, disk, and cpu as that flavor. - -In addition, there are profile-specific flavors created which can be used with -the profile-matching feature. For more details on deploying with profiles, -see :doc:`../provisioning/profile_matching`. - -.. _basic-deployment-cli-configure-namserver: - -Configure a nameserver for the Overcloud ----------------------------------------- - -Overcloud nodes can have a nameserver configured in order to resolve -hostnames via DNS. The nameserver is defined in the undercloud's neutron -subnet. If needed, define the nameserver to be used for the environment:: - - # List the available subnets - openstack subnet list - openstack subnet set --dns-nameserver - -.. admonition:: Stable Branch - :class: stable - - For Mitaka release and older, the subnet commands are executed within the - `neutron` command:: - - neutron subnet-list - neutron subnet-update --dns-nameserver - -.. note:: - A public DNS server, such as 8.8.8.8 or the undercloud DNS name server - can be used if there is no internal DNS server. - -.. admonition:: Virtual - :class: virtual - - In virtual environments, the libvirt default network DHCP server address, - typically 192.168.122.1, can be used as the overcloud nameserver. - -.. _deploy-the-overcloud: - -Deploy the Overcloud --------------------- - -.. admonition:: Validations - :class: validations - - Before you start the deployment, you may want to run the - ``pre-deployment`` validations:: - - openstack tripleo validator run --group pre-deployment - - Then verify the results as described in :ref:`running_validation_group`. - - -By default 1 compute and 1 control node will be deployed, with networking -configured for the virtual environment. To customize this, see the output of:: - - openstack help overcloud deploy - -.. admonition:: Swap - :class: optional - - Swap files or partitions can be installed as part of an Overcloud deployment. - For adding swap files there is no restriction besides having - 4GB available on / (by default). When using a swap partition, - the partition must exist and be tagged as `swap1` (by default). - To deploy a swap file or partition in each Overcloud node use one - of the following arguments when deploying:: - - -e /usr/share/openstack-tripleo-heat-templates/environments/enable-swap-partition.yaml - -e /usr/share/openstack-tripleo-heat-templates/environments/enable-swap.yaml - -.. admonition:: Ceph - :class: ceph - - When deploying Ceph with dedicated CephStorage nodes to host the CephOSD - service it is necessary to specify the number of CephStorage nodes - to be deployed and to provide some additional parameters to enable usage - of Ceph for Glance, Cinder, Nova or all of them. To do so, use the - following arguments when deploying:: - - --ceph-storage-scale -e /usr/share/openstack-tripleo-heat-templates/environments/ceph-ansible/ceph-ansible.yaml - - When deploying Ceph without dedicated CephStorage nodes, opting for an HCI - architecture instead, where the CephOSD service is colocated with the - NovaCompute service on the Compute nodes, use the following arguments:: - - -e /usr/share/openstack-tripleo-heat-templates/environments/hyperconverged-ceph.yaml -e /usr/share/openstack-tripleo-heat-templates/environments/ceph-ansible/ceph-ansible.yaml - - The `hyperconverged-ceph.yaml` environment file will also enable a port on the - `StorageMgmt` network for the Compute nodes. This will be the Ceph private - network and the Compute NIC templates have to be configured to use that, see - :doc:`../features/network_isolation` for more details on how to do - it. - -.. admonition:: RHEL Satellite Registration - :class: satellite - - To register the Overcloud nodes to a Satellite add the following flags - to the deploy command:: - - --rhel-reg --reg-method satellite --reg-org --reg-sat-url --reg-activation-key - - .. note:: - - Only using an activation key is supported when registering to - Satellite, username/password is not supported for security reasons. - The activation key must enable the following repos: - - rhel-7-server-rpms - - rhel-7-server-optional-rpms - - rhel-7-server-extras-rpms - - rhel-7-server-openstack-6.0-rpms - -.. admonition:: SSL - :class: optional - - To deploy an overcloud with SSL, see :doc:`../features/ssl`. - -Run the deploy command, including any additional parameters as necessary:: - - openstack overcloud deploy --templates [additional parameters] - -.. note:: - - When deploying a new stack or updating a preexisting deployment, it is - important to avoid using component cli along side the unified cli. This - will lead to unexpected results. - - Example: - - The following will present a behavior where the my_roles_data will persist, - due to the location of the custom roles data, which is stored in swift:: - - openstack overcloud deploy --templates -r my_roles_data.yaml - heat stack-delete overcloud - - Allow the stack to be deleted then continue:: - - openstack overcloud deploy --templates - - The execution of the above will still reference my_roles_data as the - unified command line client will perform a look up against the swift - storage. The reason for the unexpected behavior is due to the heatclient - lack of awareness of the swift storage. - - The correct course of action should be as followed:: - - openstack overcloud deploy --templates -r my_roles_data.yaml - openstack overcloud delete - - Allow the stack to be deleted then continue:: - - openstack overcloud deploy --templates - -To deploy an overcloud with multiple controllers and achieve HA, -follow :doc:`../features/high_availability`. - -.. admonition:: Virtual - :class: virtual - - When deploying the Compute node in a virtual machine - without nested guest support, add ``--libvirt-type qemu`` - or launching instances on the deployed overcloud will fail. - -.. note:: - - To deploy the overcloud with network isolation, bonds, and/or custom - network interface configurations, instead follow the workflow here to - deploy: :doc:`../features/network_isolation` - -.. note:: - - Previous versions of the client had many parameters defaulted. Some of these - parameters are now pulling defaults directly from the Heat templates. In - order to override these parameters, one should use an environment file to - specify these overrides, via 'parameter_defaults'. - - The parameters that controlled these parameters will be deprecated in the - client, and eventually removed in favor of using environment files. - - -Post-Deployment ---------------- - -.. admonition:: Validations - :class: validations - - After the deployment finishes, you can run the ``post-deployment`` - validations:: - - openstack tripleo validator run --group post-deployment - - Then verify the results as described in :ref:`running_validation_group`. - -Deployment artifacts -^^^^^^^^^^^^^^^^^^^^ - -Artifacts from the deployment, including log files, rendered -templates, and generated environment files are saved under the working -directory which can be specified with the ``--work-dir`` argument to -``openstack overcloud deploy``. By default, the location is -``~/overcloud-deploy/``. - -Access the Overcloud -^^^^^^^^^^^^^^^^^^^^ - -``openstack overcloud deploy`` generates an overcloudrc file appropriate for -interacting with the deployed overcloud in the current user's home directory. -To use it, simply source the file:: - - source ~/overcloudrc - -To return to working with the undercloud, source the ``stackrc`` file again:: - - source ~/stackrc - - -Add entries to /etc/hosts -^^^^^^^^^^^^^^^^^^^^^^^^^ - -In cases where the overcloud hostnames are not already resolvable with DNS, -entries can be added to /etc/hosts to make them resolvable. This is -particularly convenient on the undercloud. The Heat stack provides an output -value that can be appended to /etc/hosts easily. Run the following command to -get the output value and add it to /etc/hosts wherever the hostnames should -be resolvable:: - - openstack stack output show overcloud HostsEntry -f value -c output_value - - -Setup the Overcloud network -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Initial networks in Neutron in the overcloud need to be created for tenant -instances. The following are example commands to create the initial networks. -Edit the address ranges, or use the necessary ``neutron`` commands to match the -environment appropriately. This assumes a dedicated interface or native VLAN:: - - openstack network create public --external --provider-network-type flat \ - --provider-physical-network datacentre - openstack subnet create --allocation-pool start=172.16.23.140,end=172.16.23.240 \ - --network public --gateway 172.16.23.251 --no-dhcp --subnet-range \ - 172.16.23.128/25 public - -The example shows naming the network "public" because that will allow tempest -tests to pass, based on the default floating pool name set in ``nova.conf``. -You can confirm that the network was created with:: - - openstack network list - -Sample output of the command:: - - +--------------------------------------+----------+--------------------------------------+ - | ID | Name | Subnets | - +--------------------------------------+----------+--------------------------------------+ - | 4db8dd5d-fab5-4ea9-83e5-bdedbf3e9ee6 | public | 7a315c5e-f8e2-495b-95e2-48af9442af01 | - +--------------------------------------+----------+--------------------------------------+ - -To use a VLAN, the following example should work. Customize the address ranges -and VLAN id based on the environment:: - - openstack network create public --external --provider-network-type vlan \ - --provider-physical-network datacentre --provider-segment 195 - openstack subnet create --allocation-pool start=172.16.23.140,end=172.16.23.240 \ - --network public --no-dhcp --gateway 172.16.23.251 \ - --subnet-range 172.16.23.128/25 public - - -Validate the Overcloud -^^^^^^^^^^^^^^^^^^^^^^ - -Check the `Tempest`_ documentation on how to run tempest. - -.. _tempest: ../post_deployment/tempest/tempest.html - -Redeploy the Overcloud -^^^^^^^^^^^^^^^^^^^^^^ - -The overcloud can be redeployed when desired. - -#. First, delete any existing Overcloud:: - - openstack overcloud delete overcloud - -#. Confirm the Overcloud has deleted. It may take a few minutes to delete:: - - # This command should show no stack once the Delete has completed - openstack stack list - -#. It is recommended that you delete existing partitions from all nodes before - redeploying, see :doc:`../provisioning/cleaning` for details. - -#. Deploy the Overcloud again:: - - openstack overcloud deploy --templates diff --git a/deploy-guide/source/deployment/install_undercloud.rst b/deploy-guide/source/deployment/install_undercloud.rst deleted file mode 100644 index 1f0bf687..00000000 --- a/deploy-guide/source/deployment/install_undercloud.rst +++ /dev/null @@ -1,325 +0,0 @@ -Undercloud Installation -======================= - -This section contains instructions on how to install the undercloud. For update -or upgrade to a deployed undercloud see undercloud_upgrade_. - -.. _undercloud_upgrade: ../post_deployment/upgrade/undercloud.html - - -.. _install_undercloud: - -Installing the Undercloud --------------------------- - -.. note:: - Instack-undercloud was deprecated in Rocky cycle. Containerized undercloud - should be installed instead. See :doc:`undercloud` - for backward compatibility related information. - -.. note:: - Please ensure all your nodes (undercloud, compute, controllers, etc) have - their internal clock set to UTC in order to prevent any issue with possible - file future-dated timestamp if hwclock is synced before any timezone offset - is applied. - - -#. Log in to your machine (baremetal or VM) where you want to install the - undercloud as a non-root user (such as the stack user):: - - ssh @ - - .. note:: - If you don't have a non-root user created yet, log in as root and create - one with following commands:: - - sudo useradd stack - sudo passwd stack # specify a password - - echo "stack ALL=(root) NOPASSWD:ALL" | sudo tee -a /etc/sudoers.d/stack - sudo chmod 0440 /etc/sudoers.d/stack - - su - stack - - .. note:: - The undercloud is intended to work correctly with SELinux enforcing. - Installations with the permissive/disabled SELinux are not recommended. - The ``undercloud_enable_selinux`` config option controls that setting. - - .. note:: - vlan tagged interfaces must follow the if_name.vlan_id convention, like for - example: eth0.vlan100 or bond0.vlan120. - - .. admonition:: Baremetal - :class: baremetal - - Ensure that there is a FQDN hostname set and that the $HOSTNAME environment - variable matches that value. The easiest way to do this is to set the - ``undercloud_hostname`` option in undercloud.conf before running the - install. This will allow the installer to configure all of the hostname- - related settings appropriately. - - Alternatively the hostname settings can be configured manually, but - this is strongly discouraged. The manual steps are as follows:: - - sudo hostnamectl set-hostname myhost.mydomain - sudo hostnamectl set-hostname --transient myhost.mydomain - - An entry for the system's FQDN hostname is also needed in /etc/hosts. For - example, if the system is named *myhost.mydomain*, /etc/hosts should have - an entry like:: - - 127.0.0.1 myhost.mydomain myhost - - -#. Enable needed repositories: - - .. admonition:: RHEL - :class: rhel - - Enable optional repo for RHEL7:: - - sudo yum install -y yum-utils - sudo yum-config-manager --enable rhelosp-rhel-7-server-opt - - .. include:: ../repositories.rst - - -#. Install the TripleO CLI, which will pull in all other necessary packages as dependencies:: - - sudo dnf install -y python*-tripleoclient - - .. admonition:: RHEL7 / CentOS - - For RHEL or CentOS 7 the command would be:: - - sudo yum install -y python-tripleoclient - - - .. admonition:: Ceph - :class: ceph - - If you intend to deploy Ceph in the overcloud, or configure the overcloud to use an external Ceph cluster, and are running Pike or newer, then install ceph-ansible on the undercloud:: - - sudo dnf install -y ceph-ansible - - .. admonition:: TLS - :class: tls - - If you intend to deploy *TLS-everywhere* in the overcloud and are - deploying Train with python3 or Ussuri+, install the following packages:: - - sudo yum install -y python3-ipalib python3-ipaclient krb5-devel - - If you're deploying Train with python2, install the corresponding python2 - version of the above packages:: - - sudo yum install -y python-ipalib python-ipaclient krb5-devel - - if you intend to use Novajoin to implement *TLS-everywhere* install the - following package:: - - sudo yum install -y python-novajoin - - You can find more information about deploying with TLS in the - :doc:`../features/tls-introduction` documentation. - - -#. Prepare the configuration file:: - - cp /usr/share/python-tripleoclient/undercloud.conf.sample ~/undercloud.conf - - It is backwards compatible with non-containerized instack underclouds. - - .. admonition:: Stable Branch - :class: stable - - For a non-containerized undercloud, copy in the sample configuration - file and edit it to reflect your environment:: - - cp /usr/share/instack-undercloud/undercloud.conf.sample ~/undercloud.conf - - .. note:: There is a tool available that can help with writing a basic - ``undercloud.conf``: - `Undercloud Configuration Wizard `_ - It takes some basic information about the intended overcloud - environment and generates sane values for a number of the important - options. - -#. (OPTIONAL) Generate configuration for preparing container images - - As part of the undercloud install, an image registry is configured on port - `8787`. This is used to increase reliability of overcloud image pulls, and - minimise overall network transfers. The undercloud registry will be - populated with images required by the undercloud by generating the following - `containers-prepare-parameter.yaml` file and including it in - ``undercloud.conf: - container_images_file=$HOME/containers-prepare-parameter.yaml``:: - - openstack tripleo container image prepare default \ - --local-push-destination \ - --output-env-file ~/containers-prepare-parameter.yaml - - .. note:: - This command is available since Rocky. - - See :ref:`prepare-environment-containers` for details on using - `containers-prepare-parameter.yaml` to control what can be done - during the container images prepare phase of an undercloud install. - - Additionally, ``docker_insecure_registries`` and ``docker_registry_mirror`` - parameters allow to customize container registries via the - ``undercloud.conf`` file. - -#. (OPTIONAL) Override heat parameters and environment files used for undercloud - deployment. - - Similarly to overcloud deployments, see :ref:`override-heat-templates` and - :ref:`custom-template-location`, the ``undercloud.conf: custom_env_files`` - and ``undercloud.conf: templates`` configuration parameters allow to - use a custom heat templates location and override or specify additional - information for Heat resources used for undercloud deployment. - - Additionally, the ``undercloud.conf: roles_file`` parameter brings in the - ultimate flexibility of :ref:`custom_roles` and :ref:`composable_services`. - This allows you to deploy an undercloud composed of highly customized - containerized services, with the same workflow that TripleO uses for - overcloud deployments. - - .. note:: The CLI and configuration interface used to deploy a containerized - undercloud is the same as that used by 'legacy' non-containerized - underclouds. As noted above however mechanism by which the undercloud is - actually deployed is completely changed and what is more, for the first - time aligns with the overcloud deployment. See the command - ``openstack tripleo deploy --standalone`` help for details. - It normally should not be used directly for undercloud installations. - -#. Run the command to install the undercloud: - - .. admonition:: SSL - :class: optional - - To deploy an undercloud with SSL, see :doc:`../features/ssl`. - - .. admonition:: Validations - :class: validations - - :doc:`../post_deployment/validations/index` will be installed and - configured during undercloud installation. You can set - ``enable_validations = false`` in ``undercloud.conf`` to prevent - that. - - To deploy an undercloud:: - - openstack undercloud install - -.. note:: - The undercloud is containerized by default as of Rocky. - -.. note:: - It's possible to enable verbose logging with ``--verbose`` option. - -.. note:: - To install a deprecated instack undercloud, you'll need to deploy - with ``--use-heat=False`` option. - -Since Rocky, we will run all the OpenStack services in a moby container runtime -unless the default settings are overwritten. -This command requires 2 services to be running at all times. The first one is a -basic keystone service, which is currently executed by `tripleoclient` itself, the -second one is `heat-all` which executes the templates and installs the services. -The latter can be run on baremetal or in a container (tripleoclient will run it -in a container by default). - -Once the install has completed, you should take note of the files ``stackrc`` and -``undercloud-passwords.conf``. You can source ``stackrc`` to interact with the -undercloud via the OpenStack command-line client. The ``undercloud-passwords.conf`` -file contains the passwords used for each service in the undercloud. These passwords -will be automatically reused if the undercloud is reinstalled on the same system, -so it is not necessary to copy them to ``undercloud.conf``. - -.. note:: Heat installer configuration, logs and state is ephemeral for - undercloud deployments. Generated artifacts for consequent deployments get - overwritten or removed (when ``undercloud.conf: cleanup = true``). - Although, you can still find them stored in compressed files. - -Miscellaneous undercloud deployment artifacts, like processed heat templates and -compressed files, can be found in ``undercloud.conf: output_dir`` locations -like ``~/tripleo-heat-installer-templates``. - -There is also a compressed file created and placed into the output dir, named as -``undercloud-install-.tar.bzip2``, where TS represents a timestamp. - -Downloaded ansible playbooks and inventory files (see :ref:`config_download`) -used for undercloud deployment are stored in the tempdir -``~/undercloud-ansible-`` by default. - -.. note:: - In order to obtain the ansible command used for the installation of the - Undercloud in the artifacts directory, it is necessary to pass the option - ``--reproduce-command`` in the Undercloud deployment command. - -.. note:: - Any passwords set in ``undercloud.conf`` will take precedence over the ones in - ``undercloud-passwords.conf``. - -.. note:: - The undercloud installation command can be rerun to reapply changes from - ``undercloud.conf`` to the undercloud. Note that this should be done with - caution if an overcloud has already been deployed or is in progress as some - configuration changes could affect the overcloud. These changes include but - are not limited to: - - #. Package repository changes on the undercloud, followed by running the - installation command could update the undercloud such that further - management operations are not possible on the overcloud until the - overcloud update or upgrade procedure is followed. - #. Reconfiguration of the undercloud container registry if the - overcloud is using the undercloud as the source for container images. - #. Networking configuration changes on the undercloud which may affect - the overcloud's ability to connect to the undercloud for - instance metadata services. - - -.. note:: - If running ``docker`` commands as a stack user after an undercloud install fail - with a permission error, log out and log in again. The stack user does get added - to the docker group during install, but that change gets reflected only after a - new login. - -Cleaning the Undercloud ------------------------ - -This procedure isn't cleaning everything that TripleO generates, but enough -so an Undercloud could be re-deployed. - -.. note:: - This procedure has been tested on Train and onward. There is no guarantee - that it works before this version, due to container commands and - new directories. - -#. Log in to your machine (baremetal or VM) where you want to cleanup the - undercloud as a non-root user (such as the stack user):: - - ssh @ - -#. Cleanup the containers and their images:: - - sudo podman rm -af - sudo podman rmi -af - -#. Remove directories generated by TripleO:: - - sudo rm -rf \ - /var/lib/tripleo-config \ - /var/lib/config-data \ - /var/lib/container-config-scripts \ - /var/lib/container-puppet \ - /var/lib/heat-config \ - /var/lib/image-service \ - /var/lib/mysql - -#. Cleanup systemd:: - - sudo rm -rf /etc/systemd/system/tripleo* - sudo systemctl daemon-reload diff --git a/deploy-guide/source/deployment/network_v2.rst b/deploy-guide/source/deployment/network_v2.rst deleted file mode 100644 index ac03387b..00000000 --- a/deploy-guide/source/deployment/network_v2.rst +++ /dev/null @@ -1,485 +0,0 @@ -.. _network_v2: - -Networking Version 2 (Two) -========================== - -Introduction ------------- - -In the Wallaby cycle TripleO Networking has been refactored so that no -OS::Neutron heat resources are used. This was a pre-requisite for -:doc:`./ephemeral_heat`. Managing non-ephemeral neutron resources with an -ephemeral heat stack is not feasible, so the management of neutron resources -has been externalized from the overcloud heat stack. - -High level overview of the changes -.................................. - -* NIC config templates was migrated to ansible j2 templates during the - Victoria release. Replacing the heat templates previously used for NIC - configuration. Sample ansible j2 templates are available in the - `tripleo-ansible `_ - git repository as well as in - ``/usr/share/ansible/roles/tripleo_network_config/templates/`` on a deployed - undercloud. - - Please refer to :ref:`creating_custom_interface_templates` on the - :ref:`network_isolation` documentation page for further details on writing - custom Ansible j2 NIC config templates. - -* A new schema for the network definitions used for Jinja2 rendering of the - ``tripleo-heat-templates`` was introduced, in addition to tripleoclient - commands to provision networks using the new network definitions schema. - -* A new schema for network Virtual IPs was introduced in conjunction with - tripleoclient commands to provision the Virtual IPs. - -* Service Virtual IPs (redis and ovsdb) was refactored so that the neutron - resources are created by the deploy-steps playbook post-stack create/update. - -* The baremetal provisioning schema was extended to include advanced network - layouts. The ``overcloud node provision`` command was extended so that it - also provision neutron port resources for all networks defined for instances/ - roles in the baremetal provisioning definition. - -* The tool (``tripleo-ansible-inventory``) used to generate the ansible - inventory was extended to use neutron as a source for the inventory in - addition to the overcloud heat stack outputs. - -* With the TripleO ansible inventory's support to use neutron resources as a - data source, the baremetal provisioning schema and ``overcloud node - provision`` command was extended to allow arbitrary playbook - execute against the provisioned nodes, as well as applying node network - configuration utilizing the ``tripleo_network_config`` ansible role and the - ansible j2 NIC config templates. - -With all of the above in place the ``overcloud deploy`` command was extended so -that it can run all the steps: - -#. Create Networks - - Run the ``cli-overcloud-network-provision.yaml`` ansible playbook using the - network definitions provided via the ``--network-file`` argument. This - playbook creates/updates the neutron networks on the undercloud and - generates the ``networks-deployed.yaml`` environment file which is included - as a user-environment when creating the overcloud heat stack. - -#. Create Virtual IPs - - Run the ``cli-overcloud-network-vip-provision.yaml`` ansible playbook using - the Virtual IP definitions provided via the ``--vip-file`` argument. This - playbook creates/updates the Virtual IP port resources in neutron on the - undercloud and generates the ``virtual-ips-deployed.yaml`` environment file - which is included as a user-environment when creating the overcloud heat - stack. - -#. Provision Baremetal Instances - - Run the ``cli-overcloud-node-provision.yaml`` ansible playbook using the - baremetal instance definitions provided via the ``--baremetal-deployment`` - argument in combination with the ``--network-config`` argument so that - baremetal nodes are provisioned and network port resources are created. Also - run any arbitrary Ansible playbooks provided by the user on the provisioned - nodes before finally configured overcloud node networking using the - ``tripleo_network_config`` ansible role. - -#. Create the overcloud Ephemeral Heat stack - - The environment files with the parameters and resource registry overrides - required is automatically included when the ``overcloud deploy`` command is - run with the arguments: ``--vip-file``, ``--baremetal-deployment`` and - ``--network-config``. - -#. Run Config-Download and the deploy-steps playbook - - As an external deploy step the neutron ports for Service Virtual IPs are - created, and the properties of the Virtual IPs are included in hieradata. - - .. admonition:: Ceph - :class: ceph - - Optionally Ceph may be deployed after the baremetal instances - are provisioned but before the ephemeral Heat stack is created - as described in :doc:`../features/deployed_ceph`. - -Using ------ - -Pre-Provision networks -...................... - -The command to pre-provision networks for one or more overcloud stack(s) is -``openstack overcloud network provision``. The command takes a network-v2 -version networks definitions YAML file as input, and writes a heat environment -file to the file specified using the ``--output`` argument. - -Please refer to the :ref:`network_definition_opts` reference section on the -:ref:`custom_networks` document page for a reference on available options in -the network data YAML schema. - -Sample network definition YAML files can be located in the -`tripleo-heat-templates git repository -`_, -or in the ``/usr/share/openstack-tripleo-heat-templates/network-data-samples`` -directory on the undercloud. - - -**Example**: Networks definition YAML file defining the external network. - -.. code-block:: yaml - - - name: External - name_lower: external - vip: true - mtu: 1500 - subnets: - external_subnet: - ip_subnet: 10.0.0.0/24 - allocation_pools: - - start: 10.0.0.4 - end: 10.0.0.250 - gateway_ip: 10.0.0.1 - vlan: 10 - -**Example**: Create or update networks - -.. code-block:: bash - - $ openstack overcloud network provision \ - --output ~/overcloud-networks-deployed.yaml \ - ~/network_data_v2.yaml - -When deploying the overcloud include the environment file generated by the -``overcloud network provision`` command. - -.. code-block:: bash - - $ openstack overcloud deploy --templates \ - -e ~/overcloud-networks-deployed.yaml - -Pre-Provision network Virtual IPs -................................. - -The command to pre-provision Virtual IPs for an overcloud stack is: -``openstack overcloud network vip provision``. The command takes a Virtual IPs -definitions YAML file as input, and writes a heat environment file to the file -specified using the ``--output`` argument. The ``--stack`` argument defines the -name of the overcloud stack for which Virtual IPs will be provisioned. - -Please refer to the :ref:`virtual_ips_definition_opts` reference section on the -:ref:`custom_networks` document page for a reference on available options in -the Virtual IPs YAML schema. - -Sample network definition YAML files can be located in the -`tripleo-heat-templates git repository -`_, -or in the ``/usr/share/openstack-tripleo-heat-templates/network-data-samples`` -directory on the undercloud. - -**Example**: Virtual IPs definition YAML file defining the ctlplane and the -external network Virtual IPs. - -.. code-block:: yaml - - - network: ctlplane - dns_name: overcloud - - network: external - dns_name: overcloud - -**Example**: Create or update Virtual IPs - -.. code-block:: bash - - $ openstack overcloud network vip provision \ - --stack overcloud \ - --output ~/overcloud-vip-deployed.yaml \ - ~/vip_data.yaml - -When deploying the overcloud include the environment file generated by the -``overcloud network provision`` command. For example: - -.. code-block:: bash - - $ openstack overcloud deploy --templates \ - -e ~/overcloud-vip-deployed.yaml - - -Service Virtual IPs -................... - -Service Virtual IPs are created as needed when the service is enabled. To -configure the subnet to use the existing ``ServiceVipMap`` heat parameter. -For a fixed IP allocation the existing heat parameters ``RedisVirtualFixedIPs`` -and/or ``OVNDBsVirtualFixedIPs`` can be used. - -**Example**: Setting fixed ips: - -.. code-block:: yaml - - parameter_defaults: - RedisVirtualFixedIPs: [{'ip_address': '172.20.0.11'}] - OVNDBsVirtualFixedIPs: [{'ip_address': '172.20.0.12'}] - -**Example**: Setting fixed IP address and not creating a neutron resource: - -.. code-block:: yaml - - parameter_defaults: - RedisVirtualFixedIPs: [{'ip_address': '172.20.0.11', 'use_neutron': false}] - OVNDBsVirtualFixedIPs: [{'ip_address': '172.20.0.12', 'use_neutron': false}] - -.. note:: Overriding the Service Virtual IPs using the resource registry - entries ``OS::TripleO::Network::Ports::RedisVipPort`` and - ``OS::TripleO::Network::Ports::OVNDBsVipPort`` is no longer - supported. - - -Provision Baremetal Instances -............................. - -Pre provisioning baremetal instances using Metalsmith has been supported for a -while. The TripleO Network v2 work extended the workflow that provision -baremetal instances to also provision the neutron network port resources and -added the interface to run arbitrary Ansible playbooks after node provisioning. - -Please refer to the :ref:`baremetal_provision` document page for a reference on -available options in the Baremetal Deployment YAML schema. - -**Example**: Baremetal Deployment YAML set up for default the default -network-isolation scenario, including one pre-network config Ansible playbook -that will be run against the nodes in each role. - -.. code-block:: yaml - - - name: Controller - count: 1 - hostname_format: controller-%index% - ansible_playbooks: - - playbook: bm-deploy-playbook.yaml - defaults: - profile: control - networks: - - network: external - subnet: external_subnet - - network: internal_api - subnet: internal_api_subnet01 - - network: storage - subnet: storage_subnet01 - - network: storage_mgmt - subnet: storage_mgmt_subnet01 - - network: tenant - subnet: tenant_subnet01 - network_config: - template: templates/multiple_nics/multiple_nics_dvr.j2 - default_route_network: - - external - - name: Compute - count: 1 - hostname_format: compute-%index% - ansible_playbooks: - - playbook: bm-deploy-playbook.yaml - defaults: - profile: compute-leaf2 - networks: - - network: internal_api - subnet: internal_api_subnet02 - - network: tenant - subnet: tenant_subnet02 - - network: storage - subnet: storage_subnet02 - network_config: - template: templates/multiple_nics/multiple_nics_dvr.j2 - -**Example**: Arbitrary Ansible playbook example bm-deploy-playbook.yaml - -.. code-block:: yaml - - - name: Overcloud Node Network Config - hosts: allovercloud - any_errors_fatal: true - gather_facts: false - tasks: - - name: A task - debug: - msg: "A message" - -To provision baremetal nodes, create neutron port resource and apply network -configuration as defined in the above definition run the ``openstack overcloud -node provision`` command including the ``--network-config`` argument as shown -in the below example: - -.. code-block:: bash - - $ openstack overcloud node provision \ - --stack overcloud \ - --network-config \ - --output ~/overcloud-baremetal-deployed.yaml \ - ~/baremetal_deployment.yaml - -When deploying the overcloud include the environment file generated by the -``overcloud node provision`` command and enable the ``--deployed-server`` -argument. - -.. code-block:: bash - - $ openstack overcloud deploy --templates \ - --deployed-server \ - -e ~/overcloud-baremetal-deployed.yaml - -The *All-in-One* alternative using overcloud deploy command -............................................................. - -It is possible to instruct the ``openstack overcloud deploy`` command to do all -of the above steps in one go. The same YAML definitions can be used and the -environment files will be automatically included. - -**Example**: Use the **All-in-One** deploy command: - -.. code-block:: bash - - $ openstack overcloud deploy \ - --templates \ - --stack overcloud \ - --network-config \ - --deployed-server \ - --roles-file ~/my_roles_data.yaml \ - --networks-file ~/network_data_v2.yaml \ - --vip-file ~/vip_data.yaml \ - --baremetal-deployment ~/baremetal_deployment.yaml - - -Managing Multiple Overclouds -............................ - -When managing multiple overclouds using a single undercloud one would have to -use a different ``--stack`` name and ``--output`` as well as per-overcloud -YAML definitions for provisioning Virtual IPs and baremetal nodes. - -Networks can be shared, or separate for each overcloud stack. If they are -shared, use the same network definition YAML and deployed network environment -for all stacks. In the case where networks are not shared, a separate network -definitions YAML and a separate deployed network environment file must be used -by each stack. - -.. note:: The ``ctlplane`` provisioning network will always be shared. - - -Migrating existing deployments ------------------------------- - -To facilitate the migration for deployed overclouds tripleoclient commands to -extract information from deployed overcloud stacks has been added. During the -upgrade to Wallaby these tools will be executed as part of the undercloud -upgrade, placing the generated YAML definition files in the working directory -(Defaults to: ``~/overcloud-deploy/$STACK_NAME/``). Below each export command -is described, and examples to use them manually with the intent for developers -and operators to be able to better understand what happens "under the hood" -during the undercloud upgrade. - -There is also a tool ``convert_heat_nic_config_to_ansible_j2.py`` that can be -used to convert heat template NIC config to Ansible j2 templates. - -.. warning:: If migrating to use Networking v2 while using the non-Ephemeral - heat i.e ``--heat-type installed``, the existing overcloud stack - must **first** be updated to set the ``deletion_policy`` for - ``OS::Nova`` and ``OS::Neutron`` resources. This can be done - using a ``--stack-only`` update, including an environment file - setting the following tripleo-heat-templates parameters - ``NetworkDeletionPolicy``, ``PortDeletionPolicy`` and - ``ServerDeletionPolicy`` to ``retain``. - - If the deletion policy is not set to ``retain`` the - orchestration service will **delete** the existing resources - when an update using the Networking v2 environments is - performed. - -Conflicting legacy environment files -.................................... - -The heat environment files created by the Networking v2 commands uses resource -registry overrides to replace the existing resources with *pre-deployed* -resource types. These resource registry entries was also used by legacy -environment files, such as ``network-isolation.yaml``. The legacy files should -no longer be used, as they will nullify the new overrides. - -It is recommended to compare the generated environment files with existing -environment files used with the overcloud deployment prior to the migration and -remove all settings that overlap with the settings in the generated environment -files. - -Convert NIC configs -................... - -In the tripleo-heat-templates ``tools`` directory there is a script -``convert_heat_nic_config_to_ansible_j2.py`` that can be used to convert heat -NIC config templates to Ansible j2 NIC config templates. - -**Example**: Convert the compute.yaml heat NIC config template to Ansible j2. - -.. code-block:: bash - - $ /usr/share/openstack-tripleo-heat-templates/convert_heat_nic_config_to_ansible_j2.py \ - --stack overcloud \ - --networks-file network_data.yaml \ - ~/nic-configs/compute.yaml - -.. warning:: The tool does a best-effort to fully automate the conversion. The - new Ansible j2 template files should be inspected, there may be - a need to manually edit the new Ansible j2 template. The tool will - try to highlight any issues that need manual intervention by - adding comments in the Ansible j2 file. - -The :ref:`migrating_existing_network_interface_templates` section on the -:ref:`network_isolation` documentation page provides a guide for manual -migration. - -Generate Network YAML -..................... - -The command ``openstack overcloud network extract`` can be used to generate -a Network definition YAML file from a deployed overcloud stack. The YAML -definition file can then be used with ``openstack overcloud network provision`` -and the ``openstack overcloud deploy`` command. - -**Example**: Generate a Network definition YAML for the ``overcloud`` stack: - -.. code-block:: bash - - $ openstack overcloud network extract \ - --stack overcloud \ - --output ~/network_data_v2.yaml - -Generate Virtual IPs YAML -......................... - -The command ``openstack overcloud network vip extract`` can be used to generate -a Virtual IPs definition YAML file from a deployed overcloud stack. The YAML -definition file can then be used with ``openstack overcloud network vip -provision`` command and/or the ``openstack overcloud deploy`` command. - -**Example**: Generate a Virtual IPs definition YAML for the ``overcloud`` -stack: - -.. code-block:: bash - - $ openstack overcloud network vip extract \ - --stack overcloud \ - --output /home/centos/overcloud/network_vips_data.yaml - -Generate Baremetal Provision YAML -................................. - -The command ``openstack overcloud node extract provisioned`` can be used to -generate a Baremetal Provision definition YAML file from a deployed overcloud -stack. The YAML definition file can then be used with ``openstack overcloud -node provision`` command and/or the ``openstack overcloud deploy`` command. - -**Example**: Export deployed overcloud nodes to Baremetal Deployment YAML -definition - -.. code-block:: bash - - $ openstack overcloud node extract provisioned \ - --stack overcloud \ - --roles-file ~/tht_roles_data.yaml \ - --output ~/baremetal_deployment.yaml diff --git a/deploy-guide/source/deployment/overcloud.rst b/deploy-guide/source/deployment/overcloud.rst deleted file mode 100644 index 9113e276..00000000 --- a/deploy-guide/source/deployment/overcloud.rst +++ /dev/null @@ -1,84 +0,0 @@ -Containers based Overcloud Deployment -====================================== - -This documentation explains how to deploy a fully containerized overcloud -utilizing Podman which is the default since the Stein release. - -The requirements for a containerized overcloud are the same as for any other -overcloud deployment. The real difference is in where the overcloud services -will be deployed (containers vs base OS). - -Architecture ------------- - -The container-based overcloud architecture is not very different from the -baremetal/VM based one. The services deployed in the traditional baremetal -overcloud are also deployed in the docker-based one. - -One obvious difference between these two types of deployments is that the -Openstack services are deployed as containers in a container runtime rather -than directly on the host operating system. This reduces the required packages -in the host to the bare minimum for running the container runtime and managing -the base network layer. - - -Manual overcloud deployment ----------------------------- - -This section explains how to deploy a containerized overcloud manually. For an -automated overcloud deployment, please follow the steps in the -`Using TripleO Quickstart`_ section below. - -Preparing overcloud images -.......................... - -As part of the undercloud install, an image registry is configured on port -`8787`. This is used to increase reliability of overcloud image pulls, and -minimise overall network transfers. The undercloud registry will be populated -with images required by the overcloud deploy by generating the following -`containers-prepare-parameter.yaml` file and using that for the prepare call:: - - openstack tripleo container image prepare default \ - --local-push-destination \ - --output-env-file containers-prepare-parameter.yaml - -.. note:: The file `containers-prepare-parameter.yaml` may have already been - created during :ref:`install_undercloud`. It is - encouraged to share the same `containers-prepare-parameter.yaml` file - for undercloud install and overcloud deploy. - -See :ref:`prepare-environment-containers` for details on using -`containers-prepare-parameter.yaml` to control what can be done -with image preparation during overcloud deployment. - -.. _overcloud-prepare-container-images: - -Deploying the containerized Overcloud -------------------------------------- - -A containerized overcloud deployment follows all the steps described in the -baremetal :ref:`deploy-the-overcloud` documentation with the exception that it -requires an extra environment file to be added to the ``openstack overcloud -deploy`` command:: - - -e ~/containers-prepare-parameter.yaml - -If deploying with highly available controller nodes, include the -following extra environment file in addition to the above and in place -of the `environments/puppet-pacemaker.yaml` file:: - - -e /usr/share/openstack-tripleo-heat-templates/environments/docker-ha.yaml - -Using TripleO Quickstart ------------------------- - -.. note:: Please refer to the `TripleO Quickstart`_ docs for more info about - quickstart, the minimum requirements, the setup process and the - available plugins. - - -The command below will deploy a containerized overcloud on top of a baremetal undercloud:: - - bash quickstart.sh --config=~/.quickstart/config/general_config/containers_minimal.yml $VIRTHOST - -.. _TripleO Quickstart: https://docs.openstack.org/tripleo-quickstart/ diff --git a/deploy-guide/source/deployment/repositories.rst b/deploy-guide/source/deployment/repositories.rst deleted file mode 100644 index 4e5c6488..00000000 --- a/deploy-guide/source/deployment/repositories.rst +++ /dev/null @@ -1,6 +0,0 @@ -:orphan: - -Repository Enablement -===================== - -.. include:: ../repositories.rst diff --git a/deploy-guide/source/deployment/standalone.rst b/deploy-guide/source/deployment/standalone.rst deleted file mode 100644 index 9f18ba9f..00000000 --- a/deploy-guide/source/deployment/standalone.rst +++ /dev/null @@ -1,1132 +0,0 @@ -.. _standalone: - -Standalone Containers based Deployment -====================================== - -This documentation explains how the underlying framework used by the -Containerized Undercloud deployment mechanism can be reused to deploy a -single node capable of running OpenStack services for development. Optional -instructions for installing Ceph are included as well. - -System Requirements for a Standalone Deployment ------------------------------------------------ - - .. include:: ../environments/standalone.rst - :start-after: .. include_after_header - -Deploying a Standalone OpenStack node -------------------------------------- - -#. Copy your SSH key to a non-root user on your machine (baremetal or VM) - where you want to install the standalone services.:: - - ssh-copy-id -i ~/.ssh/ @ - -#. Connect to your machine as the non-root user.:: - - ssh @ - -#. Ensure a fully qualified hostname has been configured on the host being - deployed on. For example:: - - sudo hostnamectl set-hostname standalone.localdomain - sudo hostnamectl set-hostname standalone.localdomain --transient - -#. Enable needed repositories: - - .. include:: ../repositories.rst - -#. Install the TripleO CLI, which will pull in all other necessary packages as dependencies:: - - sudo dnf install -y python3-tripleoclient - - .. admonition:: Ceph - :class: ceph - - Install the packages necessary to deploy Ceph. - - .. code-block:: bash - - sudo dnf install -y util-linux lvm2 cephadm - -#. Generate a file with the default ContainerImagePrepare value:: - - openstack tripleo container image prepare default \ - --output-env-file $HOME/containers-prepare-parameters.yaml - - .. note:: - Update containers-prepare-parameters.yaml for your own needs. - See :ref:`prepare-environment-containers` for more details. - - .. admonition:: Ceph - :class: ceph - - Create a block device with logical volumes to be used as an OSD. - - .. code-block:: bash - - sudo dd if=/dev/zero of=/var/lib/ceph-osd.img bs=1 count=0 seek=7G - sudo losetup /dev/loop3 /var/lib/ceph-osd.img - sudo pvcreate /dev/loop3 - sudo vgcreate vg2 /dev/loop3 - sudo lvcreate -n data-lv2 -l +100%FREE vg2 - - Create a systemd service that restores the device on startup. - - .. code-block:: bash - - cat < /tmp/ceph-osd-losetup.service - [Unit] - Description=Ceph OSD losetup - After=syslog.target - - [Service] - Type=oneshot - ExecStart=/bin/bash -c '/sbin/losetup /dev/loop3 || \ - /sbin/losetup /dev/loop3 /var/lib/ceph-osd.img ; partprobe /dev/loop3' - ExecStop=/sbin/losetup -d /dev/loop3 - RemainAfterExit=yes - - [Install] - WantedBy=multi-user.target - EOF - - sudo mv /tmp/ceph-osd-losetup.service /etc/systemd/system/ - sudo systemctl enable ceph-osd-losetup.service - -#. Configure basic standalone parameters which include network configuration - and some deployment options. - - .. warning:: - The standalone deployment requires one network interface on the deployment - machine and **that interface will be reconfigured as per the parameters - you specify below**. The interface you want to use is specified by name - in the $INTERFACE parameter below and passed as the NeutronPublicInterface - in the standalone_parameters.yaml. If you only have one interface on your - machine be advised that it will be reconfigured to have the IP address - specified in $IP. If that is a remote box you may lose connectivity to it. - Any other network interfaces are left untouched. - - For the standalone deployment we use a single NIC on the target machine - which is reconfigured and set as a member of an ovs bridge, **br-ctlplane**. Two - examples follow which can be copy/pasted as is - depending on your setup. - You should only have to change the name of the interface to match whatever - it is called on your system. Ideally you will have two network interfaces, - so that one is used for the standalone deployment, whilst the other will be - left untouched. This can be especially important if you are deploying on a - remote box (e.g. via ssh). - - The following configuration can be used for a system with 2 network - interfaces. This configuration assumes the first interface is used for - management and we will only configure the second interface. The deployment - assumes the second interface has a "public" /24 network which will be used - for the cloud endpoints and public VM connectivity. - - In addition to the IPs used on eth1, a virtual IP will be added and managed - by pacemaker. This must be a different address to the other IP as one will - be bound to by haproxy while the other by backend services on the same. - - .. Note: The following example utilizes 2 interfaces. NIC1 which will serve as - the management interface. It can have any address and will be left untouched. - NIC2 will serve as the OpenStack & Provider network NIC. The following - exports should be configured for your network and interface. - - .. code-block:: bash - - export IP=192.168.24.2 - export VIP=192.168.24.3 - export NETMASK=24 - export INTERFACE=eth1 - - You will now create the standalone_parameters.yaml. The $IP, $VIP, $NETMASK, - and $INTERFACE will be replaced with the values from the export commands. - - .. code-block:: bash - - cat < $HOME/standalone_parameters.yaml - parameter_defaults: - CloudName: $IP - ControlPlaneStaticRoutes: [] - Debug: true - DeploymentUser: $USER - DnsServers: - - 1.1.1.1 - - 8.8.8.8 - DockerInsecureRegistryAddress: - - $IP:8787 - NeutronPublicInterface: $INTERFACE - # domain name used by the host - CloudDomain: localdomain - NeutronDnsDomain: localdomain - # re-use ctlplane bridge for public net, defined in the standalone - # net config (do not change unless you know what you're doing) - NeutronBridgeMappings: datacentre:br-ctlplane - NeutronPhysicalBridge: br-ctlplane - # enable to force metadata for public net - #NeutronEnableForceMetadata: true - StandaloneEnableRoutedNetworks: false - StandaloneHomeDir: $HOME - InterfaceLocalMtu: 1500 - # Needed if running in a VM, not needed if on baremetal - NovaComputeLibvirtType: qemu - EOF - - The following configuration can be used for a system with a single network - interface. This configuration assumes that the interface is shared for - management and cloud functions. This configuration requires there be at - least 3 ip addresses available for configuration. 1 ip is used for the - cloud endpoints, 1 is used for an internal router and 1 is used as a - floating IP. - - .. Note: NIC1 will serve as the management, OpenStack and Provider network - interface. The exports should be configured for your network and interface. - - .. code-block:: bash - - export IP=192.168.24.2 - export VIP=192.168.24.3 - export NETMASK=24 - export GATEWAY=192.168.24.1 - export INTERFACE=eth0 - - You will now create the standalone_parameters.yaml. The $IP, $NETMASK, - $GATEWAY, and $INTERFACE will be replaced with the values from the export - commands. - - .. code-block:: bash - - cat < $HOME/standalone_parameters.yaml - parameter_defaults: - CloudName: $IP - # default gateway - ControlPlaneStaticRoutes: - - ip_netmask: 0.0.0.0/0 - next_hop: $GATEWAY - default: true - Debug: true - DeploymentUser: $USER - DnsServers: - - 1.1.1.1 - - 8.8.8.8 - # needed for vip & pacemaker - KernelIpNonLocalBind: 1 - DockerInsecureRegistryAddress: - - $IP:8787 - NeutronPublicInterface: $INTERFACE - # domain name used by the host - CloudDomain: localdomain - NeutronDnsDomain: localdomain - # re-use ctlplane bridge for public net, defined in the standalone - # net config (do not change unless you know what you're doing) - NeutronBridgeMappings: datacentre:br-ctlplane - NeutronPhysicalBridge: br-ctlplane - # enable to force metadata for public net - #NeutronEnableForceMetadata: true - StandaloneEnableRoutedNetworks: false - StandaloneHomeDir: $HOME - InterfaceLocalMtu: 1500 - # Needed if running in a VM, not needed if on baremetal - NovaComputeLibvirtType: qemu - EOF - - .. admonition:: Ceph - :class: ceph - - Establish an IP address on which Ceph will listen. Because the - 192.168.24.0/24 network containing the cloud IP and VIP defined - earlier is not configured until `openstack tripleo deploy` is - run, we need Ceph to run on a different network if we're going - to deploy Ceph before the overcloud as described in - :doc:`../features/deployed_ceph`. Any IP on the VM may be used - including the IP on the default libvirt network - 192.168.122.0/24. For example: - - .. code-block:: bash - - export CEPH_IP=192.168.122.252 - - Create an OSD spec file which references the block device with - the logical volumes created earlier. - - .. code-block:: bash - - cat < $HOME/osd_spec.yaml - data_devices: - paths: - - /dev/vg2/data-lv2 - EOF - - Use the Ceph IP and OSD spec file to create a Ceph spec file - which will describe the Ceph cluster in a format `cephadm` can - parse. The `--standalone` option covers a special case for this - scenario because 'openstack overcloud node provision' is not - used. - - .. code-block:: bash - - sudo openstack overcloud ceph spec \ - --standalone \ - --mon-ip $CEPH_IP \ - --osd-spec $HOME/osd_spec.yaml \ - --output $HOME/ceph_spec.yaml - - Create the ceph-admin user by passing the Ceph spec created - earlier and use the `--standalone` option. - - .. code-block:: bash - - sudo openstack overcloud ceph user enable \ - --standalone \ - $HOME/ceph_spec.yaml \ - - Though Ceph will be configured to run on a single host via the - `--single-host-defaults` option, this deployment only has a - single OSD so it cannot replicate data even on the same host. - Create an initial Ceph configuration to disable replication: - - .. code-block:: bash - - cat < $HOME/initial_ceph.conf - [global] - osd pool default size = 1 - [mon] - mon_warn_on_pool_no_redundancy = false - EOF - - Additional Ceph daemons can be added to the Ceph cluster, but only Ceph - mds and nfs daemons are supported. Create a Ceph_daemon spec definition, - and add the required information for each daemon: - - .. code-block:: bash - - cat < $HOME/ceph_daemon.yaml - ceph_nfs: - cephfs_data: 'manila_data' - cephfs_metadata: 'manila_metadata' - EOF - - Deploy Ceph by passing the IP, Ceph spec, Ceph conf and Ceph daemon - definition created above. Use the options `--standalone`, - `--single-host-defaults`, `--skip-hosts-config` and - `--skip-container-registry-config`. Use `openstack overcloud ceph deploy - --help` for details on what these options do. User creation is skipped - via `--skip-user-create` because it was handled in the previous step. - Specify what the output deployed Ceph file should be called. The - CephIngress deployment can be skipped in a standalone environment if - there is no network isolation and the CephNFS daemon already has a stable - floating ip. - - .. code-block:: bash - - sudo openstack overcloud ceph deploy \ - --mon-ip $CEPH_IP \ - --ceph-spec $HOME/ceph_spec.yaml \ - --config $HOME/initial_ceph.conf \ - --daemon $HOME/ceph_daemon.yaml \ - --standalone \ - --single-host-defaults \ - --skip-hosts-config \ - --skip-container-registry-config \ - --skip-user-create \ - --output $HOME/deployed_ceph.yaml - - A Ceph RBD cluster should now be deployed and `sudo cephadm - shell -- ceph -s` may be used to check its status. The - `deployed_ceph.yaml` file is a Heat environment file describing - the deployed Ceph cluster and should be used during overcloud - deployment. - - -#. Run the deploy command: - - .. code-block:: bash - - sudo openstack tripleo deploy \ - --templates \ - --local-ip=$IP/$NETMASK \ - --control-virtual-ip $VIP \ - -e /usr/share/openstack-tripleo-heat-templates/environments/standalone/standalone-tripleo.yaml \ - -r /usr/share/openstack-tripleo-heat-templates/roles/Standalone.yaml \ - -e $HOME/containers-prepare-parameters.yaml \ - -e $HOME/standalone_parameters.yaml \ - --output-dir $HOME - - .. admonition:: Ceph - :class: ceph - - Include the Ceph environment files in the deploy command: - - .. code-block:: bash - - sudo openstack tripleo deploy \ - --templates \ - --local-ip=$IP/$NETMASK \ - --control-virtual-ip $VIP \ - -e /usr/share/openstack-tripleo-heat-templates/environments/standalone/standalone-tripleo.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm.yaml \ - -r /usr/share/openstack-tripleo-heat-templates/roles/Standalone.yaml \ - -e $HOME/containers-prepare-parameters.yaml \ - -e $HOME/standalone_parameters.yaml \ - -e $HOME/deployed_ceph.yaml \ - --output-dir $HOME - - -#. Check the deployed OpenStack Services - - At the end of the deployment, a clouds.yaml configuration file is placed in - the /root/.config/openstack folder. This can be used with the openstack - client to query the OpenStack services. - - .. code-block:: bash - - export OS_CLOUD=standalone - openstack endpoint list - -#. Cleanup a deployment - - If you want to remove the services and files installed by Standalone after - a deployment failure, or just to re-deploy from scratch, you can run the - following script: - - .. code-block:: bash - - #!/bin/bash - echo "Tearing down TripleO environment" - if type pcs &> /dev/null; then - sudo pcs cluster destroy - fi - if type podman &> /dev/null; then - echo "Removing podman containers and images (takes times...)" - sudo podman rm -af - sudo podman rmi -af - fi - sudo rm -rf \ - /var/lib/tripleo-config \ - /var/lib/config-data /var/lib/container-config-scripts \ - /var/lib/container-puppet \ - /var/lib/heat-config \ - /var/lib/image-serve \ - /var/lib/containers \ - /etc/systemd/system/tripleo* \ - /var/lib/mysql/* \ - /etc/openstack - rm -rf ~/.config/openstack - sudo systemctl daemon-reload - - .. admonition:: Ceph - :class: ceph - - To remove Ceph and its block device run the following. - - .. code-block:: bash - - FSID=$(sudo ls /var/lib/ceph) - sudo cephadm rm-cluster --force --fsid $FSID - - sudo systemctl stop ceph-osd-losetup.service - sudo systemctl disable ceph-osd-losetup.service - sudo lvremove --force /dev/vg2/data-lv2 - sudo vgremove --force vg2 - sudo pvremove --force /dev/loop3 - sudo losetup -d /dev/loop3 - sudo rm -f /var/lib/ceph-osd.img - sudo partprobe - -Manual deployments with ansible -------------------------------- - -With the ``--output-only`` option enabled, the installation stops before Ansible -playbooks would be normally executed. Instead, it only creates a Heat stack, -then downloads the ansible deployment data and playbooks to ``--output-dir`` for -the manual execution. - -.. note:: - When updating the existing standalone installation, keep in mind the - special cases described in :ref:`notes-for-stack-updates`. There is an - additional case for the ``--force-stack-update`` flag that might need to be - used, when in the ``--output-only`` mode. That is when you cannot know the - results of the actual deployment before ansible has started. - -Example: 1 NIC, Using Compute with Tenant and Provider Networks ---------------------------------------------------------------- - -The following example is based on the single NIC configuration and assumes that -the environment had at least 3 total IP addresses available to it. The IPs are -used for the following: - -- 1 IP address for the OpenStack services (this is the ``--local-ip`` from the - deploy command) -- 1 IP used as a Virtual Router to provide connectivity to the Tenant network - is used for the OpenStack services (is automatically assigned in this example) -- The remaining IP addresses (at least 1) are used for Floating IPs on the - provider network. - -The following is an example post deployment launching of a VM using the -private tenant network and the provider network. - -#. Create helper variables for the configuration:: - - # standalone with tenant networking and provider networking - export OS_CLOUD=standalone - export GATEWAY=192.168.24.1 - export STANDALONE_HOST=192.168.24.2 - export PUBLIC_NETWORK_CIDR=192.168.24.0/24 - export PRIVATE_NETWORK_CIDR=192.168.100.0/24 - export PUBLIC_NET_START=192.168.24.4 - export PUBLIC_NET_END=192.168.24.5 - export DNS_SERVER=1.1.1.1 - -#. Initial Nova and Glance setup:: - - # nova flavor - openstack flavor create --ram 512 --disk 1 --vcpu 1 --public tiny - # basic cirros image - wget https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img - openstack image create cirros --container-format bare --disk-format qcow2 --public --file cirros-0.4.0-x86_64-disk.img - # nova keypair for ssh - ssh-keygen - openstack keypair create --public-key ~/.ssh/id_rsa.pub default - -#. Setup a simple network security group:: - - # create basic security group to allow ssh/ping/dns - openstack security group create basic - # allow ssh - openstack security group rule create basic --protocol tcp --dst-port 22:22 --remote-ip 0.0.0.0/0 - # allow ping - openstack security group rule create --protocol icmp basic - # allow DNS - openstack security group rule create --protocol udp --dst-port 53:53 basic - -#. Create Neutron Networks:: - - openstack network create --external --provider-physical-network datacentre --provider-network-type flat public - openstack network create --internal private - openstack subnet create public-net \ - --subnet-range $PUBLIC_NETWORK_CIDR \ - --no-dhcp \ - --gateway $GATEWAY \ - --allocation-pool start=$PUBLIC_NET_START,end=$PUBLIC_NET_END \ - --network public - openstack subnet create private-net \ - --subnet-range $PRIVATE_NETWORK_CIDR \ - --network private - -#. Create Virtual Router:: - - # create router - # NOTE(aschultz): In this case an IP will be automatically assigned - # out of the allocation pool for the subnet. - openstack router create vrouter - openstack router set vrouter --external-gateway public - openstack router add subnet vrouter private-net - -#. Create floating IP:: - - # create floating ip - openstack floating ip create public - -#. Launch Instance:: - - # launch instance - openstack server create --flavor tiny --image cirros --key-name default --network private --security-group basic myserver - -#. Assign Floating IP:: - - openstack server add floating ip myserver - -#. Test SSH:: - - # login to vm - ssh cirros@ - - -Networking Details -~~~~~~~~~~~~~~~~~~ - -Here's a basic diagram of where the connections occur in the system for this -example:: - - +-------------------------------------------------------+ - |Standalone Host | - | | - | +----------------------------+ | - | | vrouter | | - | | | | - | +------------+ +-------------+ | - | |192.168.24.4| | | | - | |192.168.24.3| |192.168.100.1| | - | +---------+------+-----------+ | - | +-------------+ | | | - | | myserver | | | | - | |192.168.100.2| | | | - | +-------+-----+ | +-+ | - | | | | | - | | | | | - | ++---------+----+-+ +-----------------+ | - | | br-int +---+ br-ctlplane | | - | | | | 192.168.24.2 | | - | +------+----------+ +--------+--------+ | - | | | | - | +------+----------+ | | - | | br-tun | | | - | | | | | - | +-----------------+ +----+---+ | - | | eth0 | | - +---------------------------------------+----+---+------+ - | - | - +-------+-----+ - | switch | - +-------------+ - -Example: 1 NIC, Using Compute with Provider Network ---------------------------------------------------- - -The following example is based on the single NIC configuration and assumes that -the environment had at least 4 total IP addresses available to it. The IPs are -used for the following: - -- 1 IP address for the OpenStack services (this is the ``--local-ip`` from the - deploy command) -- 1 IP used as a Virtual Router to provide connectivity to the Tenant network - is used for the OpenStack services -- 1 IP used for DHCP on the provider network -- The remaining IP addresses (at least 1) are used for Floating IPs on the - provider network. - -The following is an example post deployment launching of a VM using the -private tenant network and the provider network. - -#. Create helper variables for the configuration:: - - # standalone with provider networking - export OS_CLOUD=standalone - export GATEWAY=192.168.24.1 - export STANDALONE_HOST=192.168.24.2 - export VROUTER_IP=192.168.24.3 - export PUBLIC_NETWORK_CIDR=192.168.24.0/24 - export PUBLIC_NET_START=192.168.24.4 - export PUBLIC_NET_END=192.168.24.5 - export DNS_SERVER=1.1.1.1 - -#. Initial Nova and Glance setup:: - - # nova flavor - openstack flavor create --ram 512 --disk 1 --vcpu 1 --public tiny - # basic cirros image - wget https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img - openstack image create cirros --container-format bare --disk-format qcow2 --public --file cirros-0.4.0-x86_64-disk.img - # nova keypair for ssh - ssh-keygen - openstack keypair create --public-key ~/.ssh/id_rsa.pub default - -#. Setup a simple network security group:: - - # create basic security group to allow ssh/ping/dns - openstack security group create basic - # allow ssh - openstack security group rule create basic --protocol tcp --dst-port 22:22 --remote-ip 0.0.0.0/0 - # allow ping - openstack security group rule create --protocol icmp basic - # allow DNS - openstack security group rule create --protocol udp --dst-port 53:53 basic - -#. Create Neutron Networks:: - - openstack network create --external --provider-physical-network datacentre --provider-network-type flat public - openstack subnet create public-net \ - --subnet-range $PUBLIC_NETWORK_CIDR \ - --gateway $GATEWAY \ - --allocation-pool start=$PUBLIC_NET_START,end=$PUBLIC_NET_END \ - --network public \ - --host-route destination=169.254.169.254/32,gateway=$VROUTER_IP \ - --host-route destination=0.0.0.0/0,gateway=$GATEWAY \ - --dns-nameserver $DNS_SERVER - -#. Create Virtual Router:: - - # vrouter needed for metadata route - # NOTE(aschultz): In this case we're creating a fixed IP because we need - # to create a manual route in the subnet for the metadata service - openstack router create vrouter - openstack port create --network public --fixed-ip subnet=public-net,ip-address=$VROUTER_IP vrouter-port - openstack router add port vrouter vrouter-port - -#. Launch Instance:: - - # launch instance - openstack server create --flavor tiny --image cirros --key-name default --network public --security-group basic myserver - -#. Test SSH:: - - # login to vm - ssh cirros@ - -Networking Details -~~~~~~~~~~~~~~~~~~ - -Here's a basic diagram of where the connections occur in the system for this -example:: - - +----------------------------------------------------+ - |Standalone Host | - | | - | +------------+ +------------+ | - | | myserver | | vrouter | | - | |192.168.24.4| |192.168.24.3| | - | +---------+--+ +-+----------+ | - | | | | - | +---+--------+----+ +-----------------+ | - | | br-int +---+ br-ctlplane | | - | | | | 192.168.24.2 | | - | +------+----------+ +--------+--------+ | - | | | | - | +------+----------+ | | - | | br-tun | | | - | | | | | - | +-----------------+ +----+---+ | - | | eth0 | | - +------------------------------------+----+---+------+ - | - | - +-------+-----+ - | switch | - +-------------+ - -Example: 2 NIC, Using Compute with Tenant and Provider Networks ---------------------------------------------------------------- - -The following example is based on the dual NIC configuration and assumes that -the environment has an entire IP range available to it on the provider network. -We are assuming the following would be reserved on the provider network: - -- 1 IP address for a gateway on the provider network -- 1 IP address for OpenStack Endpoints -- 1 IP used as a Virtual Router to provide connectivity to the Tenant network - is used for the OpenStack services (is automatically assigned in this example) -- The remaining IP addresses (at least 1) are used for Floating IPs on the - provider network. - -The following is an example post deployment launching of a VM using the -private tenant network and the provider network. - -#. Create helper variables for the configuration:: - - # standalone with tenant networking and provider networking - export OS_CLOUD=standalone - export GATEWAY=192.168.24.1 - export STANDALONE_HOST=192.168.0.2 - export PUBLIC_NETWORK_CIDR=192.168.24.0/24 - export PRIVATE_NETWORK_CIDR=192.168.100.0/24 - export PUBLIC_NET_START=192.168.0.3 - export PUBLIC_NET_END=192.168.24.254 - export DNS_SERVER=1.1.1.1 - -#. Initial Nova and Glance setup:: - - # nova flavor - openstack flavor create --ram 512 --disk 1 --vcpu 1 --public tiny - # basic cirros image - wget https://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img - openstack image create cirros --container-format bare --disk-format qcow2 --public --file cirros-0.4.0-x86_64-disk.img - # nova keypair for ssh - ssh-keygen - openstack keypair create --public-key ~/.ssh/id_rsa.pub default - -#. Setup a simple network security group:: - - # create basic security group to allow ssh/ping/dns - openstack security group create basic - # allow ssh - openstack security group rule create basic --protocol tcp --dst-port 22:22 --remote-ip 0.0.0.0/0 - # allow ping - openstack security group rule create --protocol icmp basic - # allow DNS - openstack security group rule create --protocol udp --dst-port 53:53 basic - -#. Create Neutron Networks:: - - openstack network create --external --provider-physical-network datacentre --provider-network-type flat public - openstack network create --internal private - openstack subnet create public-net \ - --subnet-range $PUBLIC_NETWORK_CIDR \ - --no-dhcp \ - --gateway $GATEWAY \ - --allocation-pool start=$PUBLIC_NET_START,end=$PUBLIC_NET_END \ - --network public - openstack subnet create private-net \ - --subnet-range $PRIVATE_NETWORK_CIDR \ - --network private - -#. Create Virtual Router:: - - # create router - # NOTE(aschultz): In this case an IP will be automatically assigned - # out of the allocation pool for the subnet. - openstack router create vrouter - openstack router set vrouter --external-gateway public - openstack router add subnet vrouter private-net - -#. Create floating IP:: - - # create floating ip - openstack floating ip create public - -#. Launch Instance:: - - # launch instance - openstack server create --flavor tiny --image cirros --key-name default --network private --security-group basic myserver - -#. Assign Floating IP:: - - openstack server add floating ip myserver - -#. Test SSH:: - - # login to vm - ssh cirros@ - -Networking Details -~~~~~~~~~~~~~~~~~~ - -Here's a basic diagram of where the connections occur in the system for this -example:: - - +---------------------------------------------------------------------+ - |Standalone Host | - | | - | +----------------------------+ | - | | vrouter | | - | | | | - | +------------+ +-------------+ | - | |192.168.24.4| | | | - | |192.168.24.3| |192.168.100.1| | - | +---------+------+-----------+ | - | +-------------+ | | | - | | myserver | | | | - | |192.168.100.2| | | | - | +-------+-----+ | +-+ | - | | | | | - | ++---------+----+-+ +-----------------+ | - | | br-int +---+ br-ctlplane | | - | | | | 192.168.24.2 | | - | +------+----------+ +------------+----+ | - | | | | - | +------+----------+ | | - | | br-tun | | | - | | | | | - | +-----------------+ | +----------+ | - | +-----+---+ | eth0 | | - | | eth1 | | 10.0.1.4 | | - +----------------------------------------+-----+---+---+-----+----+---+ - | | - | | - +------+------+ | - | switch +------+ - +-------------+ - -Example: 2 nodes, 2 NIC, Using remote Compute with Tenant and Provider Networks -------------------------------------------------------------------------------- - -The following example uses two nodes and the split control plane -method to simulate a distributed edge computing deployment. The first -Heat stack deploys a controller node which could run in a Centralized -Data Center. The second Heat stack deploys a second node which could -run at another location on the Aggregation Edge Layer. The second node -runs the nova-compute service, Ceph, and the cinder-volume service. -Both nodes use the networking configuration found in the 2 NIC, Using -Compute with Tenant and Provider Network example. - -Deploy the central controller node -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To deploy the first node, follow the Deploying a Standalone OpenStack -node section described earlier in the document but also include the -following parameters: - -.. code-block:: yaml - - parameter_defaults: - GlanceBackend: swift - StandaloneExtraConfig: - oslo_messaging_notify_use_ssl: false - oslo_messaging_rpc_use_ssl: false - -The above configures the Swift backend for Glance so that images are -pulled by the remote compute node over HTTP and ensures that Oslo -messaging does not use SSL for RPC and notifications. Note that in a -production deployment this will result in sending unencrypted traffic -over WAN connections. - -When configuring the network keep in mind that it will be necessary -for both standalone systems to be able to communicate with each -other. E.g. the $IP for the first node will be in the endpoint map -that later will be extracted from the first node and passed as a -parameter to the second node for it to access its endpoints. In this -standalone example both servers share an L2 network. In a production -edge deployment it may be necessary instead to route. - -When deploying the first node with ``openstack tripleo deploy``, pass -the ``--keep-running`` option so the Heat processes continue to run. - -Extract deployment information from the controller node -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The Heat processes were kept running in the previous step because -this allows the Heat stack to be queried after the deployment in order -to extract parameters that the second node's deployment will need as -input. To extract these parameters into separate files in a directory, -(e.g. `DIR=export_control_plane`), which may then be exported to the -second node, run the following: - -.. code-block:: bash - - unset OS_CLOUD - export OS_AUTH_TYPE=none - export OS_ENDPOINT=http://127.0.0.1:8006/v1/admin - - openstack stack output show standalone EndpointMap --format json \ - | jq '{"parameter_defaults": {"EndpointMapOverride": .output_value}}' \ - > $DIR/endpoint-map.json - - openstack stack output show standalone HostsEntry -f json \ - | jq -r '{"parameter_defaults":{"ExtraHostFileEntries": .output_value}}' \ - > $DIR/extra-host-file-entries.json - -In addition to the above create a file in the same directory, -e.g. `$DIR/oslo.yaml`, containing Oslo overrides for the second -compute node: - -.. code-block:: yaml - - parameter_defaults: - StandaloneExtraConfig: - oslo_messaging_notify_use_ssl: false - oslo_messaging_rpc_use_ssl: false - -In addition to the parameters above, add the -`oslo_messaging_notify_password` and `oslo_messaging_rpc_password` -parameters. Their values may be extracted from -`/etc/puppet/hieradata/service_configs.json` on the first node. The -following command will do this for you: - -.. code-block:: bash - - sudo egrep "oslo.*password" /etc/puppet/hieradata/service_configs.json \ - | sed -e s/\"//g -e s/,//g >> $DIR/oslo.yaml - -In addition to the above, you need to create $DIR/$HOME/export_control_plane/all-nodes-extra-map-data.json -which will contain the following AllNodesExtraMapData. You first need to locate the -group_vars generated by tripleo-ansible, located in the config-download directory. - -Then you can generate the correct Heat environment with the following command: - -.. code-block:: bash - - STANDALONE_LATEST=$(find $HOME/standalone-ansible-* -type d -printf "%T@ %p\n" | sort -n | cut -d' ' -f 2- | tail -n 1) - python3 -c "import json; t = {'parameter_defaults': {'AllNodesExtraMapData': json.loads(open('$HOME/$STANDALONE_LATEST/group_vars/overcloud.json').read()) }}; print(t)" > $DIR/all-nodes-extra-map-data.json - -Set a copy of the first node's passwords aside for the second node: - -.. code-block:: bash - - cp $HOME/tripleo-standalone-passwords.yaml $DIR/passwords.yaml - -Put a copy of the directory containing the extracted information, -e.g. `$DIR`, on the second node to be deployed. - -Deploy the remote compute node -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -On a second node, follow the procedure at the beginning of this -document to deploy a standalone OpenStack node with Ceph up to the -point where you have the following files: - -- `$HOME/standalone_parameters.yaml` -- `$HOME/containers-prepare-parameters.yaml` -- `$HOME/deployed_ceph.yaml` - -When setting the `$IP` of the second node, keep in mind that it should -have a way to reach the endpoints of the first node as found in the -endpoint-map.json, which was extracted from the first node. - -Create an environment file, e.g. `$HOME/standalone_edge.yaml`, with the -following content: - -.. code-block:: yaml - - resource_registry: - OS::TripleO::Services::CACerts: OS::Heat::None - OS::TripleO::Services::CinderApi: OS::Heat::None - OS::TripleO::Services::CinderScheduler: OS::Heat::None - OS::TripleO::Services::Clustercheck: OS::Heat::None - OS::TripleO::Services::HAproxy: OS::Heat::None - OS::TripleO::Services::Horizon: OS::Heat::None - OS::TripleO::Services::Keystone: OS::Heat::None - OS::TripleO::Services::Memcached: OS::Heat::None - OS::TripleO::Services::MySQL: OS::Heat::None - OS::TripleO::Services::NeutronApi: OS::Heat::None - OS::TripleO::Services::NeutronDhcpAgent: OS::Heat::None - OS::TripleO::Services::NovaApi: OS::Heat::None - OS::TripleO::Services::NovaConductor: OS::Heat::None - OS::TripleO::Services::NovaConsoleauth: OS::Heat::None - OS::TripleO::Services::NovaIronic: OS::Heat::None - OS::TripleO::Services::NovaMetadata: OS::Heat::None - OS::TripleO::Services::NovaPlacement: OS::Heat::None - OS::TripleO::Services::NovaScheduler: OS::Heat::None - OS::TripleO::Services::NovaVncProxy: OS::Heat::None - OS::TripleO::Services::OsloMessagingNotify: OS::Heat::None - OS::TripleO::Services::OsloMessagingRpc: OS::Heat::None - OS::TripleO::Services::Redis: OS::Heat::None - OS::TripleO::Services::SwiftProxy: OS::Heat::None - OS::TripleO::Services::SwiftStorage: OS::Heat::None - OS::TripleO::Services::SwiftRingBuilder: OS::Heat::None - - parameter_defaults: - CinderRbdAvailabilityZone: edge1 - GlanceBackend: swift - GlanceCacheEnabled: true - -The above file disables additional resources which -`/usr/share/openstack-tripleo-heat-templates/environments/standalone/standalone-tripleo.yaml` -does not disable since it represents a compute node which will consume -those resources from the earlier deployed controller node. It also -sets the Glance blackened to Swift and enables Glance caching so that -after images are pulled from the central node once, they do not need -to be pulled again. Finally the above sets the Cinder RBD availability -zone a separate availability zone for the remote compute and cinder -volume service. - -Deploy the second node with the following: - -.. code-block:: bash - - sudo openstack tripleo deploy \ - --templates \ - --local-ip=$IP/$NETMASK \ - -r /usr/share/openstack-tripleo-heat-templates/roles/Standalone.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/standalone/standalone-tripleo.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm-rbd-only.yaml \ - -e $HOME/containers-prepare-parameters.yaml \ - -e $HOME/standalone_parameters.yaml \ - -e $HOME/deployed_ceph.yaml \ - -e $HOME/standalone_edge.yaml \ - -e $HOME/export_control_plane/passwords.yaml \ - -e $HOME/export_control_plane/endpoint-map.json \ - -e $HOME/export_control_plane/all-nodes-extra-map-data.json \ - -e $HOME/export_control_plane/extra-host-file-entries.json \ - -e $HOME/export_control_plane/oslo.yaml \ - --output-dir $HOME - -The example above assumes that ``export_control_plane`` is the name -of the directory which contains the content extracted from the -controller node. - -Discover the remote compute node from the central controller node -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -After completing the prior steps, the `openstack` command will only -work on the central node because of how the ``OS_CLOUD`` environment -variable works with that nodes /root/.config/openstack folder, which -in turn assumes that keystone is running the central node and not -the edge nodes. To run `openstack` commands on edge nodes, override -the auth URL to point to keystone on the central node. - -On the central controller node run the following command to discover -the new compute node: - -.. code-block:: bash - - sudo docker exec -it nova_api nova-manage cell_v2 discover_hosts --verbose - -List the available zones, hosts, and hypervisors and look for the new node: - -.. code-block:: bash - - export OS_CLOUD=standalone - openstack availability zone list - openstack host list - openstack hypervisor list - -Take note of the zone and host list so that you can use that -information to schedule an instance on the new compute node. The -following example shows the result of deploying two new external -compute nodes:: - - [root@overcloud0 ~]# sudo docker exec -it nova_api nova-manage cell_v2 discover_hosts --verbose - Found 2 cell mappings. - Skipping cell0 since it does not contain hosts. - Getting computes from cell 'default': 631301c8-1744-4beb-8aa0-6a90aef6cd2d - Checking host mapping for compute host 'overcloud0.localdomain': 0884a9fc-9ef6-451c-ab22-06f825484e5e - Checking host mapping for compute host 'overcloud1.localdomain': 00fb920d-ef12-4a2a-9aa4-ba987d8a5e17 - Creating host mapping for compute host 'overcloud1.localdomain': 00fb920d-ef12-4a2a-9aa4-ba987d8a5e17 - Checking host mapping for compute host 'overcloud2.localdomain': 3e3a3cd4-5959-405a-b632-0b64415c43f2 - Creating host mapping for compute host 'overcloud2.localdomain': 3e3a3cd4-5959-405a-b632-0b64415c43f2 - Found 2 unmapped computes in cell: 631301c8-1744-4beb-8aa0-6a90aef6cd2d - [root@overcloud0 ~]# openstack hypervisor list - +----+------------------------+-----------------+--------------+-------+ - | ID | Hypervisor Hostname | Hypervisor Type | Host IP | State | - +----+------------------------+-----------------+--------------+-------+ - | 1 | overcloud0.example.com | QEMU | 192.168.24.2 | up | - | 2 | overcloud1.example.com | QEMU | 192.168.24.7 | up | - | 3 | overcloud2.example.com | QEMU | 192.168.24.8 | up | - +----+------------------------+-----------------+--------------+-------+ - [root@overcloud0 ~]# - -Note that the hostnames of the hypervisors above were set prior to the -deployment. - -On the central controller node run the following to create a host -aggregate for a remote compute node: - -.. code-block:: bash - - openstack aggregate create HA-edge1 --zone edge1 - openstack aggregate add host HA-edge1 overcloud1.localdomain - -To test, follow the example from "2 NIC, Using remote Compute with -Tenant and Provider Networks", except when creating the instance use -the `--availability-zone` option to schedule the instance on the new -remote compute node: - -.. code-block:: bash - - openstack server create --flavor tiny --image cirros \ - --key-name demokp --network private --security-group basic \ - myserver --availability-zone edge1 - -On the first node, run the following command to create a volume on the -second node: - -.. code-block:: bash - - openstack volume create --size 1 --availability-zone edge1 myvol - -On the second node, verify that the instance is running locally and -and that the Cinder volume was created on the local Ceph server:: - - [root@overcloud1 ~]# docker exec nova_libvirt virsh list - Id Name State - ---------------------------------------------------- - 1 instance-00000001 running - - [root@overcloud1 ~]# docker exec -ti ceph-mon rbd -p volumes ls -l - NAME SIZE PARENT FMT PROT LOCK - volume-f84ae4f5-cc25-4ed4-8a58-8b1408160e03 1GiB 2 - [root@overcloud1 ~]# - -Topology Details -~~~~~~~~~~~~~~~~ - -Here's a basic diagram of where the connections occur in the system for this -example:: - - +-------------------------+ +-------------------------+ - |standalone|compute|edge|1| |standalone|compute|edge|2| - +-----------------------+-+ +-+-----------------------+ - | | - +----+-------------+----------+ - |standalone|controller|central| - +-----------------------------+ - diff --git a/deploy-guide/source/deployment/template_deploy.rst b/deploy-guide/source/deployment/template_deploy.rst deleted file mode 100644 index 5a7c27b7..00000000 --- a/deploy-guide/source/deployment/template_deploy.rst +++ /dev/null @@ -1,85 +0,0 @@ -Deploying with Heat Templates -============================= - -It is possible to use the ``--templates`` and ``--environment-file`` -options to override specific templates or even deploy using a separate -set of templates entirely. - - -Deploying an Overcloud using the default templates --------------------------------------------------- - -The ``--templates`` option without an argument enables deploying using -the packaged Heat templates:: - - openstack overcloud deploy --templates - -.. note:: - - The default location for the templates is - `/usr/share/openstack-tripleo-heat-templates`. - - -.. _override-heat-templates: - -Overriding specific templates with local versions -------------------------------------------------- - -You may use heat environment files (via the ``--environment-file`` or ``-e`` -option), combined with the ``--templates`` option to override specific -templates, e.g to test a bugfix outside of the location of the packaged -templates. - -The mapping between heat resource types and the underlying templates can be -found in -`/usr/share/\ -openstack-tripleo-heat-templates/overcloud-resource-registry-puppet.j2.yaml` - -Here is an example of copying a specific resource template and overriding -so the deployment uses the local version:: - - mkdir local_templates - cp /usr/share/openstack-tripleo-heat-templates/puppet/controller-puppet.yaml local_templates - cat > override_templates.yaml << EOF - resource_registry: - OS::TripleO::Controller: local_templates/controller-puppet.yaml - EOF - openstack overcloud deploy --templates --environment-file override_templates.yaml - -.. note:: - - The ``--environment-file``/``-e`` option may be specified multiple times, - if duplicate keys are specified in the environment files, the last one - takes precedence. - -.. note:: - - You must also pass the environment files (again using the ``-e`` or - ``--environment-file`` option) whenever you make subsequent changes to the - overcloud, such as :doc:`../post_deployment/scale_roles`, - :doc:`../post_deployment/delete_nodes` or - :doc:`../post_deployment/upgrade/minor_update`. - -.. _custom-template-location: - -Using a custom location for all templates ------------------------------------------ - -You may specify a path to the ``--templates`` option, such that the packaged -tree may be copied to another location, which is useful e.g for developer usage -where you wish to check the templates into a revision control system. - -.. note:: - - Use caution when using this approach as you will need to rebase any local - changes on updates to the openstack-tripleo-heat-templates package, and - care will be needed to avoid modifying anything in the tree which the CLI - tools rely on (such as top-level parameters). In many cases using the - :doc:`ExtraConfig <../features/extra_config>` interfaces or specific template overrides - as outlined above may be preferable. - -Here is an example of copying the entire tripleo-heat-templates tree to a -local directory and launching a deployment using the new location:: - - cp -r /usr/share/openstack-tripleo-heat-templates /home/stack/ - openstack overcloud deploy --templates /home/stack/openstack-tripleo-heat-templates diff --git a/deploy-guide/source/deployment/tips_tricks.rst b/deploy-guide/source/deployment/tips_tricks.rst deleted file mode 100644 index d89ab2c2..00000000 --- a/deploy-guide/source/deployment/tips_tricks.rst +++ /dev/null @@ -1,380 +0,0 @@ -Tips and Tricks for containerizing services -=========================================== - -This document contains a list of tips and tricks that are useful when -containerizing an OpenStack service. - -Important Notes ---------------- - -Podman ------- - -Prior to Stein, containerized OpenStack deployments used Docker. - -Starting with the Stein release, Docker is no longer part of OpenStack, -and Podman has taken its place. The notes here are regarding Stein and later. - -Monitoring containers ---------------------- - -It's often useful to monitor the running containers and see what has been -executed and what not. The puppet containers are created and removed -automatically unless they fail. For all the other containers, it's enough to -monitor the output of the command below:: - - $ watch -n 0.5 sudo podman ps -a --filter label=managed_by=tripleo_ansible - -.. admonition:: Train - :class: stable - - :: - - $ watch -n 0.5 sudo podman ps -a --filter label=managed_by=paunch - -.. _debug-containers: - -Viewing container logs ----------------------- - -You can view the output of the main process running in a container by running:: - - $ sudo podman logs $CONTAINER_ID_OR_NAME - -Since the Stein release, standard out and standard error from containers are -captured in `/var/log/containers/stdouts`. - -We export traditional logs from containers into the `/var/log/containers` -directory on the host, where you can look at them. - -systemd and podman ------------------- - -Throughout this document you'll find references to direct podman commands -for things like restarting services. These are valid and supported methods, -but it's worth noting that services are tied into the systemd management -system, which is often the preferred way to operate. - -Restarting nova_scheduler for example:: - - $ sudo systemctl restart tripleo_nova_scheduler - -Stopping a container with systemd:: - - $ sudo systemctl stop tripleo_nova_scheduler - - -.. _toggle_debug: - -Toggle debug ------------- - -For services that support `reloading their configuration at runtime`_:: - - $ sudo podman exec -u root nova_scheduler crudini --set /etc/nova/nova.conf DEFAULT debug true - $ sudo podman kill -s SIGHUP nova_scheduler - -.. _reloading their configuration at runtime: https://storyboard.openstack.org/#!/story/2001545 - -Restart the container to turn back the configuration to normal:: - - $ sudo podman restart nova_scheduler - -Otherwise, if the service does not yet support reloading its configuration, it -is necessary to change the configuration on the host filesystem and restart the -container:: - - $ sudo crudini --set /var/lib/config-data/puppet-generated/nova/etc/nova/nova.conf DEFAULT debug true - $ sudo podman restart nova_scheduler - -Apply the inverse change to restore the default log verbosity:: - - $ sudo crudini --set /var/lib/config-data/puppet-generated/nova/etc/nova/nova.conf DEFAULT debug false - $ sudo podman restart nova_scheduler - -Debugging container failures ----------------------------- - -The following commands are useful for debugging containers. - -* **inspect**: This command allows for inspecting the container's structure and - metadata. It provides info about the bind mounts on the container, the - container's labels, the container's command, etc:: - - $ sudo podman inspect $CONTAINER_ID_OR_NAME - -* **top**: Viewing processes running within a container is trivial with Podman:: - - $ sudo podman top $CONTAINER_ID_OR_NAME - -* **exec**: Running commands on or attaching to a running container is extremely - useful to get a better understanding of what's happening in the container. - It's possible to do so by running the following command:: - - $ sudo podman exec -ti $CONTAINER_ID_OR_NAME /bin/bash - - Replace the `/bin/bash` above with other commands to run oneshot commands. For - example:: - - $ sudo podman exec -ti mysql mysql -u root -p $PASSWORD - - The above will start a mysql shell on the mysql container. - -* **export** When the container fails, it's basically impossible to know what - happened. It's possible to get the logs from docker but those will contain - things that were printed on the stdout by the entrypoint. Exporting the - filesystem structure from the container will allow for checking other logs - files that may not be in the mounted volumes:: - - $ sudo podman export $CONTAINER_ID_OR_NAME -o $CONTAINER_ID_OR_NAME.tar - -Debugging with tripleo_container_manage Ansible role ----------------------------------------------------- - -The debugging manual for tripleo_container_manage is documented in the role_ -directly. - -.. _role: https://docs.openstack.org/tripleo-ansible/latest/roles/role-tripleo_container_manage.html#debug - -Debugging with Paunch ---------------------- - -.. note:: During Ussuri cycle, Paunch has been replaced by the - tripleo_container_manage Ansible role. Therefore, the following block - is deprecated in favor of the new role which contains a Debug manual. - -The ``paunch debug`` command allows you to perform specific actions on a given -container. This can be used to: - -* Run a container with a specific configuration. -* Dump the configuration of a given container in either json or yaml. -* Output the docker command line used to start the container. -* Run a container with any configuration additions you wish such that you can - run it with a shell as any user etc. - -The configuration options you will likely be interested in include: - -:: - - --file YAML or JSON file containing configuration data - --action Action can be one of: "dump-json", "dump-yaml", - "print-cmd", or "run" - --container Name of the container you wish to manipulate - --interactive Run container in interactive mode - modifies config - and execution of container - --shell Similar to interactive but drops you into a shell - --user Start container as the specified user - --overrides JSON configuration information used to override - default config values - --default-runtime Default runtime for containers. Can be docker or - podman. - -``file`` is the name of the configuration file to use -containing the configuration for the container you wish to use. -TripleO creates configuration files for starting containers in -``/var/lib/tripleo-config/container-startup-config``. If you look in this directory -you will see a number of files corresponding with the steps in -TripleO heat templates. Most of the time, you will likely want to use -``/var/lib/tripleo-config/container-startup-config/step_4`` -as it contains most of the final startup configurations for the running -containers. - -``shell``, ``user`` and ``interactive`` are available as shortcuts that -modify the configuration to easily allow you to run an interactive session -in a given container. - -To make sure you get the right container you can use the ``paunch list`` -command to see what containers are running and which config id they -are using. This config id corresponds to which file you will find the -container configuration in. - -TripleO uses ``managed_by`` and ``config_id`` labels to help identify the -containers it is managing. These can be checked by inspecting the labels section -like so: - -:: - - # podman inspect nova_api | jq '.[0].Config.Labels | "managed_by=\(.managed_by) config_id=\(.config_id)"' - "managed_by=tripleo-Controller config_id=tripleo_step4" - -Note that if you wish to replace a currently running container you will -want to ``sudo podman rm -f`` the running container before starting a new one. - -Here is an example of using ``paunch debug`` to start a root shell inside the -heat api container: - -:: - - # paunch debug --file /var/lib/tripleo-config/container-startup-config/step_4 --managed-by=tripleo-Controller --config-id=tripleo_step4 --interactive --shell --user root --container nova_api --action run - -This will drop you into an interactive session inside the heat api container, -starting /bin/bash running as root. - -To see how this container is started by TripleO: - -:: - - # paunch debug --file /var/lib/tripleo-config/container-startup-config/step_4 --managed-by=tripleo-Controller --config-id=tripleo_step4 --container nova_api --action print-cmd - - podman run --name nova_api-1jpm5kyv --label config_id=tripleo_step4 --label container_name=nova_api --label managed_by=tripleo-Controller --label config_data={"environment": {"KOLLA_CONFIG_STRATEGY": "COPY_ALWAYS", "TRIPLEO_CONFIG_HASH": "5cbcd2d39667626874f547214d3980ec"}, "healthcheck": {"test": "/openstack/healthcheck"}, "image": "undercloud-0.ctlplane.redhat.local:8787/rh-osbs/rhosp16-openstack-nova-api:16.1_20210726.1", "net": "host", "privileged": false, "restart": "always", "start_order": 2, "user": "root", "volumes": ["/etc/hosts:/etc/hosts:ro", "/etc/localtime:/etc/localtime:ro", "/etc/pki/ca-trust/extracted:/etc/pki/ca-trust/extracted:ro", "/etc/pki/ca-trust/source/anchors:/etc/pki/ca-trust/source/anchors:ro", "/etc/pki/tls/certs/ca-bundle.crt:/etc/pki/tls/certs/ca-bundle.crt:ro", "/etc/pki/tls/certs/ca-bundle.trust.crt:/etc/pki/tls/certs/ca-bundle.trust.crt:ro", "/etc/pki/tls/cert.pem:/etc/pki/tls/cert.pem:ro", "/dev/log:/dev/log", "/etc/puppet:/etc/puppet:ro", "/var/log/containers/nova:/var/log/nova:z", "/var/log/containers/httpd/nova-api:/var/log/httpd:z", "/var/lib/kolla/config_files/nova_api.json:/var/lib/kolla/config_files/config.json:ro", "/var/lib/config-data/puppet-generated/nova:/var/lib/kolla/config_files/src:ro"]} --conmon-pidfile=/var/run/nova_api-1jpm5kyv.pid --detach=true --env=KOLLA_CONFIG_STRATEGY=COPY_ALWAYS --env=TRIPLEO_CONFIG_HASH=5cbcd2d39667626874f547214d3980ec --net=host --privileged=false --user=root --volume=/etc/hosts:/etc/hosts:ro --volume=/etc/localtime:/etc/localtime:ro --volume=/etc/pki/ca-trust/extracted:/etc/pki/ca-trust/extracted:ro --volume=/etc/pki/ca-trust/source/anchors:/etc/pki/ca-trust/source/anchors:ro --volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/pki/tls/certs/ca-bundle.crt:ro --volume=/etc/pki/tls/certs/ca-bundle.trust.crt:/etc/pki/tls/certs/ca-bundle.trust.crt:ro --volume=/etc/pki/tls/cert.pem:/etc/pki/tls/cert.pem:ro --volume=/dev/log:/dev/log --volume=/etc/puppet:/etc/puppet:ro --volume=/var/log/containers/nova:/var/log/nova:z --volume=/var/log/containers/httpd/nova-api:/var/log/httpd:z --volume=/var/lib/kolla/config_files/nova_api.json:/var/lib/kolla/config_files/config.json:ro --volume=/var/lib/config-data/puppet-generated/nova:/var/lib/kolla/config_files/src:ro undercloud-0.ctlplane.redhat.local:8787/rh-osbs/rhosp16-openstack-nova-api:16.1_20210726.1 - -You can also dump the configuration of a container to a file so you can -edit it and rerun it with different a different configuration: - -:: - - # paunch debug --file /var/lib/tripleo-config/container-startup-config/step_4 --container nova_api --action dump-json > nova_api.json - -You can then use ``nova_api.json`` as your ``--file`` argument after -editing it to your liking. - -To add configuration elements on the command line you can use the -``overrides`` option. In this example I'm adding a health check to -the container: - -:: - - # paunch debug --file nova_api.json --overrides '{"health-cmd": "/usr/bin/curl -f http://localhost:8004/v1/", "health-interval": "30s"}' --container nova_api --managed-by=tripleo-Controller --config-id=tripleo_step4 --action run - f47949a7cb205083a3adaa1530fcdd4ed7dcfa9b9afb4639468357b36786ecf0 - -Debugging container-puppet.py ------------------------------ - -The :ref:`container-puppet.py` script manages the config file generation and -puppet tasks for each service. This also exists in the `common` directory -of tripleo-heat-templates. When writing these tasks, it's useful to be -able to run them manually instead of running them as part of the entire -stack. To do so, one can run the script as shown below:: - - CONFIG=/path/to/task.json /path/to/container-puppet.py - -.. note:: Prior to the Train cycle, container-puppet.py was called - docker-puppet.py which was located in the `docker` directory. - -The json file must follow the following form:: - - [ - { - "config_image": ..., - "config_volume": ..., - "puppet_tags": ..., - "step_config": ... - } - ] - - -Using a more realistic example. Given a `puppet_config` section like this:: - - puppet_config: - config_volume: glance_api - puppet_tags: glance_api_config,glance_api_paste_ini,glance_swift_config,glance_cache_config - step_config: {get_attr: [GlanceApiPuppetBase, role_data, step_config]} - config_image: {get_param: DockerGlanceApiConfigImage} - - -Would generated a json file called `/var/lib/container-puppet/container-puppet-tasks2.json` that looks like:: - - [ - { - "config_image": "tripleomaster/centos-binary-glance-api:latest", - "config_volume": "glance_api", - "puppet_tags": "glance_api_config,glance_api_paste_ini,glance_swift_config,glance_cache_config", - "step_config": "include ::tripleo::profile::base::glance::api\n" - } - ] - - -Setting the path to the above json file as the `CONFIG` environment -variable passed to `container-puppet.py` will create a container using -the `centos-binary-glance-api:latest` image and it and run puppet on a -catalog restricted to the given puppet `puppet_tags`. - -As mentioned above, it's possible to create custom json files and call -`container-puppet.py` manually, which makes developing and debugging puppet -steps easier. - -`container-puppet.py` also supports the environment variable `SHOW_DIFF`, -which causes it to print out a docker diff of the container before and -after the configuration step has occurred. - -By default `container-puppet.py` runs things in parallel. This can make -it hard to see the debug output of a given container so there is a -`PROCESS_COUNT` variable that lets you override this. A typical debug -run for container-puppet might look like:: - - SHOW_DIFF=True PROCESS_COUNT=1 CONFIG=glance_api.json ./container-puppet.py - -Testing a code fix in a container ---------------------------------- -Let's assume that we need to test a code patch or an updated package in a -container. We will look at a few steps that can be taken to test a fix -in a container on an existing deployment. - -For example let's update packages for the mariadb container:: - - (undercloud) [stack@undercloud ~]$ sudo podman images | grep mariadb - 192.168.24.1:8787/tripleomaster/centos-binary-mariadb latest 035a8237c376 2 weeks ago 723.5 MB - -So container image `035a8237c376` is the one we need to base our work on. Since -container images are supposed to be immutable we will base our work off of -`035a8237c376` and create a new one:: - - mkdir -p galera-workaround - cat > galera-workaround/Dockerfile < - - For minimal **HA (high availability)** deployment you need at least 3 Overcloud - Controller machines and 2 Overcloud Compute machines. - -The baremetal machines must meet the following minimum specifications: - -* 8 core CPU -* 12 GB memory -* 60 GB free disk space - -Larger systems are recommended for production deployments, however. - -For instance, the undercloud needs a bit more capacity, especially regarding RAM (minimum of 16G is advised) -and is pretty intense for the I/O - fast disks (SSD, SAS) are strongly advised. - -Please also note the undercloud needs space in order to store twice the "overcloud-full" image (one time -in its glance, one time in /var/lib subdirectories for PXE/TFTP). - -TripleO is supporting only the following operating systems: - -* RHEL 9 (x86_64) -* CentOS Stream 9 (x86_64) - -Please also ensure your node clock is set to UTC in order to prevent any issue -when the OS hwclock syncs to the BIOS clock before applying timezone offset, -causing files to have a future-dated timestamp. - - -Preparing the Baremetal Environment -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Networking -^^^^^^^^^^ - -The overcloud nodes will be deployed from the undercloud machine and therefore the machines need to have their network settings modified to allow for the overcloud nodes to be PXE booted using the undercloud machine. As such, the setup requires that: - -* All overcloud machines in the setup must support IPMI -* A management provisioning network is setup for all of the overcloud machines. - One NIC from every machine needs to be in the same broadcast domain of the - provisioning network. In the tested environment, this required setting up a new - VLAN on the switch. Note that you should use the same NIC on each of the - overcloud machines ( for example: use the second NIC on each overcloud - machine). This is because during installation we will need to refer to that NIC - using a single name across all overcloud machines e.g. em2 -* The provisioning network NIC should not be the same NIC that you are using - for remote connectivity to the undercloud machine. During the undercloud - installation, a openvswitch bridge will be created for Neutron and the - provisioning NIC will be bridged to the openvswitch bridge. As such, - connectivity would be lost if the provisioning NIC was also used for remote - connectivity to the undercloud machine. -* The overcloud machines can PXE boot off the NIC that is on the private VLAN. - In the tested environment, this required disabling network booting in the BIOS - for all NICs other than the one we wanted to boot and then ensuring that the - chosen NIC is at the top of the boot order (ahead of the local hard disk drive - and CD/DVD drives). -* For each overcloud machine you have: the MAC address of the NIC that will PXE - boot on the provisioning network the IPMI information for the machine (i.e. IP - address of the IPMI NIC, IPMI username and password) - -Refer to the following diagram for more information - -.. image:: ../_images/TripleO_Network_Diagram_.jpg - -Setting Up The Undercloud Machine -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -#. Select a machine within the baremetal environment on which to install the - undercloud. -#. Install RHEL 9 x86_64 or CentOS Stream 9 x86_64 on this machine. -#. If needed, create a non-root user with sudo access to use for installing the - Undercloud:: - - sudo useradd stack - sudo passwd stack # specify a password - echo "stack ALL=(root) NOPASSWD:ALL" | sudo tee -a /etc/sudoers.d/stack - sudo chmod 0440 /etc/sudoers.d/stack - -.. admonition:: RHEL - :class: rhel - - If using RHEL, register the Undercloud for package installations/updates. - - .. admonition:: RHEL Portal Registration - :class: portal - - Register the host machine using Subscription Management:: - - sudo subscription-manager register --username="[your username]" --password="[your password]" - # Find this with `subscription-manager list --available` - sudo subscription-manager attach --pool="[pool id]" - # Verify repositories are available - sudo subscription-manager repos --list - # Enable repositories needed - sudo subscription-manager repos \ - --enable=rhel-8-for-x86_64-baseos-eus-rpms \ - --enable=rhel-8-for-x86_64-appstream-eus-rpms \ - --enable=rhel-8-for-x86_64-highavailability-eus-rpms \ - --enable=ansible-2.9-for-rhel-8-x86_64-rpms - - .. admonition:: RHEL Satellite Registration - :class: satellite - - To register the host machine to a Satellite, the following repos must - be synchronized on the Satellite and enabled for registered systems:: - - rhel-8-for-x86_64-baseos-eus-rpms - rhel-8-for-x86_64-appstream-eus-rpms - rhel-8-for-x86_64-highavailability-eus-rpms - ansible-2.9-for-rhel-8-x86_64-rpms - - See the `Red Hat Satellite User Guide`_ for how to configure the system to - register with a Satellite server. It is suggested to use an activation - key that automatically enables the above repos for registered systems. - -.. _Red Hat Satellite User Guide: https://access.redhat.com/documentation/en-US/Red_Hat_Satellite/ - - -Validations -^^^^^^^^^^^ - -You can run the ``prep`` validations to verify the hardware. Later in -the process, the validations will be run by the undercloud processes. -Refer to the Ansible section for running directly the validations -over baremetal nodes `validations_no_undercloud`_. - -Configuration Files -^^^^^^^^^^^^^^^^^^^ - -.. _instackenv: - -instackenv.json -^^^^^^^^^^^^^^^ - -Create a JSON file describing your Overcloud baremetal nodes, call it -``instackenv.json`` and place in your home directory. The file should contain -a JSON object with the only field ``nodes`` containing list of node -descriptions. - -Each node description should contains required fields: - -* ``pm_type`` - driver for Ironic nodes, see `Ironic Hardware Types`_ - for details - -* ``pm_addr`` - node BMC IP address (hypervisor address in case of virtual - environment) - -* ``pm_user``, ``pm_password`` - node BMC credentials - -Some fields are optional if you're going to use introspection later: - -* ``ports`` - list of baremetal port objects, a map specifying the following - keys: address, physical_network (optional) and local_link_connection - (optional). Optional for bare metal. Example:: - - "ports": [ - { - "address": "52:54:00:87:c8:2f", - "physical_network": "physical-network", - "local_link_connection": { - "switch_info": "switch", - "port_id": "gi1/0/11", - "switch_id": "a6:18:66:33:cb:48" - } - } - ] - -* ``cpu`` - number of CPU's in system - -* ``arch`` - CPU architecture (common values are ``i386`` and ``x86_64``) - -* ``memory`` - memory size in MiB - -* ``disk`` - hard driver size in GiB - -It is also possible (but optional) to set Ironic node capabilities directly -in the JSON file. This can be useful for assigning node profiles or setting -boot options at registration time: - -* ``capabilities`` - Ironic node capabilities. For example:: - - "capabilities": "profile:compute,boot_option:local" - -There are also two additional and optional fields that can be used to help a -user identifying machines inside ``instackenv.json`` file: - -* ``name`` - name associated to the node, it will appear in the ``Name`` - column while listing nodes - -* ``_comment`` to associate a comment to the node (like position, long - description and so on). Note that this field will not be considered by - Ironic during the import - -Also if you're working in a diverse environment with multiple architectures -and/or platforms within an architecture you may find it necessary to include a -platform field: - -* ``platform`` - String paired with images to fine tune image selection - -For example:: - - { - "nodes": [ - { - "name": "node-a", - "pm_type": "ipmi", - "ports": [ - { - "address": "fa:16:3e:2a:0e:36", - "physical_network": "ctlplane" - } - ], - "cpu": "2", - "memory": "4096", - "disk": "40", - "arch": "x86_64", - "pm_user": "admin", - "pm_password": "password", - "pm_addr": "10.0.0.8", - "_comment": "Room 1 - Rack A - Unit 22/24" - }, - { - "name": "node-b", - "pm_type": "ipmi", - "ports": [ - { - "address": "fa:16:3e:da:39:c9", - "physical_network": "ctlplane" - } - ], - "cpu": "2", - "memory": "4096", - "disk": "40", - "arch": "x86_64", - "pm_user": "admin", - "pm_password": "password", - "pm_addr": "10.0.0.15", - "_comment": "Room 1 - Rack A - Unit 26/28" - }, - { - "name": "node-n", - "pm_type": "ipmi", - "ports": [ - { - "address": "fa:16:3e:51:9b:68", - "physical_network": "leaf1" - } - ], - "cpu": "2", - "memory": "4096", - "disk": "40", - "arch": "x86_64", - "pm_user": "admin", - "pm_password": "password", - "pm_addr": "10.0.0.16", - "_comment": "Room 1 - Rack B - Unit 10/12" - } - ] - } - - -.. note:: - You don't need to create this file, if you plan on using - :doc:`../provisioning/node_discovery`. - -Ironic Hardware Types -^^^^^^^^^^^^^^^^^^^^^ - -Ironic *hardware types* provide various level of support for different -hardware. Hardware types, introduced in the Ocata cycle, are a new generation -of Ironic *drivers*. Previously, the word *drivers* was used to refer to what -is now called *classic drivers*. See `Ironic drivers documentation`_ for a full -explanation of similarities and differences between the two types. - -Hardware types are enabled in the ``undercloud.conf`` using the -``enabled_hardware_types`` configuration option. Classic drivers are enabled -using the ``enabled_drivers`` option. It has been deprecated since the Queens -release and should no longer be used. See the `hardware types migration guide`_ -for information on how to migrate existing nodes. - -Both hardware types and classic drivers can be equally used in the -``pm_addr`` field of the ``instackenv.json``. - -See https://docs.openstack.org/ironic/latest/admin/drivers.html for the most -up-to-date information about Ironic hardware types and hardware -interfaces, but note that this page always targets Ironic git master, not the -release we use. - -Generic Hardware Types -~~~~~~~~~~~~~~~~~~~~~~~ - -* This most generic hardware type is ipmi_. It uses the `ipmitool`_ utility - to manage a bare metal node, and supports a vast variety of hardware. - - .. admonition:: Stable Branch - :class: stable - - This hardware type is supported starting with the Pike release. For older - releases use the functionally equivalent ``pxe_ipmitool`` driver. - - .. admonition:: Virtual - :class: virtual - - This hardware type can be used for developing and testing TripleO in a - :doc:`virtual` as well. - -* Another generic hardware type is redfish_. It provides support for the - quite new `Redfish standard`_, which aims to replace IPMI eventually as - a generic protocol for managing hardware. In addition to the ``pm_*`` fields - mentioned above, this hardware type also requires setting ``pm_system_id`` - to the full identifier of the node in the controller (e.g. - ``/redfish/v1/Systems/42``). - - .. admonition:: Stable Branch - :class: stable - - Redfish support was introduced in the Pike release. - -The following generic hardware types are not enabled by default: - -* The snmp_ hardware type supports controlling PDUs for power management. - It requires boot device to be manually configured on the nodes. - -* Finally, the ``manual-management`` hardware type (not enabled by default) - skips power and boot device management completely. It requires manual power - and boot operations to be done at the right moments, so it's not recommended - for a generic production. - - .. admonition:: Stable Branch - :class: stable - - The functional analog of this hardware type before the Queens release - was the ``fake_pxe`` driver. - -Vendor Hardware Types -~~~~~~~~~~~~~~~~~~~~~ - -TripleO also supports vendor-specific hardware types for some types -of hardware: - -* ilo_ targets HPE Proliant Gen 8 and Gen 9 systems. - - .. admonition:: Stable Branch - :class: stable - - Use the ``pxe_ilo`` classic driver before the Queens release. - -* idrac_ targets DELL 12G and newer systems. - - .. admonition:: Stable Branch - :class: stable - - Use the ``pxe_drac`` classic driver before the Queens release. - -The following hardware types are supported but not enabled by default: - -* irmc_ targets FUJITSU PRIMERGY servers. - -* cisco-ucs-managed_ targets UCS Manager managed Cisco UCS B/C series servers. - -* cisco-ucs-standalone_ targets standalone Cisco UCS C series servers. - -.. note:: - Contact a specific vendor team if you have problems with any of these - drivers, as the TripleO team often cannot assist with them. - -.. _Ironic drivers documentation: https://docs.openstack.org/ironic/latest/install/enabling-drivers.html -.. _hardware types migration guide: https://docs.openstack.org/ironic/latest/admin/upgrade-to-hardware-types.html -.. _ipmitool: http://sourceforge.net/projects/ipmitool/ -.. _Redfish standard: https://www.dmtf.org/standards/redfish -.. _ipmi: https://docs.openstack.org/ironic/latest/admin/drivers/ipmitool.html -.. _redfish: https://docs.openstack.org/ironic/latest/admin/drivers/redfish.html -.. _snmp: https://docs.openstack.org/ironic/latest/admin/drivers/snmp.html -.. _ilo: https://docs.openstack.org/ironic/latest/admin/drivers/ilo.html -.. _idrac: https://docs.openstack.org/ironic/latest/admin/drivers/idrac.html -.. _irmc: https://docs.openstack.org/ironic/latest/admin/drivers/irmc.html -.. _cisco-ucs-managed: https://docs.openstack.org/ironic/latest/admin/drivers/ucs.html -.. _cisco-ucs-standalone: https://docs.openstack.org/ironic/latest/admin/drivers/cimc.html -.. _validations_no_undercloud: ../../validations/ansible.html diff --git a/deploy-guide/source/environments/index.rst b/deploy-guide/source/environments/index.rst deleted file mode 100644 index 44701836..00000000 --- a/deploy-guide/source/environments/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -Environment Setup -================= - -|project| can be used in baremetal as well as in virtual environments. This -section contains instructions on how to setup your environments properly. - -.. toctree:: - :maxdepth: 2 - - standalone - virtual - baremetal diff --git a/deploy-guide/source/environments/standalone.rst b/deploy-guide/source/environments/standalone.rst deleted file mode 100644 index 7ee4a1f3..00000000 --- a/deploy-guide/source/environments/standalone.rst +++ /dev/null @@ -1,12 +0,0 @@ -Standalone Environment ----------------------- -.. include_after_header - -|project| can be used as a standalone environment with all services installed -on a single virtual or baremetal machine. - -The machine you are deploying on must meet the following minimum specifications: - -* 4 core CPU -* 8 GB memory -* 60 GB free disk space diff --git a/deploy-guide/source/environments/virtual.rst b/deploy-guide/source/environments/virtual.rst deleted file mode 100644 index 97678885..00000000 --- a/deploy-guide/source/environments/virtual.rst +++ /dev/null @@ -1,14 +0,0 @@ -Virtual Environment -------------------- - -|project| can be used in a virtual environment using virtual machines instead -of actual baremetal. However, one baremetal machine is still -needed to act as the host for the virtual machines. - -.. warning:: Virtual deployments with TripleO are for development and testing - purposes only. This method cannot be used for production-ready - deployments. - -The tripleo-quickstart project is used for creating virtual environments -for use with TripleO. Please see that documentation at -https://docs.openstack.org/tripleo-quickstart/ diff --git a/deploy-guide/source/features/api_policies.rst b/deploy-guide/source/features/api_policies.rst deleted file mode 100644 index 59483350..00000000 --- a/deploy-guide/source/features/api_policies.rst +++ /dev/null @@ -1,28 +0,0 @@ -Configuring API access policies -=============================== - -Each OpenStack service, has its own role-based access policies. -They determine which user can access which resources in which way, -and are defined in the service’s policy.json file. - -.. Warning:: - - While editing policy.json is supported, modifying the policy can - have unexpected side effects and is not encouraged. - -|project| supports custom API access policies through parameters in -TripleO Heat Templates. -To enable this feature, you need to use some parameters to enable -the custom policies on the services you want. - -Creating an environment file and adding the following arguments to your -``openstack overcloud deploy`` command will do the trick:: - - $ cat ~/nova-policies.yaml - parameter_defaults: - NovaApiPolicies: { nova-context_is_admin: { key: 'compute:get_all', value: '' } } - - -e nova-policies.yaml - -In this example, we allow anyone to list Nova instances, which is very insecure but -can be done with this feature. diff --git a/deploy-guide/source/features/backends.rst b/deploy-guide/source/features/backends.rst deleted file mode 100644 index 4cd7b050..00000000 --- a/deploy-guide/source/features/backends.rst +++ /dev/null @@ -1,16 +0,0 @@ -Backend Configuration -===================== - -Documentation on how to enable and configure various backends available for -OpenStack projects. - - -.. toctree:: - - deploy_manila - cinder_custom_backend - cinder_netapp - deployed_ceph - ceph_external - domain_specific_ldap_backends - swift_external diff --git a/deploy-guide/source/features/baremetal_overcloud.rst b/deploy-guide/source/features/baremetal_overcloud.rst deleted file mode 100644 index b05bd3ff..00000000 --- a/deploy-guide/source/features/baremetal_overcloud.rst +++ /dev/null @@ -1,1169 +0,0 @@ -Bare Metal Instances in Overcloud -================================= - -This documentation explains installing Ironic for providing bare metal -instances in the overcloud to end users. This feature is supported starting -with Newton. - -Architecture and requirements ------------------------------ - -By default, TripleO installs ironic API and conductor services on the -controller nodes. In an HA configuration the 3 conductor services form a hash -ring and balance the nodes across it. For a really big bare metal cloud it's -highly recommended to move ironic-conductor services to separate roles, use -the `IronicConductor role shipped with TripleO`_ as an example. - -.. note:: - Ironic services and API in the overcloud and in the undercloud are - completely independent. - -It is recommended to have at least 12 GiB of RAM on the undercloud and -controllers. The controllers (or separate ironic-conductor roles) should have -enough disk space to keep a cache of user instance images, at least 50 GiB -is recommended. - -It's also highly recommended that you use at least two networks: - -* Undercloud provisioning network (connects undercloud and overcloud nodes) - -* Overcloud provisioning network (connects overcloud nodes and tenant bare - metal instances) - -Preparing undercloud --------------------- - -If you already have an ``instackenv.json`` file with all nodes prepared, you -might want to leave some of the nodes for overcloud instances. E.g. if you have -three nodes in the ``instackenv.json``, you can split them:: - - jq '.nodes[0:2] | {nodes: .}' instackenv.json > undercloud.json - -The format of the remaining nodes is TripleO-specific, so we need -to convert it to something Ironic can understand without using -TripleO workflows. E.g. for node using IPMI:: - - jq '.nodes[2:3] | {nodes: map({driver: .pm_type, name: .name, - driver_info: {ipmi_username: .pm_user, ipmi_address: .pm_addr, - ipmi_password: .pm_password, ipmi_port: .pm_port}, - properties: {cpus: .cpu, cpu_arch: .arch, - local_gb: .disk, memory_mb: .memory}, - ports: .mac | map({address: .})})}' instackenv.json > overcloud-nodes.yaml - -.. note:: - This command intentionally omits the capabilities, as they are often - TripleO-specific, e.g. they force local boot instead of network boot used - by default in Ironic. - -Then enroll only ``undercloud.json`` in your undercloud:: - - source stackrc - openstack overcloud node import --provide undercloud.json - -.. admonition:: Virtual - :class: virtual - - If you used **tripleo-quickstart**, you may have to delete the nodes that - did not end up in undercloud.json. - -Configuring and deploying ironic --------------------------------- - -Ironic can be installed by including one of the environment files shipped with -TripleO, however, in most of the cases you'll want to tweak certain parameters. -This section assumes that a custom environment file called -``ironic-config.yaml`` exists. Please pay particular attention to parameters -described in `Essential configuration`_. - -Essential configuration -~~~~~~~~~~~~~~~~~~~~~~~ - -The following parameters should be configured in advance for overcloud Ironic -in an environment file: - -* ``IronicEnabledHardwareTypes`` configures which hardware types will be - supported in Ironic. - - .. note:: - Hardware types are the new generation of Ironic drivers. For example, - the ``ipmi`` hardware type roughly corresponds to the ``pxe_ipmitool`` - driver. Check `driver configuration guide`_ and `driver-specific - documentation`_ for more details. - - When enabling hardware types, you usually have to enable more hardware - interfaces that these types are compatible with. For example, when enabling - the ``redfish`` hardware type, also enable ``redfish`` power and management - interfaces. For example:: - - parameter_defaults: - IronicEnabledHardwareTypes: - - ipmi - - redfish - IronicEnabledPowerInterfaces: - - ipmitool - - redfish - IronicEnabledManagementInterfaces: - - ipmitool - - redfish - - Some drivers might require additional configuration to work properly. Check - `driver configuration guide`_ and `driver-specific documentation`_ for more - details. - - By default, the ``ipmi`` hardware type is enabled. - - .. admonition:: Stable Branches - :class: stable - - The ``IronicEnabledDrivers`` option can also be used for releases prior - to Queens. It sets the list of enabled classic drivers. The most often used - bare metal driver is ``pxe_ipmitool``. Also enabled by default are - ``pxe_ilo`` and ``pxe_drac`` drivers. - -* ``IronicCleaningDiskErase`` configures erasing hard drives - before the first and after every deployment. There are two recommended - values: ``full`` erases all data and ``metadata`` erases only disk metadata. - The former is more secure, the latter is faster. - - .. admonition:: Virtual - :class: virtual - - It is highly recommended to set this parameter to ``metadata`` - for virtual environments, as full cleaning can be extremely slow there. - -.. admonition:: Stable Branches - :class: stable - - ``NovaSchedulerDefaultFilters`` configures available scheduler filters. - Before the Stein release, the ``AggregateInstanceExtraSpecsFilter`` could be - used to separate flavors targeting virtual and bare metal instances. - Starting with the Stein release, a flavor can only target one of them, so - no additional actions are needed. - -Additional configuration -~~~~~~~~~~~~~~~~~~~~~~~~ - -* ``IronicCleaningNetwork`` sets the name or UUID of the **overcloud** network - to use for node cleaning. Initially is set to ``provisioning`` and should be - set to an actual UUID later when `Configuring networks`_. - - Similarly, there are ``IronicProvisioningNetwork`` and - ``IronicRescuingNetwork``. See `Configuring networks`_ for details. - -* ``IronicDefaultBootOption`` specifies whether the instances will boot from - local disk (``local``) or from PXE or iPXE (``netboot``). This parameter was - introduced in the Pike release with the default value of ``local``. Before - that ``netboot`` was used by default. - - .. note:: - This value can be overridden per node by setting the ``boot_option`` - capability on both the node and a flavor. - -* ``IronicDefaultDeployInterface`` specifies the way a node is deployed, see - the `deploy interfaces documentation`_ for details. The default is ``iscsi``, - starting with the Rocky release the ``direct`` deploy is also configured out - of box. The ``ansible`` deploy interface requires extensive configuration as - described in :doc:`../provisioning/ansible_deploy_interface`. - -* ``IronicDefaultNetworkInterface`` specifies the network management - implementation for bare metal nodes. The default value of ``flat`` means - that the provisioning network is shared between all nodes, and will also be - available to tenants. - - If you configure an ML2 mechanism driver that supports bare metal port - binding (networking-fujitsu, networking-cisco and some others), then you can - use the ``neutron`` implementation. In that case, Ironic and Neutron will - fully manage networking for nodes, including plugging and unplugging - the provision and cleaning network. The ``IronicProvisioningNetwork`` - parameter has to be configured in a similar way to ``IronicCleaningNetwork`` - (and in most cases to the same value). See - `Configuring ml2-ansible for multi-tenant networking`_ for a brief example - and `multi-tenant networking documentation`_ for more details. - - .. note:: - Please check with your switch vendor to learn if your switch and its - ML2 driver support bare metal port binding. - - Alternatively, you can use the networking-ansible_ ML2 plugin, which - supports a large variety of switch vendors and models. It is supported - by TripleO starting with the Rocky release. - -* ``IronicImageDownloadSource`` when using the ``direct`` deploy interface this - option (introduced in the Stein release) specifies what serves as a source - for pulling the image from **ironic-python-agent**: - - * ``swift`` (the default) pulls the image from an Object Storage service - (swift) temporary URL. This requires the Image service (glance) to be - backed by the Object Storage service. If the image is not in the *raw* - format, it will be converted in memory on the target node, so enough RAM - is required. - - * ``http`` makes **ironic-conductor** cache the image on the local HTTP - server (the same as for iPXE) and serve it from there. The image gets - converted to *raw* format by default and thus can be served directly to the - target block device without in-memory conversion. - -Using a Custom Network for Overcloud Provisioning -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The Pike release provided the ability to define a custom network, -this has been further enhanced in Queens to allow for the definition -of a VLAN in the network definition. Using a custom network to provision -Overcloud nodes for Ironic has the advantage of moving all Ironic services -off of the Undercloud Provisioning network (control plane) so that routing or -bridging to the control plane is not necessary. This can increase security, -and isolates tenant bare metal node provisioning from the overcloud node -provisioning done by the undercloud. - -Follow the instructions in :doc:`custom_networks` to add an additional network, -in this example called OcProvisioning, to ``network_data.yaml``:: - - # custom network for Overcloud provisioning - - name: OcProvisioning - name_lower: oc_provisioning - vip: true - vlan: 205 - ip_subnet: '172.23.3.0/24' - allocation_pools: [{'start': '172.23.3.10', 'end': '172.23.3.200'}] - -The ServiceNetMap can be updated in ``network-environment.yaml`` to move the -Ironic services used for Overcloud provisioning to the new network:: - - ServiceNetMap: - IronicApiNetwork: oc_provisioning # changed from ctlplane - IronicNetwork: oc_provisioning # changed from ctlplane - -Add the new network to the roles file ``roles_data.yaml`` for -controller:: - - networks: - - External - - InternalApi - - Storage - - StorageMgmt - - Tenant - - OcProvisioning - -Add the new network to the NIC config controller.yaml file. Starting in Queens, -the example NIC config files will automatically populated with this new network -when it is in ``network_data.yaml`` and ``roles_data.yaml`` so this step is -not necessary:: - - - type: vlan - vlan_id: - get_param: OcProvisioningNetworkVlanID - addresses: - - ip_netmask: - get_param: OcProvisioningIpSubnet - -.. note:: - The baremetal nodes will send and received untagged VLAN traffic - in order to properly run DHCP and PXE boot. - -Deployment -~~~~~~~~~~ - -Add the ironic environment file when deploying:: - - openstack overcloud deploy --templates \ - -e /usr/share/openstack-tripleo-heat-templates/environments/services/ironic-overcloud.yaml \ - -e ironic-config.yaml - -To deploy Ironic in containers for Pike-Rocky releases please, use -``/usr/share/openstack-tripleo-heat-templates/environments/services-docker/ironic.yaml`` -instead. - -.. note:: - We don't require any virtual compute nodes for the bare metal only case, - so feel free to set ``ComputeCount: 0`` in your environment file, if you - don't need them. - -If using a custom network in Pike or later, include the ``network_data.yaml`` -and ``roles_data.yaml`` files in the deployment:: - - -n /home/stack/network_data.yaml \ - -r /home/stack/roles_data.yaml \ - -In addition, if ``network-environment.yaml`` was updated to include the -ServiceNetMap changes, include the updated and generated -``network-environment.yaml`` files:: - - -e /usr/share/openstack-tripleo-heat-templates/environments/network-environment.yaml \ - -e /home/stack/templates/environments/network-environment.yaml \ - -Validation -~~~~~~~~~~ - -Check that Ironic works by connecting to the overcloud and trying to list the -nodes (you should see an empty response, but not an error):: - - source overcloudrc - baremetal node list - -You can also check the enabled driver list:: - - $ baremetal driver list - +---------------------+-------------------------+ - | Supported driver(s) | Active host(s) | - +---------------------+-------------------------+ - | ipmi | overcloud-controller-0. | - | pxe_drac | overcloud-controller-0. | - | pxe_ilo | overcloud-controller-0. | - | pxe_ipmitool | overcloud-controller-0. | - | redfish | overcloud-controller-0. | - +---------------------+-------------------------+ - -.. note:: - This commands shows both hardware types and classic drivers combined. - -For HA configuration you should see all three controllers:: - - $ baremetal driver list - +---------------------+------------------------------------------------------------------------------------------------------------+ - | Supported driver(s) | Active host(s) | - +---------------------+------------------------------------------------------------------------------------------------------------+ - | ipmi | overcloud-controller-0.localdomain, overcloud-controller-1.localdomain, overcloud-controller-2.localdomain | - | pxe_drac | overcloud-controller-0.localdomain, overcloud-controller-1.localdomain, overcloud-controller-2.localdomain | - | pxe_ilo | overcloud-controller-0.localdomain, overcloud-controller-1.localdomain, overcloud-controller-2.localdomain | - | pxe_ipmitool | overcloud-controller-0.localdomain, overcloud-controller-1.localdomain, overcloud-controller-2.localdomain | - | redfish | overcloud-controller-0.localdomain, overcloud-controller-1.localdomain, overcloud-controller-2.localdomain | - +---------------------+------------------------------------------------------------------------------------------------------------+ - -If this list is empty or does not show any of the controllers, then the -``openstack-ironic-conductor`` service on this controller failed to start. -The likely cause is missing dependencies for vendor drivers. - -Finally, check that Nova recognizes both virtual and bare metal compute -services. In HA case there should be at least 4 services in total:: - - $ openstack compute service list --service nova-compute - +----+--------------+-------------------------------------+------+---------+-------+----------------------------+ - | ID | Binary | Host | Zone | Status | State | Updated At | - +----+--------------+-------------------------------------+------+---------+-------+----------------------------+ - | 21 | nova-compute | overcloud-novacompute-0.localdomain | nova | enabled | up | 2017-10-11T13:57:21.000000 | - | 30 | nova-compute | overcloud-controller-2.localdomain | nova | enabled | up | 2017-10-11T13:57:16.000000 | - | 33 | nova-compute | overcloud-controller-1.localdomain | nova | enabled | up | 2017-10-11T13:57:16.000000 | - | 54 | nova-compute | overcloud-controller-0.localdomain | nova | enabled | up | 2017-10-11T13:57:14.000000 | - +----+--------------+-------------------------------------+------+---------+-------+----------------------------+ - -Post-deployment configuration ------------------------------ - -In this section we configure OpenStack for both bare metal and virtual -machines provisioning. - -You need at least 3 nodes to use bare metal provisioning: one for the -undercloud, one for the controller and one for the actual instance. -This guide assumes using both virtual and bare metal computes, so to follow it -you need at least one more node, 4 in total for a non-HA configuration or 6 -for HA. - -This guide uses one network for simplicity. If you encounter weird DHCP, PXE -or networking issues with such a single-network configuration, try shutting -down the introspection DHCP server on the undercloud after the initial -introspection is finished:: - - sudo systemctl stop openstack-ironic-inspector-dnsmasq - -Resource classes -~~~~~~~~~~~~~~~~ - -Starting with the Pike release, bare metal instances are scheduled based on -*custom resource classes*. In case of Ironic, a resource class will correspond -to a flavor. When planning your bare metal cloud, think of a way to split all -nodes into classes, and create flavors accordingly. See `bare metal flavor -documentation`_ for more details. - -Preparing networking -~~~~~~~~~~~~~~~~~~~~ - -Next, we need to create at least one network for nodes to use. By default -Ironic uses the tenant network for the provisioning process, and the same -network is often configured for cleaning. - -As already mentioned, this guide assumes only one physical network shared -between undercloud and overcloud. In this case the subnet address must match -the one on the undercloud, but the allocation pools must not overlap (including -the pool used by undercloud introspection). - -For example, the following commands will work with the default undercloud -parameters:: - - source overcloudrc - openstack network create --share --provider-network-type flat \ - --provider-physical-network datacentre --external provisioning - openstack subnet create --network provisioning \ - --subnet-range 192.168.24.0/24 --gateway 192.168.24.40 \ - --allocation-pool start=192.168.24.41,end=192.168.24.100 provisioning-subnet - openstack router create default-router - openstack router add subnet default-router provisioning-subnet - -We will use this network for bare metal instances (both for provisioning and -as a tenant network), as well as an external network for virtual instances. -In a real situation you will only use it as provisioning, and create a separate -physical network as external. - -Now you can create a regular tenant network to use for virtual instances -and use the ``default-router`` to link the provisioning and tenant networks:: - - openstack network create tenant-net - openstack subnet create --network tenant-net --subnet-range 192.0.3.0/24 \ - --allocation-pool start=192.0.3.10,end=192.0.3.20 tenant-subnet - openstack router add subnet default-router tenant-subnet - -Networking using a custom network -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If using a custom network for overcloud provisioning, create a network of -type ``vlan`` with VlanID matching the ``OcProvisioning`` network created -during deployment:: - - openstack network create --share --provider-network-type vlan \ - --provider-physical-network datacentre --provider-segment 205 provisioning - -Use a subnet range outside of the ``allocation_pool`` defined in -``network_data.yaml``, for example:: - - openstack subnet create --network provisioning --subnet-range \ - 172.21.2.0/24 --gateway 172.21.2.1 --allocation-pool \ - start=172.21.2.201,end=172.21.2.250 provisioning-subnet - -As defined in ``Preparing networking``, you can create a tenant network along -with a ``default-router`` to link the provisioning and tenant networks. - -Configuring networks -~~~~~~~~~~~~~~~~~~~~ - -Ironic has to be configured to use three networks for its internal purposes: - -* *Cleaning* network is used during cleaning and is mandatory to configure. - - This network can be configured to a name or UUID during deployment via - the ``IronicCleaningNetwork`` parameter. - -* *Provisioning* network is used during deployment if the *network interface* - is set to ``neutron`` (either explicitly or via setting - ``IronicDefaultNetworkInterface`` during installation). - - This network is supported by TripleO starting with the Pike release and - can be configured to a name or UUID during deployment via - the ``IronicProvisioningNetwork`` parameter. - -* *Rescuing* network is used when starting the *rescue* process - repairing - broken instances through a special ramdisk. - - This network is supported by TripleO starting wince the Rocky release and - can be configured to a name or UUID during deployment via - the ``IronicRescuingNetwork`` parameter. - -Starting with the Ocata release, Ironic is configured to use network called -``provisioning`` for all three networks by default. However, network names are -not unique. A user creating another network with the same name will break bare -metal provisioning. Thus, it's highly recommended to update the deployment, -providing the provider network UUID. - -Use the following command to get the UUID:: - - openstack network show provisioning -f value -c id - -Configuring networks on deployment -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To update the whole deployment update the environment file you've created, -setting ``IronicCleaningNetwork`` to the this UUID, for example:: - - parameter_defaults: - IronicCleaningNetwork: c71f4bfe-409b-4292-818f-21cdf910ee06 - -In the Pike release or newer, also set the provisioning network. You can use -the same network or create a new one:: - - parameter_defaults: - IronicCleaningNetwork: c71f4bfe-409b-4292-818f-21cdf910ee06 - IronicProvisioningNetwork: c71f4bfe-409b-4292-818f-21cdf910ee06 - -In the Rocky release or newer, also set the rescuing network. You can use -the same network or create a new one:: - - parameter_defaults: - IronicCleaningNetwork: c71f4bfe-409b-4292-818f-21cdf910ee06 - IronicProvisioningNetwork: c71f4bfe-409b-4292-818f-21cdf910ee06 - IronicRescuingNetwork: c71f4bfe-409b-4292-818f-21cdf910ee06 - -Finally, run the deploy command with exactly the same arguments as before -(don't forget to include the environment file if it was not included -previously). - -Configuring networks per node -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Alternatively, you can set the networks per node starting with the Queens -release. - -When enrolling nodes, add ``cleaning_network``, ``provisioning_network`` -and/or ``rescuing_network`` to the ``driver_info`` dictionary when -`Preparing inventory`_. - -After enrolling nodes, you can update each of them with the following -command (adjusting it for your release):: - - baremetal node set \ - --driver-info cleaning_network= \ - --driver-info provisioning_network= \ - --driver-info rescuing_network= - -Adding deployment images -~~~~~~~~~~~~~~~~~~~~~~~~ - -Ironic requires the ironic-python-agent image stored in Glance. -You can use the same images you already have on the undercloud:: - - source overcloudrc - openstack image create --public --container-format aki \ - --disk-format aki --file ~/ironic-python-agent.kernel deploy-kernel - openstack image create --public --container-format ari \ - --disk-format ari --file ~/ironic-python-agent.initramfs deploy-ramdisk - -.. note:: - These commands assume that the images are in the home directory, which is - often the case for TripleO. - -Creating flavors -~~~~~~~~~~~~~~~~ - -As usual with OpenStack, you need to create at least one flavor to be used -during deployment. As bare metal resources are inherently not divisible, -the flavor will set minimum requirements (CPU count, RAM and disk sizes) that -a node must fulfil, see `bare metal flavor documentation`_ for details. - -Creating a single flavor is sufficient for the simplest case:: - - source overcloudrc - openstack flavor create --ram 1024 --disk 20 --vcpus 1 baremetal - -.. note:: - The ``disk`` argument will be used to determine the size of the root - partition. The ``ram`` and ``vcpus`` arguments are ignored for bare metal, - starting with the Pike release, if the flavor is configured as explained - below. - -Starting with the Pike release, switch to scheduling based on resource -classes, as explained in the `bare metal flavor documentation`_:: - - openstack flavor set baremetal --property resources:CUSTOM_BAREMETAL=1 - openstack flavor set baremetal --property resources:VCPU=0 - openstack flavor set baremetal --property resources:MEMORY_MB=0 - openstack flavor set baremetal --property resources:DISK_GB=0 - -Creating host aggregates -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. note:: - If you don't plan on using virtual instances, you can skip this step. - It also won't be required in the Stein release, after bare metal nodes - stopped report CPU, memory and disk properties. - -.. admonition:: Stable Branches - :class: stable - - For a hybrid bare metal and virtual environment before the Pike release - you have to set up *host aggregates* for virtual and bare metal hosts. You - can also optionally follow this procedure until the Stein release. We will - use a property called ``baremetal`` to link flavors to host aggregates:: - - openstack aggregate create --property baremetal=true baremetal-hosts - openstack aggregate create --property baremetal=false virtual-hosts - openstack flavor set baremetal --property baremetal=true - - .. warning:: - This association won't work without ``AggregateInstanceExtraSpecsFilter`` - enabled as described in `Essential configuration`_. - - .. warning:: - Any property you set on flavors has to be duplicated on aggregates, - otherwise scheduling will fail. - - Then for all flavors you've created for virtual instances set the same - ``baremetal`` property to ``false``, for example:: - - openstack flavor create --ram 1024 --disk 20 --vcpus 1 virtual - openstack flavor set virtual --property baremetal=false - -Creating instance images -~~~~~~~~~~~~~~~~~~~~~~~~ - -You can build your images using ``diskimage-builder`` tool already available -on the undercloud, for example:: - - disk-image-create centos7 baremetal dhcp-all-interfaces grub2 -o centos-image - -.. note:: - The following elements are actually optional: - - * ``dhcp-all-interfaces`` makes the resulting instance get IP addresses for - all NICs via DHCP. - - * ``grub2`` installs the grub bootloader on the image, so that local boot - can be used in additional to PXE booting. - -This command creates a so called *partition image*, i.e. an image containing -only root partition. Ironic also supports *whole disk images*, i.e. images -with the whole partition table embedded. This may be the only option when -running non-Linux images. Please check the `images documentation`_ -for more details on building and using images. - -Three components are created for every partition image: the main image with -``qcow2`` extension, the kernel with ``vmlinuz`` extension and the initrd -image with ``initrd`` extension. - -Upload them with the following command:: - - source overcloudrc - KERNEL_ID=$(openstack image create --file centos-image.vmlinuz --public \ - --container-format aki --disk-format aki -f value -c id \ - centos-image.vmlinuz) - RAMDISK_ID=$(openstack image create --file centos-image.initrd --public \ - --container-format ari --disk-format ari -f value -c id \ - centos-image.initrd) - openstack image create --file centos-image.qcow2 --public \ - --container-format bare --disk-format qcow2 \ - --property kernel_id=$KERNEL_ID --property ramdisk_id=$RAMDISK_ID \ - centos-image - -.. note:: - A whole disk image will only have one component - the image itself with - ``qcow2`` extension. Do not set ``kernel_id`` and ``ramdisk_id`` - properties for such images. - -Enrolling nodes ---------------- - -For all nodes you're enrolling you need to know: - -* BMC (IPMI, iDRAC, iLO, etc) address and credentials, - -* MAC address of the PXE booting NIC, - -* CPU count and architecture, memory size in MiB and root disk size in GiB, - -* Serial number or WWN of the root device, if the node has several hard drives. - -In the future some of this data will be provided by the introspection process, -which is not currently available in the overcloud. - -This guide uses inventory files to enroll nodes. Alternatively, you can enroll -nodes directly from CLI, see the `enrollment documentation`_ for details. - -Preparing inventory -~~~~~~~~~~~~~~~~~~~ - -Your inventory file (e.g. ``overcloud-nodes.yaml`` from `Preparing -undercloud`_) should be in the following format: - -.. code-block:: yaml - - nodes: - - name: node-0 - driver: ipmi - driver_info: - ipmi_address: - ipmi_username: - ipmi_password: - ipmi_port: - resource_class: baremetal - properties: - cpu_arch: - local_gb: - root_device: - serial: - ports: - - address: - pxe_enabled: true - local_link_connection: - switch_id: - switch_info: - port_id: - -* The ``driver`` field must be one of ``IronicEnabledDrivers`` or - ``IronicEnabledHardwareTypes``, which we set when `Configuring and deploying - ironic`_. - - .. admonition:: Stable Branch - :class: stable - - Hardware types are only available since the Pike release. In the example - above use ``pxe_ipmitool`` instead of ``ipmi`` for older releases. - -* The ``resource_class`` field corresponds to a custom resource - class, as explained in `Resource classes`_. - -* The ``root_device`` property is optional, but it's highly recommended - to set it if the bare metal node has more than one hard drive. - There are several properties that can be used instead of the serial number - to designate the root device, see the `root device hints documentation`_ - for details. - -* The ``local_gb`` field specifies the size (in GiB) of the root device. Its - value must match the size of the device specified by the ``root_device`` - property. However, to allow for partitioning, it's highly recommended to - subtract 1 GiB from it. - -* Exactly one port with ``pxe_enabled`` set to ``true`` must be specified in - the ``ports`` list. It has to match the NIC used for provisioning. - - .. note:: - More ports with ``pxe_enabled=false`` can be specified safely here. They - won't be used for provisioning, but they are used with the ``neutron`` - network interface. - -.. admonition:: Stable Branch - :class: stable - - * The ``memory_mb`` and ``cpus`` properties are mandatory before the Pike - release and can optionally be used before Stein. - - .. warning:: - Do not populate ``memory_mb`` and ``cpus`` before the Stein release if - you do **not** use host aggregates for separating virtual and bare - metal flavors as described in `Creating host aggregates`_. - -* ``local_link_connection`` is required when using the `neutron` network - interface. This information is needed so ironic/neutron can identify which - interfaces on switches corresponding to the ports defined in ironic. - - * ``switch_id`` the ID the switch uses to identify itself over LLDP(usually - the switch MAC). - - * ``switch_info`` the name associated with the switch in ``ML2HostConfigs`` - (see ML2HostConfigs in `ml2-ansible example`_) - - * ``port_id`` the name associated with the interface on the switch. - -Enrolling nodes -~~~~~~~~~~~~~~~ - -The ``overcloud-nodes.yaml`` file prepared in the previous steps can now be -imported in Ironic:: - - source overcloudrc - baremetal create overcloud-nodes.yaml - -.. warning:: - This command is provided by Ironic, not TripleO. It also does not feature - support for updates, so if you need to change something, you have to use - ``baremetal node set`` and similar commands. - -The nodes appear in the ``enroll`` provision state, you need to check their BMC -credentials and make them available:: - - DEPLOY_KERNEL=$(openstack image show deploy-kernel -f value -c id) - DEPLOY_RAMDISK=$(openstack image show deploy-ramdisk -f value -c id) - - for uuid in $(baremetal node list --provision-state enroll -f value -c UUID); - do - baremetal node set $uuid \ - --driver-info deploy_kernel=$DEPLOY_KERNEL \ - --driver-info deploy_ramdisk=$DEPLOY_RAMDISK \ - --driver-info rescue_kernel=$DEPLOY_KERNEL \ - --driver-info rescue_ramdisk=$DEPLOY_RAMDISK - baremetal node manage $uuid --wait && - baremetal node provide $uuid - done - -The deploy kernel and ramdisk were created as part of `Adding deployment -images`_. - -The ``baremetal node provide`` command makes a node go through cleaning -procedure, so it might take some time depending on the configuration. Check -your nodes status with:: - - baremetal node list --fields uuid name provision_state last_error - -Wait for all nodes to reach the ``available`` state. Any failures during -cleaning has to be corrected before proceeding with deployment. - -Populating host aggregates -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. note:: - If you don't plan on using virtual instances, you can skip this step. - It also won't be required in the Stein release, after bare metal nodes - stopped report CPU, memory and disk properties. - -.. admonition:: Stable Branch - :class: stable - - For hybrid bare metal and virtual case you need to specify which host - belongs to which host aggregates (``virtual`` or ``baremetal`` as created in - `Creating host aggregates`_). - - When the default host names are used, we can take advantage of the fact - that every virtual host will have ``compute`` in its name. All bare metal - hypervisors will be assigned to one (non-HA) or three (HA) controller hosts. - So we can do the assignment with the following commands:: - - source overcloudrc - for vm_host in $(openstack hypervisor list -f value -c "Hypervisor Hostname" | grep compute); - do - openstack aggregate add host virtual-hosts $vm_host - done - - openstack aggregate add host baremetal-hosts overcloud-controller-0.localdomain - # Ignore the following two for a non-HA environment - openstack aggregate add host baremetal-hosts overcloud-controller-1.localdomain - openstack aggregate add host baremetal-hosts overcloud-controller-2.localdomain - - .. note:: - Every time you scale out compute nodes, you need to add newly added - hosts to the ``virtual-hosts`` aggregate. - -Checking available resources -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Check that nodes are really enrolled and the power state is reflected correctly -(it may take some time):: - - $ source overcloudrc - $ baremetal node list - +--------------------------------------+------------+---------------+-------------+--------------------+-------------+ - | UUID | Name | Instance UUID | Power State | Provisioning State | Maintenance | - +--------------------------------------+------------+---------------+-------------+--------------------+-------------+ - | a970c5db-67dd-4676-95ba-af1edc74b2ee | instance-0 | None | power off | available | False | - | bd99ec64-4bfc-491b-99e6-49bd384b526d | instance-1 | None | power off | available | False | - +--------------------------------------+------------+---------------+-------------+--------------------+-------------+ - -After a few minutes, new hypervisors should appear in Nova and the stats -should display the sum of bare metal and virtual resources:: - - $ openstack hypervisor list - +----+--------------------------------------+ - | ID | Hypervisor Hostname | - +----+--------------------------------------+ - | 2 | overcloud-novacompute-0.localdomain | - | 17 | bd99ec64-4bfc-491b-99e6-49bd384b526d | - | 20 | a970c5db-67dd-4676-95ba-af1edc74b2ee | - +----+--------------------------------------+ - -.. note:: - Each bare metal node becomes a separate hypervisor in Nova. The hypervisor - host name always matches the associated node UUID. - -Next you can use the Placement API (available only via cURL for the time being) -to check that bare metal resources are properly exposed. Start with checking -that all nodes are recorded:: - - $ token=$(openstack token issue -f value -c id) - $ endpoint=$(openstack endpoint show placement -f value -c publicurl) - $ curl -sH "X-Auth-Token: $token" $endpoint/resource_providers | jq -r '.resource_providers | map({node: .name, uuid})' - [ - { - "uuid": "9dff98a8-6fc9-4a05-8d78-c1d5888d9fde", - "node": "overcloud-novacompute-0.localdomain" - }, - { - "uuid": "61d741b5-33d6-40a1-bcbe-b38ea34ca6c8", - "node": "bd99ec64-4bfc-491b-99e6-49bd384b526d" - }, - { - "uuid": "e22bc261-53be-43b3-848f-e29c728142d3", - "node": "a970c5db-67dd-4676-95ba-af1edc74b2ee" - } - ] - -Then for each of the bare metal resource providers (having node UUIDs as -names) check their inventory:: - - $ curl -sH "X-Auth-Token: $token" $endpoint/resource_providers/e22bc261-53be-43b3-848f-e29c728142d3/inventories | jq .inventories - { - "CUSTOM_BAREMETAL": { - "max_unit": 1, - "min_unit": 1, - "step_size": 1, - "reserved": 0, - "total": 1, - "allocation_ratio": 1 - } - } - -You see the custom ``baremetal`` resource class reported, as well as available -disk space (only before the Queens release). If you see an empty inventory, -nova probably consider the node unavailable. Check :ref:`no-valid-host` for -tips on a potential cause. - -Booting a bare metal instance ------------------------------ - -You will probably want to create a keypair to use for logging into instances. -For example, using SSH public key from undercloud:: - - source overcloudrc - openstack keypair create --public-key ~/.ssh/id_rsa.pub undercloud-key - -Now you're ready to boot your first bare metal instance:: - - openstack server create --image centos-image --flavor baremetal \ - --nic net-id=$(openstack network show provisioning -f value -c id) \ - --key-name undercloud-key instance-0 - -After some time (depending on the image), you will see the prepared instance:: - - $ openstack server list - +--------------------------------------+------------+--------+-----------------------------+ - | ID | Name | Status | Networks | - +--------------------------------------+------------+--------+-----------------------------+ - | 2022d237-e249-44bd-b864-e7f536a8e439 | instance-0 | ACTIVE | provisioning=192.168.24.50 | - +--------------------------------------+------------+--------+-----------------------------+ - -.. note:: - If you encounter *"No valid host found"* error from Nova, make sure to read - the undercloud troubleshooting guide on this topic: :ref:`no-valid-host`. - -Let's check that it actually got scheduled on a bare metal machine:: - - $ openstack server show instance-0 -c "OS-EXT-SRV-ATTR:host" -c "OS-EXT-SRV-ATTR:hypervisor_hostname" - +-------------------------------------+--------------------------------------+ - | Field | Value | - +-------------------------------------+--------------------------------------+ - | OS-EXT-SRV-ATTR:host | overcloud-controller-0.localdomain | - | OS-EXT-SRV-ATTR:hypervisor_hostname | bd99ec64-4bfc-491b-99e6-49bd384b526d | - +-------------------------------------+--------------------------------------+ - -You can now log into it:: - - $ ssh centos@192.168.24.50 - The authenticity of host '192.168.24.50 (192.168.24.50)' can't be established. - ECDSA key fingerprint is eb:35:45:c5:ed:d9:8a:e8:4b:20:db:06:10:6f:05:74. - Are you sure you want to continue connecting (yes/no)? yes - Warning: Permanently added '192.168.24.50' (ECDSA) to the list of known hosts. - [centos@instance-0 ~]$ - -Now let's try the same with a virtual instance:: - - openstack server create --image centos-image --flavor virtual \ - --nic net-id=$(openstack network show tenant-net -f value -c id) \ - --key-name undercloud-key instance-1 - -This instance gets scheduled on a virtual host:: - - $ openstack server show instance-1 -c "OS-EXT-SRV-ATTR:host" -c "OS-EXT-SRV-ATTR:hypervisor_hostname" - +-------------------------------------+-------------------------------------+ - | Field | Value | - +-------------------------------------+-------------------------------------+ - | OS-EXT-SRV-ATTR:host | overcloud-novacompute-0.localdomain | - | OS-EXT-SRV-ATTR:hypervisor_hostname | overcloud-novacompute-0.localdomain | - +-------------------------------------+-------------------------------------+ - -Booting a bare metal instance from a cinder volume --------------------------------------------------- - -Cinder volumes can be used to back a baremetal node over iSCSI, in order to -do this each baremetal node must first be configured to boot from a volume. -The connector ID for each node should be unique, below we achieve this by -incrementing the value of :: - - $ baremetal node set --property capabilities=iscsi_boot:true --storage-interface cinder - $ baremetal volume connector create --node --type iqn --connector-id iqn.2010-10.org.openstack.node - -The image used should be configured to boot from a iSCSI root disk, on Centos -7 this is achieved by ensuring that the `iscsi` module is added to the ramdisk -and passing `rd.iscsi.firmware=1` to the kernel in the grub config:: - - $ mkdir /tmp/mountpoint - $ guestmount -i -a /tmp/CentOS-7-x86_64-GenericCloud.qcow2 /tmp/mountpoint - $ mount -o bind /dev /tmp/mountpoint/dev - $ chroot /tmp/mountpoint /bin/bash - chroot> mv /etc/resolv.conf /etc/resolv.conf_ - chroot> echo "nameserver 8.8.8.8" > /etc/resolv.conf - chroot> yum install -y iscsi-initiator-utils - chroot> mv /etc/resolv.conf_ /etc/resolv.conf - # Be careful here to update the correct ramdisk (check/boot/grub2/grub.cfg) - chroot> dracut --force --add "network iscsi" /boot/initramfs-3.10.0-693.5.2.el7.x86_64.img 3.10.0-693.5.2.el7.x86_64 - # Edit the file /etc/default/grub and add rd.iscsi.firmware=1 to GRUB_CMDLINE_LINUX=... - chroot> vi /etc/default/grub - chroot> exit - $ umount /tmp/mountpoint/dev - $ guestunmount /tmp/mountpoint - $ guestfish -a /tmp/CentOS-7-x86_64-GenericCloud.qcow2 -m /dev/sda1 sh "/sbin/grub2-mkconfig -o /boot/grub2/grub.cfg" - -.. note:: - This image can no longer be used to do regular local boot, a situation - that should be fixed in future versions. - -This image can then be added to glance and a volume created from it:: - - $ openstack image create --disk-format qcow2 --container-format bare --file /tmp/CentOS-7-x86_64-GenericCloud.qcow2 centos-bfv - $ openstack volume create --size 10 --image centos-bfv --bootable centos-test-volume - -Finally this volume can be used to back a baremetal instance:: - - $ openstack server create --flavor baremetal --volume centos-test-volume --key default centos-test - -Configuring ml2-ansible for multi-tenant networking ---------------------------------------------------- - -Ironic can be configured to use a neutron ML2 mechanism driver for baremetal -port binding. In this example we use the ml2-ansible plugin to configure -ports on a Juniper switch (the plugin supports multiple switch types) to ensure -baremetal networks are isolated from each other. - -ml2-ansible configuration -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The following parameters must be configured in an environment file and used -when deploying the overcloud: - -* ``ML2HostConfigs:`` this mapping contains a entry for each switch netansible - will configure, for each switch there should be a key(where the key is used - to identify the switch) and a mapping containing details specific to the - switch, the following details should be provided - - * ``ansible_network_os``: network platform the switch corresponds to. - * ``ansible_host``: switch IP - * ``ansible_user``: user to connect to the switch as - * ``ansible_ssh_pass``: (optional, alternatively use a private key) password - * ``ansible_ssh_private_key_file``: (optional, alternatively use a password) private key - * ``manage_vlans``: (optional, boolean) - If the vlan networks have not been defined on - your switch and the ansible_user has permission to create them, this should be left as - ``true``. If not then you need to set to ``false`` and ensure they are created by a user - with the appropriate permissions. - * ``mac``: (optional) - Chassis MAC ID of the switch - -* ``IronicDefaultNetworkInterface`` set the default network type for nodes being - deployed. In most cases when using multi-tenant networking you'll want to set - this to ``neutron``. If the default isn't set to ``neutron`` here then the - ``network-interface`` needs to be set on a per node bases. This can be done with - the ``--network-interface`` parameter to either the ``node create`` or ``node set`` - command. - -The overcloud deploy command must also include -``-e /usr/share/openstack-tripleo-heat-templates/environments/services/neutron-ml2-ansible.yaml`` -in order to set ``OS::TripleO::Services::NeutronCorePlugin`` and ``NeutronMechanismDrivers``. - -ml2-ansible example -~~~~~~~~~~~~~~~~~~~ - -In this minimalistic example we have a baremetal node (ironic-0) being -controlled by ironic in the overcloud. This node is connected to a juniper -switch with ironic/neutron controlling the vlan id for the switch:: - - - +-------------------------------+ - | xe-0/0/7+-+ - | switch1 | | - |xe-0/0/1 | | - +-------------------------------+ | - | | - | | - +---------------+ +-----------------+ - | | | | | - | br-baremetal | | | - | | | | - | | | | - | | | | - | Overcloud | | Ironic-0 | - | | | | - | | | | - | | | | - | | | | - | | | | - | | | | - +---------------+ +-----------------+ - -Switch config for xe-0/0/7 should be removed before deployment, and -xe-0/0/1 should be a member of the vlan range 1200-1299:: - - xe-0/0/1 { - native-vlan-id XXX; - unit 0 { - family ethernet-switching { - interface-mode trunk; - vlan { - members [ XXX 1200-1299 ]; - } - } - } - } - -We first need to deploy ironic in the overcloud and include the following -configuration:: - - parameter_defaults: - IronicProvisioningNetwork: baremetal - IronicCleaningNetwork: baremetal - IronicDefaultNetworkInterface: neutron - NeutronMechanismDrivers: openvswitch,ansible - NeutronNetworkVLANRanges: baremetal:1200:1299 - NeutronFlatNetworks: datacentre,baremetal - NeutronBridgeMappings: datacentre:br-ex,baremetal:br-baremetal - ML2HostConfigs: - switch1: - ansible_network_os: junos - ansible_host: 10.9.95.25 - ansible_user: ansible - ansible_ssh_pass: ansible_password - manage_vlans: false - - -Once the overcloud is deployed, we need to create a network that will be used -as a provisioning (and cleaning) network:: - - openstack network create --provider-network-type vlan --provider-physical-network baremetal \ - --provider-segment 1200 baremetal - openstack subnet create --network baremetal --subnet-range 192.168.25.0/24 --ip-version 4 \ - --allocation-pool start=192.168.25.30,end=192.168.25.50 baremetal-subnet - -.. note:: - This network should be routed to the ctlplane network on the overcloud (while - on this network the ironic-0 will need access to the TFTP/HTTP and the ironic - API), one way to achieve this would be to set up a network representing the - ctlplane network and add a router between them:: - - openstack network create --provider-network-type flat --provider-physical-network \ - baremetal ctlplane - openstack subnet create --network ctlplane --subnet-range 192.168.24.0/24 \ - --ip-version 4 --gateway 192.168.24.254 --no-dhcp ctlplane-subnet - openstack router create provisionrouter - openstack router add subnet provisionrouter baremetal-subnet - openstack router add subnet provisionrouter ctlplane-subnet - - Each overcloud controller will also need a route added to route traffic - bound for 192.168.25.0/24 via 192.168.24.254, this can be done in the - network template when deploying the overcloud. - -If not already provided in ``overcloud-nodes.yaml`` above, the -local-link-connection values for `switch_info`, `port_id` and `switch_id` -can be provided here:: - - baremetal port set --local-link-connection switch_info=switch1 \ - --local-link-connection port_id=xe-0/0/7 \ - --local-link-connection switch_id=00:00:00:00:00:00 - -The node can now be registered with ironic and cleaned in the usual way, -once the node is available it can be used by another tenant in a regular -VLAN network:: - - openstack network create tenant-net - openstack subnet create --network tenant-net --subnet-range 192.168.3.0/24 \ - --allocation-pool start=192.168.3.10,end=192.168.3.20 tenant-subnet - openstack server create --flavor baremetal --image overcloud-full \ - --key default --network tenant-net test1 - -Assuming an external network is available the server can then be allocated a floating ip:: - - openstack router create external - openstack router add subnet external tenant-subnet - openstack router set --external-gateway external external - openstack floating ip create external - openstack server add floating ip test1 - - -.. _IronicConductor role shipped with TripleO: https://opendev.org/openstack/tripleo-heat-templates/src/branch/master/roles/IronicConductor.yaml -.. _driver configuration guide: https://docs.openstack.org/ironic/latest/install/enabling-drivers.html -.. _driver-specific documentation: https://docs.openstack.org/ironic/latest/admin/drivers.html -.. _bare metal flavor documentation: https://docs.openstack.org/ironic/latest/install/configure-nova-flavors.html -.. _enrollment documentation: https://docs.openstack.org/ironic/latest/install/enrollment.html -.. _root device hints documentation: https://docs.openstack.org/ironic/latest/install/advanced.html#specifying-the-disk-for-deployment-root-device-hints -.. _images documentation: https://docs.openstack.org/ironic/latest/install/configure-glance-images.html -.. _multi-tenant networking documentation: https://docs.openstack.org/ironic/latest/admin/multitenancy.html -.. _networking-ansible: https://github.com/openstack/networking-ansible -.. _deploy interfaces documentation: https://docs.openstack.org/ironic/latest/admin/interfaces/deploy.html diff --git a/deploy-guide/source/features/ceph_external.rst b/deploy-guide/source/features/ceph_external.rst deleted file mode 100644 index 0523b5cd..00000000 --- a/deploy-guide/source/features/ceph_external.rst +++ /dev/null @@ -1,394 +0,0 @@ -Use an external Ceph cluster with the Overcloud -=============================================== - -|project| supports use of an external Ceph cluster for certain services deployed -in the Overcloud. - -Deploying Cinder, Glance, Nova, Gnocchi with an external Ceph RBD service -------------------------------------------------------------------------- - -The overcloud may be configured to use an external Ceph RBD service by -enabling a particular environment file when deploying the -Overcloud. For Wallaby and newer include -`environments/external-ceph.yaml`. - -For Ocata and earlier use -`environments/puppet-ceph-external.yaml`. For Pike through Victoria -use `environments/ceph-ansible/ceph-ansible-external.yaml` and install -ceph-ansible on the Undercloud as described in -:doc:`../deployment/index`. For Pike through Victoria a Ceph container -is downloaded and executed on Overcloud nodes to use Ceph binaries -only available within the container. These binaries are used to create -the CephX client keyrings on the overcloud. Thus, between Pike and -Victoria it was necessary when preparing to deploy a containerized -overcloud, as described in -:doc:`../deployment/container_image_prepare`, to include the Ceph -container even if that overcloud will only connect to an external Ceph -cluster. Starting in Wallaby neither ceph-ansible or cephadm configure -Ceph clients and instead the tripleo-ansible role tripleo_ceph_client -is used. Thus, it is not necessary to install ceph-ansible nor prepare -a Ceph container when configuring external Ceph in Wallaby and -newer. Simply include `environments/external-ceph.yaml` in the -deployment. All parameters described below remain consistent -regardless of external Ceph configuration method. - -Some of the parameters in the above environment files can be overridden:: - - parameter_defaults: - # Enable use of RBD backend in nova-compute - NovaEnableRbdBackend: true - # Enable use of RBD backend in cinder-volume - CinderEnableRbdBackend: true - # Backend to use for cinder-backup - CinderBackupBackend: ceph - # Backend to use for glance - GlanceBackend: rbd - # Backend to use for gnocchi-metricsd - GnocchiBackend: rbd - # Name of the Ceph pool hosting Nova ephemeral images - NovaRbdPoolName: vms - # Name of the Ceph pool hosting Cinder volumes - CinderRbdPoolName: volumes - # Name of the Ceph pool hosting Cinder backups - CinderBackupRbdPoolName: backups - # Name of the Ceph pool hosting Glance images - GlanceRbdPoolName: images - # Name of the Ceph pool hosting Gnocchi metrics - GnocchiRbdPoolName: metrics - # Name of the user to authenticate with the external Ceph cluster - CephClientUserName: openstack - -The pools and the CephX user **must** be created on the external Ceph cluster -before deploying the Overcloud. TripleO expects a single user, configured via -CephClientUserName, to have the capabilities to use all the OpenStack pools; -the user could be created with a command like this:: - - ceph auth add client.openstack mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rwx pool=vms, allow rwx pool=images, allow rwx pool=backups, allow rwx pool=metrics' - -In addition to the above customizations, the deployer **needs** to provide -at least three required parameters related to the external Ceph cluster:: - - parameter_defaults: - # The cluster FSID - CephClusterFSID: '4b5c8c0a-ff60-454b-a1b4-9747aa737d19' - # The CephX user auth key - CephClientKey: 'AQDLOh1VgEp6FRAAFzT7Zw+Y9V6JJExQAsRnRQ==' - # The list of Ceph monitors - CephExternalMonHost: '172.16.1.7, 172.16.1.8, 172.16.1.9' - -The above parameters will result in TripleO creating a Ceph -configuration file and cephx keyring in /etc/ceph on every -node which needs to connect to Ceph to use the RBD service. - -Configuring Ceph Clients for Multiple External Ceph RBD Services ----------------------------------------------------------------- - -In Train and newer it's possible to use TripleO to deploy an -overcloud which is capable of using the RBD services of multiple -external Ceph clusters. A separate keyring and Ceph configuration file -is created for each external Ceph cluster in /etc/ceph on every -overcloud node which needs to connect to Ceph. This functionality is -provided by the `CephExternalMultiConfig` parameter. - -Do not use `CephExternalMultiConfig` when configuring an overcloud to -use only one external Ceph cluster. Instead follow the example in the -previous section. The example in the previous section and the method -of deploying an internal Ceph cluster documented in -:doc:`deployed_ceph` are mutually exclusive per Heat stack. The -following scenarios are the only supported ones in which -`CephExternalMultiConfig` may be used per Heat stack: - -* One external Ceph cluster configured, as described in previous - section, in addition to multiple external Ceph clusters configured - via `CephExternalMultiConfig`. - -* One internal Ceph cluster, as described in :doc:`deployed_ceph` in - addition to multiple external ceph clusters configured via - `CephExternalMultiConfig`. - -The `CephExternalMultiConfig` parameter is used like this:: - - CephExternalMultiConfig: - - cluster: 'ceph2' - fsid: 'af25554b-42f6-4d2b-9b9b-d08a1132d3e8' - external_cluster_mon_ips: '172.18.0.5,172.18.0.6,172.18.0.7' - keys: - - name: "client.openstack" - caps: - mgr: "allow *" - mon: "profile rbd" - osd: "profile rbd pool=volumes, profile rbd pool=backups, profile rbd pool=vms, profile rbd pool=images" - key: "AQCwmeRcAAAAABAA6SQU/bGqFjlfLro5KxrB1Q==" - mode: "0600" - dashboard_enabled: false - - cluster: 'ceph3' - fsid: 'e2cba068-5f14-4b0f-b047-acf375c0004a' - external_cluster_mon_ips: '172.18.0.8,172.18.0.9,172.18.0.10' - keys: - - name: "client.openstack" - caps: - mgr: "allow *" - mon: "profile rbd" - osd: "profile rbd pool=volumes, profile rbd pool=backups, profile rbd pool=vms, profile rbd pool=images" - key: "AQCwmeRcAAAAABAA6SQU/bGqFjlfLro5KxrB2Q==" - mode: "0600" - dashboard_enabled: false - -The above, in addition to the parameters from the previous section, -will result in an overcloud with the following files in /etc/ceph: - -* ceph.client.openstack.keyring -* ceph.conf -* ceph2.client.openstack.keyring -* ceph2.conf -* ceph3.client.openstack.keyring -* ceph3.conf - -The first two files which start with `ceph` will be created based on -the parameters discussed in the previous section. The next two files -which start with `ceph2` will be created based on the parameters from -the first list item within the `CephExternalMultiConfig` parameter -(e.g. `cluster: ceph2`). The last two files which start with `ceph3` -will be created based on the parameters from the last list item within -the `CephExternalMultiConfig` parameter (e.g. `cluster: ceph3`). - -The last four files in the list which start with `ceph2` or `ceph3` -will also contain parameters found in the first two files which -start with `ceph` except where those parameters intersect. When -there's an intersection those parameters will be overridden with the -values from the `CephExternalMultiConfig` parameter. For example there -will only be one FSID in each Ceph configuration file with the -following values per file: - -* ceph.conf will have `fsid = 4b5c8c0a-ff60-454b-a1b4-9747aa737d19` - (as seen in the previous section) -* ceph2.conf will have `fsid = af25554b-42f6-4d2b-9b9b-d08a1132d3e8` -* ceph3.conf will have `fsid = e2cba068-5f14-4b0f-b047-acf375c0004a` - -However, if the `external_cluster_mon_ips` key was not set within -the `CephExternalMultiConfig` parameter, then all three Ceph -configuration files would contain `mon host = 172.16.1.7, 172.16.1.8, -172.16.1.9`, as seen in the previous section. Thus, it is necessary to -override the `external_cluster_mon_ips` key within each list item of -the `CephExternalMultiConfig` parameter because each external Ceph -cluster will have its own set of unique monitor IPs. - -The `CephExternalMultiConfig` and `external_cluster_mon_ips` keys map -one to one but have different names because each element of the -`CephExternalMultiConfig` list should contain a map of keys and values -directly supported by ceph-ansible. See `ceph-ansible/group_vars`_ for -an example of all possible keys. - -The following parameters are the minimum necessary to configure an -overcloud to connect to an external ceph cluster: - -* cluster: The name of the configuration file and key name prefix. - This name defaults to "ceph" so if this parameter is not overridden - there will be a name collision. It is not relevant if the - external ceph cluster's name is already "ceph". For client role - configuration this parameter is only used for setting a unique name - for the configuration and key files. -* fsid: The FSID of the external ceph cluster. -* external_cluster_mon_ips: The list of monitor IPs of the external - ceph cluster as a single string where each IP is comma delimited. - If the external Ceph cluster is using both the v1 and v2 MSGR - protocol this value may look like '[v2:10.0.0.1:3300, - v1:10.0.0.1:6789], [v2:10.0.0.2:3300, v1:10.0.0.2:6789], - [v2:10.0.0.3:3300, v1:10.0.0.3:6789]'. -* dashboard_enabled: Always set this value to false when using - `CephExternalMultiConfig`. It ensures that the Ceph Dashboard is not - installed. It is not supported to use ceph-ansible dashboard roles - to communicate with an external Ceph cluster so not passing this - parameter with a value of false within `CephExternalMultiConfig` - will result in a failed deployment because the default value of true - will be used. -* keys: This is a list of maps where each map defines CephX keys which - OpenStack clients will use to connect to an external Ceph cluster. - As stated in the previous section, the pools and the CephX user must - be created on the external Ceph cluster before deploying the - overcloud. The format of each map is the same as found in - ceph-ansible. Thus, if the external Ceph cluster was deployed by - ceph-ansible, then the deployer of that cluster could share that map - with the TripleO deployer so that it could be used as a list item of - `CephExternalMultiConfig`. Similarly, the `CephExtraKeys` parameter, - described in the :doc:`deployed_ceph` documentation, has the same - syntax. - -Deploying Manila with an External CephFS Service ------------------------------------------------- - -If choosing to configure Manila with Ganesha as NFS gateway for CephFS, -with an external Ceph cluster, then add `environments/manila-cephfsganesha-config.yaml` -to the list of environment files used to deploy the overcloud and also -configure the following parameters:: - - parameter_defaults: - ManilaCephFSDataPoolName: manila_data - ManilaCephFSMetadataPoolName: manila_metadata - ManilaCephFSCephFSAuthId: 'manila' - CephManilaClientKey: 'AQDLOh1VgEp6FRAAFzT7Zw+Y9V6JJExQAsRnRQ==' - -Which represent the data and metadata pools in use by the MDS for -the CephFS filesystems, the CephX keyring to use and its secret. - -Like for the other services, the pools and keyring must be created on the -external Ceph cluster before attempting the deployment of the overcloud. -The keyring should look like the following:: - - ceph auth add client.manila mgr "allow *" mon "allow r, allow command 'auth del', allow command 'auth caps', allow command 'auth get', allow command 'auth get-or-create'" mds "allow *" osd "allow rw" - -Compatibility Options ---------------------- - -As of the Train release TripleO will install Ceph Nautilus. If the -external Ceph cluster uses the Hammer release instead, pass the -following parameters to enable backward compatibility features:: - - parameter_defaults: - ExtraConfig: - ceph::profile::params::rbd_default_features: '1' - -Deployment of an Overcloud with External Ceph ---------------------------------------------- - -Finally add the above environment files to the deploy commandline. For -Wallaby and newer use:: - - openstack overcloud deploy --templates -e /usr/share/openstack-tripleo-heat-templates/environments/external-ceph.yaml -e ~/my-additional-ceph-settings.yaml - -For Train use:: - - openstack overcloud deploy --templates -e /usr/share/openstack-tripleo-heat-templates/environments/ceph-ansible/ceph-ansible-external.yaml -e ~/my-additional-ceph-settings.yaml - -Standalone Ansible Roles for External Ceph ------------------------------------------- - -To configure an overcloud to use an external Ceph cluster, a directory -(e.g. /etc/ceph) in the overcloud containers should be populated with -Ceph configuration files and overcloud services (e.g. Nova) should be -configured to use those files. Tripleo provides Ansible roles to do -this standalone without tripleo-heat-templates or config-download. - -Single Ceph Cluster -^^^^^^^^^^^^^^^^^^^ - -The `tripleo_ceph_client_files` Ansible role copies files from a -source directory (`tripleo_ceph_client_files_source`) on the host -where Ansible is run to a destination directory -(`tripleo_ceph_client_config_home`) on the overcloud nodes. -The user must create and populate the -`tripleo_ceph_client_files_source` directory with actual Ceph -configuration and cephx key files before running the role. For -example:: - - $ ls -l /home/stack/ceph_files/ - total 16 - -rw-r--r--. 1 stack stack 245 Nov 14 13:40 ceph.client.openstack.keyring - -rw-r--r--. 1 stack stack 173 Nov 14 13:40 ceph.conf - -If the above directory exists on the host where the `ansible-playbook` -command is run, then the `tripleo_ceph_client_files_source` parameter -should be set to `/home/stack/ceph_files/`. The optional parameter -`tripleo_ceph_client_config_home` defaults to -`/var/lib/tripleo-config/ceph` since OpenStack containers will bind -mount this directory to `/etc/ceph`. The `tripleo_nova_libvirt` -Ansible role will add a secret key to libvirt so that it uses the -cephx key put in place by the `tripleo_ceph_client_files` role; it -does this if either `tripleo_nova_libvirt_enable_rbd_backend` or -`tripleo_cinder_enable_rbd_backend` are true. When these roles -are used to configure a compute node the following `group_vars` should -be set:: - - tripleo_ceph_client_files_source: /home/stack/ceph_files - tripleo_ceph_client_config_home: /var/lib/tripleo-config/ceph - tripleo_nova_libvirt_enable_rbd_backend: true - tripleo_cinder_enable_rbd_backend: true - -The `tripleo_ceph_client_files` role may then be included in a -playbook as follows in order to configure a standalone compute node to -use a single Ceph cluster:: - - - name: configure ceph client - import_role: - name: tripleo_ceph_client_files - -In order for Nova to use the Ceph cluster, the `libvirt` section of -the `nova.conf` file should be configured. The `tripleo_nova_compute` -role `tripleo_nova_compute_config_overrides` variable may be set as -follows in the inventory to set the `libvirt` values along with -others:: - - Compute: - vars: - tripleo_nova_compute_config_overrides: - libvirt: - images_rbd_ceph_conf: /etc/ceph/ceph.conf - images_rbd_glance_copy_poll_interval: '15' - images_rbd_glance_copy_timeout: '600' - images_rbd_glance_store_name: default_backend - images_rbd_pool: vms - images_type: rbd - rbd_secret_uuid: 604c9994-1d82-11ed-8ae5-5254003d6107 - rbd_user: openstack - -TripleO's convention is to set the `rbd_secret_uuid` to the FSID of -the Ceph cluster. The FSID should be in the ceph.conf file. The -`tripleo_nova_libvirt` role will use `virsh secret-*` commands so that -libvirt can retrieve the cephx secret using the FSID as a key. This -can be confirmed after running Ansible with `podman exec -nova_virtsecretd virsh secret-get-value $FSID`. - -The `tripleo_ceph_client_files` role only supports the _configure_ -aspect of the standalone tripleo-ansible roles because it just -configures one or more pairs of files on its target nodes. Thus, the -`import_role` example above could be placed in a playbook file like -`deploy-tripleo-openstack-configure.yml`, before the roles for -`tripleo_nova_libvirt` and `tripleo_nova_compute` are imported. - -Multiple Ceph Clusters -^^^^^^^^^^^^^^^^^^^^^^ - -To configure more than one Ceph backend include the -`tripleo_ceph_client_files` role from the single cluster example -above. Populate the `tripleo_ceph_client_files_source` directory with -all of the ceph configuration and cephx key files For example:: - - $ ls -l /home/stack/ceph_files/ - total 16 - -rw-r--r--. 1 stack stack 213 Nov 14 13:41 ceph2.client.openstack.keyring - -rw-r--r--. 1 stack stack 228 Nov 14 13:41 ceph2.conf - -rw-r--r--. 1 stack stack 245 Nov 14 13:40 ceph.client.openstack.keyring - -rw-r--r--. 1 stack stack 173 Nov 14 13:40 ceph.conf - -For multiple Ceph clusters, the `tripleo_nova_libvirt` role expects a -`tripleo_cinder_rbd_multi_config` Ansible variable like this:: - - tripleo_cinder_rbd_multi_config: - ceph2: - CephClusterName: ceph2 - CephClientUserName: openstack - -It is not necessary to put the default Ceph cluster (named "ceph" from -the single node example) in `tripleo_cinder_rbd_multi_config`. Only -the additional clusters (e.g. ceph2) and name their keys so they -match the `CephClusterName`. In the above example, the -`CephClusterName` value "ceph2" matches the "ceph2.conf" and -"ceph2.client.openstack.keyring". Also, the `CephClientUserName` value -"openstack" matches "ceph2.client.openstack.keyring". The -`tripleo_nova_libvirt` Ansible role uses the -`tripleo_cinder_rbd_multi_config` map as a guide to know which libvirt -secrets to create and which cephx keys to make available within the -Nova containers. - -If the combined examples above from the single cluster section for -the primary cluster "ceph" and this section for the seconary Ceph -cluster "ceph2" are used, then the directory defined by -`tripleo_ceph_client_config_home` will be populated with four files: -`ceph.conf`, `ceph2.conf`, `ceph.client.openstack.keyring` and -`ceph2.client.openstack.keyring`, which will be mounted into the Nova -containers and two libvirt secrets will be created for each cephx -key. To add more Ceph clusters, extend the list -`tripleo_cinder_rbd_multi_config` and populate -`tripleo_ceph_client_files_source` with additional files. - -.. _`ceph-ansible/group_vars`: https://github.com/ceph/ceph-ansible/tree/master/group_vars diff --git a/deploy-guide/source/features/cinder_custom_backend.rst b/deploy-guide/source/features/cinder_custom_backend.rst deleted file mode 100644 index 04b79fa6..00000000 --- a/deploy-guide/source/features/cinder_custom_backend.rst +++ /dev/null @@ -1,69 +0,0 @@ -Configuring Cinder with a Custom Unmanaged Backend -================================================== - -This guide assumes that your undercloud is already installed and ready to -deploy an overcloud. - -Adding a custom backend to Cinder ---------------------------------- - -It is possible to provide the config settings to add an arbitrary and -unmanaged backend to Cinder at deployment time via Heat environment files. - -Each backend is represented in `cinder.conf` with a ``stanza`` and a -reference to it from the `enabled_backends` key. The keys valid in the -backend ``stanza`` are dependent on the actual backend driver and -unknown to Cinder. - -For example, to provision in Cinder two additional backends one could -create a Heat environment file with the following contents:: - - parameter_defaults: - ExtraConfig: - cinder::config::cinder_config: - netapp1/volume_driver: - value: cinder.volume.drivers.netapp.common.NetAppDriver - netapp1/netapp_storage_family: - value: ontap_7mode - netapp1/netapp_storage_protocol: - value: iscsi - netapp1/netapp_server_hostname: - value: 1.1.1.1 - netapp1/netapp_server_port: - value: 80 - netapp1/netapp_login: - value: root - netapp1/netapp_password: - value: 123456 - netapp1/volume_backend_name: - value: netapp_1 - netapp2/volume_driver: - value: cinder.volume.drivers.netapp.common.NetAppDriver - netapp2/netapp_storage_family: - value: ontap_7mode - netapp2/netapp_storage_protocol: - value: iscsi - netapp2/netapp_server_hostname: - value: 2.2.2.2 - netapp2/netapp_server_port: - value: 80 - netapp2/netapp_login: - value: root - netapp2/netapp_password: - value: 123456 - netapp2/volume_backend_name: - value: netapp_2 - cinder_user_enabled_backends: ['netapp1','netapp2'] - -This will not interfere with the deployment of the other backends managed by -TripleO, like Ceph or NFS and will just add these two to the list of the -backends enabled in Cinder. - -Remember to add such an environment file to the deploy commandline:: - - openstack overcloud deploy [other overcloud deploy options] -e ~/my-backends.yaml - -.. note:: - - The :doc:`extra_config` doc has more details on the usage of the different - ExtraConfig interfaces. diff --git a/deploy-guide/source/features/cinder_netapp.rst b/deploy-guide/source/features/cinder_netapp.rst deleted file mode 100644 index c0bb3d11..00000000 --- a/deploy-guide/source/features/cinder_netapp.rst +++ /dev/null @@ -1,60 +0,0 @@ -Configuring Cinder with a NetApp Backend -======================================== - -This guide assumes that your undercloud is already installed and ready to -deploy an overcloud. - -Deploying the Overcloud ------------------------ -.. note:: - - The :doc:`../deployment/template_deploy` doc has a more detailed explanation of the - following steps. - -#. Copy the NetApp configuration file to your home directory:: - - sudo cp /usr/share/openstack-tripleo-heat-templates/environments/cinder-netapp-config.yaml ~ - -#. Edit the permissions (user is typically ``stack``):: - - sudo chown $USER ~/cinder-netapp-config.yaml - sudo chmod 755 ~/cinder-netapp-config.yaml - - -#. Edit the parameters in this file to fit your requirements. Ensure that the following line is changed:: - - OS::TripleO::ControllerExtraConfigPre: /usr/share/openstack-tripleo-heat-templates/puppet/extraconfig/pre_deploy/controller/cinder-netapp.yaml - - -#. Continue following the TripleO instructions for deploying an overcloud. - Before entering the command to deploy the overcloud, add the environment - file that you just configured as an argument:: - - openstack overcloud deploy --templates -e ~/cinder-netapp-config.yaml - -#. Wait for the completion of the overcloud deployment process. - - -Creating a NetApp Volume ------------------------- - -.. note:: - - The following steps will refer to running commands as an admin user or a - tenant user. Sourcing the ``overcloudrc`` file will authenticate you as - the admin user. You can then create a tenant user and use environment - files to switch between them. - -#. Create a new volume type that maps to the new NetApp backend [admin]:: - - cinder type-create [name] - cinder type-key [name] set volume_backend_name=tripleo_netapp - -#. Create the volume [admin]:: - - cinder create --volume-type [type name] [size of volume] - -#. Attach the volume to a server:: - - nova volume-attach - diff --git a/deploy-guide/source/features/composable_services.rst b/deploy-guide/source/features/composable_services.rst deleted file mode 100644 index 13296376..00000000 --- a/deploy-guide/source/features/composable_services.rst +++ /dev/null @@ -1,58 +0,0 @@ -.. _composable_services: - -Deploying with Composable Services -================================== - -TripleO offers the option of deploying with a user-defined list of services -per role (where "role" means group of nodes, e.g "Controller", and "service" -refers to the individual services or configurations e.g "Nova API"). - - -Deploying with custom service lists ------------------------------------ - -Each role to be used in the deployment is defined in a `roles_data.yaml` file. -There is a sample file in `/usr/share/openstack-tripleo-heat-templates`, or the -tripleo-heat-templates_ git repository. Additional example roles are located in -the `/usr/share/openstack-tripleo-heat-templates/roles` directory and can be used -to create a custom `roles_data.yaml` file. See :doc:`custom_roles` for additional -usage details. - -The data in `roles_data.yaml` is used to set the defaults for per-role parameters -e.g `ControllerServices`. These defaults can be overridden via environment -files, e.g:: - - cat > keystone_only_params.yaml << EOF - - parameter_defaults: - ControllerServices: - - OS::TripleO::Services::Keystone - - OS::TripleO::Services::RabbitMQ - - OS::TripleO::Services::HAproxy - - OS::TripleO::Services::MySQL - - OS::TripleO::Services::Keepalived - ComputeCount: 0 - EOF - -The example above overrides the default list of services, and instead deploys -Keystone and the services it requires. It also sets the ComputeCount to zero -to enable a minimal "keystone only" deployment on a single node. - -You can then pass the environment file on deployment as follows:: - - openstack overcloud deploy -e keystone_only_params.yaml - -The same approach can be used for any role. - -.. warning:: - While considerable flexibility is available regarding service placement with - these interfaces, the flexible placement of pacemaker managed services is only - available since the Ocata release. - -.. warning:: - In general moving control-plane services to the Compute role is not - recommended, as the compute nodes require a different upgrade lifecycle - and thus control-plane services on this role may present problems during - major upgrades between releases. - -.. _tripleo-heat-templates: https://opendev.org/openstack/tripleo-heat-templates diff --git a/deploy-guide/source/features/compute_nvdimm.rst b/deploy-guide/source/features/compute_nvdimm.rst deleted file mode 100644 index 8c277817..00000000 --- a/deploy-guide/source/features/compute_nvdimm.rst +++ /dev/null @@ -1,83 +0,0 @@ -Manage Virtual Persistent Memory (vPMEM) -===================================================== -Virtual Persistent Memory (vPMEM) is a Nova feature that allows to expose -Persistent Memory (PMEM) namespaces to guests using libvirt compute driver. -This guide show how the vPMEM feature is supported in TripleO deployment -framework. For in-depth description of Nova's vPMEM feature check Nova -documentation: `Attaching virtual persistent memory to guests -`_ - -.. warning:: - - vPMEM feature is only available in Train(20.0.0) or later releases. - -.. contents:: - :depth: 3 - :backlinks: none - -Prerequisite ------------- -Operators needs to properly configured PMEM Hardware before deploying Overcloud -with vPMEM support. Example of such a hardware is Intel Optane DC Persistent Memory. -Intel provides tool (`ipmctl `_) -to configure the PMEM hardware. - -Operators need to configure the hardware in such a way to enable TripleO to create -`PMEM namespaces `_ in **devdax** mode. -TripleO currently support one backend NVDIMM region, so in case of multiple NVDIMMs -Interleaved Region needs to be configured. - -TripleO vPMEM parameters ------------------------- - -Following parameter are used within TripleO to configure vPMEM: - - .. code:: - - NovaPMEMMappings: - type: string - description: > - PMEM namespace mappings as backend for vPMEM feature. This parameter - sets Nova's `pmem_namespaces` configuration options. PMEM namespaces - needs to be create manually or with conjunction with `NovaPMEMNamespaces` - parameter. - Requires format: $LABEL:$NSNAME[|$NSNAME][,$LABEL:$NSNAME[|$NSNAME]]. - default: "" - tags: - - role_specific - NovaPMEMNamespaces: - type: string - description: > - Creates PMEM namespaces on the host server using `ndctl` tool - through Ansible. - Requires format: $SIZE:$NSNAME[,$SIZE:$NSNAME...]. - $SIZE supports the suffixes "k" or "K" for KiB, "m" or "M" for MiB, "g" - or "G" for GiB and "t" or "T" for TiB. - NOTE: This requires properly configured NVDIMM regions and enough space - for requested namespaces. - default: "" - tags: - - role_specific - -Both parameters are role specific and should be used with custom role. Please check documentation on -how to use `Role-Specific Parameters `_. - -Examples --------- - .. code:: - - parameter_defaults: - ComputePMEMParameters: - NovaPMEMMappings: "6GB:ns0|ns1|ns2,LARGE:ns3" - NovaPMEMNamespaces: "6G:ns1,6G:ns1,6G:ns2,100G:ns3" - - -The following example will perform following steps: -* ensure **ndctl** tool is installed on hosts with role **ComputePMEM** -* create PMEM namespaces as specified in the **NovaPMEMNamespaces** parameter. -- ns0, ns1, ns2 with size 6GiB -- ns3 with size 100GiB -* set Nova parameter **pmem_namespaces** in nova.conf to map create namespaces to vPMEM as specified in **NovaPMEMMappings**. -In this example the label '6GB' will map to one of ns0, ns1 or ns2 namespace and the label 'LARGE' will map to ns3 namespace. - -After deployment you need to configure flavors as described in documentation `Nova: Configure a flavor `_ diff --git a/deploy-guide/source/features/custom_networks.rst b/deploy-guide/source/features/custom_networks.rst deleted file mode 100644 index 27220832..00000000 --- a/deploy-guide/source/features/custom_networks.rst +++ /dev/null @@ -1,579 +0,0 @@ -.. _custom_networks: - -Deploying with Custom Networks -============================== - -TripleO offers the option of deploying with a user-defined list of networks, -where each network can be enabled (or not) for each role (group of servers) in -the deployment. - -Default networks ----------------- - -TripleO offers a default network topology when deploying with network isolation -enabled, and this is reflected in the default-network-isolation_ file in -tripleo-heat-templates_. - -.. admonition:: Victoria and prior releases - - In Victoria and prior releases the default network topology is reflected in - the network_data.yaml_ file in tripleo-heat-templates_. - -These default networks are as follows: - -* ``External`` - External network traffic (disabled by default for - Compute/Storage nodes) - -* ``InternalApi`` - Internal API traffic, most intra-service traffic uses this - network by default - -* ``Storage`` - Storage traffic - -* ``StorageMgmt`` - Storage management traffic (such as replication traffic - between storage nodes) - -* ``Tenant`` - Tenant networks for compute workloads running on the cloud - -Deploying with custom networks ------------------------------- - -Each network is defined in the ``network_data`` YAML file. There are sample -files in ``/usr/share/openstack-tripleo-heat-templates/network-data-samples``, -or the tripleo-heat-templates_ git repository which can be copied and modified -as needed. - -The ``network_data`` YAML file contains a list of networks, with definitions -like: - -.. code-block:: yaml - - - name: CustomNetwork - vip: false - name_lower: custom_network - subnets: - custom_network_subnet: - ip_subnet: 172.16.6.0/24 - allocation_pools: - - start: 172.16.6.4 - - end: 172.16.6.250 - gateway_ip: 172.16.6.1 - -.. admonition:: Victoria and prior releases - - Victoria and releases prior to it used a slightly different ``network_data`` - YAML. - - .. code-block:: yaml - - - name: CustomNetwork - vip: false - name_lower: custom_network - ip_subnet: '172.16.6.0/24' - allocation_pools: [{'start': '172.16.6.4', 'end': '172.16.6.250'}] - gateway_ip: '172.16.6.1' - -The data in the ``network_data`` YAML definition is used to create and update -the network and subnet API resources in Neutron on the undercloud. It is also -used to perform templating with jinja2_ such that arbitrary user-defined -networks may be added, and the default networks may be modified or removed. - -The steps to define your custom networks are: - -#. Copy one of the sample ``network_data`` YAML definitions provided by - tripleo-heat-templates_, for example:: - - cp /usr/share/openstack-tripleo-heat-templates/network-data-samples/default-network-isolation.yaml \ - custom_network_data.yaml - - - .. admonition:: Victoria and prior releases - - In Victoria and earlier releases the sample network data YAML was in a - different location. - - :: - - cp /usr/share/openstack-tripleo-heat-templates/network_data.yaml custom_network_data.yaml - -#. Modify the ``custom_network_data.yaml`` file as required. The network data - is a list of networks, where each network contains at least the - following items: - - :name: Name of the network (mandatory) - :vip: Enable creation of a virtual IP on this network - :subnets: Dictionary's, one or more subnet definition items keyed by the - subnet name. - - :subnet_name: Name of the subnet - - :ip_subnet: IP/CIDR, e.g. ``'10.0.0.0/24'`` - - :allocation_pools: IP range list, e.g. ``[{'start':'10.0.0.4', 'end':'10.0.0.250'}]`` - - :gateway_ip: Gateway for the network - - :vlan: Vlan ID for this network. (supported in Queens and later) - - See `Network data YAML options`_ for a list of all documented options for - the ``network_data`` YAML network definition. - - .. admonition:: Victoria and prior releases - - Victoria and earlier releases requires the first subnet definition **not** - to be in the *subnets* dictionary. - - :name: Name of the network (mandatory) - :vip: Enable creation of a virtual IP on this network - :vlan: Vlan ID for this network. (supported in Queens and later) - :ip_subnet: IP/CIDR, e.g. ``'10.0.0.0/24'`` - :allocation_pools: IP range list, e.g. ``[{'start':'10.0.0.4', 'end':'10.0.0.250'}]`` - :gateway_ip: Gateway for the network - - Other options are supported, see the documentation in the default - network_data.yaml_ for details. - - .. warning:: - Currently there is no validation of the network subnet and - allocation_pools, so care must be take to ensure these are consistent, - and do not conflict with any existing networks, otherwise your deployment - may fail or result in unexpected results. - -#. Copy one of the sample ``vip_data`` YAML definitions provided by - tripleo-heat-templates_, for example:: - - cp /usr/share/openstack-tripleo-heat-templates/network-data-samples/vip-data-default-network-isolation.yaml \ - custom_vip_data.yaml - - .. admonition:: Victoria and prior releases - - For Victoria and prior releases the Virtual IP resources are created as - part of the overcloud heat stack. This step is not valid for these - releases. - -#. Modify the ``custom_vip_data.yaml`` file as required. The Virtual IP data - is a list of Virtual IP address definitions, each containing at a minimum - the name of the network where the IP address should be allocated. - - See `Network Virtual IPs data YAML options`_ for a list of all documented - options for the ``vip_data`` YAML network Virtual IPs definition. - - .. admonition:: Victoria and prior releases - - For Victoria and prior releases the Virtual IP resources are created as - part of the overcloud heat stack. This step is not valid for these - releases. - -#. Copy network configuration templates, add new networks. - - Prior to Victoria, Heat templates were used to define nic configuration - templates. With the Victoria release, Ansible jinja2_ templates were - introduced, and replaced the heat templates. - - The nic configuration examples in tripleo-heat-templates_ was ported to - Ansible jinja2_ templates located in the tripleo_network_config role in - tripleo-ansible_. - - If one of the shipped examples match, use it! If not, be inspired by the - shipped examples and create a set of custom Ansible jinja2 templates. Please - refer to the :ref:`creating_custom_interface_templates` documentation page - which provide a detailed guide on how to create custom Ansible jinja2 - nic config templates. - - For example, copy a sample template to a custom location:: - - cp -r /usr/share/ansible/roles/tripleo_network_config/templates/single_nic_vlans custom-single-nic-vlans - - Modify the templates in custom-single-nic-vlans to match your needs. - - .. admonition:: Ussuri and prior releases - - Prior to Queens, the nic config templates were not dynamically generated, - so it was necessary to copy those that were in use, and add parameters for - any additional networks, for example:: - - cp -r /usr/share/openstack-tripleo-heat-templates/network/config/single-nic-vlans custom-single-nic-vlans - - Each file in ``single-nic-vlans`` will require updating to add - parameters for each custom network. Copy those that exist for the - default networks, and rename to match the *name* field in - ``custom_network_data.yaml``. - - .. note:: - Since Queens, the NIC config templates are dynamically - generated so this step is only necessary when creating custom NIC - config templates, not when just adding a custom network. - - -#. Set your environment overrides to enable your nic config templates. - - Create or update an existing environment file and set the parameter values - to enable your custom nic config templates, for example create a file - ``custom-net-single-nic-with-vlans.yaml`` with these parameter settings:: - - parameter_defaults: - ControllerNetworkConfigTemplate: '/path/to/custom-single-nic-vlans/single_nic_vlans.j2' - CephStorageNetworkConfigTemplate: '/path/to/custom-single-nic-vlans/single_nic_vlans_storage.j2' - ComputeNetworkConfigTemplate: '/path/to/custom-single-nic-vlans/single_nic_vlans.j2' - -#. Create the networks on the undercloud and generate the - ``networks-deployed-environment.yaml`` which will be used as an environment - file when deploying the overcloud. - - :: - - openstack overcloud network provision \ - --output networks-deployed-environment.yaml \ - custom_network_data.yaml - - .. admonition:: Victoria and prior releases - - For Victoria and earlier releases *skip* this step. - - There was no command ``openstack overcloud network provision`` in these - releases. Network resources was created as part of the overcloud heat - stack. - - .. note:: This step is optional when using the ``--baremetal-deployment`` - and ``--vip-data`` options with the ``overcloud deploy`` command. - The deploy command will detect the new format of the network data - YAML definition, run the workflow to create the networks and - include the ``networks-deployed-environment.yaml`` automatically. - -#. Create the overcloud network Virtual IPs and generate the - ``vip-deployed-environment.yaml`` which will be used as an environment file - when deploying the overcloud. - - .. code-block:: bash - - $ openstack overcloud network vip provision \ - --output ~/templates/vip-deployed-environment.yaml \ - ~/templates/custom_vip_data.yaml - - .. note:: This step is optional if using the ``--vip-data`` options with the - ``overcloud deploy`` command. In that case workflow to create the - Virtual IPs and including the environment is automated. - -#. To deploy you pass the ``custom_network_data.yaml`` file via the ``-n`` - option to the overcloud deploy, for example: - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -n custom_network_data.yaml \ - -e baremetal-deployed-environment.yaml \ - -e networks-deployed-environment.yaml \ - -e vip-deployed-environment.yaml \ - -e custom-net-single-nic-with-vlans.yaml - - .. note:: baremetal-deployed-environment.yaml refers to baremetal which - **has already been** deployed as described - :doc:`../provisioning/baremetal_provision` - - Alternatively include the network, Virtual IPs and baremetal provisioning - in the ``overcloud deploy`` command to do it all in one: - - .. code-block:: bash - - openstack overcloud deploy --templates \ - --networks-file custom_network_data.yaml \ - --vip-file custom_vip_data.yaml \ - --baremetal-deployment baremetal_deployment.yaml \ - --network-config \ - -e custom-net-single-nic-with-vlans.yaml - - .. note:: ``baremetal_deployment.yaml`` refers to baremetal which **will - be** deployed by the above command. Please refer to - :doc:`../provisioning/baremetal_provision` for a reference on the - ``baremetal_deployment.yaml`` used in the above example. - - - .. admonition:: Victoria and prior releases - - :: - - openstack overcloud deploy --templates \ - -n custom_network_data.yaml \ - -e custom-net-single-nic-with-vlans.yaml - - .. note:: - It is also possible to copy the entire tripleo-heat-templates tree, and - modify the ``network_data.yaml`` file in place, then deploy via - ``--templates ``. - - -.. _network_definition_opts: - -Network data YAML options -------------------------- - -:name: - Name of the network - - type: *string* - -:name_lower: - *(optional)* Lower case name of the network - - type: *string* - - default: *name.lower()* - -:dns_domain: - *(optional)* Dns domain name for the network - - type: *string* - -:mtu: - *(optional)* Set the maximum transmission unit (MTU) that is guaranteed to - pass through the data path of the segments in the network. - - type: *number* - - default: 1600 - -:service_net_map_replace: - *(optional)* if name_lower is set to a custom name this should be set to - original default (optional). This field is only necessary when changing the - default network names, not when adding a new custom network. - - type: *string* - - .. warning:: Please avoid using this option, the correct solution when - changing a *name_lower* of one of the default networks is to - also update the ``ServiceNetMap`` parameter to use the same - custom *name_lower*. - -:ipv6: - *(optional)* - - type: *boolean* - - default: *false* - -:vip: - *(optional)* Enable creation of a virtual IP on this network - - type: *boolean* - - default: *false* - -:subnets: - A map of subnets for the network. The collection should contain keys which - define the subnet name. The value for each item is a subnet definition. - - Example: - - .. code-block:: yaml - - subnets: - subnet_name_a: - ip_subnet: 192.0.2.0/24 - allocation_pools: - - start: 192.0.2.50 - end: 192.0.2.99 - gateway_ip: 192.0.2.1 - vlan: 102 - subnet_name_b: - ip_subnet: 198.51.100.0/24 - allocation_pools: - - start: 198.51.100.50 - end: 198.51.100.99 - gateway_ip: 198.51.100.1 - vlan: 101 - - See `Options for network data YAML subnet definitions`_ for a list of all - documented sub-options for the subnet definitions. - - type: *dictionary* - - -Options for network data YAML subnet definitions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:ip_subnet: - IPv4 CIDR block notation for this subnet. For example: ``192.0.2.0/24`` - - type: *string* - - .. note:: Optional if ``ipv6_subnet`` is specified. - -:ipv6_subnet: - IPv6 CIDR block notation for this subnet. For example: - ``2001:db8:fd00:1000::/64`` - - type: *string* - - .. note:: Optional if ``ip_subnet`` is specified. - -:gateway_ip: - *(optional)* The gateway IPv4 address - - type: *string* - -:gateway_ipv6: - *(optional)* The gateway IPv6 address - -:allocation_pools: - *(optional)* The start and end addresses for the subnets IPv4 allocation - pools. - - type: *list* - - elements: *dictionary* - - :suboptions: - - :start: Start address for the allocation pool. - - type: *string* - - :end: End address for the allocation pool. - - type: *string* - - Example: - - .. code-block:: yaml - - allocation_pools: - - start: 192.0.2.50 - end: 192.0.2.99 - - start: 192.0.2.150 - end: 192.0.2.199 - -:ipv6_allocation_pools: - *(optional)* The start and end addresses for the subnets IPv6 allocation - pools. - - type: *list* - - elements: *dictionary* - - :suboptions: - - :start: Start address for the allocation pool. - - type: *string* - - :end: End address for the allocation pool. - - type: *string* - - Example: - - .. code-block:: yaml - - allocation_pools: - - start: 2001:db8:fd00:1000:100::1 - end: 2001:db8:fd00:1000:199::1 - - start: 2001:db8:fd00:1000:300::1 - end: 2001:db8:fd00:1000:399::1 - -:routes: - *(optional)* List of networks that should be routed via network gateway. A - single /16 supernet route could be used for 255 smaller /24 subnets. - - type: *list* - - elements: *dictionary* - - :suboptions: - - :destination: Destination network, - for example: ``198.51.100.0/24`` - - type: *string* - - :nexthop: IP address of the router to use for the destination network, - for example: ``192.0.2.1`` - - type: *string* - - Example: - - .. code-block:: yaml - - routes: - - destination: 198.51.100.0/24 - nexthop: 192.0.2.1 - - destination: 203.0.113.0/24 - nexthost: 192.0.2.1 - -:routes_ipv6: - *(optional)* List of IPv6 networks that should be routed via network gateway. - - type: *list* - - elements: *dictionary* - - :suboptions: - - :destination: Destination network, - for example: ``2001:db8:fd00:2000::/64`` - - type: *string* - - :nexthop: IP address of the router to use for the destination network, - for example: ``2001:db8:fd00:1000::1`` - - type: *string* - - Example: - - .. code-block:: yaml - - routes: - - destination: 2001:db8:fd00:2000::/64 - nexthop: 2001:db8:fd00:1000:100::1 - - destination: 2001:db8:fd00:3000::/64 - nexthost: 2001:db8:fd00:1000:100::1 - -:vlan: - *(optional)* vlan ID for the network - - type: *number* - - -.. _virtual_ips_definition_opts: - -Network Virtual IPs data YAML options -------------------------------------- - -:network: - Neutron Network name - - type: *string* - -:ip_address: - *(optional)* IP address, a pre-defined fixed IP address. - - type: *string* - -:subnet: - *(optional)* Neutron Subnet name, used to specify the subnet to use when - creating the Virtual IP neutron port. - - This is required for deployments using routed networks, to ensure the Virtual - IP is allocated on the subnet where controller nodes are attached. - - type: *string* - -:dns_name: - *(optional)* Dns Name, the hostname part of the FQDN (Fully Qualified Domain - Name) - - type: *string* - - default: overcloud - -:name: - *(optional)* Virtual IP name - - type: *string* - default: $network_name_virtual_ip - -.. _tripleo-heat-templates: https://opendev.org/openstack/tripleo-heat-templates -.. _default-network-isolation: https://opendev.org/openstack/tripleo-heat-templates/src/branch/master/network-data-samples/default-network-isolation.yaml -.. _network_data.yaml: https://opendev.org/openstack/tripleo-heat-templates/src/branch/master/network_data.yaml -.. _jinja2: http://jinja.pocoo.org/docs/dev/ -.. _tripleo-ansible: https://opendev.org/openstack/tripleo-ansible/src/branch/master/tripleo_ansible/roles/tripleo_network_config/templates diff --git a/deploy-guide/source/features/custom_roles.rst b/deploy-guide/source/features/custom_roles.rst deleted file mode 100644 index ee406436..00000000 --- a/deploy-guide/source/features/custom_roles.rst +++ /dev/null @@ -1,141 +0,0 @@ -.. _custom_roles: - -Deploying with Custom Roles -=========================== - -TripleO offers the option of deploying with a user-defined list of roles, -each running a user defined list of services (where "role" means group of -nodes, e.g "Controller", and "service" refers to the individual services or -configurations e.g "Nova API"). - -See :doc:`composable_services` if you only wish to modify the default list of -deployed services, or see below if you wish to modify the deployed roles. - - -Provided example roles ----------------------- - -TripleO offers examples roles provided in `openstack-tripleo-heat-templates`. -These roles can be listed using the `tripleoclient` by running:: - - openstack overcloud role list - -With these provided roles, the user deploying the overcloud can generate a -`roles_data.yaml` file that contains the roles they would like to use for the -overcloud nodes. Additionally the user can manage their personal custom roles -in a similar manner by storing the individual files in a directory and using -the `tripleoclient` to generate their `roles_data.yaml`. For example, a user -can execute the following to create a `roles_data.yaml` containing only the -`Controller` and `Compute` roles:: - - openstack overcloud roles generate -o ~/roles_data.yaml Controller Compute - -These provided roles can be generated with a different `name` in the -`roles_data.yaml` by using a format like, `Compute:ComputeHardwareA`, which -will add the role `Compute` to `roles_data.yaml` by modifying the `name` of -the role to `ComputeHardwareA`. This helps in associating nodes with a specific -hardware group to a role and target parameters specific to this hardware -group. The example command below generates a `role_data.yaml` with two Compute -roles which can be addressed to specific hardware groups.:: - - openstack overcloud roles generate -o ~/roles_data.yaml Controller \ - Compute:ComputeHardwareA Compute:ComputeHardwareB - - -Deploying with custom roles ---------------------------- - -Each role is defined in the `roles_data.yaml` file. There is a sample file in -`/usr/share/openstack-tripleo-heat-templates`, or the tripleo-heat-templates_ git -repository. - -The data in `roles_data.yaml` is used to perform templating with jinja2_ such -that arbitrary user-defined roles may be added, and the default roles may -be modified or removed. - -The steps to define your custom roles configuration are: - -1. Copy the default roles provided by `tripleo-heat-templates`:: - - mkdir ~/roles - cp /usr/share/openstack-tripleo-heat-templates/roles/* ~/roles - -2. Create a new role file with your custom role. - -Additional details about the format for the roles file can be found in the -`README.rst `_ -in the roles/ directory from `tripleo-heat-templates`. The filename should -match the name of the role. For example if adding a new role named `Galera`, -the role file name should be `Galera.yaml`. The file should at least contain -the following items: - -* name: Name of the role e.g "CustomController", mandatory -* ServicesDefault: List of services, optional, defaults to an empty list - See the default roles_data.yaml or overcloud-resource-registry-puppet.j2.yaml - for the list of supported services. Both files can be found in the top - tripleo-heat-templates folder - -Additional items like the ones below should be included as well: - -* CountDefault: Default number of nodes, defaults to zero -* HostnameFormatDefault: Format string for hostname, optional -* Description: A few sentences describing the role and information - pertaining to the usage of the role. - -The role file format is a basic yaml structure. The expectation is that there -is a single role per file. See the roles `README.rst` for additional details. For -example the following role might be used to deploy a pacemaker managed galera -cluster:: - - - name: Galera - HostnameFormatDefault: '%stackname%-galera-%index%' - ServicesDefault: - - OS::TripleO::Services::CACerts - - OS::TripleO::Services::Timezone - - OS::TripleO::Services::Timesync - - OS::TripleO::Services::Snmp - - OS::TripleO::Services::Kernel - - OS::TripleO::Services::Pacemaker - - OS::TripleO::Services::MySQL - - OS::TripleO::Services::TripleoPackages - - OS::TripleO::Services::TripleoFirewall - - OS::TripleO::Services::SensuClient - - OS::TripleO::Services::FluentdClient - -.. note:: - In the example above, if you wanted to deploy the Galera role on specific nodes - you would either use predictable placement :doc:`../provisioning/node_placement` or add a custom - parameter called OvercloudGaleraFlavor:: - - - parameter_defaults: - OvercloudGaleraFlavor: oooq_galera - -.. warning:: - When scaling your deployment out, you need as well set the role counts in the - "parameter_defaults" section. The ``--control-scale`` and ``--compute-scale`` - CLI args are hardcoded to the "Control" and "Compute" role names, so they're in - fact ignored when using custom roles. - -3. Create a `roles_data.yaml` file that contains the custom role in addition - to the other roles that will be deployed. For example:: - - openstack overcloud roles generate --roles-path ~/roles -o ~/my_roles_data.yaml Controller Compute Galera - -4. Pass the modified roles_data on deployment as follows:: - - openstack overcloud deploy --templates -r ~/my_roles_data.yaml - -.. note:: - It is also possible to copy the entire tripleo-heat-templates tree, and modify - the roles_data.yaml file in place, then deploy via ``--templates `` - -.. warning:: - Note that in your custom roles you may not use any already predefined name - So in practice you may not override the following roles: Controller, Compute, - BlockStorage, SwiftStorage and CephStorage. You need to use different names - instead. - - -.. _tripleo-heat-templates: https://opendev.org/openstack/tripleo-heat-templates -.. _jinja2: http://jinja.pocoo.org/docs/dev/ diff --git a/deploy-guide/source/features/deploy_cellv2.rst b/deploy-guide/source/features/deploy_cellv2.rst deleted file mode 100644 index fd0e1cc9..00000000 --- a/deploy-guide/source/features/deploy_cellv2.rst +++ /dev/null @@ -1,30 +0,0 @@ -Deploy an additional nova cell v2 -================================= - -.. warning:: - Multi cell support is only supported in Stein and later versions. - -The different sections in this guide assume that you are ready to deploy a new -overcloud, or already have installed an overcloud (min Stein release). - -.. note:: - - Starting with CentOS 8 and the TripleO Stein release, podman is the CONTAINERCLI - to be used in the following steps. - -The minimum requirement for having multiple cells is to have a central OpenStack -controller cluster running all controller services. Additional cells will -have cell controllers running the cell DB, cell MQ and a nova cell conductor -service. In addition there are 1..n compute nodes. The central nova conductor -service acts as a super conductor of the whole environment. - -For more details on the cells v2 layout check `Cells Layout (v2) -`_ - -.. toctree:: - - deploy_cellv2_basic.rst - deploy_cellv2_advanced.rst - deploy_cellv2_routed.rst - deploy_cellv2_additional.rst - deploy_cellv2_manage_cell.rst diff --git a/deploy-guide/source/features/deploy_cellv2_additional.rst b/deploy-guide/source/features/deploy_cellv2_additional.rst deleted file mode 100644 index 0d655d14..00000000 --- a/deploy-guide/source/features/deploy_cellv2_additional.rst +++ /dev/null @@ -1,155 +0,0 @@ -Additional cell considerations and features -=========================================== - -.. warning:: - Multi cell support is only supported in Stein or later versions. - -.. contents:: - :depth: 3 - :backlinks: none - -.. _cell_availability_zone: - -Availability Zones (AZ) ------------------------ -A nova AZ must be configured for each cell to make sure instances stay in the -cell when performing migration and to be able to target a cell when an instance -gets created. The central cell must also be configured as a specific AZs -(or multiple AZs) rather than the default. - -Configuring AZs for Nova (compute) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -It's also possible to configure the AZ for a compute node by adding it to a -host aggregate after the deployment is completed. The following commands show -creating a host aggregate, an associated AZ, and adding compute nodes to a -`cell-1` AZ: - -.. code-block:: bash - - source overcloudrc - openstack aggregate create cell1 --zone cell1 - openstack aggregate add host cell1 hostA - openstack aggregate add host cell1 hostB - -.. note:: - - Right now we can not use `OS::TripleO::Services::NovaAZConfig` to auto - create the AZ during the deployment as at this stage the initial cell - creation is not complete. Further work is needed to fully automate the - post cell creation steps before `OS::TripleO::Services::NovaAZConfig` - can be used. - - -Routed networks ---------------- - -A routed spine and leaf networking layout can be used to deploy the additional -cell nodes in a distributed nature. Not all nodes need to be co-located at the -same physical location or datacenter. See :ref:`routed_spine_leaf_network` for -more details. - -Reusing networks from an already deployed stack ------------------------------------------------ -When deploying separate stacks it may be necessary to reuse networks, subnets, -and VIP resources between stacks if desired. Only a single Heat stack can own a -resource and be responsible for its creation and deletion, however the -resources can be reused in other stacks. - -Usually the internal api network in case of split cell controller and cell -compute stacks are shared. - -To reuse network related resources between stacks, the following parameters -have been added to the network definitions in the `network_data.yaml` file -format: - -.. code-block:: bash - - external_resource_network_id: Existing Network UUID - external_resource_subnet_id: Existing Subnet UUID - external_resource_segment_id: Existing Segment UUID - external_resource_vip_id: Existing VIP UUID - -These parameters can be set on each network definition in the -`network_data.yaml` file used for the deployment of the separate stack. - -Not all networks need to be reused or shared across stacks. The -`external_resource_*` parameters can be set for only the networks that are -meant to be shared, while the other networks can be newly created and managed. - -For example, to reuse the `internal_api` network from the cell controller stack -in the compute stack, run the following commands to show the UUIDs for the -related network resources: - -.. code-block:: bash - - openstack network show internal_api -c id -f value - openstack subnet show internal_api_subnet -c id -f value - openstack port show internal_api_virtual_ip -c id -f value - -Save the values shown in the output of the above commands and add them to the -network definition for the `internal_api` network in the `network_data.yaml` -file for the separate stack. - -In case the overcloud and the cell controller stack uses the same internal -api network there are two ports with the name `internal_api_virtual_ip`. -In this case it is required to identify the correct port and use the id -instead of the name in the `openstack port show` command. - -An example network definition would look like: - -.. code-block:: bash - - - name: InternalApi - external_resource_network_id: 93861871-7814-4dbc-9e6c-7f51496b43af - external_resource_subnet_id: c85c8670-51c1-4b17-a580-1cfb4344de27 - external_resource_vip_id: 8bb9d96f-72bf-4964-a05c-5d3fed203eb7 - name_lower: internal_api - vip: true - ip_subnet: '172.16.2.0/24' - allocation_pools: [{'start': '172.16.2.4', 'end': '172.16.2.250'}] - ipv6_subnet: 'fd00:fd00:fd00:2000::/64' - ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:2000::10', 'end': 'fd00:fd00:fd00:2000:ffff:ffff:ffff:fffe'}] - mtu: 1400 - -.. note:: - - When *not* sharing networks between stacks, each network defined in - `network_data.yaml` must have a unique name across all deployed stacks. - This requirement is necessary since regardless of the stack, all networks are - created in the same tenant in Neutron on the undercloud. - - For example, the network name `internal_api` can't be reused between - stacks, unless the intent is to share the network between the stacks. - The network would need to be given a different `name` and `name_lower` - property such as `InternalApiCompute0` and `internal_api_compute_0`. - -Configuring nova-metadata API per-cell --------------------------------------- - -.. note:: - Deploying nova-metadata API per-cell is only supported in Train - and later. - -.. note:: - - NovaLocalMetadataPerCell is only tested with ovn metadata agent to - automatically forward requests to the nova metadata api. - -It is possible to configure the nova-metadata API service local per-cell. -In this situation the cell controllers also host the nova-metadata API -service. The `NovaLocalMetadataPerCell` parameter, which defaults to -`false` need to be set to `true`. -Using nova-metadata API service per-cell can have better performance and -data isolation in a multi-cell deployment. Users should consider the use -of this configuration depending on how neutron is setup. If networks span -cells, you might need to run nova-metadata API service centrally. -If your networks are segmented along cell boundaries, then you can -run nova-metadata API service per cell. - -.. code-block:: yaml - - parameter_defaults: - NovaLocalMetadataPerCell: True - -See also information on running nova-metadata API per cell as explained -in the cells v2 layout section `Local per cell `_ diff --git a/deploy-guide/source/features/deploy_cellv2_advanced.rst b/deploy-guide/source/features/deploy_cellv2_advanced.rst deleted file mode 100644 index d158d7f9..00000000 --- a/deploy-guide/source/features/deploy_cellv2_advanced.rst +++ /dev/null @@ -1,247 +0,0 @@ -Example 2. - Split Cell controller/compute Architecture in Train release -======================================================================== - -.. warning:: - Multi cell support is only supported in Stein or later versions. - This guide addresses Train release and later! - -.. contents:: - :depth: 3 - :backlinks: none - -This guide assumes that you are ready to deploy a new overcloud, or have -already installed an overcloud (min Train release). - -.. note:: - - Starting with CentOS 8 and the TripleO Stein release, podman is the CONTAINERCLI - to be used in the following steps. - -.. _advanced_cell_arch: - -In this scenario the cell computes get split off in its own stack, e.g. to -manage computes from each edge site in its own stack. - -This section only explains the differences to the :doc:`deploy_cellv2_basic`. - -Like before the following example uses six nodes and the split control plane method -to deploy a distributed cell deployment. The first Heat stack deploys the controller -cluster. The second Heat stack deploys the cell controller. The computes will then -again be split off in its own stack. - -.. _cell_export_cell_controller_info: - -Extract deployment information from the overcloud stack -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Again like in :ref:`cell_export_overcloud_info` information from the control -plane stack needs to be exported: - -.. code-block:: bash - - source stackrc - mkdir cell1 - export DIR=cell1 - - openstack overcloud cell export cell1-ctrl -o cell1/cell1-ctrl-input.yaml - - -Create roles file for the cell stack -____________________________________ - -The same roles get exported as in :ref:`cell_create_roles_file`. - -Create cell parameter file for additional customization (e.g. cell1/cell1.yaml) -_______________________________________________________________________________ - -The cell parameter file remains the same as in :ref:`cell_parameter_file` with -the only difference that the `ComputeCount` gets set to 0. This is required as -we use the roles file contain both `CellController` and `Compute` role and the -default count for the `Compute` role is 1 (e.g. `cell1/cell1.yaml`): - -.. code-block:: yaml - - parameter_defaults: - ... - # number of controllers/computes in the cell - CellControllerCount: 1 - ComputeCount: 0 - ... - -Create the network configuration for `cellcontroller` and add to environment file -_________________________________________________________________________________ -Depending on the network configuration of the used hardware and network -architecture it is required to register a resource for the `CellController` -role. - -.. code-block:: yaml - - resource_registry: - OS::TripleO::CellController::Net::SoftwareConfig: single-nic-vlans/controller.yaml - -.. note:: - - For details on network configuration consult :ref:`network_isolation` guide, chapter *Customizing the Interface Templates*. - -Deploy the cell -^^^^^^^^^^^^^^^ - -Create new flavor used to tag the cell controller -_________________________________________________ - -Follow the instructions in :ref:`cell_create_flavor_and_tag` on how to create -a new flavor and tag the cell controller. - -Run cell deployment -___________________ -To deploy the cell controller stack we use the same `overcloud deploy` -command as it was used to deploy the `overcloud` stack and add the created -export environment files: - -.. code-block:: bash - - openstack overcloud deploy \ - --templates /usr/share/openstack-tripleo-heat-templates \ - -e ... additional environment files used for overcloud stack, like container - prepare parameters, or other specific parameters for the cell - ... -     --stack cell1-ctrl \ -     -r $HOME/$DIR/cell_roles_data.yaml \ -     -e $HOME/$DIR/cell1-ctrl-input.yaml \ -     -e $HOME/$DIR/cell1.yaml - -Wait for the deployment to finish: - -.. code-block:: bash - - openstack stack list - +--------------------------------------+--------------+----------------------------------+-----------------+----------------------+----------------------+ - | ID | Stack Name | Project | Stack Status | Creation Time | Updated Time | - +--------------------------------------+--------------+----------------------------------+-----------------+----------------------+----------------------+ - | 890e4764-1606-4dab-9c2f-6ed853e3fed8 | cell1-ctrl | 2b303a97f4664a69ba2dbcfd723e76a4 | CREATE_COMPLETE | 2019-02-12T08:35:32Z | None | - | 09531653-1074-4568-b50a-48a7b3cc15a6 | overcloud | 2b303a97f4664a69ba2dbcfd723e76a4 | UPDATE_COMPLETE | 2019-02-09T09:52:56Z | 2019-02-11T08:33:37Z | - +--------------------------------------+--------------+----------------------------------+-----------------+----------------------+----------------------+ - -Create the cell -^^^^^^^^^^^^^^^ -As in :ref:`cell_create_cell` create the cell, but we can skip the final host -discovery step as the computes are note yet deployed. - -Extract deployment information from the cell controller stack -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The cell compute stack again requires input information from both the control -plane stack (`overcloud`) and the cell controller stack (`cell1-ctrl`): - -.. code-block:: bash - - source stackrc - export DIR=cell1 - -Export EndpointMap, HostsEntry, AllNodesConfig, GlobalConfig and passwords information -______________________________________________________________________________________ -As before the `openstack overcloud cell export` functionality of the tripleo-client -is used to export the required data from the cell controller stack. - -.. code-block:: bash - - openstack overcloud cell export cell1-cmp -o cell1/cell1-cmp-input.yaml -e cell1-ctrl - -`cell1-cmp` is the chosen name for the new compute stack. This parameter is used to -set the default export file name, which is then stored on the current directory. -In this case a dedicated export file was set via `-o`. -In addition it is required to use the `--cell-stack ` or `-e ` -parameter to point the export command to the cell controller stack and indicate -that this is a compute child stack. This is required as the input information for -the cell controller and cell compute stack is not the same. - -.. note:: - - If the export file already exists it can be forced to be overwritten using - `--force-overwrite` or `-f`. - -.. note:: - - The services from the cell stacks use the same passwords services as the - control plane services. - -Create cell compute parameter file for additional customization -_______________________________________________________________ -A new parameter file is used to overwrite, or customize settings which are -different from the cell controller stack. Add the following content into -a parameter file for the cell compute stack, e.g. `cell1/cell1-cmp.yaml`: - -.. code-block:: yaml - - resource_registry: - # Since the compute stack deploys only compute nodes ExternalVIPPorts - # are not required. - OS::TripleO::Network::Ports::ExternalVipPort: /usr/share/openstack-tripleo-heat-templates/network/ports/noop.yaml - - parameter_defaults: - # number of controllers/computes in the cell - CellControllerCount: 0 - ComputeCount: 1 - -The above file overwrites the values from `cell1/cell1.yaml` to not deploy -a controller in the cell compute stack. Since the cell compute stack uses -the same role file the default `CellControllerCount` is 1. -If there are other differences, like network config, parameters, ... for -the computes, add them here. - -Deploy the cell computes -^^^^^^^^^^^^^^^^^^^^^^^^ - -Run cell deployment -___________________ -To deploy the overcloud we can use the same `overcloud deploy` command as -it was used to deploy the `cell1-ctrl` stack and add the created export -environment files: - -.. code-block:: bash - - openstack overcloud deploy \ - --templates /usr/share/openstack-tripleo-heat-templates \ - -e ... additional environment files used for overcloud stack, like container - prepare parameters, or other specific parameters for the cell - ... -     --stack cell1-cmp \ - -n $HOME/$DIR/cell1-cmp/network_data.yaml \ -     -r $HOME/$DIR/cell_roles_data.yaml \ -     -e $HOME/$DIR/cell1-ctrl-input.yaml \ -     -e $HOME/$DIR/cell1-cmp-input.yaml \ -     -e $HOME/$DIR/cell1.yaml \ -     -e $HOME/$DIR/cell1-cmp.yaml - -Wait for the deployment to finish: - -.. code-block:: bash - - openstack stack list - +--------------------------------------+--------------+----------------------------------+-----------------+----------------------+----------------------+ - | ID | Stack Name | Project | Stack Status | Creation Time | Updated Time | - +--------------------------------------+--------------+----------------------------------+-----------------+----------------------+----------------------+ - | 790e4764-2345-4dab-7c2f-7ed853e7e778 | cell1-cmp | 2b303a97f4664a69ba2dbcfd723e76a4 | CREATE_COMPLETE | 2019-02-12T08:35:32Z | None | - | 890e4764-1606-4dab-9c2f-6ed853e3fed8 | cell1-ctrl | 2b303a97f4664a69ba2dbcfd723e76a4 | CREATE_COMPLETE | 2019-02-12T08:35:32Z | None | - | 09531653-1074-4568-b50a-48a7b3cc15a6 | overcloud | 2b303a97f4664a69ba2dbcfd723e76a4 | UPDATE_COMPLETE | 2019-02-09T09:52:56Z | 2019-02-11T08:33:37Z | - +--------------------------------------+--------------+----------------------------------+-----------------+----------------------+----------------------+ - -Perform cell host discovery -___________________________ -The final step is to discover the computes deployed in the cell. Run the host discovery -as explained in :ref:`cell_host_discovery`. - -Create and add the node to an Availability Zone -_______________________________________________ -After a cell got provisioned, it is required to create an availability zone for the -compute stack, it is not enough to just create an availability zone for the complete -cell. In this used case we want to make sure an instance created in the compute group, -stays in it when performing a migration. Check :ref:`cell_availability_zone` on more -about how to create an availability zone and add the node. - -After that the cell is deployed and can be used. - -.. note:: - - Migrating instances between cells is not supported. To move an instance to - a different cell it needs to be re-created in the new target cell. - diff --git a/deploy-guide/source/features/deploy_cellv2_basic.rst b/deploy-guide/source/features/deploy_cellv2_basic.rst deleted file mode 100644 index 94bc1017..00000000 --- a/deploy-guide/source/features/deploy_cellv2_basic.rst +++ /dev/null @@ -1,416 +0,0 @@ -Example 1. - Basic Cell Architecture in Train release -===================================================== - -.. warning:: - Multi cell support is only supported in Stein or later versions. - This guide addresses Train release and later! - -.. contents:: - :depth: 3 - :backlinks: none - -This guide assumes that you are ready to deploy a new overcloud, or have -already installed an overcloud (min Train release). - -.. note:: - - Starting with CentOS 8 and the TripleO Stein release, podman is the CONTAINERCLI - to be used in the following steps. - -.. _basic_cell_arch: - -The following example uses six nodes and the split control plane method to -deploy a distributed cell deployment. The first Heat stack deploys a controller -cluster and a compute. The second Heat stack deploys a cell controller and a -compute node: - -.. code-block:: bash - - openstack overcloud status - +-----------+---------------------+---------------------+-------------------+ - | Plan Name | Created | Updated | Deployment Status | - +-----------+---------------------+---------------------+-------------------+ - | overcloud | 2019-02-12 09:00:27 | 2019-02-12 09:00:27 | DEPLOY_SUCCESS | - +-----------+---------------------+---------------------+-------------------+ - - openstack server list -c Name -c Status -c Networks - +----------------------------+--------+------------------------+ - | Name | Status | Networks | - +----------------------------+--------+------------------------+ - | overcloud-controller-1 | ACTIVE | ctlplane=192.168.24.19 | - | overcloud-controller-2 | ACTIVE | ctlplane=192.168.24.11 | - | overcloud-controller-0 | ACTIVE | ctlplane=192.168.24.29 | - | overcloud-novacompute-0 | ACTIVE | ctlplane=192.168.24.15 | - +----------------------------+--------+------------------------+ - -The above deployed overcloud shows the nodes from the first stack. - -.. note:: - - In this example the default cell and the additional cell uses the - same network, When configuring another network scenario keep in - mind that it will be necessary for the systems to be able to - communicate with each other. - -Extract deployment information from the overcloud stack -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Any additional cell stack requires information from the overcloud Heat stack -where the central OpenStack services are located. The extracted parameters are -needed as input for additional cell stacks. To extract these parameters -into separate files in a directory (e.g. DIR=cell1) run the following: - -.. code-block:: bash - - source stackrc - mkdir cell1 - export DIR=cell1 - -.. _cell_export_overcloud_info: - -Export EndpointMap, HostsEntry, AllNodesConfig, GlobalConfig and passwords information -______________________________________________________________________________________ -The tripleo-client in Train provides an `openstack overcloud cell export` -functionality to export the required data from the control plane stack which -then is used as an environment file passed to the cell stack. - -.. code-block:: bash - - openstack overcloud cell export cell1 -o cell1/cell1-cell-input.yaml - -`cell1` is the chosen name for the new cell. This parameter is used to -set the default export file name, which is then stored on the current -directory. -In this case a dedicated export file was set via `-o`. - -.. note:: - - If the export file already exists it can be forced to be overwritten using - `--force-overwrite` or `-f`. - -.. note:: - - The services from the cell stacks use the same passwords services as the - control plane services. - -.. _cell_create_roles_file: - -Create roles file for the cell stack -____________________________________ -Different roles are provided within tripleo-heat-templates, depending on -the configuration and desired services to be deployed. - -The default compute role at roles/Compute.yaml can be used for cell computes -if that is sufficient for the use case. - -A dedicated role, `roles/CellController.yaml` is provided. This role includes -the necessary roles for the cell controller, where the main services are -galera database, rabbitmq, nova-conductor, nova novnc proxy and nova metadata -in case `NovaLocalMetadataPerCell` is enabled. - -Create the roles file for the cell: - -.. code-block:: bash - - openstack overcloud roles generate --roles-path \ - /usr/share/openstack-tripleo-heat-templates/roles \ - -o $DIR/cell_roles_data.yaml Compute CellController - -.. _cell_parameter_file: - -Create cell parameter file for additional customization (e.g. cell1/cell1.yaml) -_______________________________________________________________________________ -Each cell has some mandatory parameters which need to be set using an -environment file. -Add the following content into a parameter file for the cell, e.g. `cell1/cell1.yaml`: - -.. code-block:: - - resource_registry: - OS::TripleO::Network::Ports::OVNDBsVipPort: /usr/share/openstack-tripleo-heat-templates/network/ports/noop.yaml - OS::TripleO::Network::Ports::RedisVipPort: /usr/share/openstack-tripleo-heat-templates/network/ports/noop.yaml - - parameter_defaults: - # since the same networks are used in this example, the - # creation of the different networks is omitted - ManageNetworks: false - - # CELL Parameter to reflect that this is an additional CELL - NovaAdditionalCell: True - - # The DNS names for the VIPs for the cell - CloudName: cell1.ooo.test - CloudNameInternal: cell1.internalapi.ooo.test - CloudNameStorage: cell1.storage.ooo.test - CloudNameStorageManagement: cell1.storagemgmt.ooo.test - CloudNameCtlplane: cell1.ctlplane.ooo.test - - # Flavors used for the cell controller and computes - OvercloudCellControllerFlavor: cellcontroller - OvercloudComputeFlavor: compute - - # Number of controllers/computes in the cell - CellControllerCount: 1 - ComputeCount: 1 - -   # Compute names need to be uniq across cells. Make sure to have a uniq - # hostname format for cell nodes -   ComputeHostnameFormat: 'cell1-compute-%index%' - - # default gateway - ControlPlaneStaticRoutes: - - ip_netmask: 0.0.0.0/0 - next_hop: 192.168.24.1 - default: true - DnsServers: - - x.x.x.x - -The above file disables creating networks by setting ``ManageNetworks`` parameter -to ``false`` so that the same ``network_data.yaml`` file from the overcloud stack -can be used. When ``ManageNetworks`` is set to false, ports will be created for -the nodes in the separate stacks on the existing networks that were already created -in the ``overcloud`` stack. - -It also specifies that this will be an additional cell using parameter -`NovaAdditionalCell`. - -.. note:: - - Compute hostnames need to be uniq across cells. Make sure to use - `ComputeHostnameFormat` to have uniq hostnames. - - -Create the network configuration for `cellcontroller` and add to environment file -_________________________________________________________________________________ -Depending on the network configuration of the used hardware and network -architecture it is required to register a resource for the `CellController` -role. - -.. code-block:: yaml - - resource_registry: - OS::TripleO::CellController::Net::SoftwareConfig: single-nic-vlans/controller.yaml - OS::TripleO::Compute::Net::SoftwareConfig: single-nic-vlans/compute.yaml - -.. note:: - - This example just reused the exiting network configs as it is a shared L2 - network. For details on network configuration consult :ref:`network_isolation` guide, - chapter *Customizing the Interface Templates*. - -Deploy the cell -^^^^^^^^^^^^^^^ - -.. _cell_create_flavor_and_tag: - -Create new flavor used to tag the cell controller -_________________________________________________ -Depending on the hardware create a flavor and tag the node to be used. - -.. code-block:: bash - - openstack flavor create --id auto --ram 4096 --disk 40 --vcpus 1 cellcontroller - openstack flavor set --property "cpu_arch"="x86_64" \ - --property "capabilities:boot_option"="local" \ - --property "capabilities:profile"="cellcontroller" \ - --property "resources:CUSTOM_BAREMETAL=1" \ - --property "resources:DISK_GB=0" \ - --property "resources:MEMORY_MB=0" \ - --property "resources:VCPU=0" \ - cellcontroller - -The properties need to be modified to the needs of the environment. - -Tag node into the new flavor using the following command - - -.. code-block:: bash - - baremetal node set --property \ - capabilities='profile:cellcontroller,boot_option:local' - -Verify the tagged cellcontroller: - -.. code-block:: bash - - openstack overcloud profiles list - -Run cell deployment -___________________ -To deploy the overcloud we can use the same `overcloud deploy` command as -it was used to deploy the `overcloud` stack and add the created export -environment files: - -.. code-block:: bash - - openstack overcloud deploy \ - --templates /usr/share/openstack-tripleo-heat-templates \ - -e ... additional environment files used for overcloud stack, like container - prepare parameters, or other specific parameters for the cell - ... -     --stack cell1 \ -     -r $HOME/$DIR/cell_roles_data.yaml \ -     -e $HOME/$DIR/cell1-cell-input.yaml \ -     -e $HOME/$DIR/cell1.yaml - -Wait for the deployment to finish: - -.. code-block:: bash - - openstack stack list - +--------------------------------------+--------------+----------------------------------+-----------------+----------------------+----------------------+ - | ID | Stack Name | Project | Stack Status | Creation Time | Updated Time | - +--------------------------------------+--------------+----------------------------------+-----------------+----------------------+----------------------+ - | 890e4764-1606-4dab-9c2f-6ed853e3fed8 | cell1 | 2b303a97f4664a69ba2dbcfd723e76a4 | CREATE_COMPLETE | 2019-02-12T08:35:32Z | None | - | 09531653-1074-4568-b50a-48a7b3cc15a6 | overcloud | 2b303a97f4664a69ba2dbcfd723e76a4 | UPDATE_COMPLETE | 2019-02-09T09:52:56Z | 2019-02-11T08:33:37Z | - +--------------------------------------+--------------+----------------------------------+-----------------+----------------------+----------------------+ - -.. _cell_create_cell: - -Create the cell and discover compute nodes (ansible playbook) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -An ansible role and playbook is available to automate the one time tasks -to create a cell after the deployment steps finished successfully. In -addition :ref:`cell_create_cell_manual` explains the tasks being automated -by this ansible way. - -.. note:: - - When using multiple additional cells, don't place all inventories of the cells - in one directory. The current version of the `create-nova-cell-v2.yaml` playbook - uses `CellController[0]` to get the `database_connection` and `transport_url` - to create the new cell. When all cell inventories get added to the same directory - `CellController[0]` might not be the correct cell controller for the new cell. - -.. code-block:: bash - - export CONTAINERCLI=podman #choose appropriate container cli here - source stackrc - mkdir inventories - for i in overcloud cell1; do \ - /usr/bin/tripleo-ansible-inventory \ - --static-yaml-inventory inventories/${i}.yaml --stack ${i}; \ - done - - ANSIBLE_HOST_KEY_CHECKING=False ANSIBLE_SSH_RETRIES=3 ansible-playbook -i inventories \ - /usr/share/ansible/tripleo-playbooks/create-nova-cell-v2.yaml \ - -e tripleo_cellv2_cell_name=cell1 \ - -e tripleo_cellv2_containercli=${CONTAINERCLI} - -The playbook requires two parameters `tripleo_cellv2_cell_name` to provide -the name of the new cell and until docker got dropped `tripleo_cellv2_containercli` -to specify either if podman or docker is used. - -.. _cell_create_cell_manual: - -Create the cell and discover compute nodes (manual way) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The following describes the manual needed steps to finalize the cell -deployment of a new cell. These are the steps automated in the ansible -playbook mentioned in :ref:`cell_create_cell`. - -Get control plane and cell controller IPs: - -.. code-block:: bash - - CTRL_IP=$(openstack server list -f value -c Networks --name overcloud-controller-0 | sed 's/ctlplane=//') - CELL_CTRL_IP=$(openstack server list -f value -c Networks --name cell1-cellcontrol-0 | sed 's/ctlplane=//') - -Add cell information to overcloud controllers -_____________________________________________ -On all central controllers add information on how to reach the cell controller -endpoint (usually internalapi) to `/etc/hosts`, from the undercloud: - -.. code-block:: bash - - CELL_INTERNALAPI_INFO=$(ssh heat-admin@${CELL_CTRL_IP} egrep \ - cell1.*\.internalapi /etc/hosts) - ansible -i /usr/bin/tripleo-ansible-inventory Controller -b \ - -m lineinfile -a "dest=/etc/hosts line=\"$CELL_INTERNALAPI_INFO\"" - -.. note:: - - Do this outside the `HEAT_HOSTS_START` .. `HEAT_HOSTS_END` block, or - add it to an `ExtraHostFileEntries` section of an environment file for the - central overcloud controller. Add the environment file to the next - `overcloud deploy` run. - -Extract transport_url and database connection -_____________________________________________ -Get the `transport_url` and database `connection` endpoint information -from the cell controller. This information is used to create the cell in the -next step: - -.. code-block:: bash - - CELL_TRANSPORT_URL=$(ssh heat-admin@${CELL_CTRL_IP} sudo \ - crudini --get /var/lib/config-data/nova/etc/nova/nova.conf DEFAULT transport_url) - CELL_MYSQL_VIP=$(ssh heat-admin@${CELL_CTRL_IP} sudo \ - crudini --get /var/lib/config-data/nova/etc/nova/nova.conf database connection \ - | awk -F[@/] '{print $4}' - -Create the cell -_______________ -Login to one of the central controllers create the cell with reference to -the IP of the cell controller in the `database_connection` and the -`transport_url` extracted from previous step, like: - -.. code-block:: bash - - ssh heat-admin@${CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_api \ - nova-manage cell_v2 create_cell --name computecell1 \ - --database_connection "{scheme}://{username}:{password}@$CELL_MYSQL_VIP/nova?{query}" \ - --transport-url "$CELL_TRANSPORT_URL" - -.. note:: - - Templated transport cells URLs could be used if the same amount of controllers - are in the default and add on cell. For further information about templated - URLs for cell mappings check: `Template URLs in Cell Mappings - `_ - -.. code-block:: bash - - ssh heat-admin@${CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_api \ - nova-manage cell_v2 list_cells --verbose - -After the cell got created the nova services on all central controllers need to -be restarted. - -Docker: - -.. code-block:: bash - - ansible -i /usr/bin/tripleo-ansible-inventory Controller -b -a \ - "docker restart nova_api nova_scheduler nova_conductor" - -Podman: - -.. code-block:: bash - - ansible -i /usr/bin/tripleo-ansible-inventory Controller -b -a \ - "systemctl restart tripleo_nova_api tripleo_nova_conductor tripleo_nova_scheduler" - -We now see the cell controller services registered: - -.. code-block:: bash - - (overcloud) [stack@undercloud ~]$ nova service-list - -Perform cell host discovery -___________________________ -The final step is to discover the computes deployed in the cell. Run the host discovery -as explained in :ref:`cell_host_discovery`. - -Create and add the node to an Availability Zone -_______________________________________________ -After a cell got provisioned, it is required to create an availability zone for the -cell to make sure an instance created in the cell, stays in the cell when performing -a migration. Check :ref:`cell_availability_zone` on more about how to create an -availability zone and add the node. - -After that the cell is deployed and can be used. - -.. note:: - - Migrating instances between cells is not supported. To move an instance to - a different cell it needs to be re-created in the new target cell. diff --git a/deploy-guide/source/features/deploy_cellv2_manage_cell.rst b/deploy-guide/source/features/deploy_cellv2_manage_cell.rst deleted file mode 100644 index 0033558e..00000000 --- a/deploy-guide/source/features/deploy_cellv2_manage_cell.rst +++ /dev/null @@ -1,189 +0,0 @@ -Managing the cell ------------------ - -.. _cell_host_discovery: - -Add a compute to a cell -~~~~~~~~~~~~~~~~~~~~~~~ - -To increase resource capacity of a running cell, you can start more servers of -a selected role. For more details on how to add nodes see :doc:`../post_deployment/scale_roles`. - -After the node got deployed, login to one of the overcloud controllers and run -the cell host discovery: - -.. code-block:: bash - - CTRL=overcloud-controller-0 - CTRL_IP=$(openstack server list -f value -c Networks --name $CTRL | sed 's/ctlplane=//') - - # CONTAINERCLI can be either docker or podman - export CONTAINERCLI='docker' - - # run cell host discovery - ssh heat-admin@${CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_api \ - nova-manage cell_v2 discover_hosts --by-service --verbose - - # verify the cell hosts - ssh heat-admin@${CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_api \ - nova-manage cell_v2 list_hosts - - # add new node to the availability zone - source overcloudrc - (overcloud) $ openstack aggregate add host - -.. note:: - - Optionally the cell uuid can be specified to the `discover_hosts` and - `list_hosts` command to only target against a specific cell. - -Delete a compute from a cell -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -* As initial step migrate all instances off the compute. - -* From one of the overcloud controllers, delete the computes from the cell: - - .. code-block:: bash - - source stackrc - CTRL=overcloud-controller-0 - CTRL_IP=$(openstack server list -f value -c Networks --name $CTRL | sed 's/ctlplane=//') - - # CONTAINERCLI can be either docker or podman - export CONTAINERCLI='docker' - - # list the cell hosts - ssh heat-admin@${CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_api \ - nova-manage cell_v2 list_hosts - - # delete a node from a cell - ssh heat-admin@${CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_api \ - nova-manage cell_v2 delete_host --cell_uuid --host - -* Delete the node from the cell stack - - See :doc:`../post_deployment/delete_nodes`. - -* Delete the resource providers from placement - - This step is required as otherwise adding a compute node with the same hostname - will make it to fail to register and update the resources with the placement - service.: - - .. code-block:: bash - - sudo dnf install python3-osc-placement - source overcloudrc - openstack resource provider list - +--------------------------------------+---------------------------------------+------------+ - | uuid | name | generation | - +--------------------------------------+---------------------------------------+------------+ - | 9cd04a8b-5e6c-428e-a643-397c9bebcc16 | computecell1-novacompute-0.site1.test | 11 | - +--------------------------------------+---------------------------------------+------------+ - - openstack resource provider delete 9cd04a8b-5e6c-428e-a643-397c9bebcc16 - -Delete a cell -~~~~~~~~~~~~~ - -* As initial step delete all instances from the cell. - -* From one of the overcloud controllers, delete all computes from the cell: - - .. code-block:: bash - - CTRL=overcloud-controller-0 - CTRL_IP=$(openstack server list -f value -c Networks --name $CTRL | sed 's/ctlplane=//') - - # CONTAINERCLI can be either docker or podman - export CONTAINERCLI='docker' - - # list the cell hosts - ssh heat-admin@${CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_api \ - nova-manage cell_v2 list_hosts - - # delete a node from a cell - ssh heat-admin@${CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_api \ - nova-manage cell_v2 delete_host --cell_uuid --host - -* On the cell controller delete all deleted instances from the database: - - .. code-block:: bash - - CELL_CTRL=cell1-cellcontrol-0 - CELL_CTRL_IP=$(openstack server list -f value -c Networks --name $CELL_CTRL | sed 's/ctlplane=//') - - # CONTAINERCLI can be either docker or podman - export CONTAINERCLI='docker' - - ssh heat-admin@${CELL_CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_conductor \ - nova-manage db archive_deleted_rows --until-complete --verbose - -* From one of the overcloud controllers, delete the cell: - - .. code-block:: bash - - CTRL=overcloud-controller-0 - CTRL_IP=$(openstack server list -f value -c Networks --name $CTRL | sed 's/ctlplane=//') - - # CONTAINERCLI can be either docker or podman - export CONTAINERCLI='docker' - - # list the cells - ssh heat-admin@${CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_api \ - nova-manage cell_v2 list_cells - - # delete the cell - ssh heat-admin@${CTRL_IP} sudo ${CONTAINERCLI} exec -i -u root nova_api \ - nova-manage cell_v2 delete_cell --cell_uuid - -* Delete the cell stack: - - .. code-block:: bash - - openstack stack delete --wait --yes && openstack overcloud plan delete - - .. note:: - - If the cell consist of a controller and compute stack, delete as a first step the - compute stack and then the controller stack. - -* From a system which can reach the placement endpoint, delete the resource providers from placement - - This step is required as otherwise adding a compute node with the same hostname - will make it to fail to register as a resource with the placement service. - - .. code-block:: bash - - sudo dnf install python3-osc-placement - source overcloudrc - openstack resource provider list - +--------------------------------------+---------------------------------------+------------+ - | uuid | name | generation | - +--------------------------------------+---------------------------------------+------------+ - | 9cd04a8b-5e6c-428e-a643-397c9bebcc16 | computecell1-novacompute-0.site1.test | 11 | - +--------------------------------------+---------------------------------------+------------+ - - openstack resource provider delete 9cd04a8b-5e6c-428e-a643-397c9bebcc16 - -Updating a cell -~~~~~~~~~~~~~~~ -Each stack in a multi-stack cell deployment must be updated to perform a full minor -update across the entire deployment. - -Cells can be updated just like the overcloud nodes following update procedure described -in :ref:`package_update` and using appropriate stack name for update commands. - -The control plane and cell controller stack should be updated first by completing all -the steps from the minor update procedure. - -Once the control plane stack is updated, re-run the export command to recreate the -required input files for each separate cell stack. - -.. note:: - - Before re-running the export command, backup the previously used input file so that - the previous versions are not overwritten. In the event that a separate cell stack - needs a stack update operation performed prior to the minor update procedure, the - previous versions of the exported files should be used. diff --git a/deploy-guide/source/features/deploy_cellv2_routed.rst b/deploy-guide/source/features/deploy_cellv2_routed.rst deleted file mode 100644 index bf31df2d..00000000 --- a/deploy-guide/source/features/deploy_cellv2_routed.rst +++ /dev/null @@ -1,718 +0,0 @@ -Example 3. - Advanced example using split cell controller/compute architecture and routed networks in Train release -=================================================================================================================== - -.. warning:: - Multi cell support is only supported in Stein or later versions. - This guide addresses Train release and later! - -.. contents:: - :depth: 3 - :backlinks: none - -This guide assumes that you are ready to deploy a new overcloud, or have -already installed an overcloud (min Train release). - -.. note:: - - Starting with CentOS 8 and the TripleO Stein release, podman is the CONTAINERCLI - to be used in the following steps. - -In this example we use the :doc:`deploy_cellv2_advanced` using a routed spine and -leaf networking layout to deploy an additional cell. Not all nodes need -to be co-located at the same physical location or datacenter. See -:ref:`routed_spine_leaf_network` for more details. - -The nodes deployed to the control plane, which are part of the overcloud stack, -use different networks then the cell stacks which are separated in a cell -controller stack and a cell compute stack. The cell controller and cell compute -stack use the same networks, - -.. note:: - - In this example the routing for the different VLAN subnets is done by - the undercloud, which must _NOT_ be done in a production environment - as it is a single point of failure! - -Used networks -^^^^^^^^^^^^^ -The following provides and overview of the used networks and subnet -details for this example: - -.. code-block:: yaml - - InternalApi - internal_api_subnet - vlan: 20 - net: 172.16.2.0/24 - route: 172.17.2.0/24 gw: 172.16.2.254 - internal_api_cell1 - vlan: 21 - net: 172.17.2.0/24 - gateway: 172.17.2.254 - Storage - storage_subnet - vlan: 30 - net: 172.16.1.0/24 - route: 172.17.1.0/24 gw: 172.16.1.254 - storage_cell1 - vlan: 31 - net: 172.17.1.0/24 - gateway: 172.17.1.254 - StorageMgmt - storage_mgmt_subnet - vlan: 40 - net: 172.16.3.0/24 - route: 172.17.3.0/24 gw: 172.16.3.254 - storage_mgmt_cell1 - vlan: 41 - net: 172.17.3.0/24 - gateway: 172.17.3.254 - Tenant - tenant_subnet - vlan: 50 - net: 172.16.0.0/24 - External - external_subnet - vlan: 10 - net: 10.0.0.0/24 - external_cell1 - vlan: 11 - net: 10.0.1.0/24 - gateway: 10.0.1.254 - -Prepare control plane for cell network routing -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: bash - - openstack overcloud status - +-----------+-------------------+ - | Plan Name | Deployment Status | - +-----------+-------------------+ - | overcloud | DEPLOY_SUCCESS | - +-----------+-------------------+ - - openstack server list -c Name -c Status -c Networks - +-------------------------+--------+------------------------+ - | Name | Status | Networks | - +-------------------------+--------+------------------------+ - | overcloud-controller-2 | ACTIVE | ctlplane=192.168.24.29 | - | overcloud-controller-0 | ACTIVE | ctlplane=192.168.24.18 | - | overcloud-controller-1 | ACTIVE | ctlplane=192.168.24.20 | - | overcloud-novacompute-0 | ACTIVE | ctlplane=192.168.24.16 | - +-------------------------+--------+------------------------+ - -Overcloud stack for the control planed deployed using a `routes.yaml` -environment file to add the routing information for the new cell -subnets. - -.. code-block:: yaml - - parameter_defaults: - InternalApiInterfaceRoutes: - - destination: 172.17.2.0/24 - nexthop: 172.16.2.254 - StorageInterfaceRoutes: - - destination: 172.17.1.0/24 - nexthop: 172.16.1.254 - StorageMgmtInterfaceRoutes: - - destination: 172.17.3.0/24 - nexthop: 172.16.3.254 - -Reuse networks and adding cell subnets -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To prepare the `network_data` parameter file for the cell controller stack -the file from the control plane is used as base: - -.. code-block:: bash - - cp /usr/share/openstack-tripleo-heat-templates/network_data.yaml cell1/network_data-ctrl.yaml - -When deploying a cell in separate stacks it may be necessary to reuse networks, -subnets, segments, and VIP resources between stacks. Only a single Heat stack -can own a resource and be responsible for its creation and deletion, however -the resources can be reused in other stacks. - -To reuse network related resources between stacks, the following parameters have -been added to the network definitions in the network_data.yaml file format: - -.. code-block:: yaml - - external_resource_network_id: Existing Network UUID - external_resource_subnet_id: Existing Subnet UUID - external_resource_segment_id: Existing Segment UUID - external_resource_vip_id: Existing VIP UUID - -.. note: - - The cell controllers use virtual IPs, therefore the existing VIPs from the - central overcloud stack should not be referenced. In case cell controllers - and cell computes get split into separate stacks, the cell compute stack - network_data file need an external_resource_vip_id reference to the cell - controllers VIP resource. - -These parameters can be set on each network definition in the `network_data-ctrl.yaml` -file used for the deployment of the separate stack. - -Not all networks need to be reused or shared across stacks. The `external_resource_*` -parameters can be set for only the networks that are meant to be shared, while -the other networks can be newly created and managed. - -In this example we reuse all networks, except the management network as it is -not being used at all. - -The resulting storage network here looks like this: - -.. code-block:: - - - name: Storage -   external_resource_network_id: 30e9d52d-1929-47ed-884b-7c6d65fa2e00 -   external_resource_subnet_id: 11a3777a-8c42-4314-a47f-72c86e9e6ad4 -   vip: true -   vlan: 30 -   name_lower: storage -   ip_subnet: '172.16.1.0/24' -   allocation_pools: [{'start': '172.16.1.4', 'end': '172.16.1.250'}] -   ipv6_subnet: 'fd00:fd00:fd00:3000::/64' -   ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:3000::10', 'end': 'fd00:fd00:fd00:3000:ffff:ffff:ffff:fffe'}] -   mtu: 1500 -   subnets: -     storage_cell1: -       vlan: 31 -       ip_subnet: '172.17.1.0/24' -       allocation_pools: [{'start': '172.17.1.10', 'end': '172.17.1.250'}] -       gateway_ip: '172.17.1.254' - -We added the `external_resource_network_id` and `external_resource_subnet_id` of -the control plane stack as we want to reuse those resources: - -.. code-block:: bash - - openstack network show storage -c id -f value - openstack subnet show storage_subnet -c id -f value - -In addition a new `storage_cell1` subnet is now added to the `subnets` section -to get it created in the cell controller stack for cell1: - -.. code-block:: - - subnets: - storage_cell1: - vlan: 31 - ip_subnet: '172.17.1.0/24' - allocation_pools: [{'start': '172.17.1.10', 'end': '172.17.1.250'}] - gateway_ip: '172.17.1.254' - -.. note:: - - In this example no Management network is used, therefore it was removed. - -Full networks data example: - -.. code-block:: - - - name: Storage - external_resource_network_id: 30e9d52d-1929-47ed-884b-7c6d65fa2e00 -   external_resource_subnet_id: 11a3777a-8c42-4314-a47f-72c86e9e6ad4 -   vip: true -   vlan: 30 -   name_lower: storage -   ip_subnet: '172.16.1.0/24' -   allocation_pools: [{'start': '172.16.1.4', 'end': '172.16.1.250'}] -   ipv6_subnet: 'fd00:fd00:fd00:3000::/64' -   ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:3000::10', 'end': 'fd00:fd00:fd00:3000:ffff:ffff:ffff:fffe'}] -   mtu: 1500 -   subnets: -     storage_cell1: -       vlan: 31 -       ip_subnet: '172.17.1.0/24' -       allocation_pools: [{'start': '172.17.1.10', 'end': '172.17.1.250'}] -       gateway_ip: '172.17.1.254' - - name: StorageMgmt -   name_lower: storage_mgmt -   external_resource_network_id: 29e85314-2177-4cbd-aac8-6faf2a3f7031 -   external_resource_subnet_id: 01c0a75e-e62f-445d-97ad-b98a141d6082 -   vip: true -   vlan: 40 -   ip_subnet: '172.16.3.0/24' -   allocation_pools: [{'start': '172.16.3.4', 'end': '172.16.3.250'}] -   ipv6_subnet: 'fd00:fd00:fd00:4000::/64' -   ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:4000::10', 'end': 'fd00:fd00:fd00:4000:ffff:ffff:ffff:fffe'}] -   mtu: 1500 -   subnets: -     storage_mgmt_cell1: -       vlan: 41 -       ip_subnet: '172.17.3.0/24' -       allocation_pools: [{'start': '172.17.3.10', 'end': '172.17.3.250'}] -       gateway_ip: '172.17.3.254' - - name: InternalApi -   name_lower: internal_api -   external_resource_network_id: 5eb79743-7ff4-4f68-9904-6e9c36fbaaa6 -   external_resource_subnet_id: dbc24086-0aa7-421d-857d-4e3956adec10 -   vip: true -   vlan: 20 -   ip_subnet: '172.16.2.0/24' -   allocation_pools: [{'start': '172.16.2.4', 'end': '172.16.2.250'}] -   ipv6_subnet: 'fd00:fd00:fd00:2000::/64' -   ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:2000::10', 'end': 'fd00:fd00:fd00:2000:ffff:ffff:ffff:fffe'}] -   mtu: 1500 -   subnets: -     internal_api_cell1: -       vlan: 21 -       ip_subnet: '172.17.2.0/24' -       allocation_pools: [{'start': '172.17.2.10', 'end': '172.17.2.250'}] -       gateway_ip: '172.17.2.254' - - name: Tenant -   external_resource_network_id: ee83d0fb-3bf1-47f2-a02b-ef5dc277afae -   external_resource_subnet_id: 0b6030ae-8445-4480-ab17-dd4c7c8fa64b -   vip: false  # Tenant network does not use VIPs -   name_lower: tenant -   vlan: 50 -   ip_subnet: '172.16.0.0/24' -   allocation_pools: [{'start': '172.16.0.4', 'end': '172.16.0.250'}] -   ipv6_subnet: 'fd00:fd00:fd00:5000::/64' -   ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:5000::10', 'end': 'fd00:fd00:fd00:5000:ffff:ffff:ffff:fffe'}] -   mtu: 1500 - - name: External -   external_resource_network_id: 89b7b481-f609-45e7-ad5e-e006553c1d3a -   external_resource_subnet_id: dd84112d-2129-430c-a8c2-77d2dee05af2 -   vip: true -   name_lower: external -   vlan: 10 -   ip_subnet: '10.0.0.0/24' -   allocation_pools: [{'start': '10.0.0.4', 'end': '10.0.0.250'}] -   gateway_ip: '10.0.0.1' -   ipv6_subnet: '2001:db8:fd00:1000::/64' -   ipv6_allocation_pools: [{'start': '2001:db8:fd00:1000::10', 'end': '2001:db8:fd00:1000:ffff:ffff:ffff:fffe'}] -   gateway_ipv6: '2001:db8:fd00:1000::1' -   mtu: 1500 -   subnets: -     external_cell1: -       vlan: 11 -       ip_subnet: '10.0.1.0/24' -       allocation_pools: [{'start': '10.0.1.10', 'end': '10.0.1.250'}] -       gateway_ip: '10.0.1.254' - -.. note: - - When not sharing networks between stacks, each network defined in `network_data*.yaml` - must have a unique name across all deployed stacks. This requirement is necessary - since regardless of the stack, all networks are created in the same tenant in - Neutron on the undercloud. - -Export EndpointMap, HostsEntry, AllNodesConfig, GlobalConfig and passwords information -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Follow the steps as explained in :ref:`cell_export_overcloud_info` on how to -export the required data from the overcloud stack. - -Cell roles -^^^^^^^^^^ -Modify the cell roles file to use new subnets for `InternalApi`, `Storage`, -`StorageMgmt` and `External` for cell controller and compute: - -.. code-block:: bash - - openstack overcloud roles generate --roles-path \ - /usr/share/openstack-tripleo-heat-templates/roles \ - -o $DIR/cell_roles_data.yaml Compute CellController - -For each role modify the subnets to match what got defined in the previous step -in `cell1/network_data-ctrl.yaml`: - -.. code-block:: - - - name: Compute -   description: | -     Basic Compute Node role -   CountDefault: 1 -   # Create external Neutron bridge (unset if using ML2/OVS without DVR) -   tags: -     - external_bridge -   networks: -     InternalApi: -       subnet: internal_api_cell1 -     Tenant: -       subnet: tenant_subnet -     Storage: -       subnet: storage_cell1 - ... - - name: CellController -   description: | -     CellController role for the nova cell_v2 controller services -   CountDefault: 1 -   tags: -     - primary -     - controller -   networks: -     External: -       subnet: external_cell1 -     InternalApi: -       subnet: internal_api_cell1 -     Storage: -       subnet: storage_cell1 -     StorageMgmt: -       subnet: storage_mgmt_cell1 -     Tenant: -       subnet: tenant_subnet - -Create the cell parameter file -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Each cell has some mandatory parameters which need to be set using an -environment file. -Add the following content into a parameter file for the cell, e.g. `cell1/cell1.yaml`: - -.. code-block:: yaml - - parameter_defaults: - # new CELL Parameter to reflect that this is an additional CELL - NovaAdditionalCell: True - - # The DNS names for the VIPs for the cell - CloudName: cell1.ooo.test - CloudNameInternal: cell1.internalapi.ooo.test - CloudNameStorage: cell1.storage.ooo.test - CloudNameStorageManagement: cell1.storagemgmt.ooo.test - CloudNameCtlplane: cell1.ctlplane.ooo.test - - # Flavors used for the cell controller and computes - OvercloudCellControllerFlavor: cellcontroller - OvercloudComputeFlavor: compute - - # number of controllers/computes in the cell - CellControllerCount: 3 - ComputeCount: 0 - - # Compute names need to be unique, make sure to have a unique - # hostname format for cell nodes - ComputeHostnameFormat: 'cell1-compute-%index%' - - # default gateway - ControlPlaneStaticRoutes: - - ip_netmask: 0.0.0.0/0 - next_hop: 192.168.24.1 - default: true - DnsServers: - - x.x.x.x - -Virtual IP addresses -^^^^^^^^^^^^^^^^^^^^ -The cell controller is hosting VIP’s (Virtual IP addresses) and is not using -the base subnet of one or more networks, therefore additional overrides to the -`VipSubnetMap` are required to ensure VIP’s are created on the subnet associated -with the L2 network segment the controller nodes is connected to. - -Add a `VipSubnetMap` to the `cell1/cell1.yaml` or a new parameter file to -point the VIPs to the correct subnet: - -.. code-block:: yaml - - parameter_defaults: - VipSubnetMap: - InternalApi: internal_api_cell1 - Storage: storage_cell1 - StorageMgmt: storage_mgmt_cell1 - External: external_cell1 - -Create the network configuration for `cellcontroller` and add to environment file -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Depending on the network configuration of the used hardware and network -architecture it is required to register a resource for the `CellController` -role in `cell1/cell1.yaml`. - -.. code-block:: yaml - - resource_registry: - OS::TripleO::CellController::Net::SoftwareConfig: cell1/single-nic-vlans/controller.yaml - OS::TripleO::Compute::Net::SoftwareConfig: cell1/single-nic-vlans/compute.yaml - -.. note:: - - For details on network configuration consult :ref:`network_isolation` guide, chapter *Customizing the Interface Templates*. - -Deploy the cell controllers -^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Create new flavor used to tag the cell controller -_________________________________________________ -Follow the instructions in :ref:`cell_create_flavor_and_tag` on how to create -a new flavor and tag the cell controller. - -Run cell deployment -___________________ -To deploy the overcloud we can use the same `overcloud deploy` command as -it was used to deploy the `overcloud` stack and add the created export -environment files: - -.. code-block:: bash - - openstack overcloud deploy \ - --templates /usr/share/openstack-tripleo-heat-templates \ - -e ... additional environment files used for overcloud stack, like container - prepare parameters, or other specific parameters for the cell - ... - --stack cell1-ctrl \ -   -n $HOME/$DIR/network_data-ctrl.yaml \ -   -r $HOME/$DIR/cell_roles_data.yaml \ - -e $HOME/$DIR/cell1-ctrl-input.yaml \ - -e $HOME/$DIR/cell1.yaml - -Wait for the deployment to finish: - -.. code-block:: bash - - openstack stack list - - +--------------------------------------+------------+----------------------------------+-----------------+----------------------+----------------------+ - | ID | Stack Name | Project | Stack Status | Creation Time | Updated Time | - +--------------------------------------+------------+----------------------------------+-----------------+----------------------+----------------------+ - | 6403ed94-7c8f-47eb-bdb8-388a5ac7cb20 | cell1-ctrl | f7736589861c47d8bbf1ecd29f02823d | CREATE_COMPLETE | 2019-08-15T14:46:32Z | None | - | 925a2875-fbbb-41fd-bb06-bf19cded2510 | overcloud | f7736589861c47d8bbf1ecd29f02823d | UPDATE_COMPLETE | 2019-08-13T10:43:20Z | 2019-08-15T10:13:41Z | - +--------------------------------------+------------+----------------------------------+-----------------+----------------------+----------------------+ - -Create the cell -^^^^^^^^^^^^^^^ -As in :ref:`cell_create_cell` create the cell, but we can skip the final host -discovery step as the computes are note yet deployed. - - -Extract deployment information from the cell controller stack -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Follow the steps explained in :ref:`cell_export_cell_controller_info` on -how to export the required input data from the cell controller stack. - -Create cell compute parameter file for additional customization -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Create the `cell1/cell1-cmp.yaml` parameter file to overwrite settings -which are different from the cell controller stack. - -.. code-block:: yaml - - parameter_defaults: - # number of controllers/computes in the cell - CellControllerCount: 0 - ComputeCount: 1 - -The above file overwrites the values from `cell1/cell1.yaml` to not deploy -a controller in the cell compute stack. Since the cell compute stack uses -the same role file the default `CellControllerCount` is 1. - -Reusing networks from control plane and cell controller stack -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -For the cell compute stack we reuse the networks from the control plane -stack and the subnet from the cell controller stack. Therefore references -to the external resources for network, subnet, segment and vip are required: - -.. code-block:: bash - - cp cell1/network_data-ctrl.yaml cell1/network_data-cmp.yaml - -The storage network definition in `cell1/network_data-cmp.yaml` looks -like this: - -.. code-block:: - - - name: Storage -   external_resource_network_id: 30e9d52d-1929-47ed-884b-7c6d65fa2e00 -   external_resource_subnet_id: 11a3777a-8c42-4314-a47f-72c86e9e6ad4 -   external_resource_vip_id: 4ed73ea9-4cf6-42c1-96a5-e32b415c738f -   vip: true -   vlan: 30 -   name_lower: storage -   ip_subnet: '172.16.1.0/24' -   allocation_pools: [{'start': '172.16.1.4', 'end': '172.16.1.250'}] -   ipv6_subnet: 'fd00:fd00:fd00:3000::/64' -   ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:3000::10', 'end': 'fd00:fd00:fd00:3000:ffff:ffff:ffff:fffe'}] -   mtu: 1500 -   subnets: -     storage_cell1: -       vlan: 31 -       ip_subnet: '172.17.1.0/24' -       allocation_pools: [{'start': '172.17.1.10', 'end': '172.17.1.250'}] -       gateway_ip: '172.17.1.254' -       external_resource_subnet_id: 7930635d-d1d5-4699-b318-00233c73ed6b -       external_resource_segment_id: 730769f8-e78f-42a3-9dd4-367a212e49ff - -Previously we already added the `external_resource_network_id` and `external_resource_subnet_id` -for the network in the upper level hierarchy. - -In addition we add the `external_resource_vip_id` of the VIP of the stack which -should be reused for this network (Storage). - -Important is that the `external_resource_vip_id` for the InternalApi points -the VIP of the cell controller stack! - -.. code-block:: bash - - openstack port show -c id -f value - -In the `storage_cell1` subnet section we add the `external_resource_subnet_id` -and `external_resource_segment_id` of the cell controller stack: - -.. code-block:: yaml - - storage_cell1: - vlan: 31 - ip_subnet: '172.17.1.0/24' - allocation_pools: [{'start': '172.17.1.10', 'end': '172.17.1.250'}] - gateway_ip: '172.17.1.254' - external_resource_subnet_id: 7930635d-d1d5-4699-b318-00233c73ed6b - external_resource_segment_id: 730769f8-e78f-42a3-9dd4-367a212e49ff - -.. code-block:: bash - - openstack subnet show storage_cell1 -c id -f value - openstack network segment show storage_storage_cell1 -c id -f value - -Full networks data example for the compute stack: - -.. code-block:: - - - name: Storage -   external_resource_network_id: 30e9d52d-1929-47ed-884b-7c6d65fa2e00 -   external_resource_subnet_id: 11a3777a-8c42-4314-a47f-72c86e9e6ad4 -   external_resource_vip_id: 4ed73ea9-4cf6-42c1-96a5-e32b415c738f -   vip: true -   vlan: 30 -   name_lower: storage -   ip_subnet: '172.16.1.0/24' -   allocation_pools: [{'start': '172.16.1.4', 'end': '172.16.1.250'}] -   ipv6_subnet: 'fd00:fd00:fd00:3000::/64' -   ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:3000::10', 'end': 'fd00:fd00:fd00:3000:ffff:ffff:ffff:fffe'}] -   mtu: 1500 -   subnets: -     storage_cell1: -       vlan: 31 -       ip_subnet: '172.17.1.0/24' -       allocation_pools: [{'start': '172.17.1.10', 'end': '172.17.1.250'}] -       gateway_ip: '172.17.1.254' -       external_resource_subnet_id: 7930635d-d1d5-4699-b318-00233c73ed6b -       external_resource_segment_id: 730769f8-e78f-42a3-9dd4-367a212e49ff - - name: StorageMgmt -   name_lower: storage_mgmt -   external_resource_network_id: 29e85314-2177-4cbd-aac8-6faf2a3f7031 -   external_resource_subnet_id: 01c0a75e-e62f-445d-97ad-b98a141d6082 -   external_resource_segment_id: 4b4f6f83-f031-4495-84c5-7422db1729d5 -   vip: true -   vlan: 40 -   ip_subnet: '172.16.3.0/24' -   allocation_pools: [{'start': '172.16.3.4', 'end': '172.16.3.250'}] -   ipv6_subnet: 'fd00:fd00:fd00:4000::/64' -   ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:4000::10', 'end': 'fd00:fd00:fd00:4000:ffff:ffff:ffff:fffe'}] -   mtu: 1500 -   subnets: -     storage_mgmt_cell1: -       vlan: 41 -       ip_subnet: '172.17.3.0/24' -       allocation_pools: [{'start': '172.17.3.10', 'end': '172.17.3.250'}] -       gateway_ip: '172.17.3.254' -       external_resource_subnet_id: de9233d4-53a3-485d-8433-995a9057383f -       external_resource_segment_id: 2400718d-7fbd-4227-8318-245747495241 - - name: InternalApi -   name_lower: internal_api -   external_resource_network_id: 5eb79743-7ff4-4f68-9904-6e9c36fbaaa6 -   external_resource_subnet_id: dbc24086-0aa7-421d-857d-4e3956adec10 -   external_resource_vip_id: 1a287ad7-e574-483a-8288-e7c385ee88a0 -   vip: true -   vlan: 20 -   ip_subnet: '172.16.2.0/24' -   allocation_pools: [{'start': '172.16.2.4', 'end': '172.16.2.250'}] -   ipv6_subnet: 'fd00:fd00:fd00:2000::/64' -   ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:2000::10', 'end': 'fd00:fd00:fd00:2000:ffff:ffff:ffff:fffe'}] -   mtu: 1500 -   subnets: -     internal_api_cell1: -       external_resource_subnet_id: 16b8cf48-6ca1-4117-ad90-3273396cb41d -       external_resource_segment_id: b310daec-7811-46be-a958-a05a5b0569ef -       vlan: 21 -       ip_subnet: '172.17.2.0/24' -       allocation_pools: [{'start': '172.17.2.10', 'end': '172.17.2.250'}] -       gateway_ip: '172.17.2.254' - - name: Tenant -   external_resource_network_id: ee83d0fb-3bf1-47f2-a02b-ef5dc277afae -   external_resource_subnet_id: 0b6030ae-8445-4480-ab17-dd4c7c8fa64b -   vip: false  # Tenant network does not use VIPs -   name_lower: tenant -   vlan: 50 -   ip_subnet: '172.16.0.0/24' -   allocation_pools: [{'start': '172.16.0.4', 'end': '172.16.0.250'}] -   ipv6_subnet: 'fd00:fd00:fd00:5000::/64' -   ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:5000::10', 'end': 'fd00:fd00:fd00:5000:ffff:ffff:ffff:fffe'}] -   mtu: 1500 - - name: External -   external_resource_network_id: 89b7b481-f609-45e7-ad5e-e006553c1d3a -   external_resource_subnet_id: dd84112d-2129-430c-a8c2-77d2dee05af2 -   external_resource_vip_id: b7a0606d-f598-4dc6-9e85-e023c64fd20b -   vip: true -   name_lower: external -   vlan: 10 -   ip_subnet: '10.0.0.0/24' -   allocation_pools: [{'start': '10.0.0.4', 'end': '10.0.0.250'}] -   gateway_ip: '10.0.0.1' -   ipv6_subnet: '2001:db8:fd00:1000::/64' -   ipv6_allocation_pools: [{'start': '2001:db8:fd00:1000::10', 'end': '2001:db8:fd00:1000:ffff:ffff:ffff:fffe'}] -   gateway_ipv6: '2001:db8:fd00:1000::1' -   mtu: 1500 -   subnets: -     external_cell1: -       vlan: 11 -       ip_subnet: '10.0.1.0/24' -       allocation_pools: [{'start': '10.0.1.10', 'end': '10.0.1.250'}] -       gateway_ip: '10.0.1.254' -       external_resource_subnet_id: 81ac9bc2-4fbe-40be-ac0e-9aa425799626 -       external_resource_segment_id: 8a877c1f-cb47-40dd-a906-6731f042e544 - -Deploy the cell computes -^^^^^^^^^^^^^^^^^^^^^^^^ - -Run cell deployment -___________________ -To deploy the overcloud we can use the same `overcloud deploy` command as -it was used to deploy the `cell1-ctrl` stack and add the created export -environment files: - -.. code-block:: bash - - openstack overcloud deploy \ - --templates /usr/share/openstack-tripleo-heat-templates \ - -e ... additional environment files used for overcloud stack, like container - prepare parameters, or other specific parameters for the cell - ... - --stack cell1-cmp \ - -r $HOME/$DIR/cell_roles_data.yaml \ - -n $HOME/$DIR/network_data-cmp.yaml \ - -e $HOME/$DIR/cell1-ctrl-input.yaml \ - -e $HOME/$DIR/cell1-cmp-input.yaml \ - -e $HOME/$DIR/cell1.yaml \ - -e $HOME/$DIR/cell1-cmp.yaml - -Wait for the deployment to finish: - -.. code-block:: bash - - openstack stack list - +--------------------------------------+------------+----------------------------------+--------------------+----------------------+----------------------+ - | ID | Stack Name | Project | Stack Status | Creation Time | Updated Time | - +--------------------------------------+------------+----------------------------------+--------------------+----------------------+----------------------+ - | 12e86ea6-3725-482a-9b05-b283378dcf30 | cell1-cmp | f7736589861c47d8bbf1ecd29f02823d | CREATE_COMPLETE | 2019-08-15T15:57:19Z | None | - | 6403ed94-7c8f-47eb-bdb8-388a5ac7cb20 | cell1-ctrl | f7736589861c47d8bbf1ecd29f02823d | CREATE_COMPLETE | 2019-08-15T14:46:32Z | None | - | 925a2875-fbbb-41fd-bb06-bf19cded2510 | overcloud | f7736589861c47d8bbf1ecd29f02823d | UPDATE_COMPLETE | 2019-08-13T10:43:20Z | 2019-08-15T10:13:41Z | - +--------------------------------------+------------+----------------------------------+--------------------+----------------------+----------------------+ - -Perform cell host discovery -___________________________ -The final step is to discover the computes deployed in the cell. Run the host discovery -as explained in :ref:`cell_host_discovery`. - -Create and add the node to an Availability Zone -_______________________________________________ -After a cell got provisioned, it is required to create an availability zone for the -compute stack, it is not enough to just create an availability zone for the complete -cell. In this used case we want to make sure an instance created in the compute group, -stays in it when performing a migration. Check :ref:`cell_availability_zone` on more -about how to create an availability zone and add the node. - -After that the cell is deployed and can be used. - -.. note:: - - Migrating instances between cells is not supported. To move an instance to - a different cell it needs to be re-created in the new target cell. diff --git a/deploy-guide/source/features/deploy_manila.rst b/deploy-guide/source/features/deploy_manila.rst deleted file mode 100644 index bf3a8318..00000000 --- a/deploy-guide/source/features/deploy_manila.rst +++ /dev/null @@ -1,349 +0,0 @@ -Deploying Manila in the Overcloud -================================= - -This guide assumes that your undercloud is already installed and ready to -deploy an overcloud with Manila enabled. - -Deploying the Overcloud with the Internal Ceph Backend ------------------------------------------------------- -Ceph deployed by TripleO can be used as a Manila share backend. Make sure that -Ceph, Ceph MDS and Manila Ceph environment files are included when deploying the -Overcloud:: - - openstack overcloud deploy --templates \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/ceph-mds.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/manila-cephfsnative-config.yaml - -.. note:: - These and any other environment files or options passed to the overcloud - deploy command, are referenced below as the "full environment". We assumed - the ``--plan`` flag is not what we want to use for this example. - -Network Isolation -~~~~~~~~~~~~~~~~~ -When mounting a ceph share from a user instance, the user instance needs access -to the Ceph public network. When mounting a ceph share from a user instance, -the user instance needs access to the Ceph public network, which in TripleO -maps to the Overcloud storage network. In an Overcloud which uses isolated -networks the tenant network and storage network are isolated from one another -so user instances cannot reach the Ceph public network unless the cloud -administrator creates a provider network in neutron that maps to the storage -network and exposes access to it. - -Before deploying Overcloud make sure that there is a bridge for storage network -interface. If single NIC with VLANs network configuration is used (as in -``/usr/share/openstack-tripleo-heat-templates/network/config/single-nic-vlans/``) -then by default ``br-ex`` bridge is used for storage network and no additional -customization is required for Overcloud deployment. If a dedicated interface is -used for storage network (as in -``/usr/share/openstack-tripleo-heat-templates/network/config/multiple-nics/``) -then update storage interface for each node type (controller, compute, ceph) to -use bridge. The following interface definition:: - - - type: interface - name: nic2 - use_dhcp: false - addresses: - - ip_netmask: - get_param: StorageIpSubnet - -should be replaced with:: - - - type: ovs_bridge - name: br-storage - use_dhcp: false - addresses: - - ip_netmask: - get_param: StorageIpSubnet - members: - - type: interface - name: nic2 - use_dhcp: false - primary: true - -And pass following parameters when deploying Overcloud to allow Neutron to map -provider networks to the storage bridge:: - - parameter_defaults: - NeutronBridgeMappings: datacentre:br-ex,storage:br-storage - NeutronFlatNetworks: datacentre,storage - -If the storage network uses VLAN, include storage network in -``NeutronNetworkVLANRanges`` parameter. For example:: - - NeutronNetworkVLANRanges: 'datacentre:100:1000,storage:30:30' - -.. warning:: - If network isolation is used, make sure that storage provider network - subnet doesn't overlap with IP allocation pool used for Overcloud storage - nodes (controlled by ``StorageAllocationPools`` heat parameter). - ``StorageAllocationPools`` is by default set to - ``[{'start': '172.16.1.4', 'end': '172.16.1.250'}]``. It may be necessary - to shrink this pool, for example:: - - StorageAllocationPools: [{'start': '172.16.1.4', 'end': '172.16.1.99'}] - -When Overcloud is deployed, create a provider network which can be used to -access storage network. - -* If single NIC with VLANs is used, then the provider network is mapped - to the default datacentre network:: - - neutron net-create storage --shared --provider:physical_network \ - datacentre --provider:network_type vlan --provider:segmentation_id 30 - - neutron subnet-create --name storage-subnet \ - --allocation-pool start=172.16.1.100,end=172.16.1.120 \ - --enable-dhcp storage 172.16.1.0/24 - -* If a custom bridge was used for storage network interface (``br-storage`` in - the example above) then provider network is mapped to the network specified - by ``NeutronBridgeMappings`` parameter (``storage`` network in the example - above):: - - neutron net-create storage --shared --provider:physical_network storage \ - --provider:network_type flat - - neutron subnet-create --name storage-subnet \ - --allocation-pool start=172.16.1.200,end=172.16.1.220 --enable-dhcp \ - storage 172.16.1.0/24 --no-gateway - -.. note:: - Allocation pool should not overlap with storage network - pool used for storage nodes (``StorageAllocationPools`` parameter). - You may also need to shrink storage nodes pool size to reserve more IPs - for tenants using the provider network. - -.. note:: - - Make sure that subnet CIDR matches storage network CIDR (``StorageNetCidr`` - parameter)and - segmentation_id matches VLAN ID for the storage network traffic - (``StorageNetworkVlanID`` parameter). - -Then Ceph shares can be accessed from a user instance by adding the provider -network to the instance. - -.. note:: - - Cloud-init by default configures only first network interface to use DHCP - which means that user instances will not have network interface for storage - network autoconfigured. You can configure it manually or use - `dhcp-all-interfaces `_. - - -Deploying Manila in the overcloud with CephFS through NFS and a composable network ----------------------------------------------------------------------------------- - -The CephFS through NFS back end is composed of Ceph metadata servers (MDS), -NFS Ganesha (the NFS gateway), and the Ceph cluster service components. -The manila CephFS NFS driver uses NFS-Ganesha gateway to provide NFSv4 protocol -access to CephFS shares. -The Ceph MDS service maps the directories and file names of the file system -to objects that are stored in RADOS clusters. -The NFS-Ganesha service runs on the Controller nodes with the Ceph services. - - -CephFS with NFS-Ganesha deployment -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -CephFS through NFS deployments use an extra isolated network, StorageNFS. -This network is deployed so users can mount shares over NFS on that network -without accessing the Storage or Storage Management networks which are -reserved for infrastructure traffic. - -The ControllerStorageNFS custom role configures the isolated StorageNFS network. -This role is similar to the default `Controller.yaml` role file with the addition -of the StorageNFS network and the CephNfs service, indicated by the `OS::TripleO::Services:CephNfs` -service. - - -#. To create the StorageNFSController role, used later in the process by the - overcloud deploy command, run:: - - openstack overcloud roles generate --roles-path /usr/share/openstack-tripleo-heat-templates/roles \ - -o /home/stack/roles_data.yaml ControllerStorageNfs Compute CephStorage - -#. Run the overcloud deploy command including the new generated `roles_data.yaml` - and the `network_data_ganesha.yaml` file that will trigger the generation of - this new network. The final overcloud command must look like the following:: - - openstack overcloud deploy \ - --templates /usr/share/openstack-tripleo-heat-templates \ - -n /usr/share/openstack-tripleo-heat-templates/network_data_ganesha.yaml \ - -r /home/stack/roles_data.yaml \ - -e /home/stack/containers-default-parameters.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml \ - -e /home/stack/network-environment.yaml \ - -e/usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/ceph-mds.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/manila-cephfsganesha-config.yaml - - -.. note:: - - The network_data_ganesha.yaml file contains an additional section that defines - the isolated StorageNFS network. Although the default settings work for most - installations, you must edit the YAML file to add your network settings, - including the VLAN ID, subnet, and other settings:: - - name: StorageNFS - enabled: true - vip: true - name_lower: storage_nfs - vlan: 70 - ip_subnet: '172.16.4.0/24' - allocation_pools: [{'start': '172.16.4.4', 'end': '172.16.4.149'}] - ipv6_subnet: 'fd00:fd00:fd00:7000::/64' - ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:7000::10', 'end': 'fd00:fd00:fd00:7000:ffff:ffff:ffff:fffe'}] - - -Configure the StorageNFS network -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -After the overcloud deployment is over, create a corresponding `StorageNFSSubnet` on -the neutron-shared provider network. -The subnet is the same as the storage_nfs network definition in the `network_data_ganesha.yml` -and ensure that the allocation range for the StorageNFS subnet and the corresponding -undercloud subnet do not overlap. - -.. note:: - - No gateway is required because the StorageNFS subnet is dedicated to serving NFS shares - -In order to create the storage_nfs subnet, run:: - - openstack subnet create --allocation-pool start=172.16.4.150,end=172.16.4.250 \ - --dhcp --network StorageNFS --subnet-range 172.16.4.0/24 \ - --gateway none StorageNFSSubnet - -#. Replace the `start=172.16.4.150,end=172.16.4.250` IP values with the IP - values for your network. -#. Replace the `172.16.4.0/24` subnet range with the subnet range for your - network. - - -Deploying the Overcloud with an External Backend ------------------------------------------------- -.. note:: - - The :doc:`../deployment/template_deploy` doc has a more detailed explanation of the - following steps. - -#. Copy the Manila driver-specific configuration file to your home directory: - - - Dell-EMC Isilon driver:: - - sudo cp /usr/share/openstack-tripleo-heat-templates/environments/manila-isilon-config.yaml ~ - - - Dell-EMC Unity driver:: - - sudo cp /usr/share/openstack-tripleo-heat-templates/environments/manila-unity-config.yaml ~ - - - Dell-EMC Vmax driver:: - - sudo cp /usr/share/openstack-tripleo-heat-templates/environments/manila-vmax-config.yaml ~ - - - Dell-EMC VNX driver:: - - sudo cp /usr/share/openstack-tripleo-heat-templates/environments/manila-vnx-config.yaml ~ - - - NetApp driver:: - - sudo cp /usr/share/openstack-tripleo-heat-templates/environments/manila-netapp-config.yaml ~ - -#. Edit the permissions (user is typically ``stack``):: - - sudo chown $USER ~/manila-*-config.yaml - sudo chmod 755 ~/manila-*-config.yaml - -#. Edit the parameters in this file to fit your requirements. - - - Fill in or override the values of parameters for your back end. - - - Since you have copied the file out of its original location, - replace relative paths in the resource_registry with absolute paths - based on ``/usr/share/openstack-tripleo-heat-templates``. - -#. Continue following the TripleO instructions for deploying an overcloud. - Before entering the command to deploy the overcloud, add the environment - file that you just configured as an argument. For example:: - - openstack overcloud deploy --templates \ - -e -e ~/manila-[isilon or unity or vmax or vnx or netapp]-config.yaml - -#. Wait for the completion of the overcloud deployment process. - - -Creating the Share ------------------- - -.. note:: - - The following steps will refer to running commands as an admin user or a - tenant user. Sourcing the ``overcloudrc`` file will authenticate you as - the admin user. You can then create a tenant user and use environment - files to switch between them. - -#. Create a share network to host the shares: - - - Create the overcloud networks. The :doc:`../deployment/install_overcloud` - doc has a more detailed explanation about creating the network - and subnet. Note that you may also need to perform the following - steps to get Manila working:: - - neutron router-create router1 - neutron router-interface-add router1 [subnet id] - - - List the networks and subnets [tenant]:: - - neutron net-list && neutron subnet-list - - - Create a share network (typically using the private default-net net/subnet) - [tenant]:: - - manila share-network-create --neutron-net-id [net] --neutron-subnet-id [subnet] - -#. Create a new share type (yes/no is for specifying if the driver handles - share servers) [admin]:: - - manila type-create [name] [yes/no] - -#. Create the share [tenant]:: - - manila create --share-network [share net ID] --share-type [type name] [nfs/cifs] [size of share] - - -Accessing the Share -------------------- - -#. To access the share, create a new VM on the same Neutron network that was - used to create the share network:: - - nova boot --image [image ID] --flavor [flavor ID] --nic net-id=[network ID] [name] - -#. Allow access to the VM you just created:: - - manila access-allow [share ID] ip [IP address of VM] - -#. Run ``manila list`` and ensure that the share is available. - -#. Log into the VM:: - - ssh [user]@[IP] - -.. note:: - - You may need to configure Neutron security rules to access the - VM. That is not in the scope of this document, so it will not be covered - here. - -5. In the VM, execute:: - - sudo mount [export location] [folder to mount to] - -6. Ensure the share is mounted by looking at the bottom of the output of the - ``mount`` command. - -7. That's it - you're ready to start using Manila! diff --git a/deploy-guide/source/features/deploy_swift.rst b/deploy-guide/source/features/deploy_swift.rst deleted file mode 100644 index 4b89c17a..00000000 --- a/deploy-guide/source/features/deploy_swift.rst +++ /dev/null @@ -1,116 +0,0 @@ -Deploy and Scale Swift in the Overcloud -======================================= - -This guide assumes that you are ready to deploy a new overcloud. To ensure -that Swift nodes are all using the same Ring, some manual steps are required. - -Initial Deploy --------------- - -To correctly deploy Swift, we need to manually manage the Swift Rings. This -can be achieved by disabling the Ring building process in TripleO by setting -the ``SwiftRingBuild`` and ``RingBuild`` parameters both to ``false``. For -example:: - - parameter_defaults: - SwiftRingBuild: false - RingBuild: false - -.. note:: - - If this is saved in a file named ``deploy-parameters.yaml`` then it can - be deployed with ``openstack overcloud deploy --templates -e - deploy-parameters.yaml``. - -After the deploy is completed, you will need to ssh onto the overcloud node as -the ``heat-admin`` user and switch to the root user with ``sudo -i``. The IP -addresses is available in the output of ``openstack server list``. Once -connected, in the ``/etc/swift/`` directory follow the instructions in the -`Swift documentation `_ to create the Rings. - -After this is completed you will need to copy the ``/etc/swift/*.ring.gz`` and -``/etc/swift/*.builder`` files from the controller to all other controllers and -Swift storage nodes. These files will also be used when adding additional Swift -nodes. You should have six files:: - - /etc/swift/account.builder - /etc/swift/account.ring.gz - /etc/swift/container.builder - /etc/swift/container.ring.gz - /etc/swift/object.builder - /etc/swift/object.ring.gz - -.. note:: - - These files will be updated each time a new node is added with - swift-ring-builder. - - -Scaling Swift -------------- - -TripleO doesn't currently automatically update and scale Swift Rings. This -needs to be done manually, with similar steps to the above initial -deployment. First we need to define how many dedicated Swift nodes we want to -deploy with the ``ObjectStorageCount`` parameter. In this example we are -adding two Swift nodes:: - - parameter_defaults: - SwiftRingBuild: false - RingBuild: false - ObjectStorageCount: 2 - -After we have deployed again with this new environment we will have two Swift -nodes that need to be added to the ring we created during the initial -deployment. Follow the instructions on `Managing the Rings -`_ -to add the new devices to the rings and copy the new rings to *all* nodes in -the Swift cluster. - -.. note:: - - Also read the section on `Scripting ring creation - `_ - to automate this process of scaling the Swift cluster. - - -Viewing the Ring ----------------- - -The swift ring can be viewed on each node with the ``swift-ring-builder`` -command. It can be executed against all of the ``*.builder`` files. Its -output will display all the nodes in the Ring like this:: - - $ swift-ring-builder /etc/swift/object.builder - /etc/swift/object.builder, build version 4 - 1024 partitions, 3.000000 replicas, 1 regions, 1 zones, 3 devices, 0.00 balance, 0.00 dispersion - The minimum number of hours before a partition can be reassigned is 1 - The overload factor is 0.00% (0.000000) - Devices: id region zone ip address port replication ip replication port name weight partitions balance meta - 0 1 1 192.168.24.22 6000 192.168.24.22 6000 d1 100.00 1024 0.00 - 1 1 1 192.168.24.24 6000 192.168.24.24 6000 d1 100.00 1024 0.00 - 2 1 1 192.168.24.6 6000 192.168.24.6 6000 d1 100.00 1024 0.00 - -Ring configuration be verified by checking the hash of the ``*.ring.gz`` -files. It should be the same on all nodes in the ring.:: - - $ sha1sum /etc/swift/*.ring.gz - d41c1b4f93a98a693a6ede074a1b78585af2dc89 /etc/swift/account.ring.gz - 1d10d8cb826308a058c7089fdedfeca122426da9 /etc/swift/container.ring.gz - f26639938660ee0111e4e7bc1b45f28a0b9f6079 /etc/swift/object.ring.gz - -You can also check this by using the ``swift-recon`` command on one of the -overcloud nodes. It will query all other servers and compare all checksums and -a summary like this:: - - [root@overcloud-controller-0 ~]# swift-recon --md5 - =============================================================================== - --> Starting reconnaissance on 3 hosts (object) - =============================================================================== - [2016-10-14 12:37:11] Checking ring md5sums - 3/3 hosts matched, 0 error[s] while checking hosts. - =============================================================================== - [2016-10-14 12:37:11] Checking swift.conf md5sum - 3/3 hosts matched, 0 error[s] while checking hosts. - =============================================================================== diff --git a/deploy-guide/source/features/deployed_ceph.rst b/deploy-guide/source/features/deployed_ceph.rst deleted file mode 100644 index eadc2d32..00000000 --- a/deploy-guide/source/features/deployed_ceph.rst +++ /dev/null @@ -1,1968 +0,0 @@ -Deploying Ceph with TripleO -=========================== - -In Wallaby and newer it is possible to have TripleO provision hardware -and deploy Ceph before deploying the overcloud on the same hardware. - -Deployed Ceph Workflow ----------------------- - -As described in the :doc:`../deployment/network_v2` the ``openstack -overcloud`` command was extended so that it can run all of the -following as separate steps: - -#. Create Networks -#. Create Virtual IPs -#. Provision Baremetal Instances -#. Deploy Ceph -#. Create the overcloud Ephemeral Heat stack -#. Run Config-Download and the deploy-steps playbook - -This document covers the "Deploy Ceph" step above. It also covers how -to configure the overcloud deployed in the subsequent steps to use the -Ceph cluster. For details on the earlier steps see -:doc:`../deployment/network_v2`. - -The "Provision Baremetal Instances" step outputs a YAML file -describing the deployed baremetal, for example:: - - openstack overcloud node provision \ - -o ~/deployed_metal.yaml \ - ... - -The deployed_metal.yaml file can be passed as input to the ``openstack -overcloud ceph deploy`` command, which in turn outputs a YAML file -describing the deployed Ceph cluster, for example:: - - openstack overcloud ceph deploy \ - ~/deployed_metal.yaml \ - -o ~/deployed_ceph.yaml \ - ... - -Both the deployed_metal.yaml and deployed_ceph.yaml files may then be -passed as input to the step to "Create the overcloud Ephemeral Heat -stack", for example:: - - openstack overcloud deploy --templates \ - -e ~/deployed_metal.yaml \ - -e ~/deployed_ceph.yaml \ - ... - -While the overcloud is being deployed the data in the -deployed_ceph.yaml file will be used to configure the OpenStack -clients to connect to the Ceph cluster as well as configure the Ceph -cluster to host OpenStack. - -The above workflow is called "Deployed Ceph" because Ceph is already -deployed when the overcloud is configured. - -Deployed Ceph Scope -------------------- - -The "Deployed Ceph" feature deploys a Ceph cluster ready to serve RBD -and CephFS by calling TripleO Ansible roles which execute the -`cephadm` command. When the "Deployed Ceph" process is over you should -expect to find the following: - -- The CephMon, CephMgr and CephOSD services are running on all nodes - which should have those services as defined by the - :doc:`composable_services` interface -- If desired, the CephMds and CephNFS service will also be deployed - and running (this feature is not available in Wallaby however). -- It's possible to SSH into a node with the CephMon service and run - `sudo cephadm shell` -- All OSDs should be running unless there were environmental issues - (e.g. disks were not cleaned) -- A ceph configuration file and client admin keyring file in /etc/ceph - of overcloud nodes with the CephMon service -- The Ceph cluster is ready to serve RBD - -You should not expect the following after "Deployed Ceph" has run: - -- No pools or cephx keys for OpenStack will be created yet -- No CephDashboard or CephRGW services will be running yet - -The above will be configured during overcloud deployment by the -`openstack overcloud deploy` command as they were prior to the -"Deployed Ceph" feature. The reasons for this are the following: - -- The Dashboard and RGW services need to integrate with haproxy which - is deployed with the overcloud -- The list of pools to create and their respective cephx keys are a - function of which OpenStack clients (e.g. Nova, Cinder, etc) will be - used so they must be in the overcloud definition. Thus, they are - created during overcloud deployment - -During the overcloud deployment, the above resources will be created -in Ceph using the client admin keyring file and the -``~/deployed_ceph.yaml`` file output by `openstack overcloud ceph -deploy`. Because these resources are created directly on the Ceph -cluster with admin level access, "Deployed Ceph" is different from -the "External Ceph" feature described in :doc:`ceph_external`. - -The main benefits of using "Deployed Ceph" are the following: - -- Use cephadm to deploy Ceph on the hardware managed by TripleO - without having to write your own cephadm spec file (though you may - provide your own if you wish) -- Focus on debugging the basic Ceph deployment without debugging the - overcloud deployment at the same time -- Fix any Ceph deployment problems directly using either Ansible or - the Ceph orchestrator tools before starting the overcloud deployment -- Have the benefits above while maintaining hyperconverged support by - using a tested workflow - -In summary, `openstack overcloud ceph deploy` deploys the Ceph cluster -while `openstack overcloud deploy` (and the commands that follow) -deploy OpenStack and configure that Ceph cluster to be used by -OpenStack. - -Multiple Ceph clusters per deployment -------------------------------------- - -TripleO can only deploy one Ceph cluster in the overcloud per Heat -stack. However, within that Heat stack it's possible to configure -an overcloud to communicate with multiple Ceph clusters which are -external to the overcloud. To do this, follow this document to -configure the "internal" Ceph cluster which is part of the overcloud -and also use the `CephExternalMultiConfig` parameter described in the -:doc:`ceph_external` documentation. - -Prerequisite: Ensure the Ceph container is available ----------------------------------------------------- - -Before deploying Ceph follow the -:ref:`prepare-environment-containers` documentation so -the appropriate Ceph container image is used. -The output of the `openstack tripleo container image prepare` -command should contain a line like the following:: - - ContainerCephDaemonImage: undercloud.ctlplane.mydomain.tld:8787/ceph-ci/daemon:v6.0.0-stable-6.0-pacific-centos-8-x86_64 - -See "Container Options" options below for more details. - -Prerequisite: Ensure the cephadm package is installed ------------------------------------------------------ - -The `cephadm` package needs to be installed on at least one node in -the overcloud in order to bootstrap the first node of the Ceph -cluster. - -The `cephadm` package is pre-built into the overcloud-full image. -The `tripleo_cephadm` role will also use Ansible's package module -to ensure it is present. If `tripleo-repos` is passed the `ceph` -argument for Wallaby or newer, then the CentOS SIG Ceph repository -will be enabled with the appropriate version containing the `cephadm` -package, e.g. for Wallaby the ceph-pacific repository is enabled. - -Prerequisite: Ensure Disks are Clean ------------------------------------- - -cephadm does not reformat the OSD disks and expect them to be clean to -complete successfully. Consequently, when reusing the same nodes (or -disks) for new deployments, it is necessary to clean the disks before -every new attempt. One option is to enable the automated cleanup -functionality in Ironic, which will zap the disks every time that a -node is released. The same process can be executed manually or only -for some target nodes, see `cleaning instructions in the Ironic documentation`_. - - -Deployed Ceph Command Line Interface ------------------------------------- - -The command line interface supports the following options:: - - $ openstack overcloud ceph deploy --help - usage: openstack overcloud ceph deploy [-h] -o [-y] - [--skip-user-create] - [--skip-hosts-config] - [--skip-container-registry-config] - [--skip-ntp] - [--cephadm-ssh-user CEPHADM_SSH_USER] - [--stack STACK] - [--working-dir WORKING_DIR] - [--roles-data ROLES_DATA] - [--network-data NETWORK_DATA] - [--public-network-name PUBLIC_NETWORK_NAME] - [--cluster-network-name CLUSTER_NETWORK_NAME] - [--cluster CLUSTER] [--mon-ip MON_IP] - [--config CONFIG] - [--cephadm-extra-args CEPHADM_EXTRA_ARGS] - [--force] - [--ansible-extra-vars ANSIBLE_EXTRA_VARS] - [--ceph-client-username CEPH_CLIENT_USERNAME] - [--ceph-client-key CEPH_CLIENT_KEY] - [--skip-cephx-keys] - [--ceph-vip CEPH_VIP] - [--daemons DAEMONS] - [--single-host-defaults] - [--ntp-server NTP_SERVER] - [--ntp-heat-env-file NTP_HEAT_ENV_FILE] - [--ceph-spec CEPH_SPEC | --osd-spec OSD_SPEC] - [--crush-hierarchy CRUSH_HIERARCHY] - [--standalone] - [--tld] - [--container-image-prepare CONTAINER_IMAGE_PREPARE] - [--cephadm-default-container] - [--container-namespace CONTAINER_NAMESPACE] - [--container-image CONTAINER_IMAGE] - [--container-tag CONTAINER_TAG] - [--registry-url REGISTRY_URL] - [--registry-username REGISTRY_USERNAME] - [--registry-password REGISTRY_PASSWORD] - [] - - positional arguments: - - Path to the environment file output from "openstack - overcloud node provision". This argument may be - excluded only if --ceph-spec is used. - - optional arguments: - -h, --help show this help message and exit - -o , --output - The path to the output environment file describing the - Ceph deployment to pass to the overcloud deployment. - -y, --yes Skip yes/no prompt before overwriting an existing - output file (assume yes). - --skip-user-create Do not create the cephadm SSH user. This user is - necessary to deploy but may be created in a separate - step via 'openstack overcloud ceph user enable'. - --skip-hosts-config Do not update /etc/hosts on deployed servers. By - default this is configured so overcloud nodes can - reach each other and the undercloud by name. - --skip-ntp Do not install/enable ntp chronyd service. By default - time synchronization service chronyd is installed and - enabled later by tripleo. - --skip-container-registry-config - Do not update /etc/containers/registries.conf on - deployed servers. By default this is configured so - overcloud nodes can pull containers from the - undercloud registry. - --cephadm-ssh-user CEPHADM_SSH_USER - Name of the SSH user used by cephadm. Warning: if this - option is used, it must be used consistently for every - 'openstack overcloud ceph' call. Defaults to 'ceph- - admin'. (default=Env: CEPHADM_SSH_USER) - --stack STACK Name or ID of heat stack (default=Env: - OVERCLOUD_STACK_NAME) - --working-dir WORKING_DIR - The working directory for the deployment where all - input, output, and generated files will be stored. - Defaults to "$HOME/overcloud-deploy/" - --roles-data ROLES_DATA - Path to an alternative roles_data.yaml. Used to decide - which node gets which Ceph mon, mgr, or osd service - based on the node's role in . - --network-data NETWORK_DATA - Path to an alternative network_data.yaml. Used to - define Ceph public_network and cluster_network. This - file is searched for networks with name_lower values - of storage and storage_mgmt. If none found, then - search repeats but with service_net_map_replace in - place of name_lower. Use --public-network-name or - --cluster-network-name options to override name of the - searched for network from storage or storage_mgmt to a - customized name. If network_data has no storage - networks, both default to ctlplane. If found network - has >1 subnet, they are all combined (for routed - traffic). If a network has ipv6 true, then the - ipv6_subnet is retrieved instead of the ip_subnet, and - the Ceph global ms_bind_ipv4 is set false and the - ms_bind_ipv6 is set true. Use --config to override - these defaults if desired. - --public-network-name PUBLIC_NETWORK_NAME - Name of the network defined in network_data.yaml which - should be used for the Ceph public_network. Defaults - to 'storage'. - --cluster-network-name CLUSTER_NETWORK_NAME - Name of the network defined in network_data.yaml which - should be used for the Ceph cluster_network. Defaults - to 'storage_mgmt'. - --cluster CLUSTER Name of the Ceph cluster. If set to 'foo', then the - files /etc/ceph//foo.conf and - /etc/ceph//foo.client.admin.keyring will be - created. Otherwise these files will use the name - 'ceph'. Changing this means changing command line - calls too, e.g. 'ceph health' will become 'ceph - --cluster foo health' unless export CEPH_ARGS='-- - cluster foo' is used. - --mon-ip MON_IP IP address of the first Ceph monitor. If not set, an - IP from the Ceph public_network of a server with the - mon label from the Ceph spec is used. IP must already - be active on server. - --config CONFIG Path to an existing ceph.conf with settings to be - assimilated by the new cluster via 'cephadm bootstrap - --config' - --cephadm-extra-args CEPHADM_EXTRA_ARGS - String of extra parameters to pass cephadm. E.g. if - --cephadm-extra-args '--log-to-file --skip-prepare- - host', then cephadm boostrap will use those options. - Warning: requires --force as not all possible options - ensure a functional deployment. - --force Run command regardless of consequences. - --ansible-extra-vars ANSIBLE_EXTRA_VARS - Path to an existing Ansible vars file which can - override any variable in tripleo-ansible. If '-- - ansible-extra-vars vars.yaml' is passed, then - 'ansible-playbook -e @vars.yaml ...' is used to call - tripleo-ansible Ceph roles. Warning: requires --force - as not all possible options ensure a functional - deployment. - --ceph-client-username CEPH_CLIENT_USERNAME - Name of the cephx user. E.g. if 'openstack' is used, - then 'ceph auth get client.openstack' will return a - working user with key and capabilities on the deployed - Ceph cluster. Ignored unless tripleo_cephadm_pools is - set via --ansible-extra-vars. If this parameter is not - set and tripleo_cephadm_keys is set via --ansible- - extra-vars, then 'openstack' will be used. Used to set - CephClientUserName in --output. - --ceph-client-key CEPH_CLIENT_KEY - Value of the cephx key. E.g. - 'AQC+vYNXgDAgAhAAc8UoYt+OTz5uhV7ItLdwUw=='. Ignored - unless tripleo_cephadm_pools is set via --ansible- - extra-vars. If this parameter is not set and - tripleo_cephadm_keys is set via --ansible-extra-vars, - then a random key will be generated. Used to set - CephClientKey in --output. - --skip-cephx-keys Do not create cephx keys even if tripleo_cephadm_pools - is set via --ansible-extra-vars. If this option is - used, then even the defaults of --ceph-client-key and - --ceph-client-username are ignored, but the pools - defined via --ansible-extra-vars are still be created. - --ceph-vip CEPH_VIP Path to an existing Ceph services/network mapping - file. - --daemons DAEMONS Path to an existing Ceph daemon options definition. - --single-host-defaults - Adjust configuration defaults to suit a single-host - Ceph cluster. - --ntp-server NTP_SERVER - NTP Servers to be used while configuring chronyd - service. e.g. --ntp-server '0.pool.ntp.org, - 1.pool.ntp.org,2.pool.ntp.org' - --ntp-heat-env-file NTP_HEAT_ENV_FILE - Path to existing heat environment file with NTP - servers to be used while configuring chronyd service. - NTP servers are extracted from 'NtpServer' key. - --ceph-spec CEPH_SPEC - Path to an existing Ceph spec file. If not provided a - spec will be generated automatically based on --roles- - data and . The - parameter is optional only - if --ceph-spec is used. - --osd-spec OSD_SPEC Path to an existing OSD spec file. Mutually exclusive - with --ceph-spec. If the Ceph spec file is generated - automatically, then the OSD spec in the Ceph spec file - defaults to {data_devices: {all: true}} for all - service_type osd. Use --osd-spec to override the - data_devices value inside the Ceph spec file. - --crush-hierarchy CRUSH_HIERARCHY - Path to an existing crush hierarchy spec file. - --standalone Use single host Ansible inventory. Used only for - development or testing environments. - --tld Top Level Domain suffix to be added to the short - hostname to represent the fully qualified - domain name. - --container-image-prepare CONTAINER_IMAGE_PREPARE - Path to an alternative - container_image_prepare_defaults.yaml. Used to control - which Ceph container is pulled by cephadm via the - ceph_namespace, ceph_image, and ceph_tag variables in - addition to registry authentication via - ContainerImageRegistryCredentials. - --cephadm-default-container - Use the default container defined in cephadm instead of - container_image_prepare_defaults.yaml. If this is - used, 'cephadm bootstrap' is not passed the --image - parameter. - - container-image-prepare overrides: - The following options may be used to override individual values set via - --container-image-prepare. If the example variables below were set the - image would be concatenated into quay.io/ceph/ceph:latest and a custom - registry login would be used. - - --container-namespace CONTAINER_NAMESPACE - e.g. quay.io/ceph - --container-image CONTAINER_IMAGE - e.g. ceph - --container-tag CONTAINER_TAG - e.g. latest - --registry-url REGISTRY_URL - --registry-username REGISTRY_USERNAME - --registry-password REGISTRY_PASSWORD - - This command is provided by the python-tripleoclient plugin. - $ - -Run `openstack overcloud ceph deploy --help` in your own environment -to see the latest options which you have available. - - -Ceph Configuration Options --------------------------- - -Any initial Ceph configuration options may be passed to a new cluster -by putting them in a standard ini-style configuration file and using -`cephadm bootstrap --config` option. The exact same option is passed -through to cephadm with `openstack overcloud ceph deploy --config`:: - - $ cat < initial-ceph.conf - [global] - ms_cluster_mode: secure - ms_service_mode: secure - ms_client_mode: secure - EOF - $ openstack overcloud ceph deploy --config initial-ceph.conf ... - -The above example shows how to configure the messenger v2 protocol to -use a secure mode that encrypts all data passing over the network. - -The `deployed_ceph.yaml` Heat environment file output by `openstack -overcloud ceph deploy` has `ApplyCephConfigOverridesOnUpdate` set to -true. This means that services not covered by deployed ceph, e.g. RGW, -can have the configuration changes that they need applied during -overcloud deployment. After the deployed ceph process has run and -then after the overcloud is deployed, it is recommended to update the -`deployed_ceph.yaml` Heat environment file, or similar, to set -`ApplyCephConfigOverridesOnUpdate` to false. Any subsequent Ceph -configuration changes should then be made by the `ceph config -command`_. - -It is supported to pass through the `cephadm --single-host-defaults` -option, which configures a Ceph cluster to run on a single host:: - - openstack overcloud ceph deploy --single-host-defaults - -Any option available from running `cephadm bootstrap --help` may be -passed through `openstack overcloud ceph deploy` with the -`--cephadm-extra-args` argument. For example:: - - openstack overcloud ceph deploy --force \ - --cephadm-extra-args '--log-to-file --skip-prepare-host' \ - ... - -When the above is run the following will be run on the cephadm -bootstrap node (the first controller node by default) on the -overcloud:: - - cephadm bootstrap --log-to-file --skip-prepare-host ... - -The `--force` option is required when using `--cephadm-extra-args` -because not all possible options ensure a functional deployment. - -Placement Groups (PGs) ----------------------- - -When Ceph is initially deployed with `openstack overcloud ceph deploy` -the PG and replica count settings are not changed from Ceph's own -defaults unless their parameters (osd_pool_default_size, -osd_pool_default_pg_num, osd_pool_default_pgp_num) are included in an -initial Ceph configuration file which can be passed with the --config -option. These settings may also be modified after `openstack overcloud -ceph deploy`. - -The deprecated Heat parameters `CephPoolDefaultSize` and -`CephPoolDefaultPgNum` no longer have any effect as these -configurations are not made during overcloud deployment. -However, during overcloud deployment pools are created and -both the target_size_ratio or pg_num per pool may be set at that -point. See the "Ceph Pool Options" section for more details. - -Ceph Name Options ------------------ - -To deploy with a different cluster name than the default of "ceph" use -the ``--cluster`` option:: - - openstack overcloud ceph deploy \ - --cluster central \ - ... - -The above will result in keyrings and Ceph configuration files being -created with the name passed to cluster, for example:: - - [root@oc0-controller-0 ~]# ls -l /etc/ceph/ - total 16 - -rw-------. 1 root root 63 Mar 26 21:49 central.client.admin.keyring - -rw-------. 1 167 167 201 Mar 26 22:17 central.client.openstack.keyring - -rw-------. 1 167 167 134 Mar 26 22:17 central.client.radosgw.keyring - -rw-r--r--. 1 root root 177 Mar 26 21:49 central.conf - [root@oc0-controller-0 ~]# - -When `cephadm shell` is run on an overcloud node like the above, Ceph -commands might return the error ``monclient: get_monmap_and_config -cannot identify monitors to contact`` because the default "ceph" name -is not used. Thus, if the ``--cluster`` is used when deploying Ceph, -then use options like the following to run `cephadm shell` after -deployment:: - - cephadm shell --config /etc/ceph/central.conf \ - --keyring /etc/ceph/central.client.admin.keyring - -Another solution is to use the following before running ceph commands:: - - cephadm shell --mount /etc/ceph:/etc/ceph - export CEPH_ARGS='--cluster central' - -After using either of the above standard Ceph commands should work -within the cephadm shell container. - -Ceph Spec Options ------------------ - -The roles file, described in the next section, and the output of -`openstack overcloud node provision` are passed to the -`ceph_spec_bootstrap`_ Ansible module to create a `Ceph Service -Specification`_. The `openstack overcloud ceph deploy` command does -this automatically so that a spec does not usually need to be -generated separately. However, it is possible to generate a ceph spec -before deployment with the following command:: - - $ openstack overcloud ceph spec --help - usage: openstack overcloud ceph spec [-h] -o [-y] - [--stack STACK] - [--working-dir WORKING_DIR] - [--roles-data ROLES_DATA] - [--mon-ip MON_IP] [--standalone] - [--osd-spec OSD_SPEC | --crush-hierarchy CRUSH_HIERARCHY] - [] - - positional arguments: - - Path to the environment file output from "openstack - overcloud node provision". This argument may be - excluded only if --standalone is used. - - optional arguments: - -h, --help show this help message and exit - -o , --output - The path to the output cephadm spec file to pass to - the "openstack overcloud ceph deploy --ceph-spec - " command. - -y, --yes Skip yes/no prompt before overwriting an existing - output file (assume yes). - --stack STACK - Name or ID of heat stack (default=Env: - OVERCLOUD_STACK_NAME) - --working-dir WORKING_DIR - The working directory for the deployment where all - input, output, and generated files will be stored. - Defaults to "$HOME/overcloud-deploy/" - --roles-data ROLES_DATA - Path to an alternative roles_data.yaml. Used to decide - which node gets which Ceph mon, mgr, or osd service - based on the node's role in . - --mon-ip MON_IP - IP address of the first Ceph monitor. Only available - with --standalone. - --standalone Create a spec file for a standalone deployment. Used - for single server development or testing environments. - --tld Top Level Domain suffix to be added to the short - hostname to represent the fully qualified - domain name. - --osd-spec OSD_SPEC - Path to an existing OSD spec file. When the Ceph spec - file is generated its OSD spec defaults to - {data_devices: {all: true}} for all service_type osd. - Use --osd-spec to override the data_devices value - inside the Ceph spec file. - --crush-hierarchy CRUSH_HIERARCHY - Path to an existing crush hierarchy spec file. - $ - -The spec file may then be edited if desired and passed directly like -this:: - - openstack overcloud ceph deploy \ - deployed_metal.yaml \ - -o deployed_ceph.yaml \ - --ceph-spec ~/ceph_spec.yaml - -Overriding which disks should be OSDs -------------------------------------- - -The `Advanced OSD Service Specifications`_ should be used to define -how disks are used as OSDs. - -By default all available disks (excluding the disk where the operating -system is installed) are used as OSDs. This is because the default -spec has the following:: - - data_devices: - all: true - -In the above example, the `data_devices` key is valid for any `Ceph -Service Specification`_ whose `service_type` is "osd". Other OSD -service types, as found in the `Advanced OSD Service -Specifications`_, may be set by using the ``--osd-spec`` option. - -If the file ``osd_spec.yaml`` contains the following:: - - data_devices: - rotational: 1 - db_devices: - rotational: 0 - -and the following command is run:: - - openstack overcloud ceph deploy \ - deployed_metal.yaml \ - -o deployed_ceph.yaml \ - --osd-spec osd_spec.yaml - -Then all rotating devices will be data devices and all non-rotating -devices will be used as shared devices (wal, db). This is because when -the dynamic Ceph service specification is built, whatever is in the -file referenced by ``--osd-spec`` will be appended to the section of -the specification if the `service_type` is "osd". The same -``--osd-spec`` is available to the `openstack overcloud ceph spec` -command. - -Another possible variation of the ``osd_spec.yaml`` file is the -following:: - - data_devices: - model: 'SAMSUNG' - osds_per_device: 2 - -In the above example we use the `model` under `data_devices` to only -create OSDs on that model of device and we configure two OSDs per -device. Note that `osds_per_device` does not go under `data_devices` -but on the same level. The above was created after using the following -command on an already running system to find a way to refer only to a -particular device. In this case the ATA_INTEL devices are not -configured as OSDs but the three SAMSUNG devices are configured as -OSDs:: - - [ceph: root@controller-0 /]# ceph orch device ls | grep cephstorage-2 - cephstorage-2 /dev/nvme0n1 ssd SAMSUNG MZ1LW960HMJP-00003_S2X6NY0KB00178 960G Insufficient space (<10 extents) on vgs, LVM detected, locked - cephstorage-2 /dev/nvme1n1 ssd SAMSUNG MZQLB960HAJR-00007_S437NA0N506009 960G Insufficient space (<10 extents) on vgs, LVM detected, locked - cephstorage-2 /dev/nvme2n1 ssd SAMSUNG MZQLB960HAJR-00007_S437NA0N506027 960G Yes - cephstorage-2 /dev/sdb ssd ATA_INTEL_SSDSC2KB960G8_PHYF039400YC960CGN 960G Yes - cephstorage-2 /dev/sdc ssd ATA_INTEL_SSDSC2KB960G8_PHYF039402F2960CGN 960G Yes - [ceph: root@controller-0 /]# - -The :doc:`node_specific_hieradata` feature is not supported by the -cephadm integration but the `Advanced OSD Service Specifications`_ has -a `host_pattern` parameter which specifies which host to target for -certain `data_devices` definitions, so the equivalent functionality is -available but with the new syntax. - -Service Placement Options -------------------------- - -The Ceph services defined in the roles_data.yaml file as described in -:doc:`composable_services` determine which baremetal node runs which -service. By default the Controller role has the CephMon and CephMgr -service while the CephStorage role has the CephOSD service. Most -composable services require Heat output in order to determine how -services are configured, but not the Ceph services. Thus, the -roles_data.yaml file remains authoritative for Ceph service placement -even though the "Deployed Ceph" process happens before Heat is run. - -It is only necessary to use the `--roles-file` option if the default -roles_data.yaml file is not being used. For example if you intend to -deploy hyperconverged nodes, then you want the predeployed compute -nodes to be in the ceph spec with the "osd" label and for the -`service_type` "osd" to have a placement list containing a list of the -compute nodes. To do this generate a custom roles file as described in -:doc:`composable_services` like this:: - - openstack overcloud roles generate Controller ComputeHCI > custom_roles.yaml - -and then pass that roles file like this:: - - openstack overcloud ceph deploy \ - deployed_metal.yaml \ - -o deployed_ceph.yaml \ - --roles-data custom_roles.yaml - -After running the above the compute nodes should have running OSD -containers and when the overcloud is deployed Nova compute services -will then be set up on the same hosts. - -If you wish to generate the ceph spec with the modified placement -described above before the ceph deployment, then the same roles -file may be passed to the 'openstack overcloud ceph spec' command:: - - openstack overcloud ceph spec \ - --stack overcloud \ - --roles-data custom_roles.yaml \ - --output ceph_spec.yaml \ - deployed_metal.yaml - -In the above example the `--stack` is used in order to find the -working directory containing the Ansible inventory which was created -when `openstack overcloud node provision` was run. - -Ceph VIP Options ----------------- - -The `--ceph-vip` option may be used to reserve a VIP for each Ceph service -specified by the 'service/network' mapping defined as input. -A generic ceph service mapping can be something like the following:: - - --- - ceph_services: - - service: ceph_nfs - network: storage_cloud_0 - - service: ceph_rgw - network: storage_cloud_0 - -For each service added to the list above, a virtual IP on the specified -network is created to be used as `frontend_vip` of the ingress daemon. -When no subnet is specified, a default `_subnet` pattern is used. -If the subnet does not follow the `_subnet` pattern, a subnet for -the VIP may be specified per service:: - - --- - ceph_services: - - service: ceph_nfs - network: storage_cloud_0 - - service: ceph_rgw - network: storage_cloud_0 - subnet: storage_leafX - -When the `subnet` parameter is provided, it will be used by the -`tripleo_service_vip` Ansible module, otherwise the default pattern is followed. -This feature also supports the fixed_ips mode. When fixed IPs are defined, the -module is able to use that input to reserve the VIP on that network. A valid -input can be something like the following:: - - --- - fixed: true - ceph_services: - - service: ceph_nfs - network: storage_cloud_0 - ip_address: 172.16.11.159 - - service: ceph_rgw - network: storage_cloud_0 - ip_address: 172.16.11.160 - -When the boolean `fixed` is set to True, the subnet pattern is ignored, and -a sanity check on the user input is performed, looking for the `ip_address` -keys associated to the specified services. If the `fixed` keyword is missing, -the subnet pattern is followed. When the environment file containing the -'ceph service/network' mapping described above is created, it can be passed -to the ceph deploy command via the `--ceph-vip` option:: - - openstack overcloud ceph deploy \ - deployed_metal.yaml \ - -o deployed_ceph.yaml \ - --ceph-vip ~/ceph_services.yaml - - -Deploy additional daemons -------------------------- -A new option `--daemons` for the `openstack overcloud ceph deploy` command has -been added and may be used to define additional Ceph daemons that are deployed -during the Ceph provisioning process. -This option requires a data structure which defines the services to be -deployed:: - - ceph_nfs: - cephfs_data: 'manila_data' - cephfs_metadata: 'manila_metadata' - ceph_rgw: {} - ceph_ingress: - tripleo_cephadm_haproxy_container_image: undercloud.ctlplane.mydomain.tld:8787/ceph/haproxy:2.3 - tripleo_cephadm_keepalived_container_image: undercloud.ctlplane.mydomain.tld:8787/ceph/keepalived:2.5.1 - -For each service added to the data structure above, additional options can be -defined and passed as extra_vars to the tripleo-ansible flow. If no option is -specified, the default values provided by the cephadm tripleo-ansible role will -be used. - - -Example: deploy HA Ceph NFS daemon ----------------------------------- -Cephadm is able to deploy and manage the lifecycle of a highly available -ceph-nfs daemon, called `CephIngress`_, which uses haproxy and keepalived. The -`--daemon` option described in the previous section, provides: - -#. a stable, VIP managed by keepalived used to access the NFS service -#. fail-over between hosts in case of failure -#. load distribution across multiple NFS gateways through Haproxy - -To deploy a cephadm managed ceph-nfs daemon with the related ingress service, -create a `ceph_daemons.yaml` spec file with the following definition:: - - ceph_nfs: - cephfs_data: 'manila_data' - cephfs_metadata: 'manila_metadata' - ceph_ingress: - tripleo_cephadm_haproxy_container_image: undercloud.ctlplane.mydomain.tld:8787/ceph/haproxy:2.3 - tripleo_cephadm_keepalived_container_image: undercloud.ctlplane.mydomain.tld:8787/ceph/keepalived:2.5.1 - -When the environment file containing the services definition described above is -created, it can be passed to the ceph deploy command via the `--daemon` -option:: - - openstack overcloud ceph deploy \ - deployed_metal.yaml \ - -o deployed_ceph.yaml \ - --ceph-vip ~/ceph_services.yaml \ - --daemon ~/ceph_daemons.yaml - -.. note:: - A VIP must be reserved for the ceph_nfs service and passed to the command - above. For further information on reserving a VIP for a Ceph service, see - `Ceph VIP Options`_. - - -Crush Hierarchy Options ------------------------ - -The `ceph_spec_bootstrap`_ Ansible module is used to generate the Ceph -related spec file which is applied using the Ceph orchestrator tool. -During the Ceph OSDs deployment, a custom crush hierarchy can be defined -and passed using the ``--crush-hierarchy`` option. -As per `Ceph Host Management`_, by doing this the `location` attribute is -added to the Hosts spec. -The location attribute will only affect the initial CRUSH location -Subsequent changes of the location property will be ignored. Also, removing -a host will not remove any CRUSH generated bucket. - -Example: Apply a custom crush hierarchy to the deployed OSDs -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If the file ``crush_hierarchy.yaml`` contains something like the following:: - - --- - ceph-0: - root: default - rack: r0 - ceph-1: - root: default - rack: r1 - ceph-2: - root: default - rack: r2 - -and the following command is run:: - - openstack overcloud ceph deploy \ - deployed_metal.yaml \ - -o deployed_ceph.yaml \ - --osd-spec osd_spec.yaml \ - --crush-hierarchy crush_hierarchy.yaml - -Then the Ceph cluster will bootstrap with the following Ceph OSD layout:: - - - [ceph: root@ceph-0 /]# ceph osd tree - ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF - -1 0.02939 root default - -3 0.00980 rack r0 - -2 0.00980 host ceph-node-00 - 0 hdd 0.00980 osd.0 up 1.00000 1.00000 - -5 0.00980 rack r1 - -4 0.00980 host ceph-node-01 - 1 hdd 0.00980 osd.1 up 1.00000 1.00000 - -7 0.00980 rack r2 - -6 0.00980 host ceph-node-02 - 2 hdd 0.00980 osd.2 up 1.00000 1.00000 - - -.. note:: - - Device classes are automatically detected by Ceph, but crush rules - are associated to pools and they still be defined using the - CephCrushRules parameter during the overcloud deployment. Additional - details can be found in the "Overriding CRUSH rules" section below. - -TLD option ----------- -During ceph spec generation, if ``--tld`` is passed to `ceph_spec_bootstrap`_ -ansible module, generated spec will have the hostnames appended with tld. - -This ``--tld`` option is available in `openstack overcloud ceph deploy` and - `openstack overcloud ceph spec` commands. - -for example:: - - openstack overcloud ceph deploy \ - --tld "redhat.local" - -During `openstack overcloud ceph deploy` , even the hostnames of all overcloud -nodes are appended with ``--tld`` option, which makes it a Fully qualified -Domain name (canonical name) suitable for TLS-e configuration. - -Network Options ---------------- - -The storage networks defined in the network_data.yaml file as -described in :doc:`custom_networks` determine which networks -Ceph is configured to use. When using network isolation, the -standard is for TripleO to deploy two storage networks which -map to the two Ceph networks in the following way: - -* ``storage`` - Storage traffic, the Ceph ``public_network``, - e.g. Nova compute nodes use this network for RBD traffic to the Ceph - cluster. - -* ``storage_mgmt`` - Storage management traffic (such as replication - traffic between storage nodes), the Ceph ``cluster_network``, - e.g. Ceph OSDs use this network to replicate data. - -``openstack overcloud ceph deploy`` will use the network_data.yaml -file specified by the ``--network-data`` option to determine which -networks should be used for the ``public_network`` and -``cluster_network``. It assumes these networks are named ``storage`` -and ``storage_mgmt`` in the network_data.yaml file unless a different -name should be used as indicated by the ``--public-network-name`` and -``--cluster-network-name`` options. - -It is necessary to use the ``--network-data`` option when deploying -with network isolation. Otherwise the default network, i.e. the -ctlplane network on the undercloud (192.168.24.0/24), will be used for -both the ``public_network`` and ``cluster_network``. - - -Example: Multiple subnets with custom network names -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If network_data.yaml contains the following:: - - - name: StorageMgmtCloud0 - name_lower: storage_mgmt_cloud_0 - service_net_map_replace: storage_mgmt - subnets: - storage_mgmt_cloud_0_subnet12: - ip_subnet: '172.16.12.0/24' - storage_mgmt_cloud_0_subnet13: - ip_subnet: '172.16.13.0/24' - - name: StorageCloud0 - name_lower: storage_cloud_0 - service_net_map_replace: storage - subnets: - storage_cloud_0_subnet14: - ip_subnet: '172.16.14.0/24' - storage_cloud_0_subnet15: - ip_subnet: '172.16.15.0/24' - -Then the Ceph cluster will have the following parameters set:: - - [global] - public_network = '172.16.14.0/24,172.16.15.0/24' - cluster_network = '172.16.12.0/24,172.16.13.0/24' - ms_bind_ipv4 = True - ms_bind_ipv6 = False - -This is because the TripleO client will see that though the -``name_lower`` value does not match ``storage`` or ``storage_mgmt`` -(they match the custom names ``storage_cloud_0`` and -``storage_mgmt_cloud_0`` instead), those names do match the -``service_net_map_replace`` values. If ``service_net_map_replace`` -is in the network_data.yaml, then it is not necessary to use the -``--public-network-name`` and ``--cluster-network-name`` -options. Alternatively the ``service_net_map_replace`` key could have -been left out and the ``--public-network-name`` and -``--cluster-network-name`` options could have been used instead. Also, -because multiple subnets are used they are concatenated and it is -assumed that there is routing between the subnets. If there was no -``subnets`` key, in the network_data.yaml file, then the client would -have looked instead for the single ``ip_subnet`` key for each network. - -By default the Ceph global `ms_bind_ipv4` is set `true` and -`ms_bind_ipv6` is set `false`. - -Example: IPv6 -^^^^^^^^^^^^^ - -If network_data.yaml contains the following:: - - - name: Storage - ipv6: true - ipv6_subnet: fd00:fd00:fd00:3000::/64 - name_lower: storage - - name: StorageMgmt - ipv6: true - ipv6_subnet: fd00:fd00:fd00:4000::/64 - name_lower: storage_mgmt - -Then the Ceph cluster will have the following parameters set:: - - [global] - public_network = fd00:fd00:fd00:3000::/64 - cluster_network = fd00:fd00:fd00:4000::/64 - ms_bind_ipv4 = False - ms_bind_ipv6 = True - -Because the storage networks in network_data.yaml contain `ipv6: -true`, the ipv6_subset values are extracted and the Ceph globals -`ms_bind_ipv4` is set `false` and `ms_bind_ipv6` is set `true`. -It is not supported to have the ``public_network`` use IPv4 and -the ``cluster_network`` use IPv6 or vice versa. - -Example: Directly setting network and ms_bind options -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If the examples above are not sufficient for your Ceph network needs, -then it's possible to create an initial-ceph.conf with the four -parameters ``public_network``, ``cluster_network``, ``ms_bind_ipv4``, -and ``ms_bind_ipv6`` options set to whatever values are desired. - -When using the ``--config`` option it is still important to ensure the -TripleO ``storage`` and ``storage_mgmt`` network names map to the -correct ``public_network`` and ``cluster_network`` so that the rest of -the deployment is consistent. - -The four parameters, ``public_network``, ``cluster_network``, -``ms_bind_ipv4``, and ``ms_bind_ipv6``, are always set in the Ceph -cluster (with `ceph config set global`) from the ``--network-data`` -file unless those parameters are explicitly set in the ``--config`` -file. In that case the values in the ``--network-data`` file are not -set directly in the Ceph cluster though other aspects of the overcloud -deployment treat the ``--network-data`` file as authoritative -(e.g. when Ceph RGW is set) so both sources should be consistent if -the ``--config`` file has any of these four parameters. - -An example of setting the four parameters in the initial Ceph -configuration is below:: - - $ cat < initial-ceph.conf - [global] - public_network = 'fd00:fd00:fd00:3000::/64,172.16.14.0/24' - cluster_network = 'fd00:fd00:fd00:4000::/64,172.16.12.0/24' - ms_bind_ipv4 = true - ms_bind_ipv6 = true - EOF - $ openstack overcloud ceph deploy \ - --config initial-ceph.conf --network-data network_data.yaml - -The above assumes that network_data.yaml contains the following:: - - - name: Storage - ipv6_subnet: fd00:fd00:fd00:3000::/64 - ip_subnet: 172.16.14.0/24 - name_lower: storage - - name: StorageMgmt - ipv6_subnet: fd00:fd00:fd00:4000::/64 - ip_subnet: 172.16.12.0/24 - name_lower: storage_mgmt - -The above settings, which mix IPv4 and IPv6, are experimental and -untested. - -SSH User Options ----------------- - -Cephadm must use SSH to connect to all remote Ceph hosts that it -manages. The "Deployed Ceph" feature creates an account and SSH key -pair on all Ceph nodes in the overcloud and passes this information -to cephadm so that it uses this account instead of creating its own. -The `openstack overcloud ceph deploy` command will automatically -create this user and distribute their SSH keys. It's also possible -to create this user and distribute the associated keys in a separate -step by running `openstack overcloud ceph user enable` and then when -calling `openstack overcloud ceph deploy` with the -`--skip-user-create` option. By default the user is called -`ceph-admin` though both commands support the `--cephadm-ssh-user` -option to set a different name. If this option is used though, it must -be used consistently for every `openstack overcloud ceph` call. - -The `openstack overcloud ceph user disable --fsid ` command -may be run after `openstack overcloud ceph deploy` has been run -to disable cephadm so that it may not be used to administer the -Ceph cluster and no `ceph orch ...` CLI commands will function. -This will also prevent Ceph node overcloud scale operations though -the Ceph cluster will still be able to read and write data. This same -command will also remove the public and private SSH keys of the -cephadm SSH user on overclouds which host Ceph. The "ceph user enable" -option may then be used to re-distribute the public and private SSH -keys of the cephadm SSH user and re-enable the cephadm mgr module. -`openstack overcloud ceph user enable` will only re-enable the cephadm -mgr module if it is passed the FSID with the `--fsid ` option. -The FSID may be found in the deployed_ceph.yaml Heat environment file -which is generated by the `openstack overcloud ceph deploy -o -deployed_ceph.yaml` command. - -.. warning:: - Disabling cephadm will disable all Ceph management features - described in this document. The `openstack overcloud ceph user - disable` command is not recommended unless you have a good reason - to disable cephadm. - -Both the `openstack overcloud ceph user enable` and `openstack -overcloud ceph user disable` commands require the path to an existing -Ceph spec file to be passed as an argument. This is necessary in order -to determine which hosts require the cephadm SSH user and which of -those hosts require the private SSH key. Only hosts with the _admin -label get the private SSH since they need to be able to SSH into other -Ceph hosts. In the average deployment with three monitor nodes this is -three hosts. All other Ceph hosts only get the public key added to the -users authorized_keys file. - -See the "Ceph Spec Options" options of this document for where to find -this file or how to automatically generate one before Ceph deployment -if you plan to call `openstack overcloud ceph user enable` before -calling `openstack overcloud ceph deploy`. See `openstack overcloud -ceph user enable --help` and `openstack overcloud ceph user disable ---help` for more information. - -Container Options ------------------ - -As described in :doc:`../deployment/container_image_prepare` the -undercloud may be used as a container registry for ceph containers -and there is a supported syntax to download containers from -authenticated registries. - -By default `openstack overcloud ceph deploy` will pull the Ceph -container in the default ``container_image_prepare_defaults.yaml`` -file. If a `push_destination` is defined in this file, then the -overcloud will be configured so it can access the local registry in -order to download the Ceph container. This means that `openstack -overcloud ceph deploy` will modify the overcloud's ``/etc/hosts`` -and ``/etc/containers/registries.conf`` files; unless the -`--skip-hosts-config` and `--skip-container-registry-config` options -are used or a `push_destination` is not defined. - -The version of the Ceph used in each OpenStack release changes per -release and can be seen by running a command like this:: - - egrep "ceph_namespace|ceph_image|ceph_tag" \ - /usr/share/tripleo-common/container-images/container_image_prepare_defaults.yaml - -The `--container-image-prepare` option can be used to override which -``container_image_prepare_defaults.yaml`` file is used. If a version -of this file called ``custom_container_image_prepare.yaml`` is -modified to contain syntax like the following:: - - ContainerImageRegistryCredentials: - quay.io/ceph-ci: - quay_username: quay_password - -Then when a command like the following is run:: - - openstack overcloud ceph deploy \ - deployed_metal.yaml \ - -o deployed_ceph.yaml \ - --container-image-prepare custom_container_image_prepare.yaml - -The credentials will be extracted from the file and the tripleo -ansible role to bootstrap Ceph will be executed like this:: - - cephadm bootstrap - --registry-url quay.io/ceph-ci - --registry-username quay_username - --registry-password quay_password - ... - -The syntax of the container image prepare file can also be ignored and -instead the following command line options may be used instead:: - - --container-namespace CONTAINER_NAMESPACE - e.g. quay.io/ceph - --container-image CONTAINER_IMAGE - e.g. ceph - --container-tag CONTAINER_TAG - e.g. latest - --registry-url REGISTRY_URL - --registry-username REGISTRY_USERNAME - --registry-password REGISTRY_PASSWORD - -If a variable above is unused, then it defaults to the ones found in -the default ``container_image_prepare_defaults.yaml`` file. In other -words, the above options are overrides. - - -NTP configuration ------------------ - -To help Ceph monitors to form a quorum, time synchronization is configured -before Ceph deployment. - -By default `openstack overcloud ceph deploy` installs and enables -time synchronization service (chrony) unless `--skip-ntp` is used. - -Chrony uses NTP servers defined in ansible-role-chrony role by default. - -NTP server/s can be configured explicitly by using either -`--ntp-server` or `--ntp-heat-env-file`. - -NTP servers can be passed as a comma-separated string to the deploy command, -for example:: - - openstack overcloud ceph deploy \ - --ntp-server "0.pool.ntp.org,1.pool.ntp.org" - -Alternatively, a heat env file which contains the list of NTP servers -can be used as shown here:: - - openstack overcloud ceph deploy \ - --ntp-heat-env-file "/home/stack/ntp-parameters.yaml" - -where ntp-parameter.yaml should have the NTP servers defined in the parameter -`NtpServer` as shown in the example:: - - parameter_defaults: - NtpServer: 0.pool.ntp.org,1.pool.ntp.org - -Creating Pools and CephX keys before overcloud deployment (Optional) --------------------------------------------------------------------- - -By default `openstack overcloud ceph deploy` does not create Ceph -pools or cephx keys to access those pools. Later during overcloud -deployment the pools and cephx keys are created based on which Heat -environment files are passed. For most cases only pools for Cinder -(volumes), Nova (vms), and Glance (images) are created but if the -Heat environment file to configure additional services are passed, -e.g. cinder-backup, then the required pools are created. This is -covered in more detail in the next section of this document. - -It is not necessary to create pools and cephx keys before overcloud -deployment but it is possible. The Ceph pools can be created when -`openstack overcloud ceph deploy` is run by using the option ---ansible-extra-vars to set the tripleo_cephadm_pools variable used -by tripleo-ansible's tripleo_cephadm role. - -Create an Ansible extra vars file defining the desired pools:: - - cat < tripleo_cephadm_ansible_extra_vars.yaml - --- - tripleo_cephadm_pools: - - name: vms - pg_autoscale_mode: True - target_size_ratio: 0.3 - application: rbd - - name: volumes - pg_autoscale_mode: True - target_size_ratio: 0.5 - application: rbd - - name: images - target_size_ratio: 0.2 - pg_autoscale_mode: True - application: rbd - tripleo_ceph_client_vars: /home/stack/overcloud-deploy/overcloud/cephadm/ceph_client.yml - EOF - -The pool names 'vms', 'volumes', and 'images' used above are -recommended since those are the default names that the overcloud -deployment will use when "openstack overcloud deploy" is run, unless -the Heat parameters NovaRbdPoolName, CinderRbdPoolName, and -GlanceRbdPoolName are overridden respectively. - -In the above example, tripleo_ceph_client_vars is used to direct Ansible -to save the generated ceph_client.yml file in a cephadm subdirectory of -the working directory. The tripleo_cephadm role will ensure this directory -exists before creating the file. If `openstack overcloud export ceph` is -going to be used, it will expect the Ceph client file to be in this location, -based on the stack name (e.g. overcloud). - -Deploy the Ceph cluster with Ansible extra vars:: - - openstack overcloud ceph deploy \ - deployed-metal-overcloud.yaml \ - -y -o deployed-ceph-overcloud.yaml \ - --force \ - --ansible-extra-vars tripleo_cephadm_ansible_extra_vars.yaml - -After Ceph is deployed, the pools should be created and an openstack cephx -key will also be created to access all of those pools. The contents of -deployed-ceph-overcloud.yaml will also have the pool and cephx key -Heat environment parameters set so the overcloud will use the same -values. - -When the tripleo_cephadm_pools variable is set, the Tripleo client will -create a tripleo_cephadm_keys tripleo-ansible variable structure with -the client name "openstack" and a generated cephx key like the following:: - - tripleo_cephadm_keys: - - name: client.openstack - key: AQC+vYNXgDAgAhAAc8UoYt+OTz5uhV7ItLdwUw== - mode: '0600' - caps: - mgr: allow * - mon: profile rbd - osd: profile rbd pool=vms, profile rbd pool=volumes, profile rbd pool=images - -It is not recommended to define tripleo_cephadm_keys in the Ansible extra vars file. -If you prefer to set the key username to something other than "openstack" or prefer -to pass your own cephx client key (e.g. AQC+vYNXgDAgAhAAc8UoYt+OTz5uhV7ItLdwUw==), -then use following parameters:: - - --ceph-client-username (default: openstack) - --ceph-client-key (default: auto generates a valid cephx key) - -Both of the above parameters are ignored unless tripleo_cephadm_pools is set via ---ansible-extra-vars. If tripleo_cephadm_pools is set then a cephx key to access -all of the pools will always be created unless --skip-cephx-keys is used. - -If you wish to re-run 'openstack overcloud ceph deploy' for any -reason and have created-cephx keys in previous runs, then you may use -the --ceph-client-key parameter from the previous run to prevent a new -key from being generated. The key value can be found in the file which -is output from he previous run (e.g. --output ). - -If any of the above parameters are used, then the generated deployed Ceph output -file (e.g. --output ) will contain the values of the above -variables mapped to their TripleO Heat template environment variables to ensure a -consistent overcloud deployment:: - - CephPools: {{ tripleo_cephadm_pools }} - CephClientConfigVars: {{ tripleo_ceph_client_vars }} - CephClientKey: {{ ceph_client_username }} - CephClientUserName: {{ ceph_client_key }} - -The CephPools Heat parameter above has always supported idempotent -updates. It will be pre-populated with the pools from -tripleo_cephadm_pools after Ceph is deployed. The deployed_ceph.yaml -which is output can also be updated so that additional pools can be -created when the overcloud is deployed. The Heat parameters above are -described in more detail in the rest of this document. - -Environment files to configure Ceph during Overcloud deployment ---------------------------------------------------------------- - -After `openstack overcloud ceph deploy` has run and output the -`deployed_ceph.yaml` file, this file and other Heat environment -files should be passed to the `openstack overcloud deploy` -command:: - - openstack overcloud deploy --templates \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm.yaml \ - -e deployed_ceph.yaml - -The above will make the following modifications to the Ceph cluster -while the overcloud is being deployed: - -- Execute cephadm to add the Ceph RADOS Gateway (RGW) service -- Configure HAProxy as a front end for RGW -- Configure Keystone so RGW behaves like the OpenStack object service -- Create Pools for both RGW and RBD services -- Create an openstack client cephx keyring for Nova, Cinder, Glance to - access RBD - -The information necessary to configure Ceph clients will then -be extracted to `/home/stack/ceph_client.yml` on the undercloud and -passed to the as input to the tripleo-ansible role tripleo_ceph_client -which will then configure the rest of the overcloud to use the new -Ceph cluster as described in the :doc:`ceph_external` documentation. - -If you only wish to deploy Ceph RBD without RGW then use the following -variation of the above:: - - openstack overcloud deploy --templates \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm-rbd-only.yaml \ - -e deployed_ceph.yaml - -Do not directly edit the `environments/cephadm/cephadm.yaml` -or `cephadm-rbd-only.yaml` file. If you wish to override the defaults, -as described below in the sections starting with "Overriding", then -place those overrides in a separate `cephadm-overrides.yaml` file and -deploy like this:: - - openstack overcloud deploy --templates \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm.yaml \ - -e deployed_ceph.yaml \ - -e ceph-overrides.yaml - -Applying Ceph server configuration during overcloud deployment --------------------------------------------------------------- - -The `deployed_ceph.yaml` file output by `openstack overcloud ceph deploy` -has the paramter `ApplyCephConfigOverridesOnUpdate` set to true so -that Ceph services not deployed by `openstack overcloud ceph deploy`, -e.g. RGW, can be configured during initial overcloud deployment. After -both Ceph and the overcloud have been deployed, edit the -`deployed_ceph.yaml` file and set `ApplyCephConfigOverridesOnUpdate` -to false. All Ceph server configuration changes should then be made -using `Ceph Orchestrator`_. - -It is technically possible to set `ApplyCephConfigOverridesOnUpdate` -to true and use `CephConfigOverrides` to override Ceph *server* -configurations during stack updates. When this happens, parameters in -`CephConfigOverrides` are put into a file, e.g. assimilate_ceph.conf, -and a command like `ceph config assimilate-conf -i -assimilate_ceph.conf` is run. - -Regardless of the value of the `ApplyCephConfigOverridesOnUpdate` -boolean, if `openstack overcloud deploy` is re-run in order to update -the stack, the cephadm bootstrap process is not repeated because -that process is only run if `cephadm list` returns an empty list. - -Applying Ceph client configuration during overcloud deployment --------------------------------------------------------------- - -To make a Ceph *client* configuration change, update the parameters in -`CephConfigOverrides` and run a stack update. This will not -change the configuration for the Ceph servers unless -`ApplyCephConfigOverridesOnUpdate` is set to true (as described in the -section above). By default it should only change configurations for -the Ceph clients. Examples of Ceph clients include Nova compute -containers, Cinder volume containers, Glance image containers, etc. - -The `CephConfigOverrides` directive updates all Ceph client -configuration files on the overcloud in the `CephConfigPath` (which -defaults to /var/lib/tripleo-config/ceph). The `CephConfigPath` is -mounted on the client containers as `/etc/ceph`. The name of the -configuration file is `ceph.conf` because the `CephClusterName` -parameter defaults to "ceph". If `CephClusterName` was set to "foo", -then the file would be called `/etc/ceph/foo.conf`. - -Ceph Pool Options ------------------ - -When `openstack overcloud deploy` is run a pool is created for each -OpenStack service depending on if that service is enabled by including -its Heat environment. For example, a command like the following will -result in pools for Nova (vms), Cinder (volumes) and Glance (images) -being created:: - - openstack overcloud deploy --templates \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm-rbd-only.yaml - -If `-e environments/cinder-backup.yaml` included in the above command -then a pool called backups would also be created. - -By default each pool will have Ceph`s pg_autoscale_mode enabled so it -is not necessary to directly set a PG number per pool. However, even -with this mode enabled it is recommended to set a `target_size_ratio` -(or pg_num) per pool in order to minimize data rebalancing. For more -information on pg_autoscale_mode see `Autoscaling Placement Groups`_. - -To control the target_size_ratio per pool, create a Heat environment -file like pools.yaml with the following content and include it in the -`openstack overcloud deploy` command with a `-e pools.yaml`:: - - CephPools: - - name: volumes - target_size_ratio: 0.4 - application: rbd - - name: images - target_size_ratio: 0.1 - application: rbd - - name: vms - target_size_ratio: 0.3 - application: rbd - -In the above example it is assumed that the percentage of data used -per service will be Cinder volumes 40%, Glance images 10% and Nova vms -30% (with 20% of space free for other pools). It is worthwhile to set -these values based on your expected usage (e.g. maybe 40% is not right -for your usecase). If you do not override the CephPools parameter, -then each pool will have Ceph's default PG number. Though the -autoscaler will adjust this number automatically over time based on -usage, the data will be moved within the cluster as a result which -will use computational resources. - -If you prefer to set a PG number instead of a target size ratio, then -replace `target_size_ratio` in the example above with ‘pg_num’ and -supply a different integer per pool (e.g. 512 for volumes, 128 for -images, etc.) based on your expected usage. - -Overriding CRUSH rules ----------------------- - -To deploy Ceph pools with custom CRUSH Map Rules use the -`CephCrushRules` parameter to define a list of named rules and then -associate the `rule_name` per pool with the `CephPools` parameter:: - - parameter_defaults: - CephCrushRules: - - name: HDD - root: default - type: host - class: hdd - default: true - - name: SSD - root: default - type: host - class: ssd - default: false - CephPools: - - {'name': 'slow_pool', 'rule_name': 'HDD', 'application': 'rbd'} - - {'name': 'fast_pool', 'rule_name': 'SSD', 'application': 'rbd'} - -CRUSH rules may be created during overcloud deployment as documented -above. CRUSH rules may also be created directly via the Ceph command -line tools. - -Overriding CephX Keys ---------------------- - -During overcloud deployment, TripleO will create a Ceph cluster with a -CephX key file for OpenStack RBD client connections that is shared by -the Nova, Cinder, and Glance services to read and write to their -pools. Not only will the keyfile be created but the Ceph cluster will -be configured to accept connections when the key file is used. The -file will be named `ceph.client.openstack.keyring` and it will be -stored in `/etc/ceph` within the containers, but on the container host -it will be stored in a location defined by a TripleO exposed parameter -which defaults to `/var/lib/tripleo-config/ceph`. - -The keyring file is created using the following defaults: - -* CephClusterName: 'ceph' -* CephClientUserName: 'openstack' -* CephClientKey: This value is randomly generated per Heat stack. If - it is overridden the recommendation is to set it to the output of - `ceph-authtool --gen-print-key`. - -If the above values are overridden, the keyring file will have a -different name and different content. E.g. if `CephClusterName` was -set to 'foo' and `CephClientUserName` was set to 'bar', then the -keyring file would be called `foo.client.bar.keyring` and it would -contain the line `[client.bar]`. - -The `CephExtraKeys` parameter may be used to generate additional key -files containing other key values and should contain a list of maps -where each map describes an additional key. The syntax of each -map must conform to what the `ceph-ansible/library/ceph_key.py` -Ansible module accepts. The `CephExtraKeys` parameter should be used -like this:: - - CephExtraKeys: - - name: "client.glance" - caps: - mgr: "allow *" - mon: "profile rbd" - osd: "profile rbd pool=images" - key: "AQBRgQ9eAAAAABAAv84zEilJYZPNuJ0Iwn9Ndg==" - mode: "0600" - -If the above is used, in addition to the -`ceph.client.openstack.keyring` file, an additional file called -`ceph.client.glance.keyring` will be created which contains:: - - [client.glance] - key = AQBRgQ9eAAAAABAAv84zEilJYZPNuJ0Iwn9Ndg== - caps mgr = "allow *" - caps mon = "profile rbd" - caps osd = "profile rbd pool=images" - -The Ceph cluster will also allow the above key file to be used to -connect to the images pool. Ceph RBD clients which are external to the -overcloud could then use this CephX key to connect to the images -pool used by Glance. The default Glance deployment defined in the Heat -stack will continue to use the `ceph.client.openstack.keyring` file -unless that Glance configuration itself is overridden. - -Add the Ceph Dashboard to a Overcloud deployment ------------------------------------------------- - -During the overcloud deployment most of the Ceph daemons can be added and -configured. -To deploy the ceph dashboard include the ceph-dashboard.yaml environment -file as in the following example:: - - openstack overcloud deploy --templates \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/ceph-dashboard.yaml - -The command above will include the ceph dashboard related services and -generates all the `cephadm` required variables to render the monitoring -stack related spec that can be applied against the deployed Ceph cluster. -When the deployment has been completed the Ceph dashboard containers, -including prometheus and grafana, will be running on the controller nodes -and will be accessible using the port 3100 for grafana and 9092 for prometheus; -since this service is only internal and doesn’t listen on the public vip, users -can reach both grafana and the exposed ceph dashboard using the controller -provisioning network vip on the specified port (8444 is the default for a generic -overcloud deployment). - -The resulting deployment will be composed by an external stack made by grafana, -prometheus, alertmanager, node-exporter containers and the ceph dashboard mgr -module that acts as the backend for this external stack, embedding the grafana -layouts and showing the ceph cluster specific metrics coming from prometheus. -The Ceph Dashboard backend services run on the specified `CephDashboardNetwork` -and `CephGrafanaNetwork`, while the high availability is realized by haproxy and -Pacemaker. - -The Ceph Dashboard frontend is fully integrated with the tls-everywhere framework, -hence providing the tls environments files will trigger the certificate request for -both grafana and the ceph dashboard: the generated crt and key files are then -configured by cephadm, resulting in a key-value pair within the Ceph orchestrator, -which is able to mount the required files to the dashboard related containers. -The Ceph Dashboard admin user role is set to `read-only` mode by default for safe -monitoring of the Ceph cluster. To permit an admin user to have elevated privileges -to alter elements of the Ceph cluster with the Dashboard, the operator can change the -default. - -For this purpose, TripleO exposes a parameter that can be used to change the Ceph -Dashboard admin default mode. - -Log in to the undercloud as `stack` user and create the `ceph_dashboard_admin.yaml` -environment file with the following content:: - - parameter_defaults: - CephDashboardAdminRO: false - -Run the overcloud deploy command to update the existing stack and include the environment -file created with all other environment files that are already part of the existing -deployment:: - - openstack overcloud deploy --templates \ - -e \ - -e ceph_dashboard_admin.yml - -The ceph dashboard will also work with composable networks. -In order to isolate the monitoring access for security purposes, operators can -take advantage of composable networks and access the dashboard through a separate -network vip. By doing this, it's not necessary to access the provisioning network -and separate authorization profiles may be implemented. - -To deploy the overcloud with the ceph dashboard composable network we need first -to generate the controller specific role created for this scenario:: - - openstack overcloud roles generate \ - -o /home/stack/roles_data.yaml \ - ControllerStorageDashboard Compute \ - BlockStorage ObjectStorage CephStorage - -Finally, run the overcloud deploy command including the new generated `roles_data.yaml` -and the `network_data_dashboard.yaml` file that will trigger the generation of this -new network. - -The final overcloud command must look like the following:: - - openstack overcloud deploy --templates \ - -r /home/stack/roles_data.yaml \ - -n /usr/share/openstack-tripleo-heat-templates/network_data_dashboard.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm.yaml \ - -e ~/my-ceph-settings.yaml - -Scenario: Deploy Ceph with TripleO and Metalsmith and then Scale Up -------------------------------------------------------------------- - -Deploy the hardware as described in :doc:`../provisioning/baremetal_provision` -and include nodes with in the `CephStorage` role. For example, the -following could be the content of ~/overcloud_baremetal_deploy.yaml:: - - - name: Controller - count: 3 - instances: - - hostname: controller-0 - name: controller-0 - - hostname: controller-1 - name: controller-1 - - hostname: controller-2 - name: controller-2 - - name: CephStorage - count: 3 - instances: - - hostname: ceph-0 - name: ceph-0 - - hostname: ceph-1 - name: ceph-2 - - hostname: ceph-2 - name: ceph-2 - - name: Compute - count: 1 - instances: - - hostname: compute-0 - name: compute-0 - -which is passed to the following command:: - - openstack overcloud node provision \ - --stack overcloud \ - --output ~/overcloud-baremetal-deployed.yaml \ - ~/overcloud_baremetal_deploy.yaml - -Ceph may then be deployed with `openstack overcloud ceph deploy`. -As described in :doc:`../provisioning/baremetal_provision`, pass -~/overcloud_baremetal_deploy.yaml as input, along with -/usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm.yaml -and any Ceph Overrides described in the rest of this document, to the -`openstack overcloud deploy` command. - -To scale up, modify the ~/overcloud_baremetal_deploy.yaml file -described above to add more CephStorage nodes. In the example below -the number of storage nodes is doubled:: - - - name: CephStorage - count: 6 - instances: - - hostname: ceph-0 - name: ceph-0 - - hostname: ceph-1 - name: ceph-2 - - hostname: ceph-2 - name: ceph-2 - - hostname: ceph-3 - name: ceph-3 - - hostname: ceph-4 - name: ceph-4 - - hostname: ceph-5 - name: ceph-5 - -As described in :doc:`../provisioning/baremetal_provision`, re-run the -same `openstack overcloud node provision` command with the updated -~/overcloud_baremetal_deploy.yaml file. This will result in the three -new storage nodes being provisioned and output an updated copy of -~/overcloud-baremetal-deployed.yaml. The updated copy will have the -`CephStorageCount` changed from 3 to 6 and the `DeployedServerPortMap` -and `HostnameMap` will contain the new storage nodes. - -After the three new storage nodes are deployed run the same -`openstack overcloud deploy` command as described in the previous -section with updated copy of ~/overcloud-baremetal-deployed.yaml. -The additional Ceph Storage nodes will be added to the Ceph and -the increased capacity will available. It is not necessary to run -`openstack overcloud ceph deploy` to scale up. - -In particular, the following will happen as a result of running -`openstack overcloud deploy`: - -- The storage networks and firewall rules will be appropriately - configured on the new CephStorage nodes -- The ceph-admin user will be created on the new CephStorage nodes -- The ceph-admin user's public SSH key will be distributed to the new - CephStorage nodes so that cephadm can use SSH to add extra nodes -- If a new host with the Ceph Mon or Ceph Mgr service is being added, - then the private SSH key will also be added to that node. -- An updated Ceph spec will be generated and installed on the - bootstrap node, i.e. /home/ceph-admin/specs/ceph_spec.yaml on the - bootstrap node will contain new entries for the new CephStorage - nodes. -- The cephadm bootstrap process will be skipped because `cephadm ls` - will indicate that Ceph containers are already running. -- The updated spec will be applied and cephadm will schedule the new - nodes to join the cluster. - -Scenario: Scale Down Ceph with TripleO and Metalsmith ------------------------------------------------------ - -.. warning:: This procedure is only possible if the Ceph cluster has - the capacity to lose OSDs. - -Before using TripleO to remove hardware which is part of a Ceph -cluster, use Ceph orchestrator to deprovision the hardware gracefully. -This example uses commands from the `OSD Service Documentation for -cephadm`_ to remove the OSDs, and their host, before using TripleO -to scale down the Ceph storage nodes. - -Start a Ceph shell and identify the OSDs to be removed by server. In -the following example we will identify the OSDs of the host ceph-2:: - - [root@oc0-controller-0 ~]# cephadm shell - ... - [ceph: root@oc0-controller-0 /]# ceph osd tree - ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF - -1 0.58557 root default - ... - -7 0.19519 host ceph-2 - 5 hdd 0.04880 osd.5 up 1.00000 1.00000 - 7 hdd 0.04880 osd.7 up 1.00000 1.00000 - 9 hdd 0.04880 osd.9 up 1.00000 1.00000 - 11 hdd 0.04880 osd.11 up 1.00000 1.00000 - ... - [ceph: root@oc0-controller-0 /]# - -First we need to update the Ceph spec defined in the server. Otherwise -the Ceph Manager will try to recreate the OSDs when we remove them -with `ceph orch osd rm`. Export the Ceph spec, edit it, and reapply -the modified Ceph spec:: - - [ceph: root@oc0-controller-0 /]# ceph orch ls --export > spec.yml - [ceph: root@oc0-controller-0 /]# vi spec.yml - [ceph: root@oc0-controller-0 /]# ceph orch apply -i spec.yml - [ceph: root@oc0-controller-0 /]# - -In the step where the spec is edited, update the `service_type: osd` -hosts list to remove the "ceph-2" host and remove the `service_type: -host` entry for the "ceph-2" host. - -As per `ceph osd tree`, the ceph-2 host has OSDs 5,7,9,11 which can -be removed by running `ceph orch osd rm 5 7 9 11`. For example:: - - [ceph: root@oc0-controller-0 /]# ceph orch osd rm --zap 5 7 9 11 - Scheduled OSD(s) for removal - [ceph: root@oc0-controller-0 /]# ceph orch osd rm status - OSD_ID HOST STATE PG_COUNT REPLACE FORCE DRAIN_STARTED_AT - 7 ceph-2 draining 27 False False 2021-04-23 21:35:51.215361 - 9 ceph-2 draining 8 False False 2021-04-23 21:35:49.111500 - 11 ceph-2 draining 14 False False 2021-04-23 21:35:50.243762 - [ceph: root@oc0-controller-0 /]# - -The `--zap` option is used to clean the disks so that they can be -easily added back to the ceph cluster if necessary. Use `ceph orch osd -rm status` to check the status:: - - [ceph: root@oc0-controller-0 /]# ceph orch osd rm status - OSD_ID HOST STATE PG_COUNT REPLACE FORCE DRAIN_STARTED_AT - 7 ceph-2 draining 34 False False 2021-04-23 21:35:51.215361 - 11 ceph-2 done, waiting for purge 0 False False 2021-04-23 21:35:50.243762 - [ceph: root@oc0-controller-0 /]# - -Only proceed if `ceph orch osd rm status` returns no output. - -Remove the host with `ceph orch host rm `. For example:: - - [ceph: root@oc0-controller-0 /]# ceph orch host rm ceph-2 - Removed host 'ceph-2' - [ceph: root@oc0-controller-0 /]# - -Now that the host and OSDs have been logically removed from the Ceph -cluster proceed to remove the host from the overcloud as described in -the "Scaling Down" section of :doc:`../provisioning/baremetal_provision`. - -Scenario: Deploy Hyperconverged Ceph ------------------------------------- - -Use a command like the following to create a `roles.yaml` file -containing a standard Controller role and a ComputeHCI role:: - - openstack overcloud roles generate Controller ComputeHCI -o ~/roles.yaml - -The ComputeHCI role is a Compute node which also runs co-located Ceph -OSD daemons. This kind of service co-location is referred to as HCI, -or hyperconverged infrastructure. See the :doc:`composable_services` -documentation for details on roles and services. - -When collocating Nova Compute and Ceph OSD services, boundaries can be -set to reduce contention for CPU and Memory between the two services. -To limit Ceph for HCI, create an initial Ceph configuration file with -the following:: - - $ cat < initial-ceph.conf - [osd] - osd_memory_target_autotune = true - osd_numa_auto_affinity = true - [mgr] - mgr/cephadm/autotune_memory_target_ratio = 0.2 - EOF - $ - -The `osd_memory_target_autotune`_ is set to true so that the OSD -daemons will adjust their memory consumption based on the -`osd_memory_target` config option. The `autotune_memory_target_ratio` -defaults to 0.7. So 70% of the total RAM in the system is the starting -point, from which any memory consumed by non-autotuned Ceph daemons -are subtracted, and then the remaining memory is divided by the OSDs -(assuming all OSDs have `osd_memory_target_autotune` true). For HCI -deployments the `mgr/cephadm/autotune_memory_target_ratio` can be set -to 0.2 so that more memory is available for the Nova Compute -service. This has the same effect as setting the ceph-ansible `is_hci` -parameter to true. - -A two NUMA node system can host a latency sensitive Nova workload on -one NUMA node and a Ceph OSD workload on the other NUMA node. To -configure Ceph OSDs to use a specific NUMA node (and not the one being -used by the Nova Compute workload) use either of the following Ceph -OSD configurations: - -- `osd_numa_node` sets affinity to a numa node (-1 for none) -- `osd_numa_auto_affinity` automatically sets affinity to the NUMA - node where storage and network match - -If there are network interfaces on both NUMA nodes and the disk -controllers are NUMA node 0, then use a network interface on NUMA node -0 for the storage network and host the Ceph OSD workload on NUMA -node 0. Then host the Nova workload on NUMA node 1 and have it use the -network interfaces on NUMA node 1. Setting `osd_numa_auto_affinity`, -to true, as in the `initial-ceph.conf` file above, should result in -this configuration. Alternatively, the `osd_numa_node` could be set -directly to 0 and `osd_numa_auto_affinity` could be unset so that it -will default to false. - -When a hyperconverged cluster backfills as a result of an OSD going -offline, the backfill process can be slowed down. In exchange for a -slower recovery, the backfill activity has less of an impact on -the collocated Compute workload. Ceph Pacific has the following -defaults to control the rate of backfill activity:: - - osd_recovery_op_priority = 3 - osd_max_backfills = 1 - osd_recovery_max_active_hdd = 3 - osd_recovery_max_active_ssd = 10 - -It is not necessary to pass the above in an initial ceph.conf as they -are the default values, but if these values need to be deployed with -different values modify an example like the above and add it to the -initial Ceph configuration file before deployment. If the values need -to be adjusted after the deployment use `ceph config set osd -`. - -To limit Nova resources add parameters to `ceph-overrides.yaml` -like the following but modify the NovaReservedHostMemory to suit your -environment:: - - parameter_defaults: - ComputeHCIParameters: - NovaReservedHostMemory: 75000 - -The `NovaReservedHostMemory` above overrides the default value of -`reserved_host_memory_mb` in /etc/nova/nova.conf. The value may be -set so that the Nova scheduler does not give memory to a virtual -machine that a Ceph OSD on the same server will need. The example -above reserves 5 GB per OSD for 10 OSDs per host in addition to the -default reserved memory for the hypervisor. In an IOPS-optimized -cluster performance can be improved by reserving more memory per OSD. -The 5 GB number is provided as a starting point which can be further -tuned if necessary. - -Deploy Ceph with `openstack overcloud ceph deploy` and be sure to -pass the initial Ceph configuration file with Ceph HCI tunings. Then -deploy the overcloud with `openstack overcloud deploy` and the as -described in "Scenario: Deploy Ceph with TripleO and Metalsmith" but -use the `-r` option to include generated `roles.yaml` file and the -`-e` option with the `ceph-overrides.yaml` file containing the Nova -HCI tunings described above. - -The examples above may be used to tune a hyperconverged system during -deployment. If the values need to be changed after deployment, then -use the `ceph orchestrator` command to set them directly. - -After deployment start a Ceph shell and confirm the above values were -applied. For example, to check that the NUMA and memory target auto -tuning run commands lke this:: - - [ceph: root@oc0-controller-0 /]# ceph config dump | grep numa - osd advanced osd_numa_auto_affinity true - [ceph: root@oc0-controller-0 /]# ceph config dump | grep autotune - osd advanced osd_memory_target_autotune true - [ceph: root@oc0-controller-0 /]# ceph config get mgr mgr/cephadm/autotune_memory_target_ratio - 0.200000 - [ceph: root@oc0-controller-0 /]# - -We can then confirm that a specific OSD, e.g. osd.11, inherited those -values with commands like this:: - - [ceph: root@oc0-controller-0 /]# ceph config get osd.11 osd_memory_target - 4294967296 - [ceph: root@oc0-controller-0 /]# ceph config get osd.11 osd_memory_target_autotune - true - [ceph: root@oc0-controller-0 /]# ceph config get osd.11 osd_numa_auto_affinity - true - [ceph: root@oc0-controller-0 /]# - -To confirm that the default backfill values are set for the same -example OSD, use commands like this:: - - [ceph: root@oc0-controller-0 /]# ceph config get osd.11 osd_recovery_op_priority - 3 - [ceph: root@oc0-controller-0 /]# ceph config get osd.11 osd_max_backfills - 1 - [ceph: root@oc0-controller-0 /]# ceph config get osd.11 osd_recovery_max_active_hdd - 3 - [ceph: root@oc0-controller-0 /]# ceph config get osd.11 osd_recovery_max_active_ssd - 10 - [ceph: root@oc0-controller-0 /]# - -On the compute node, verify that the `reserved_host_memory_mb` was -applied:: - - $ sudo podman exec -ti nova_compute /bin/bash - # grep reserved_host_memory_mb /etc/nova/nova.conf - reserved_host_memory_mb=75000 - # - -.. _`cephadm`: https://docs.ceph.com/en/latest/cephadm/index.html -.. _`cleaning instructions in the Ironic documentation`: https://docs.openstack.org/ironic/latest/admin/cleaning.html -.. _`ceph config command`: https://docs.ceph.com/en/latest/man/8/ceph/#config -.. _`ceph_spec_bootstrap`: https://docs.openstack.org/tripleo-ansible/latest/modules/modules-ceph_spec_bootstrap.html -.. _`Ceph Service Specification`: https://docs.ceph.com/en/octopus/mgr/orchestrator/#orchestrator-cli-service-spec -.. _`Advanced OSD Service Specifications`: https://docs.ceph.com/en/octopus/cephadm/drivegroups/ -.. _`Ceph Host Management`: https://docs.ceph.com/en/latest/cephadm/host-management/#setting-the-initial-crush-location-of-host -.. _`Overriding crush rules`: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/features/cephadm.html#overriding-crush-rules -.. _`CephIngress`: https://docs.ceph.com/en/pacific/cephadm/services/nfs/#high-availability-nfs -.. _`Ceph Orchestrator`: https://docs.ceph.com/en/latest/mgr/orchestrator/ -.. _`Autoscaling Placement Groups`: https://docs.ceph.com/en/latest/rados/operations/placement-groups/ -.. _`OSD Service Documentation for cephadm`: https://docs.ceph.com/en/latest/cephadm/services/osd/ -.. _`osd_memory_target_autotune`: https://docs.ceph.com/en/latest/cephadm/services/osd/#automatically-tuning-osd-memory diff --git a/deploy-guide/source/features/deployed_server.rst b/deploy-guide/source/features/deployed_server.rst deleted file mode 100644 index 79a0bdf1..00000000 --- a/deploy-guide/source/features/deployed_server.rst +++ /dev/null @@ -1,697 +0,0 @@ -.. _deployed_server: - -Using Already Deployed Servers -============================== - -TripleO can be used with servers that have already been deployed and -provisioned with a running operating system. - -In this deployment scenario, Ironic from the Undercloud is not used -to do any server deployment, installation, or power management. An external to -TripleO and already existing provisioning tool is expected to have already -installed an operating system on the servers that are intended to be used as -nodes in the Overcloud. - -Additionally, Neutron can be optionally used or not. - -.. note:: - It's an all or nothing approach when using already deployed servers. Mixing - using deployed servers with servers provisioned with Nova and Ironic is not - currently possible. - -Benefits to using this feature include not requiring a dedicated provisioning -network, and being able to use a custom partitioning scheme on the already -deployed servers. - -Deployed Server Requirements ----------------------------- - -Networking -^^^^^^^^^^ - -Network interfaces -__________________ - -It's recommended that each server have a dedicated management NIC with -externally configured connectivity so that the servers are reachable outside of -any networking configuration done by the OpenStack deployment. - -A separate interface, or set of interfaces should then be used for the -OpenStack deployment itself, configured in the typical fashion with a set of -NIC config templates during the Overcloud deployment. See -:doc:`../features/network_isolation` for more information on configuring networking. - -.. note:: - - When configuring network isolation be sure that the configuration does not - result in a loss of network connectivity from the deployed servers to the - undercloud. The interface(s) that are being used for this connectivity should - be excluded from the NIC config templates so that the configuration does not - unintentionally drop all networking access to the deployed servers. - - -Undercloud -__________ - -Neutron in the Undercloud is not used for providing DHCP services for the -Overcloud nodes, hence a dedicated provisioning network with L2 connectivity is -not a requirement in this scenario. Neutron is however still used for IPAM for -the purposes of assigning IP addresses to the port resources created by -tripleo-heat-templates. - -Network L3 connectivity is still a requirement between the Undercloud and -Overcloud nodes. The undercloud will need to be able to connect over a routable -IP to the overcloud nodes for software configuration with ansible. - -Overcloud -_________ - -Configure the deployed servers that will be used as nodes in the overcloud with -L3 connectivity from the Undercloud as needed. The configuration could be done -via static or DHCP IP assignment. - -Further networking configuration of Overcloud nodes is the same as in a typical -TripleO deployment, except for: - -* Initial configuration of L3 connectivity from the undercloud to the - overcloud. -* No requirement for dedicating a separate L2 network for provisioning - -Testing Connectivity -____________________ - -Test connectivity from the undercloud to the overcloud nodes using SSH over the configured IP -address on the deployed servers. This should be the IP address that is -configured on ``--overcloud-ssh-network`` as passed to the ``openstack overcloud -deploy`` command. The key and user to use with the test should be the same as -used with ``--overcloud-ssh-key`` and ``--overcloud-ssh-user`` with the -deployment command. - -Package repositories -^^^^^^^^^^^^^^^^^^^^ - -The servers will need to already have the appropriately enabled yum repositories -as packages will be installed on the servers during the Overcloud deployment. -The enabling of repositories on the Overcloud nodes is the same as it is for -other areas of TripleO, such as Undercloud installation. See -:doc:`../repositories` for the detailed steps on how to -enable the standard repositories for TripleO. - -Deploying the Overcloud ------------------------ - -Provision networks and ports if using Neutron -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If using Neutron for resource management, Network resources for the deployment -still must be provisioned with the ``openstack overcloud network provision`` -command as documented in :ref:`custom_networks`. - -Port resources for the deployment still must be provisioned with the -``openstack overcloud node provision`` command as documented in -:ref:`baremetal_provision`. - -Set the ``managed`` key to false in either the ``defaults`` dictionary for each -role, or on each instances dictionary in the baremetal provision configuration -file. - -The generated file must then be passed to the ``openstack overcloud deploy`` -command. - -Deployment Command -^^^^^^^^^^^^^^^^^^ - -With generated baremetal and network environments -_________________________________________________ -Include the generated environment files with the deployment command:: - - openstack overcloud deploy \ - --deployed-server \ - -e ~/overcloud-networks-deployed.yaml \ - -e ~/overcloud-baremetal-deployed.yaml \ - - -Without generated environments (no Neutron) -___________________________________________ -The following command would be used when the ``openstack overcloud network -provision`` and ``openstack overcloud node provision`` commands were not used. -Additional environment files need to be passed to the deployment command:: - - openstack overcloud deploy \ - --deployed-server \ - -e /usr/share/openstack-tripleo-heat-templates/environments/deployed-server-environment.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/deployed-networks.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/deployed-ports.yaml \ - -e ~/hostnamemap.yaml \ - -e ~/deployed-server-network-environment.yaml \ - - -The environment file ``deployed-server-environment.yaml`` contains the necessary -``resource_registry`` mappings to disable Nova management of overcloud servers -so that deployed servers are used instead. - -``deployed-networks.yaml`` and ``deployed-ports.yaml`` enable the necessary -mappings to disable the Neutron management of network resources. - -``hostnamemap.yaml`` is optional and should define the ``HostnameMap`` -parameter if the actual server hostnames do not match the default role hostname -format. For example:: - - parameter_defaults: - HostnameMap: - overcloud-controller-0: controller-00-rack01 - overcloud-controller-1: controller-01-rack02 - overcloud-controller-2: controller-02-rack03 - overcloud-novacompute-0: compute-00-rack01 - overcloud-novacompute-1: compute-01-rack01 - overcloud-novacompute-2: compute-02-rack01 - -``deployed-server-network-environment.yaml`` should define at a minimum the -following parameters:: - - NodePortMap - DeployedNetworkEnvironment - ControlPlaneVipData - VipPortMap - OVNDBsVirtualFixedIPs - RedisVirtualFixedIPs - EC2MetadataIp - ControlPlaneDefaultRoute - -The following is a sample environment file that shows setting these values - -.. code-block:: yaml - - parameter_defaults: - - NodePortMap: - controller0: - ctlplane: - ip_address: 192.168.100.2 - ip_address_uri: 192.168.100.2 - ip_subnet: 192.168.100.0/24 - external: - ip_address: 10.0.0.10 - ip_address_uri: 10.0.0.10 - ip_subnet: 10.0.0.10/24 - internal_api: - ip_address: 172.16.2.10 - ip_address_uri: 172.16.2.10 - ip_subnet: 172.16.2.10/24 - management: - ip_address: 192.168.1.10 - ip_address_uri: 192.168.1.10 - ip_subnet: 192.168.1.10/24 - storage: - ip_address: 172.16.1.10 - ip_address_uri: 172.16.1.10 - ip_subnet: 172.16.1.10/24 - storage_mgmt: - ip_address: 172.16.3.10 - ip_address_uri: 172.16.3.10 - ip_subnet: 172.16.3.10/24 - tenant: - ip_address: 172.16.0.10 - ip_address_uri: 172.16.0.10 - ip_subnet: 172.16.0.10/24 - - compute0: - ctlplane: - ip_address: 192.168.100.3 - ip_address_uri: 192.168.100.3 - ip_subnet: 192.168.100.0/24 - external: - ip_address: 10.0.0.110 - ip_address_uri: 10.0.0.110 - ip_subnet: 10.0.0.110/24 - internal_api: - ip_address: 172.16.2.110 - ip_address_uri: 172.16.2.110 - ip_subnet: 172.16.2.110/24 - management: - ip_address: 192.168.1.110 - ip_address_uri: 192.168.1.110 - ip_subnet: 192.168.1.110/24 - storage: - ip_address: 172.16.1.110 - ip_address_uri: 172.16.1.110 - ip_subnet: 172.16.1.110/24 - storage_mgmt: - ip_address: 172.16.3.110 - ip_address_uri: 172.16.3.110 - ip_subnet: 172.16.3.110/24 - tenant: - ip_address: 172.16.0.110 - ip_address_uri: 172.16.0.110 - ip_subnet: 172.16.0.110/24 - - ControlPlaneVipData: - fixed_ips: - - ip_address: 192.168.100.1 - name: control_virtual_ip - network: - tags: [] - subnets: - - ip_version: 4 - - VipPortMap: - external: - ip_address: 10.0.0.100 - ip_address_uri: 10.0.0.100 - ip_subnet: 10.0.0.100/24 - internal_api: - ip_address: 172.16.2.100 - ip_address_uri: 172.16.2.100 - ip_subnet: 172.16.2.100/24 - storage: - ip_address: 172.16.1.100 - ip_address_uri: 172.16.1.100 - ip_subnet: 172.16.1.100/24 - storage_mgmt: - ip_address: 172.16.3.100 - ip_address_uri: 172.16.3.100 - ip_subnet: 172.16.3.100/24 - - RedisVirtualFixedIPs: - - ip_address: 192.168.100.10 - use_neutron: false - OVNDBsVirtualFixedIPs: - - ip_address: 192.168.100.11 - use_neutron: false - - DeployedNetworkEnvironment: - net_attributes_map: - external: - network: - dns_domain: external.tripleodomain. - mtu: 1400 - name: external - tags: - - tripleo_network_name=External - - tripleo_net_idx=0 - - tripleo_vip=true - subnets: - external_subnet: - cidr: 10.0.0.0/24 - dns_nameservers: [] - gateway_ip: null - host_routes: [] - ip_version: 4 - name: external_subnet - tags: - - tripleo_vlan_id=10 - internal_api: - network: - dns_domain: internalapi.tripleodomain. - mtu: 1400 - name: internal_api - tags: - - tripleo_net_idx=1 - - tripleo_vip=true - - tripleo_network_name=InternalApi - subnets: - internal_api_subnet: - cidr: 172.16.2.0/24 - dns_nameservers: [] - gateway_ip: null - host_routes: [] - ip_version: 4 - name: internal_api_subnet - tags: - - tripleo_vlan_id=20 - management: - network: - dns_domain: management.tripleodomain. - mtu: 1400 - name: management - tags: - - tripleo_net_idx=5 - - tripleo_network_name=Management - subnets: - management_subnet: - cidr: 192.168.1.0/24 - dns_nameservers: [] - gateway_ip: 192.168.1.1 - host_routes: [] - ip_version: 4 - name: management_subnet - tags: - - tripleo_vlan_id=60 - storage: - network: - dns_domain: storage.tripleodomain. - mtu: 1400 - name: storage - tags: - - tripleo_net_idx=3 - - tripleo_vip=true - - tripleo_network_name=Storage - subnets: - storage_subnet: - cidr: 172.16.1.0/24 - dns_nameservers: [] - gateway_ip: null - host_routes: [] - ip_version: 4 - name: storage_subnet - tags: - - tripleo_vlan_id=30 - storage_mgmt: - network: - dns_domain: storagemgmt.tripleodomain. - mtu: 1400 - name: storage_mgmt - tags: - - tripleo_net_idx=4 - - tripleo_vip=true - - tripleo_network_name=StorageMgmt - subnets: - storage_mgmt_subnet: - cidr: 172.16.3.0/24 - dns_nameservers: [] - gateway_ip: null - host_routes: [] - ip_version: 4 - name: storage_mgmt_subnet - tags: - - tripleo_vlan_id=40 - tenant: - network: - dns_domain: tenant.tripleodomain. - mtu: 1400 - name: tenant - tags: - - tripleo_net_idx=2 - - tripleo_network_name=Tenant - subnets: - tenant_subnet: - cidr: 172.16.0.0/24 - dns_nameservers: [] - gateway_ip: null - host_routes: [] - ip_version: 4 - name: tenant_subnet - tags: - - tripleo_vlan_id=50 - net_cidr_map: - external: - - 10.0.0.0/24 - internal_api: - - 172.16.2.0/24 - management: - - 192.168.1.0/24 - storage: - - 172.16.1.0/24 - storage_mgmt: - - 172.16.3.0/24 - tenant: - - 172.16.0.0/24 - net_ip_version_map: - external: 4 - internal_api: 4 - management: 4 - storage: 4 - storage_mgmt: 4 - tenant: 4 - -.. note:: - - Beginning in Wallaby, the above parameter values from - ``deployed-server-network-environment.yaml`` and the - ``deployed-networks.yaml`` and ``deployed-ports.yaml`` environments replace the use of the - ``DeployedServerPortMap`` parameter, the - ``environments/deployed-server-deployed-neutron-ports.yaml`` environment, and - the ``deployed-neutron-port.yaml`` template. - - The previous parameters and environments can still be used with the - exception that no resources can be mapped to any Neutron native Heat - resources (resources starting with ``OS::Neutron::*``) when using - :doc:`ephemeral Heat <../deployment/ephemeral_heat>` as there is no Heat - and Neutron API communication. - - Note that the following resources may be mapped to ``OS::Neutron::*`` - resources in environment files used prior to Wallaby, and these mappings - should be removed from Wallaby onward:: - - OS::TripleO::Network::Ports::ControlPlaneVipPort - OS::TripleO::Network::Ports::RedisVipPort - OS::TripleO::Network::Ports::OVNDBsVipPort - - .. admonition:: Victoria and prior releases - - The ``DeployedServerPortMap`` parameter can be used to assign fixed IP's - from either the ctlplane network or the IP address range for the - overcloud. - - If the deployed servers were preconfigured with IP addresses from the ctlplane - network for the initial undercloud connectivity, then the same IP addresses can - be reused during the overcloud deployment. Add the following to a new - environment file and specify the environment file as part of the deployment - command:: - - resource_registry: - OS::TripleO::DeployedServer::ControlPlanePort: ../deployed-server/deployed-neutron-port.yaml - parameter_defaults: - DeployedServerPortMap: - controller0-ctlplane: - fixed_ips: - - ip_address: 192.168.24.9 - subnets: - - cidr: 192.168.24.0/24 - network: - tags: - - 192.168.24.0/24 - compute0-ctlplane: - fixed_ips: - - ip_address: 192.168.24.8 - subnets: - - cidr: 192.168.24..0/24 - network: - tags: - - 192.168.24.0/24 - - The value of the DeployedServerPortMap variable is a map. The keys correspond - to the ``-ctlplane`` of the deployed servers. Specify the ip - addresses and subnet CIDR to be assigned under ``fixed_ips``. - - In the case where the ctlplane is not routable from the deployed - servers, the virtual IPs on the ControlPlane, as well as the virtual IPs - for services (Redis and OVNDBs) must be statically assigned. - - Use ``DeployedServerPortMap`` to assign an IP address from any CIDR:: - - resource_registry: - OS::TripleO::DeployedServer::ControlPlanePort: /usr/share/openstack-tripleo-heat-templates/deployed-server/deployed-neutron-port.yaml - OS::TripleO::Network::Ports::ControlPlaneVipPort: /usr/share/openstack-tripleo-heat-templates/deployed-server/deployed-neutron-port.yaml - - # Set VIP's for redis and OVN to noop to default to the ctlplane VIP - # The ctlplane VIP is set with control_virtual_ip in - # DeployedServerPortMap below. - # - # Alternatively, these can be mapped to deployed-neutron-port.yaml as - # well and redis_virtual_ip and ovn_dbs_virtual_ip added to the - # DeployedServerPortMap value to set fixed IP's. - OS::TripleO::Network::Ports::RedisVipPort: /usr/share/openstack-tripleo-heat-templates/network/ports/noop.yaml - OS::TripleO::Network::Ports::OVNDBsVipPort: /usr/share/openstack-tripleo-heat-templates/network/ports/noop.yaml - - parameter_defaults: - NeutronPublicInterface: eth1 - EC2MetadataIp: 192.168.100.1 - ControlPlaneDefaultRoute: 192.168.100.1 - - DeployedServerPortMap: - control_virtual_ip: - fixed_ips: - - ip_address: 192.168.100.1 - subnets: - - cidr: 192.168.100.0/24 - network: - tags: - - 192.168.100.0/24 - controller0-ctlplane: - fixed_ips: - - ip_address: 192.168.100.2 - subnets: - - cidr: 192.168.100.0/24 - network: - tags: - - 192.168.100.0/24 - compute0-ctlplane: - fixed_ips: - - ip_address: 192.168.100.3 - subnets: - - cidr: 192.168.100.0/24 - network: - tags: - - 192.168.100.0/24 - - In the above example, notice how ``RedisVipPort`` and ``OVNDBsVipPort`` are - mapped to ``network/ports/noop.yaml``. This mapping is due to the fact that - these VIP IP addresses comes from the ctlplane by default, and they will use - the same VIP address that is used for ``ControlPlanePort``. Alternatively - these VIP's can be mapped to their own fixed IP's, in which case a VIP will - be created for each. In this case, the following mappings and values would be - added to the above example:: - - resource_registry: - OS::TripleO::Network::Ports::RedisVipPort: /usr/share/openstack-tripleo-heat-templates/deployed-server/deployed-neutron-port.yaml - OS::TripleO::Network::Ports::OVNDBsVipPort: /usr/share/openstack-tripleo-heat-templates/deployed-server/deployed-neutron-port.yaml - - parameter_defaults: - - DeployedServerPortMap: - redis_virtual_ip: - fixed_ips: - - ip_address: 192.168.100.10 - subnets: - - cidr: 192.168.100.0/24 - network: - tags: - - 192.168.100.0/24 - ovn_dbs_virtual_ip: - fixed_ips: - - ip_address: 192.168.100.11 - subnets: - - cidr: 192.168.100.0/24 - network: - tags: - - 192.168.100.0/24 - - - Use ``DeployedServerPortMap`` to assign an ControlPlane Virtual IP address from - any CIDR, and the ``RedisVirtualFixedIPs`` and ``OVNDBsVirtualFixedIPs`` - parameters to assign the ``RedisVip`` and ``OVNDBsVip``:: - - resource_registry: - OS::TripleO::DeployedServer::ControlPlanePort: /usr/share/openstack-tripleo-heat-templates/deployed-server/deployed-neutron-port.yaml - OS::TripleO::Network::Ports::ControlPlaneVipPort: /usr/share/openstack-tripleo-heat-templates/deployed-server/deployed-neutron-port.yaml - - parameter_defaults: - NeutronPublicInterface: eth1 - EC2MetadataIp: 192.168.100.1 - ControlPlaneDefaultRoute: 192.168.100.1 - - # Set VIP's for redis and OVN - RedisVirtualFixedIPs: - - ip_address: 192.168.100.10 - use_neutron: false - OVNDBsVirtualFixedIPs: - - ip_address: 192.168.100.11 - use_neutron: false - - DeployedServerPortMap: - control_virtual_ip: - fixed_ips: - - ip_address: 192.168.100.1 - subnets: - - cidr: 192.168.100.0/24 - network: - tags: - - 192.168.100.0/24 - controller0-ctlplane: - fixed_ips: - - ip_address: 192.168.100.2 - subnets: - - cidr: 192.168.100.0/24 - network: - tags: - - 192.168.100.0/24 - compute0-ctlplane: - fixed_ips: - - ip_address: 192.168.100.3 - subnets: - - cidr: 192.168.100.0/24 - network: - tags: - - 192.168.100.0/24 - -Scaling the Overcloud ---------------------- - -Scaling Up -^^^^^^^^^^ -When scaling out compute nodes, the steps to be completed by the -user are as follows: - -#. Prepare the new deployed server(s) as shown in `Deployed Server - Requirements`_. -#. Start the scale out command. See :doc:`../post_deployment/scale_roles` for reference. - -Scaling Down -^^^^^^^^^^^^ - - -Starting in Train and onward, `openstack overcloud node delete` can take -a list of server hostnames instead of instance ids. However they can't be -mixed while running the command. Example: if you use hostnames, it would -have to be for all the nodes to delete. - -.. admonition:: Victoria and prior releases - :class: victoria - - The following instructions should be used when the cloud is deployed on - Victoria or a prior release. - - When scaling down the Overcloud, follow the scale down instructions as normal - as shown in :doc:`../post_deployment/delete_nodes`, however use the following - command to get the uuid values to pass to `openstack overcloud node delete` - instead of using `nova list`:: - - openstack stack resource list overcloud -n5 --filter type=OS::TripleO::Server - - Replace `` in the above command with the actual name of the role that - you are scaling down. The `stack_name` column in the command output can be used - to identify the uuid associated with each node. The `stack_name` will include - the integer value of the index of the node in the Heat resource group. For - example, in the following sample output:: - - $ openstack stack resource list overcloud -n5 --filter type=OS::TripleO::ComputeDeployedServerServer - +-----------------------+--------------------------------------+------------------------------------------+-----------------+----------------------+-------------------------------------------------------------+ - | resource_name | physical_resource_id | resource_type | resource_status | updated_time | stack_name | - +-----------------------+--------------------------------------+------------------------------------------+-----------------+----------------------+-------------------------------------------------------------+ - | ComputeDeployedServer | 66b1487c-51ee-4fd0-8d8d-26e9383207f5 | OS::TripleO::ComputeDeployedServerServer | CREATE_COMPLETE | 2017-10-31T23:45:18Z | overcloud-ComputeDeployedServer-myztzg7pn54d-0-pixawichjjl3 | - | ComputeDeployedServer | 01cf59d7-c543-4f50-95df-6562fd2ed7fb | OS::TripleO::ComputeDeployedServerServer | CREATE_COMPLETE | 2017-10-31T23:45:18Z | overcloud-ComputeDeployedServer-myztzg7pn54d-1-ooCahg1vaequ | - | ComputeDeployedServer | 278af32c-c3a4-427e-96d2-3cda7e706c50 | OS::TripleO::ComputeDeployedServerServer | CREATE_COMPLETE | 2017-10-31T23:45:18Z | overcloud-ComputeDeployedServer-myztzg7pn54d-2-xooM5jai2ees | - +-----------------------+--------------------------------------+------------------------------------------+-----------------+----------------------+-------------------------------------------------------------+ - - The index 0, 1, or 2 can be seen in the `stack_name` column. These indices - correspond to the order of the nodes in the Heat resource group. Pass the - corresponding uuid value from the `physical_resource_id` column to `openstack - overcloud node delete` command. - -The physical deployed servers that have been removed from the deployment need -to be powered off. In a deployment not using deployed servers, this would -typically be done with Ironic. When using deployed servers, it must be done -manually, or by whatever existing power management solution is already in -place. If the nodes are not powered down, they will continue to be operational -and could remain functional as part of the deployment, since there are no steps -to unconfigure, uninstall software, or stop services on nodes when scaling -down. - -Once the nodes are powered down and all needed data has been saved from the -nodes, it is recommended that they be reprovisioned back to a base operating -system configuration so that they do not unintentionally join the deployment in -the future if they are powered back on. - -.. note:: - - Do not attempt to reuse nodes that were previously removed from the - deployment without first reprovisioning them using whatever provisioning tool - is in place. - -Deleting the Overcloud ----------------------- - -When deleting the Overcloud, the Overcloud nodes need to be manually powered -off, otherwise, the cloud will still be active and accepting any user requests. - -After archiving important data (log files, saved configurations, database -files), that needs to be saved from the deployment, it is recommended to -reprovision the nodes to a clean base operating system. The reprovision will -ensure that they do not start serving user requests, or interfere with future -deployments in the case where they are powered back on in the future. - -.. note:: - - As with scaling down, do not attempt to reuse nodes that were previously part - of a now deleted deployment in a new deployment without first reprovisioning - them using whatever provisioning tool is in place. diff --git a/deploy-guide/source/features/designate.rst b/deploy-guide/source/features/designate.rst deleted file mode 100644 index fa22bfcd..00000000 --- a/deploy-guide/source/features/designate.rst +++ /dev/null @@ -1,62 +0,0 @@ -Deploying DNSaaS (Designate) -============================ - -Because some aspects of a Designate deployment are specific to the environment -in which it is deployed, there is some additional configuration required -beyond just including an environment file. The following instructions will -explain this configuration. - -First, make a copy of the ``designate-config.yaml`` environment. - -.. note:: For HA deployments, there is a separate ``designate-config-ha.yaml`` - file that should be used instead. - -:: - - cp /usr/share/openstack-tripleo-heat-templates/environments/designate-config.yaml . - -This file contains a sample pool configuration which must be edited to match -the intended environment. Each section has comments that explain how to -configure it. - -.. TODO(bnemec): Include these notes in the sample environments, or figure - out how to pull these values from the Heat stack and populate - the file automatically. - -* ``ns_records``: There should be one of these for each node running designate, - and they should point at the public IP of the node. -* ``nameservers``: There should be one of these for each node running BIND. - The ``host`` value should be the public IP of the node. -* ``targets``: There should be one of these for each node running BIND. Each - target has the following attributes which need to be configured: - - * ``masters``: There should be one of these for each node running - designate-mdns. The ``host`` value should be the public IP of the node. - * ``options``: This specifies where the target BIND instance will be - listening. ``host`` should be the public IP of the node, and - ``rndc_host`` should be the internal_api IP of the node. - -Because this configuration requires the node IPs to be known ahead of time, it -is necessary to use predictable IPs. Full details on configuring those can be -found at :doc:`../provisioning/node_placement`. - -Only the external (public) and internal_api networks need to be predictable -for Designate. The following is an example of the addresses that need to be -set:: - - parameter_defaults: - ControllerIPs: - external: - - 10.0.0.51 - - 10.0.0.52 - - 10.0.0.53 - internal_api: - - 172.17.0.251 - - 172.17.0.252 - - 172.17.0.253 - -Include ``enable-designate.yaml``, ``ips-from-pool.yaml``, and either -``designate-config.yaml`` or ``designate-config-ha.yaml`` in the deploy -command:: - - openstack overcloud deploy --templates -e /usr/share/openstack-tripleo-heat-templates/environments/enable-designate.yaml -e ips-from-pool.yaml -e designate-config.yaml [...] diff --git a/deploy-guide/source/features/disable_telemetry.rst b/deploy-guide/source/features/disable_telemetry.rst deleted file mode 100644 index 22d5dea3..00000000 --- a/deploy-guide/source/features/disable_telemetry.rst +++ /dev/null @@ -1,28 +0,0 @@ -Disable Telemetry -================= - -This guide assumes that your undercloud is already installed and ready to -deploy an overcloud without Telemetry services. - -Deploy your overcloud without Telemetry services ------------------------------------------------- - -If you don't need or don't want Telemetry services (Ceilometer, Gnocchi, -Panko and Aodh), you can disable the services by adding this environment -file when deploying the overcloud:: - - openstack overcloud deploy --templates \ - -e /usr/share/openstack-tripleo-heat-templates/environments/disable-telemetry.yaml - -Disabling Notifications -~~~~~~~~~~~~~~~~~~~~~~~ - -When Telemetry is disabled, OpenStack Notifications will be disabled as well, and -the driver will be set to 'noop' for all OpenStack services. -If you would like to restore notifications, you would need to set NotificationDriver to -'messagingv2' in your environment. - -.. Warning:: - - NotificationDriver parameter can only support 'noop' and 'messagingv2' for now. - Also note that 'messaging' driver is obsolete and isn't supported by TripleO. diff --git a/deploy-guide/source/features/distributed_compute_node.rst b/deploy-guide/source/features/distributed_compute_node.rst deleted file mode 100644 index 6ce31001..00000000 --- a/deploy-guide/source/features/distributed_compute_node.rst +++ /dev/null @@ -1,1166 +0,0 @@ -.. _distributed_compute_node: - -Distributed Compute Node deployment -=================================== - -Introduction ------------- -Additional groups of compute nodes can be deployed and integrated with an -existing deployment of a control plane stack. These compute nodes are deployed -in separate stacks from the main control plane (overcloud) stack, and they -consume exported data from the overcloud stack to reuse as -configuration data. - -Deploying these additional nodes in separate stacks provides for separation of -management between the control plane stack and the stacks for additional compute -nodes. The stacks can be managed, scaled, and updated separately. - -Using separate stacks also creates smaller failure domains as there are less -baremetal nodes in each individual stack. A failure in one baremetal node only -requires that management operations to address that failure need only affect -the single stack that contains the failed node. - -A routed spine and leaf networking layout can be used to deploy these -additional groups of compute nodes in a distributed nature. Not all nodes need -to be co-located at the same physical location or datacenter. See -:ref:`routed_spine_leaf_network` for more details. - -Such an architecture is referred to as "Distributed Compute Node" or "DCN" for -short. - -Supported failure modes and High Availability recommendations -------------------------------------------------------------- - -Handling negative scenarios for DCN starts from the deployment planning, like -choosing some particular SDN solution over provider networks to meet the -expected SLA. - -Loss of control plane connectivity -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A failure of the central control plane affects all DCN edge sites. There is no -autonomous control planes at the edge. No OpenStack control plane API or CLI -operations can be executed locally in that case. For example, you cannot create -a snapshot of a Nova VM, or issue an auth token, nor can you delete an image or -a VM. - -.. note:: A single Controller service failure normally induces - no downtime for edge sites and should be handled as for usual HA deployments. - -Loss of an edge site -^^^^^^^^^^^^^^^^^^^^ - -Running Nova VM instances will keep running. If stopped running, you need the -control plane back to recover the stopped or crashed workloads. If Neutron DHCP -agent is centralized, and we are forwarding DHCP requests to the central site, -any VMs that are trying to renew their IPs will eventually time out and lose -connectivity. - -.. note:: A single Compute service failure normally affects only its edge site - without additional downtime induced for neighbor edge sites or the central - control plane. - -OpenStack infrastructure services, like Nova Compute, will automatically -reconnect to MariaDB database cluster and RabbitMQ broker when the control -plane's uplink is back. No timed out operations can be resumed though and need -to be retried manually. - -It is recommended to maintain each DCN edge site as a separate Availability Zone -(AZ) for Nova/Neutron and Cinder services. - -Improving resiliency for N/S and E/W traffic -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Reliability of the central control plane may be enhanced with L3 HA network, -which only provides North-South routing. The East-West routing effectiveness of -edge networks may be improved by using DVR or highly available Open Virtual -Network (OVN). There is also BGPVPN and its backend specific choices. - -Network recommendations -^^^^^^^^^^^^^^^^^^^^^^^ - -Traditional or external provider networks with backbone routing at the edge may -fulfill or complement a custom distributed routing solution, like L3 Spine-Leaf -topology. - -.. note:: Neutron SDN backends that involve tunnelling may be sub-optimal for - Edge DCN cases because of the known issues 1808594_ and 1808062_. - - .. _1808594: https://bugs.launchpad.net/tripleo/+bug/1808594 - .. _1808062: https://bugs.launchpad.net/tripleo/+bug/1808062 - -For dynamic IPv4 and stateful IPv6 IPAM cases, you will also need DHCP on those -provider networks in order to assign IPs to VM instances. External provider -networks usually require no Neutron DHCP agents and handle IPAM (and -routing) on its own. While for traditional or -`Routed Provider Networks `_, -when there is no L2 connectivity to edge over WAN, and Neutron DHCP agents are -placed on controllers at the central site, you should have a DHCP relay on -every provider network. Alternatively, DHCP agents need to be moved to the edge. -Such setups also require highly reliable links between remote and central sites. - -.. note:: Neither of DHCP relays/agents at compute nodes, nor routed/external - provider networks are tested or automated via TripleO Heat Templates. You would - have to have those configured manually for your DCN environments. - -.. note:: OVN leverages DVR and does not require running Neutron DHCP/L3 agents, - which might as well simplify particular DCN setups. - -That said, when there is a network failure that disconnects the edge off the -central site, there is no SLA for recovery time but only what the provider -networks or a particular SDN choice can guarantee. For switched/routed/MPLS -provider networks, that may span from 10's of ms to a few seconds. With -the outage thresholds are typically considered to be a 15 seconds. These trace -back on various standards that are relevant here. - -Config-drive/cloud-init details -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Config-drive uses virtual media capabilities of the BMC controller, so that no -DHCP is required for VMs to obtain IP addresses at edge sites. This is -the most straightforward solution. This does require that the WAN between the -remote site and central site is live during deployment of a VM, but after that -the VM can run independently without a connection to the central site. - -.. note:: Config-drive may be a tricky for VMs that do not support - cloud-init, like some appliance VMs. It may be that such ones (or other VMs - that do not support config-drive) will have to be configured with a static IP - that matches the Neutron port. - -The simplest solution we recommend for DCN would involve only external provider -networks at the edge. For that case, it is also recommended to use either -config-drive, or IPv6 SLAAC, or another configuration mechanism other than -those requiring a `169.254.169.254/32` route for the provider routers to forward -data to the metadata service. - -IPv6 details -^^^^^^^^^^^^ - -IPv6 for tenants' workloads and infrastructure tunnels interconnecting -the central site and the edge is a viable option as well. IPv6 cannot be used for -provisioning networks though. Key benefits IPv6 may provide for DCN are: - -* SLAAC, which is a EUI-64 form of autoconfig that makes IPv6 addresses - calculated based on MAC addresses and requires no DHCP services placed on the - provider networks. -* Improved mobility for endpoints, like NFV APIs, to roam around different links - and edge sites without losing its connections and IP addresses. -* End-to-end IPv6 has been shown to have better performance by large content - networks. This is largely due to the presence of NAT in most end-to-end IPv4 - connections that slows them down. - -Storage recommendations -^^^^^^^^^^^^^^^^^^^^^^^ - -Prior to Ussuri, DCN was only available with ephemeral storage for -Nova Compute services. Enhanced data availability, locality awareness -and/or replication mechanisms had to be addressed only on the edge -cloud application layer. - -In Ussuri and newer, |project| is able to deploy -:doc:`distributed_multibackend_storage` which may be combined with the -example in this document to add distributed image management and -persistent storage at the edge. - - -Deploying DCN -------------- - -Deploying the DCN architecture requires consideration as it relates to the -undercloud, roles, networks, and availability zones configuration. This section -will document on how to approach the DCN deployment. - -The deployment will make use of specific undercloud configuration, and then -deploying multiple stacks, typically one stack per distributed location, -although this is not a strict requirement. - -At the central site, stack separation can still be used to deploy separate -stacks for control plane and compute services if compute services are desired -at the central site. See deploy_control_plane_ for more information. - -Each distributed site will be a separate stack as well. See deploy_dcn_ for -more information. - -.. _undercloud_dcn: - -Undercloud configuration -^^^^^^^^^^^^^^^^^^^^^^^^ -This section describes the steps required to configure the undercloud for DCN. - -Using direct deploy instead of iSCSI -____________________________________ - -In a default undercloud configuration, ironic deploys nodes using the ``iscsi`` -deploy interface. When using the ``iscsi`` deploy interface, the deploy ramdisk -publishes the node’s disk as an iSCSI target, and the ``ironic-conductor`` -service then copies the image to this target. - -For a DCN deployment, network latency is often a concern between the undercloud -and the distributed compute nodes. Considering the potential for latency, the -distributed compute nodes should be configured to use the ``direct`` deploy -interface in the undercloud. This process is described later in this guide -under :ref:`configure-deploy-interface`. - -When using the ``direct`` deploy interface, the deploy ramdisk will download the -image over HTTP from the undercloud's Swift service, and copy it to the node’s -disk. HTTP is more resilient when dealing with network latency than iSCSI, so -using the ``direct`` deploy interface provides a more stable node deployment -experience for distributed compute nodes. - -Configure the Swift temporary URL key -_____________________________________ - -Images used for overcloud deployment are served by Swift and are made -available to nodes using an HTTP URL, over the ``direct`` deploy -interface. To allow Swift to create temporary URLs, it must be -configured with a temporary URL key. The key value is used for -cryptographic signing and verification of the temporary URLs created -by Swift. - -The following commands demonstrate how to configure the setting. In this -example, ``uuidgen`` is used to randomly create a key value. You should choose a -unique key value that is a difficult to guess value. For example:: - - source ~/stackrc - openstack role add --user admin --project service ResellerAdmin - openstack --os-project-name service object store account set --property Temp-URL-Key=$(uuidgen | sha1sum | awk '{​print $1}') - -.. _configure-deploy-interface: - -Configure nodes to use the deploy interface -___________________________________________ - -This section describes how to configure the deploy interface for new and -existing nodes. - -For new nodes, the deploy interface can be specified directly in the JSON -structure for each node. For example, see the ``“deploy_interface”: “direct”`` -setting below:: - - { - "nodes":[ - { - "mac":[ - "bb:bb:bb:bb:bb:bb" - ], - "name":"node01", - "cpu":"4", - "memory":"6144", - "disk":"40", - "arch":"x86_64", - "pm_type":"ipmi", - "pm_user":"admin", - "pm_password":"p@55w0rd!", - "pm_addr":"192.168.24.205", - “deploy_interface”: “direct” - }, - { - "mac":[ - "cc:cc:cc:cc:cc:cc" - ], - "name":"node02", - "cpu":"4", - "memory":"6144", - "disk":"40", - "arch":"x86_64", - "pm_type":"ipmi", - "pm_user":"admin", - "pm_password":"p@55w0rd!", - "pm_addr":"192.168.24.206" - “deploy_interface”: “direct” - } - ] - } - -Existing nodes can be updated to use the ``direct`` deploy interface. For -example:: - - baremetal node set --deploy-interface direct 4b64a750-afe3-4236-88d1-7bb88c962666 - -.. _deploy_control_plane: - -Deploying the control plane -^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The main overcloud control plane stack should be deployed as needed for the -desired cloud architecture layout. This stack contains nodes running the -control plane and infrastructure services needed for the cloud. For the -purposes of this documentation, this stack is referred to as the -``control-plane`` stack. - -No specific changes or deployment configuration is necessary to deploy just the -control plane services. - -It's possible to configure the ``control-plane`` stack to contain -only control plane services, and no compute or storage services. If -compute and storage services are desired at the same geographical site -as the ``control-plane`` stack, then they may be deployed in a -separate stack just like a edge site specific stack, but using nodes -at the same geographical location. In such a scenario, the stack with -compute and storage services could be called ``central`` and deploying -it in a separate stack allows for separation of management and -operations. This scenario may also be implemented with an "external" -Ceph cluster for storage as described in :doc:`ceph_external`. If -however, Glance needs to be configured with multiple stores so that -images may be served to remote sites one ``control-plane`` stack may -be used as described in :doc:`distributed_multibackend_storage`. - -It is suggested to give each stack an explicit name. For example, the control -plane stack could be called ``control-plane`` and set by passing ``--stack -control-plane`` to the ``openstack overcloud deploy`` command. - -.. _deploy_dcn: - -Deploying a DCN site -^^^^^^^^^^^^^^^^^^^^ -Once the control plane is deployed, separate deployments can be done for each -DCN site. This section will document how to perform such a deployment. - -.. _export_dcn: - -Saving configuration from the overcloud -_______________________________________ -Once the overcloud control plane has been deployed, data needs to be retrieved -from the overcloud Heat stack and plan to pass as input values into the -separate DCN deployment. - -Beginning in Wallaby with :ref:`ephemeral_heat`, the export file is created -automatically under the default working directory which defaults to -``$HOME/overcloud-deploy/``. The working directory can also be set with -the ``--working-dir`` cli argument to the ``openstack overcloud deploy`` -command. - -The export file will be automatically created as -``$HOME/overcloud-deploy//-export.yaml``. - -.. admonition:: Victoria and prior releases - - In Victoria and prior releases, the export file must be created by running - the export command. - - Extract the needed data from the control plane stack: - - .. code-block:: bash - - # Pass --help to see a full list of options - openstack overcloud export \ - --stack control-plane \ - --output-file control-plane-export.yaml - -.. note:: - - The generated ``control-plane-export.yaml`` contains sensitive security data - such as passwords and TLS certificates that are used in the overcloud - deployment. Some passwords in the file may be removed if they are not needed - by DCN. For example, the passwords for RabbitMQ, MySQL, Keystone, Nova and - Neutron should be sufficient to launch an instance. When the export common - is run, the Ceph passwords are excluded so that DCN deployments which include - Ceph do not reuse the same Ceph password and instead new ones are generated - per DCN deployment. - - Care should be taken to keep the file as secured as possible. - -.. _reuse_networks_dcn: - -Network resource configuration -______________________________ -Beginning in Wallaby, the Network V2 model is used to provision and manage the -network related resources (networks, subnets, and ports). Specifying a -parameter value for ``ManageNetworks`` or using the external resource UUID's is -deprecated in Wallaby. - -The network resources can either be provisioned and managed with the separate -``openstack overcloud network`` command or as part of the ``openstack overcloud -deploy`` command using the cli args (``--networks-file``, ``--vip-file``, -``--baremetal-deployment``). - -See :ref:`network_v2` for the full details on managing the network resources. - -With Network v2, the Heat stack no longer manages any of the network resources. -As such with using DCN with multi-stack, it is no longer necessary to first -update the central stack to provision new network resources when deploying a -new site. Instead, it is all handled as part of the network provisioning -commands or the overcloud deploy command. - -The same files used with the cli args (``--networks-file``, ``--vip-file``, -``--baremetal-deployment``), should be the same files used across all stacks in -a DCN deployment. - -.. admonition:: Victoria and prior releases - - When deploying separate stacks it may be necessary to reuse networks, subnets, - and VIP resources between stacks if desired. Only a single Heat stack can own a - resource and be responsible for its creation and deletion, however the - resources can be reused in other stacks. - - **ManageNetworks** - - The ``ManageNetworks`` parameter can be set to ``false`` so that the same - ``network_data.yaml`` file can be used across all the stacks. When - ``ManageNetworks`` is set to false, ports will be created for the nodes in the - separate stacks on the existing networks that were already created in the - ``control-plane`` stack. - - When ``ManageNetworks`` is used, it's a global option for the whole stack and - applies to all of the network, subnet, and segment resources. - - To use ``ManageNetworks``, create an environment file which sets the parameter - value to ``false``:: - - parameter_defaults: - ManageNetworks: false - - When using ``ManageNetworks``, all network resources (except for ports) - are managed in the central stack. When the central stack is deployed, - ``ManageNetworks`` should be left unset (or set to True). When a child stack - is deployed, it is then set to false so that the child stack does not attempt - to manage the already existing network resources. - - Additionally, when adding new network resources, such as entire new leaves when - deploying spine/leaf, the central stack must first be updated with the new - ``network_data.yaml`` that contains the new leaf definitions. Even though the - central stack is not directly using the new network resources, it still is - responsible for creating and managing them. Once the new network resources are - made available in the central stack, a child stack (such as a new edge site) - could be deployed using the new networks. - - **External UUID's** - - If more fine grained control over which networks should be reused from the - ``control-plane`` stack is needed, then various ``external_resource_*`` fields - can be added to ``network_data.yaml``. When these fields are present on - network, subnet, segment, or vip resources, Heat will mark the resources in the - separate stack as being externally managed, and it won't try to any create, - update, or delete operations on those resources. - - ``ManageNetworks`` should not be set when when the ``external_resource_*`` - fields are used. - - The external resource fields that can be used in ``network_data.yaml`` are as - follows:: - - external_resource_network_id: Existing Network UUID - external_resource_subnet_id: Existing Subnet UUID - external_resource_segment_id: Existing Segment UUID - external_resource_vip_id: Existing VIP UUID - - These fields can be set on each network definition in the - `network_data.yaml`` file used for the deployment of the separate stack. - - Not all networks need to be reused or shared across stacks. The - `external_resource_*` fields can be set for only the networks that are - meant to be shared, while the other networks can be newly created and managed. - - For example, to reuse the ``internal_api`` network from the control plane stack - in a separate stack, run the following commands to show the UUIDs for the - related network resources: - - .. code-block:: bash - - openstack network show internal_api -c id -f value - openstack subnet show internal_api_subnet -c id -f value - openstack port show internal_api_virtual_ip -c id -f value - - Save the values shown in the output of the above commands and add them to the - network definition for the ``internal_api`` network in the - ``network_data.yaml`` file for the separate stack. An example network - definition would look like: - - .. code-block:: bash - - - name: InternalApi - external_resource_network_id: 93861871-7814-4dbc-9e6c-7f51496b43af - external_resource_subnet_id: c85c8670-51c1-4b17-a580-1cfb4344de27 - external_resource_vip_id: 8bb9d96f-72bf-4964-a05c-5d3fed203eb7 - name_lower: internal_api - vip: true - ip_subnet: '172.16.2.0/24' - allocation_pools: [{'start': '172.16.2.4', 'end': '172.16.2.250'}] - ipv6_subnet: 'fd00:fd00:fd00:2000::/64' - ipv6_allocation_pools: [{'start': 'fd00:fd00:fd00:2000::10', 'end': 'fd00:fd00:fd00:2000:ffff:ffff:ffff:fffe'}] - mtu: 1400 - - .. note:: - - When *not* sharing networks between stacks, each network defined in - ``network_data.yaml`` must have a unique name across all deployed stacks. - This requirement is necessary since regardless of the stack, all networks are - created in the same tenant in Neutron on the undercloud. - - For example, the network name ``internal_api`` can't be reused between - stacks, unless the intent is to share the network between the stacks. - The network would need to be given a different ``name`` and - ``name_lower`` property such as ``InternalApiCompute0`` and - ``internal_api_compute_0``. - - If separate storage and storage management networks are used with - multiple Ceph clusters and Glance servers per site, then a routed - storage network should be shared between sites for image transfer. - The storage management network, which Ceph uses to keep OSDs balanced, - does not need to be shared between sites. - -DCN related roles -_________________ -Different roles are provided within ``tripleo-heat-templates``, depending on the -configuration and desired services to be deployed at each distributed site. - -The default compute role at ``roles/Compute.yaml`` can be used if that is -sufficient for the use case. - -Three additional roles are also available for deploying compute nodes -with co-located persistent storage at the distributed site. - -The first is ``roles/DistributedCompute.yaml``. This role includes the default -compute services, but also includes the cinder volume service. The cinder -volume service would be configured to talk to storage that is local to the -distributed site for persistent storage. - -The second is ``roles/DistributedComputeHCI.yaml``. This role includes the -default computes services, the cinder volume service, and also includes the -Ceph Mon, Mgr, and OSD services for deploying a Ceph cluster at the -distributed site. Using this role, both the compute services and Ceph -services are deployed on the same nodes, enabling a hyperconverged -infrastructure for persistent storage at the distributed site. When -Ceph is used, there must be a minimum of three `DistributedComputeHCI` -nodes. This role also includes a Glance server, provided by the -`GlanceApiEdge` service with in the `DistributedComputeHCI` role. The -Nova compute service of each node in the `DistributedComputeHCI` role -is configured by default to use its local Glance server. - -The third is ``roles/DistributedComputeHCIScaleOut.yaml``. This role is -like the DistributedComputeHCI role but does not run the Ceph Mon and -Mgr service. It offers the Ceph OSD service however, so it may be used -to scale up storage and compute services at each DCN site after the -minimum of three DistributedComputeHCI nodes have been deployed. There -is no `GlanceApiEdge` service in the `DistributedComputeHCIScaleOut` -role but in its place the Nova compute service of the role is -configured by default to connect to a local `HaProxyEdge` service -which in turn proxies image requests to the Glance servers running on -the `DistributedComputeHCI` roles. - -For information on configuring the distributed Glance services see -:doc:`distributed_multibackend_storage`. - -Configuring Availability Zones (AZ) -___________________________________ -Each edge site must be configured as a separate availability zone (AZ). When -you deploy instances to this AZ, you can expect it to run on the remote Compute -node. In addition, the central site must also be within a specific AZ (or -multiple AZs), rather than the default AZ. - -When also deploying persistent storage at each site, the storage backend -availability zone must match the compute availability zone name. - -AZs are configured differently for compute (Nova) and storage (Cinder). -Configuring AZs are documented in the next sections. - -Configuring AZs for Nova (compute) -################################## -The Nova AZ configuration for compute nodes in the stack can be set with the -``NovaComputeAvailabilityZone`` parameter during the deployment. - -The value of the parameter is the name of the AZ where compute nodes in that -stack will be added. - -For example, the following environment file would be used to add compute nodes -in the ``edge-0`` stack to the ``edge-0`` AZ:: - - parameter_defaults: - NovaComputeAvailabilityZone: edge-0 - -Additionally, the ``OS::TripleO::NovaAZConfig`` service must be enabled by -including the following ``resource_registry`` mapping:: - - resource_registry: - OS::TripleO::Services::NovaAZConfig: tripleo-heat-templates/deployment/nova/nova-az-config.yaml - -Or, the following environment can be included which sets the above mapping:: - - environments/nova-az-config.yaml - -It's also possible to configure the AZ for a compute node by adding it to a -host aggregate after the deployment is completed. The following commands show -creating a host aggregate, an associated AZ, and adding compute nodes to a -``edge-0`` AZ:: - - openstack aggregate create edge-0 --zone edge-0 - openstack aggregate add host edge-0 hostA - openstack aggregate add host edge-0 hostB - -.. note:: - - The above commands are run against the deployed overcloud, not the - undercloud. Make sure the correct rc file for the control plane stack of - the overcloud is sourced for the shell before running the commands. - - -Configuring AZs for Cinder (storage) -#################################### -Each site that uses consistent storage is configured with its own cinder -backend(s). Cinder backends are not shared between sites. Each backend is also -configured with an AZ that should match the configured Nova AZ for the compute -nodes that will make use of the storage provided by that backend. - -The ``CinderStorageAvailabilityZone`` parameter can be used to configure the AZ -for a given backend. Parameters are also available for different backend types, -such as ``CinderISCSIAvailabilityZone``, ``CinderRbdAvailabilityZone``, and -``CinderNfsAvailabilityZone``. When set, the backend type specific parameter -will take precedence over ``CinderStorageAvailabilityZone``. - -This example shows an environment file setting the AZ for the backend in the -``central`` site:: - - parameter_defaults: - CinderStorageAvailabilityZone: central - -This example shows an environment file setting the AZ for the backend in the -``edge0`` site:: - - parameter_defaults: - CinderStorageAvailabilityZone: edge0 - -Deploying Ceph with HCI -####################### -When deploying Ceph while using the ``DistributedComputeHCI`` and -``DistributedComputeHCIScaleOut`` roles, the following environment file -should be used to enable Ceph:: - - environments/ceph-ansible/ceph-ansible.yaml - -Sample environments -################### - -There are sample environments that are included in ``tripleo-heat-templates`` -for setting many of the parameter values and ``resource_registry`` mappings. These -environments are located within the ``tripleo-heat-templates`` directory at:: - - environments/dcn.yaml - environments/dcn-storage.yaml - -The environments are not all-inclusive and do not set all needed values and -mappings, but can be used as a guide when deploying DCN. - -Example: DCN deployment with pre-provisioned nodes, shared networks, and multiple stacks -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -This example shows the deployment commands and associated environment files -of a example of a DCN deployment. The deployment uses pre-provisioned nodes. -All networks are shared between the multiple stacks. The example illustrates -the deployment workflow of deploying multiple stacks for a real -world DCN deployment. - -Four stacks are deployed: - -control-plane - All control plane services. Shares the same geographical location as the - central stack. -central - Compute, Cinder, Ceph deployment. Shares the same geographical location as - the control-plane stack. -edge0 - Compute, Cinder, Ceph deployment. Separate geographical location from any - other stack. -edge1 - Compute, Cinder, Ceph deployment. Separate geographical location from any - other stack. - -Notice how the ``central`` stack will contain only compute and storage -services. It is really just another instance of an edge site, but just happens -to be deployed at the same geographical location as the ``control-plane`` -stack. ``control-plane`` and ``central`` could instead be deployed in the same -stack, however for easier manageability and separation, they are deployed in -separate stacks. - -This example also uses pre-provisioned nodes as documented at -:ref:`deployed_server`. - -Undercloud -__________ -Since this example uses pre-provisioned nodes, no additional undercloud -configuration is needed. The steps in undercloud_dcn_ are not specifically -applicable when using pre-provisioned nodes. - -Deploy the control-plane stack -______________________________ -The ``control-plane`` stack is deployed with the following command:: - - openstack overcloud deploy \ - --verbose \ - --stack control-plane \ - --disable-validations \ - --templates /home/centos/tripleo-heat-templates \ - -r roles-data.yaml \ - -e role-counts.yaml \ - --networks-file network_data_v2.yaml \ - --vip-file vip_data.yaml \ - --baremetal-deployment baremetal_deployment.yaml \ - -e /home/centos/tripleo-heat-templates/environments/docker-ha.yaml \ - -e /home/centos/tripleo/environments/containers-prepare-parameter.yaml \ - -e /home/centos/tripleo-heat-templates/environments/deployed-server-environment.yaml \ - -e /home/centos/tripleo-heat-templates/environments/deployed-server-bootstrap-environment-centos.yaml \ - -e /home/centos/tripleo-heat-templates/environments/network-isolation.yaml \ - -e /home/centos/tripleo-heat-templates/environments/net-multiple-nics.yaml \ - -e hostnamemap.yaml \ - -e network-environment.yaml \ - -e deployed-server-port-map.yaml \ - -e az.yaml - - -Many of the specified environments and options are not specific to DCN. The -ones that relate to DCN are as follows. - -``--stack control-plane`` sets the stack name to ``control-plane``. - -The ``roles-data.yaml`` file contains only the Controller role from the -templates directory at ``roles/Controller.yaml``. - -``role-counts.yaml`` contains:: - - parameter_defaults: - ControllerCount: 1 - -.. warning:: - Only one `Controller` node is deployed for example purposes but - three are recommended in order to have a highly available control - plane. - -``network_data_v2.yaml``, ``vip_data.yaml``, and ``baremetal_deployment.yaml`` -contain the definitions to manage the network resources. See :ref:`network_v2` -for creating these files. - -``az.yaml`` contains:: - - parameter_defaults: - CinderStorageAvailabilityZone: 'central' - NovaComputeAvailabilityZone: 'central' - -When the deployment completes, a single stack is deployed:: - - (undercloud) [centos@scale ~]$ openstack stack list - +--------------------------------------+---------------+----------------------------------+-----------------+----------------------+----------------------+ - | ID | Stack Name | Project | Stack Status | Creation Time | Updated Time | - +--------------------------------------+---------------+----------------------------------+-----------------+----------------------+----------------------+ - | 5f172fd8-97a5-4b9b-8d4c-2c931fd048e7 | control-plane | c117a9b489384603b2f45185215e9728 | CREATE_COMPLETE | 2019-03-13T18:51:08Z | 2019-03-13T19:44:27Z | - +--------------------------------------+---------------+----------------------------------+-----------------+----------------------+----------------------+ - -.. _example_export_dcn: - -Exported configuration from the ``control-plane`` stack -_______________________________________________________ -As documented in export_dcn_, the export file is created automatically by the -``openstack overcloud deploy`` command. - -.. admonition:: Victoria and prior releases - - For Victoria and prior releases, the following command must be run to export - the configuration data from the ``control-plane`` stack:: - - openstack overcloud export \ - --stack control-plane \ - --output-file control-plane-export.yaml - -Deploy the central stack -________________________ -The ``central`` stack deploys compute and storage services to be co-located -at the same site where the ``control-plane`` stack was deployed. - -The ``central`` stack is deployed with the following command:: - - openstack overcloud deploy \ - --verbose \ - --stack central \ - --templates /home/centos/tripleo-heat-templates \ - -r distributed-roles-data.yaml \ - -n site_network_data.yaml \ - --disable-validations \ - --networks-file network_data_v2.yaml \ - --vip-file vip_data.yaml \ - --baremetal-deployment baremetal_deployment.yaml \ - -e /home/centos/tripleo-heat-templates/environments/docker-ha.yaml \ - -e /home/centos/tripleo/environments/containers-prepare-parameter.yaml \ - -e /home/centos/tripleo-heat-templates/environments/deployed-server-environment.yaml \ - -e /home/centos/tripleo-heat-templates/environments/deployed-server-bootstrap-environment-centos.yaml \ - -e /home/centos/tripleo-heat-templates/environments/network-isolation.yaml \ - -e /home/centos/tripleo-heat-templates/environments/net-multiple-nics.yaml \ - -e /home/centos/tripleo-heat-templates/environments/ceph-ansible/ceph-ansible.yaml \ - -e /home/centos/tripleo-heat-templates/environments/low-memory-usage.yaml \ - -e role-counts.yaml \ - -e hostnamemap.yaml \ - -e network-environment.yaml \ - -e deployed-server-port-map.yaml \ - -e ceph-environment.yaml \ - -e az.yaml \ - -e /home/centos/overcloud-deploy/control-plane/control-plane-export.yaml - -``--stack central`` sets the stack name to ``central``. - -``distributed-roles-data.yaml`` contains a single role called ``DistributedComputeHCI`` -which contains Nova, Cinder, and Ceph services. The example role is from the -templates directory at ``roles/DistributedComputeHCI.yaml``. - -``role-counts.yaml`` contains:: - - parameter_defaults: - DistributedComputeHCICount: 1 - -.. warning:: - Only one `DistributedComputeHCI` is deployed for example - purposes but three are recommended in order to have a highly - available Ceph cluster. If more than three such nodes of that role - are necessary for additional compute and storage resources, then - use additional nodes from the `DistributedComputeHCIScaleOut` role. - -``network_data_v2.yaml``, ``vip_data.yaml``, and ``baremetal_deployment.yaml`` -are the same files used with the ``control-plane`` stack. - -``az.yaml`` contains the same content as was used in the ``control-plane`` -stack:: - - parameter_defaults: - CinderStorageAvailabilityZone: 'central' - NovaComputeAvailabilityZone: 'central' - -The ``control-plane-export.yaml`` file was generated by the ``openstack -overcloud deploy`` command when deploying the ``control-plane`` stack or from -the command from example_export_dcn_ (Victoria or prior releases). - -When the deployment completes, 2 stacks are deployed:: - - +--------------------------------------+---------------+----------------------------------+-----------------+----------------------+----------------------+ - | ID | Stack Name | Project | Stack Status | Creation Time | Updated Time | - +--------------------------------------+---------------+----------------------------------+-----------------+----------------------+----------------------+ - | 0bdade63-4645-4490-a540-24be48527e10 | central | c117a9b489384603b2f45185215e9728 | CREATE_COMPLETE | 2019-03-25T21:35:49Z | None | - | 5f172fd8-97a5-4b9b-8d4c-2c931fd048e7 | control-plane | c117a9b489384603b2f45185215e9728 | CREATE_COMPLETE | 2019-03-13T18:51:08Z | None | - +--------------------------------------+---------------+----------------------------------+-----------------+----------------------+----------------------+ - -The AZ and aggregate configuration for Nova can be checked and verified with -these commands. Note that the ``rc`` file for the ``control-plane`` stack must be -sourced as these commands talk to overcloud APIs:: - - (undercloud) [centos@scale ~]$ source control-planerc - (control-plane) [centos@scale ~]$ openstack aggregate list - +----+---------+-------------------+ - | ID | Name | Availability Zone | - +----+---------+-------------------+ - | 9 | central | central | - +----+---------+-------------------+ - (control-plane) [centos@scale ~]$ openstack aggregate show central - +-------------------+----------------------------+ - | Field | Value | - +-------------------+----------------------------+ - | availability_zone | central | - | created_at | 2019-03-25T22:23:25.000000 | - | deleted | False | - | deleted_at | None | - | hosts | [u'compute-0.localdomain'] | - | id | 9 | - | name | central | - | properties | | - | updated_at | None | - +-------------------+----------------------------+ - (control-plane) [centos@scale ~]$ nova availability-zone-list - +----------------------------+----------------------------------------+ - | Name | Status | - +----------------------------+----------------------------------------+ - | internal | available | - | |- openstack-0.localdomain | | - | | |- nova-conductor | enabled :-) 2019-03-27T18:21:29.000000 | - | | |- nova-scheduler | enabled :-) 2019-03-27T18:21:31.000000 | - | | |- nova-consoleauth | enabled :-) 2019-03-27T18:21:34.000000 | - | central | available | - | |- compute-0.localdomain | | - | | |- nova-compute | enabled :-) 2019-03-27T18:21:32.000000 | - +----------------------------+----------------------------------------+ - (control-plane) [centos@scale ~]$ openstack compute service list - +----+------------------+-------------------------+----------+---------+-------+----------------------------+ - | ID | Binary | Host | Zone | Status | State | Updated At | - +----+------------------+-------------------------+----------+---------+-------+----------------------------+ - | 1 | nova-scheduler | openstack-0.localdomain | internal | enabled | up | 2019-03-27T18:23:31.000000 | - | 2 | nova-consoleauth | openstack-0.localdomain | internal | enabled | up | 2019-03-27T18:23:34.000000 | - | 3 | nova-conductor | openstack-0.localdomain | internal | enabled | up | 2019-03-27T18:23:29.000000 | - | 7 | nova-compute | compute-0.localdomain | central | enabled | up | 2019-03-27T18:23:32.000000 | - +----+------------------+-------------------------+----------+---------+-------+----------------------------+ - - -Note how a new aggregate and AZ called ``central`` has been automatically -created. The newly deployed ``nova-compute`` service from the ``compute-0`` host in -the ``central`` stack is automatically added to this aggregate and zone. - -The AZ configuration for Cinder can be checked and verified with these -commands:: - - (control-plane) [centos@scale ~]$ openstack volume service list - +------------------+-------------------------+---------+---------+-------+----------------------------+ - | Binary | Host | Zone | Status | State | Updated At | - +------------------+-------------------------+---------+---------+-------+----------------------------+ - | cinder-scheduler | openstack-0.rdocloud | central | enabled | up | 2019-03-27T21:17:44.000000 | - | cinder-volume | compute-0@tripleo_ceph | central | enabled | up | 2019-03-27T21:17:44.000000 | - +------------------+-------------------------+---------+---------+-------+----------------------------+ - -The Cinder AZ configuration shows the ceph backend in the ``central`` zone -which was deployed by the ``central`` stack. - -Deploy the edge-0 and edge-1 stacks -___________________________________ -Now that the ``control-plane`` and ``central`` stacks are deployed, we'll deploy an -``edge-0`` and ``edge-1`` stack. These stacks are similar to the ``central`` stack in that they -deploy the same roles with the same services. It differs in that the nodes -will be managed in a separate stack and it illustrates the separation of -deployment and management between edge sites. - -The AZs will be configured differently in these stacks as the nova and -cinder services will belong to an AZ unique to each the site. - -The ``edge-0`` stack is deployed with the following command:: - - openstack overcloud deploy \ - --verbose \ - --stack edge-0 \ - --templates /home/centos/tripleo-heat-templates \ - -r distributed-roles-data.yaml \ - -n site_network_data.yaml \ - --disable-validations \ - --networks-file network_data_v2.yaml \ - --vip-file vip_data.yaml \ - --baremetal-deployment baremetal_deployment.yaml \ - -e /home/centos/tripleo-heat-templates/environments/docker-ha.yaml \ - -e /home/centos/tripleo/environments/containers-prepare-parameter.yaml \ - -e /home/centos/tripleo-heat-templates/environments/deployed-server-environment.yaml \ - -e /home/centos/tripleo-heat-templates/environments/deployed-server-bootstrap-environment-centos.yaml \ - -e /home/centos/tripleo-heat-templates/environments/network-isolation.yaml \ - -e /home/centos/tripleo-heat-templates/environments/net-multiple-nics.yaml \ - -e /home/centos/tripleo-heat-templates/environments/ceph-ansible/ceph-ansible.yaml \ - -e /home/centos/tripleo-heat-templates/environments/low-memory-usage.yaml \ - -e role-counts.yaml \ - -e hostnamemap.yaml \ - -e network-environment.yaml \ - -e deployed-server-port-map.yaml \ - -e ceph-environment.yaml \ - -e az.yaml \ - -e /home/centos/overcloud-deploy/control-plane/control-plane-export.yaml - -``--stack edge-0`` sets the stack name to ``edge-0``. - -``distributed-roles-data.yaml`` contains a single role called ``DistributedComputeHCI`` -which contains Nova, Cinder, and Ceph services. The example role is from the -templates directory at ``roles/DistributedComputeHCI.yaml``. This file is the -same as was used in the ``central`` stack. - -``role-counts.yaml`` contains:: - - parameter_defaults: - DistributedComputeHCICount: 1 - -.. warning:: - Only one `DistributedComputeHCI` is deployed for example - purposes but three are recommended in order to have a highly - available Ceph cluster. If more than three such nodes of that role - are necessary for additional compute and storage resources, then - use additional nodes from the `DistributedComputeHCIScaleOut` role. - -``az.yaml`` contains specific content for the ``edge-0`` stack:: - - parameter_defaults: - CinderStorageAvailabilityZone: 'edge-0' - NovaComputeAvailabilityZone: 'edge-0' - -The ``CinderStorageAvailabilityZone`` and ``NovaDefaultAvailabilityZone`` -parameters are set to ``edge-0`` to match the stack name. - -The ``control-plane-export.yaml`` file was generated by the ``openstack -overcloud deploy`` command when deploying the ``control-plane`` stack or from -the command from example_export_dcn_ (Victoria or prior releases). - -``network_data_v2.yaml``, ``vip_data.yaml``, and ``baremetal_deployment.yaml`` -are the same files used with the ``control-plane`` stack. However, the files -will need to be modified if the ``edge-0`` or ``edge-1`` stacks require -additional provisioning of network resources for new subnets. Update the files -as needed and continue to use the same files for all ``overcloud deploy`` -commands across all stacks. - -The ``edge-1`` stack is deployed with a similar command. The stack is given a -different name with ``--stack edge-1`` and ``az.yaml`` contains:: - - parameter_defaults: - CinderStorageAvailabilityZone: 'edge-1' - NovaComputeAvailabilityZone: 'edge-1' - -When the deployment completes, there are now 4 stacks are deployed:: - - (undercloud) [centos@scale ~]$ openstack stack list - +--------------------------------------+---------------+----------------------------------+-----------------+----------------------+----------------------+ - | ID | Stack Name | Project | Stack Status | Creation Time | Updated Time | - +--------------------------------------+---------------+----------------------------------+-----------------+----------------------+----------------------+ - | 203dc480-3b0b-4cd9-9f70-f79898461c17 | edge-0 | c117a9b489384603b2f45185215e9728 | CREATE_COMPLETE | 2019-03-29T17:30:15Z | None | - | 0bdade63-4645-4490-a540-24be48527e10 | central | c117a9b489384603b2f45185215e9728 | CREATE_COMPLETE | 2019-03-25T21:35:49Z | None | - | 5f172fd8-97a5-4b9b-8d4c-2c931fd048e7 | control-plane | c117a9b489384603b2f45185215e9728 | UPDATE_COMPLETE | 2019-03-13T18:51:08Z | 2019-03-13T19:44:27Z | - +--------------------------------------+---------------+----------------------------------+-----------------+----------------------+----------------------+ - -Repeating the same commands that were run after the ``central`` stack was -deployed to show the AZ configuration, shows that the new AZs for ``edge-0`` -and ``edge-1`` are created and available:: - - (undercloud) [centos@scale ~]$ source control-planerc - (control-plane) [centos@scale ~]$ openstack aggregate list - +----+---------+-------------------+ - | ID | Name | Availability Zone | - +----+---------+-------------------+ - | 9 | central | central | - | 10 | edge-0 | edge-0 | - | 11 | edge-1 | edge-1 | - +----+---------+-------------------+ - (control-plane) [centos@scale ~]$ openstack aggregate show edge-0 - +-------------------+----------------------------+ - | Field | Value | - +-------------------+----------------------------+ - | availability_zone | edge-0 | - | created_at | 2019-03-29T19:01:59.000000 | - | deleted | False | - | deleted_at | None | - | hosts | [u'compute-1.localdomain'] | - | id | 10 | - | name | edge-0 | - | properties | | - | updated_at | None | - +-------------------+----------------------------+ - (control-plane) [centos@scale ~]$ nova availability-zone-list - +----------------------------+----------------------------------------+ - | Name | Status | - +----------------------------+----------------------------------------+ - | internal | available | - | |- openstack-0.localdomain | | - | | |- nova-conductor | enabled :-) 2019-04-01T17:38:06.000000 | - | | |- nova-scheduler | enabled :-) 2019-04-01T17:38:13.000000 | - | | |- nova-consoleauth | enabled :-) 2019-04-01T17:38:09.000000 | - | central | available | - | |- compute-0.localdomain | | - | | |- nova-compute | enabled :-) 2019-04-01T17:38:07.000000 | - | edge-0 | available | - | |- compute-1.localdomain | | - | | |- nova-compute | enabled :-) 2019-04-01T17:38:07.000000 | - | edge-1 | available | - | |- compute-2.localdomain | | - | | |- nova-compute | enabled :-) 2019-04-01T17:38:06.000000 | - +----------------------------+----------------------------------------+ - (control-plane) [centos@scale ~]$ openstack compute service list - +----+------------------+-------------------------+----------+---------+-------+----------------------------+ - | ID | Binary | Host | Zone | Status | State | Updated At | - +----+------------------+-------------------------+----------+---------+-------+----------------------------+ - | 1 | nova-scheduler | openstack-0.localdomain | internal | enabled | up | 2019-04-01T17:38:23.000000 | - | 2 | nova-consoleauth | openstack-0.localdomain | internal | enabled | up | 2019-04-01T17:38:19.000000 | - | 3 | nova-conductor | openstack-0.localdomain | internal | enabled | up | 2019-04-01T17:38:26.000000 | - | 7 | nova-compute | compute-0.localdomain | central | enabled | up | 2019-04-01T17:38:27.000000 | - | 16 | nova-compute | compute-1.localdomain | edge-0 | enabled | up | 2019-04-01T17:38:27.000000 | - | 17 | nova-compute | compute-2.localdomain | edge-1 | enabled | up | 2019-04-01T17:38:26.000000 | - +----+------------------+-------------------------+----------+---------+-------+----------------------------+ - (control-plane) [centos@scale ~]$ openstack volume service list - +------------------+-------------------------+---------+---------+-------+----------------------------+ - | Binary | Host | Zone | Status | State | Updated At | - +------------------+-------------------------+---------+---------+-------+----------------------------+ - | cinder-scheduler | openstack-0.rdocloud | central | enabled | up | 2019-04-01T17:38:27.000000 | - | cinder-volume | hostgroup@tripleo_iscsi | central | enabled | up | 2019-04-01T17:38:27.000000 | - | cinder-volume | compute-0@tripleo_ceph | central | enabled | up | 2019-04-01T17:38:30.000000 | - | cinder-volume | compute-1@tripleo_ceph | edge-0 | enabled | up | 2019-04-01T17:38:32.000000 | - | cinder-volume | compute-2@tripleo_ceph | edge-1 | enabled | up | 2019-04-01T17:38:28.000000 | - +------------------+-------------------------+---------+---------+-------+----------------------------+ - (control-plane) [centos@scale ~]$ - -For information on extending this example with distributed image -management for image sharing between DCN site Ceph clusters see -:doc:`distributed_multibackend_storage`. - -Updating DCN ------------- - -Each stack in a multi-stack DCN deployment must be updated to perform a full -minor update across the entire deployment. - -The minor update procedure as detailed in :ref:`package_update` be run for -each stack in the deployment. - -The control-plane or central stack should be updated first by completing all -the steps from the minor update procedure. - -.. admonition:: Victoria and prior releases - - Once the central stack is updated, re-run the export command from - :ref:`export_dcn` to recreate the required input file for each separate - DCN stack. - - .. note:: - - When re-running the export command, save the generated file in a new - directory so that the previous version is not overwritten. In the event - that a separate DCN stack needs a stack update operation performed prior to - the minor update procedure, the previous version of the exported file - should be used. - -Each separate DCN stack can then be updated individually as required. There is -no requirement as to the order of which DCN stack is updated first. - -Running Ansible across multiple DCN stacks ------------------------------------------- - -.. warning:: - This currently is only supported in Train or newer versions. - -Each DCN stack should usually be updated individually. However if you -need to run Ansible on nodes deployed from more than one DCN stack, -then the ``tripleo-ansible-inventory`` command's ``--stack`` option -supports being passed more than one stack. If more than one stack is -passed, then a single merged inventory will be generated which -contains the union of the nodes in those stacks. For example, if you -were to run the following:: - - tripleo-ansible-inventory --static-yaml-inventory inventory.yaml --stack central,edge0 - -then you could use the generated inventory.yaml as follows:: - - (undercloud) [stack@undercloud ~]$ ansible -i inventory.yaml -m ping central - central-controller-0 | SUCCESS => { - "ansible_facts": { - "discovered_interpreter_python": "/usr/bin/python" - }, - "changed": false, - "ping": "pong" - } - (undercloud) [stack@undercloud ~]$ ansible -i inventory.yaml -m ping edge0 - edge0-distributedcomputehci-0 | SUCCESS => { - "ansible_facts": { - "discovered_interpreter_python": "/usr/bin/python" - }, - "changed": false, - "ping": "pong" - } - (undercloud) [stack@undercloud ~]$ ansible -i inventory.yaml -m ping all - undercloud | SUCCESS => { - "changed": false, - "ping": "pong" - } - edge0-distributedcomputehci-0 | SUCCESS => { - "ansible_facts": { - "discovered_interpreter_python": "/usr/bin/python" - }, - "changed": false, - "ping": "pong" - } - central-controller-0 | SUCCESS => { - "ansible_facts": { - "discovered_interpreter_python": "/usr/bin/python" - }, - "changed": false, - "ping": "pong" - } - (undercloud) [stack@undercloud ~]$ - -When multiple stacks are passed as input a host group is created -after each stack which refers to all of the nodes in that stack. -In the example above, edge0 has only one node from the -DistributedComputeHci role and central has only one node from the -Controller role. - -The inventory will also have a host group created for every item in -the cross product of stacks and roles. For example, -central_Controller, edge0_Compute, edge1_Compute, etc. This is done -in order to avoid name collisions, e.g. Compute would refer to all -nodes in the Compute role, but when there's more than one stack -edge0_Compute and edge1_Compute refer to different Compute nodes -based on the stack from which they were deployed. diff --git a/deploy-guide/source/features/distributed_multibackend_storage.rst b/deploy-guide/source/features/distributed_multibackend_storage.rst deleted file mode 100644 index 0334a80d..00000000 --- a/deploy-guide/source/features/distributed_multibackend_storage.rst +++ /dev/null @@ -1,1331 +0,0 @@ -Distributed Multibackend Storage -================================ - -|project| is able to extend :doc:`distributed_compute_node` to include -distributed image management and persistent storage with the benefits -of using OpenStack and Ceph. - -Features --------- - -This Distributed Multibackend Storage design extends the architecture -described in :doc:`distributed_compute_node` to support the following -workflow. - -- Upload an image to the Central site using `glance image-create` - command with `--file` and `--store central` parameters. -- Move a copy of the same image to DCN sites using a command like - `glance image-import --stores dcn1,dcn2 --import-method - copy-image`. -- The image's unique ID will be shared consistently across sites -- The image may be copy-on-write booted on any DCN site as the RBD - pools for Glance and Nova will use the same local Ceph cluster. -- If the Glance server at each DCN site was configured with write - access to the Central Ceph cluster as an additional store, then an - image generated from making a snapshot of an instance running at a - DCN site may be copied back to the central site and then copied to - additional DCN sites. -- The same Ceph cluster per site may also be used by Cinder as an RBD - store to offer local volumes in active/active mode. - -In the above workflow the only time RBD traffic crosses the WAN is -when an image is imported or copied between sites. Otherwise all RBD -traffic is local to each site for fast COW boots, and performant IO -to the local Cinder and Nova Ceph pools. - -Architecture ------------- - -The architecture to support the above features has the following -properties. - -- A separate Ceph cluster at each availability zone or geographic - location -- Glance servers at each availability zone or geographic location -- The containers implementing the Ceph clusters may be collocated on - the same hardware providing compute services, i.e. the compute nodes - may be hyper-converged, though it is not necessary that they be - hyper-converged -- It is not necessary to deploy Glance and Ceph at each DCN site, if - storage services are not needed at that DCN site - -In this scenario the Glance service at the central site is configured -with multiple stores such that. - -- The central Glance server's default store is the central Ceph - cluster using the RBD driver -- The central Glance server has additional RBD stores; one per DCN - site running Ceph - -Similarly the Glance server at each DCN site is configured with -multiple stores such that. - -- Each DCN Glance server's default store is the DCN Ceph - cluster that is in the same geographic location. -- Each DCN Glance server is configured with one additional store which - is the Central RBD Ceph cluster. - -Though there are Glance services distributed to multiple sites, the -Glance client for overcloud users should use the public Glance -endpoints at the central site. These endpoints may be determined by -querying the Keystone service, which only runs at the central site, -with `openstack endpoint list`. Ideally all images should reside in -the central Glance and be copied to DCN sites before instances of -those images are booted on DCN sites. If an image is not copied to a -DCN site before it is booted, then the image will be streamed to the -DCN site and then the image will boot as an instance. This happens -because Glance at the DCN site has access to the images store at the -Central ceph cluster. Though the booting of the image will take time -because it has not been copied in advance, this is still preferable -to failing to boot the image. - -Stacks ------- - -In the example deployment three stacks are deployed: - -control-plane - All control plane services including Glance. Includes a Ceph - cluster named central which is hypercoverged with compute nodes and - runs Cinder in active/passive mode managed by pacemaker. -dcn0 - Runs Compute, Glance and Ceph services. The Cinder volume service - is configured in active/active mode and not managed by pacemaker. - The Compute and Cinder services are deployed in a separate - availability zone and may also be in a separate geographic - location. -dcn1 - Deploys the same services as dcn0 but in a different availability - zone and also in a separate geographic location. - -Note how the above differs from the :doc:`distributed_compute_node` -example which splits services at the primary location into two stacks -called `control-plane` and `central`. This example combines the two -into one stack. - -During the deployment steps all templates used to deploy the -control-plane stack will be kept on the undercloud in -`/home/stack/control-plane`, all templates used to deploy the dcn0 -stack will be kept on the undercloud in `/home/stack/dcn0` and dcn1 -will follow the same pattern as dcn0. The sites dcn2, dcn3 and so on -may be created, based on need, by following the same pattern. - -Ceph Deployment Types ---------------------- - -|project| supports two types of Ceph deployments. An "internal" Ceph -deployment is one where a Ceph cluster is deployed as part of the -overcloud as described in :doc:`deployed_ceph`. An "external" Ceph -deployment is one where a Ceph cluster already exists and an overcloud -is configured to be a client of that Ceph cluster as described in -:doc:`ceph_external`. Ceph external deployments have special meaning -to |project| in the following ways: - -- The Ceph cluster was not deployed by |project| -- The OpenStack Ceph client is configured by |project| - -The deployment example in this document uses the "external" term to -focus on the second of the above because the client configuration is -important. This example differs from the first of the above because -Ceph was deployed by |project|, however relative to other stacks, it -is an external Ceph cluster because, for the stacks which configure -the Ceph clients, it doesn't matter that the Ceph server came from a -different stack. In this sense, the example in this document uses both -types of deployments as described in the following sequence: - -- The central site deploys an internal Ceph cluster called central - with a cephx keyring which may be used to access the central ceph - pools. -- The dcn0 site deploys an internal Ceph cluster called dcn0 with a - cephx keyring which may be used to access the dcn0 Ceph pools. - During the same deployment the dcn0 site is also configured - with the cephx keyring from the previous step so that it is also a - client of the external Ceph cluster, relative to dcn0, called - central from the previous step. The `GlanceMultistoreConfig` - parameter is also used during this step so that Glance will use the - dcn0 Ceph cluster as an RBD store by default but it will also be - configured to use the central Ceph cluster as an additional RBD - backend. -- The dcn1 site is deployed the same way as the dcn0 site and the - pattern may be continued for as many DCN sites as necessary. -- The central site is then updated so that in addition to having an - internal Ceph deployment for the cluster called central, it is also - configured with multiple external ceph clusters, relative to the - central site, for each DCN site. This is accomplished by passing - the cephx keys which were created during each DCN site deployment - as input to the stack update. During the stack update the - `GlanceMultistoreConfig` parameter is added so that Glance will - continue to use the central Ceph cluster as an RBD store by - default but it will also be configured to use each DCN Ceph cluster - as an additional RBD backend. - -The above sequence is possible by using the `CephExtraKeys` parameter -as described in :doc:`deployed_ceph` and the `CephExternalMultiConfig` -parameter described in :doc:`ceph_external`. - -Decide which cephx key will be used to access remote Ceph clusters -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When |project| deploys Ceph it creates a cephx key called openstack and -configures Cinder, Glance, and Nova to use this key. When |project| creates -multiple Ceph clusters, as described in this document, a unique version of -this key is automatically created for each site, -e.g. central.client.openstack.keyring, dcn0.client.openstack.keyring, -and dcn1.client.openstack.keyring. Each site also needs a cephx key to -access the Ceph cluster at another site, and there are two options. - -1. Each site shares a copy of its openstack cephx key with the other site. -2. Each site shares a separately created external cephx key with the other - site, and does not share its own openstack key. - -Option 1 allows certain Cinder volume operations to function correctly across -sites. For example, Cinder can back up volumes at DCN sites to the central -site, and restore volume backups to other sites. Offline volume migration can -be used to move volumes from DCN sites to the central site, and from the -central site to DCN sites. Note that online volume migration between sites, -and migrating volumes directly from one DCN site to another DCN site are not -supported. - -Option 2 does not support backing up or restoring cinder volumes between the -central and DCN sites, nor does it support offline volume migration between -the sites. However, if a shared external key is ever compromised, it can be -rescinded without affecting the site's own openstack key. - -Deployment Steps ----------------- - -This section shows the deployment commands and associated environment -files of an example DCN deployment with distributed image -management. It is based on the :doc:`distributed_compute_node` -example and does not cover redundant aspects of it such as networking. - -This example assumes that the VIPs and Networks have already been -provisioned as described in :doc:`../deployment/network_v2`. We assume -that ``~/deployed-vips-control-plane.yaml`` was created by the output -of `openstack overcloud network vip provision` and that -``~/deployed-network-control-plane.yaml`` was created by the output of -`openstack overcloud network provision`. - -Create a separate external Cephx key (optional) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you do not wish to distribute the default cephx key called -openstack, and instead create a cephx key used at external sites, then -follow this section. Otherwise proceed to the next section. -Some cinder volume operations only work when sites are using a common -'openstack' cephx key name. Cross-AZ backups and offline volume -migration are not supported when using a separate external cephx key. - -Create ``/home/stack/control-plane/ceph_keys.yaml`` with contents like -the following:: - - parameter_defaults: - CephExtraKeys: - - name: "client.external" - caps: - mgr: "allow *" - mon: "profile rbd" - osd: "profile rbd pool=vms, profile rbd pool=volumes, profile rbd pool=images" - key: "AQD29WteAAAAABAAphgOjFD7nyjdYe8Lz0mQ5Q==" - mode: "0600" - -The key should be considered sensitive and may be randomly generated -with the following command:: - - python3 -c 'import os,struct,time,base64; key = os.urandom(16); header = struct.pack(" ~/control-plane/initial-ceph.conf - [osd] - osd_memory_target_autotune = true - osd_numa_auto_affinity = true - [mgr] - mgr/cephadm/autotune_memory_target_ratio = 0.2 - EOF - $ - -The ``--container-image-prepare`` and ``--network-data`` options are -included to make the example complete but are not displayed in this -document. Both are necessary so that ``cephadm`` can download the Ceph -container from the undercloud and so that the correct storage networks -are used. - -Passing ``--stack control-plane`` directs the above command to use the -working directory (e.g. ``$HOME/overcloud-deploy/``) which was -created by `openstack overcloud node provision`. This directory -contains the Ansible inventory and is where generated files from the -Ceph deployment will be stored. - -Passing ``--cluster central`` changes the name of Ceph cluster. As -multiple Ceph clusters will be deployed, each is given a separate -name. This name is inherited in the cephx key and configuration files. - -After Ceph is deployed, confirm that the central admin cephx key and -Ceph configuration file have been configured on one of the -controllers:: - - [root@oc0-controller-0 ~]# ls -l /etc/ceph/ - -rw-------. 1 root root 63 Mar 26 21:49 central.client.admin.keyring - -rw-r--r--. 1 root root 177 Mar 26 21:49 central.conf - [root@oc0-controller-0 ~]# - -From one of the controller nodes confirm that the `cephadm shell` -functions when passed these files:: - - cephadm shell --config /etc/ceph/central.conf \ - --keyring /etc/ceph/central.client.admin.keyring - -Deploy the control-plane stack -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Deploy the control-plane stack:: - - openstack overcloud deploy \ - --stack control-plane \ - --templates /usr/share/openstack-tripleo-heat-templates/ \ - -r ~/control-plane/control_plane_roles.yaml \ - -n ~/network-data.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-environment.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/podman.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm-rbd-only.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cinder-backup.yaml \ - -e ~/control-plane/deployed-ceph-control-plane.yaml \ - -e ~/control-plane/ceph_keys.yaml \ - -e ~/deployed-vips-control-plane.yaml \ - -e ~/deployed-network-control-plane.yaml \ - -e ~/deployed-metal-control-plane.yaml \ - -e ~/control-plane/glance.yaml - - -Passing ``-e ~/control-plane/ceph_keys.yaml`` is only required if you -followed the optional section called "Create a separate external Cephx -key (optional)". If you are using the openstack keyring, then you may -pass the ``environments/cinder-backup.yaml`` to deploy the -cinder-backup service at the central site. The cinder-backup service -running in the central site will be able to back up volumes located at -DCN sites as long as all sites use the default 'openstack' cephx key -name. DCN volumes cannot be backed up to the central site if the -deployment uses a separate 'external' cephx key. - -The network related files are included to make the example complete -but are not displayed in this document. For more information on -configuring networks with distributed compute nodes see -:doc:`distributed_compute_node`. - -The ``environments/cephadm/cephadm-rbd-only.yaml`` results in -additional configuration of ceph for the ``control-plane`` stack. It -creates the pools for the OpenStack services being deployed and -creates the cephx keyring for the `openstack` cephx user and -distributes the keys and conf files so OpenStack can be a client of -the Ceph cluster. RGW is not deployed simply because an object storage -system is not needed for this example. However, if an object storage -system is desired at the Central site, substitute -``environments/cephadm/cephadm.yaml`` for -``environments/cephadm/cephadm-rbd-only.yaml`` and Ceph RGW will also -be configured at the central site. - -This file also contains both `NovaEnableRbdBackend: true` and -`GlanceBackend: rbd`. When both of these settings are used, the Glance -`image_import_plugins` setting will contain `image_conversion`. With -this setting enabled commands like `glance image-create-via-import` -with `--disk-format qcow2` will result in the image being converted -into a raw format, which is optimal for the Ceph RBD driver. If -you need to disable image conversion you may override the -`GlanceImageImportPlugin` parameter. For example:: - - parameter_defaults: - GlanceImageImportPlugin: [] - -The ``glance.yaml`` file sets the following to configure the local Glance backend:: - - parameter_defaults: - GlanceShowMultipleLocations: true - GlanceEnabledImportMethods: web-download,copy-image - GlanceBackend: rbd - GlanceBackendID: central - GlanceStoreDescription: 'central rbd glance store' - -The ``environments/cinder-backup.yaml`` file is not used in this -deployment. It's possible to enable the Cinder-backup service by using -this file but it will only write to the backups pool of the central -Ceph cluster. - -All files matching ``deployed-*.yaml`` should have been created in the -previous sections. - -The optional ``~/control-plane/ceph_keys.yaml`` file was created in -the previous sections. - -Extract overcloud control-plane and Ceph configuration -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Once the overcloud control plane has been deployed, data needs to be -retrieved from it to pass as input values into the separate DCN -deployment. - -The Heat export file is created automatically within the working -directory as described in :doc:`distributed_compute_node`. Confirm -this file was created for the control-plane as it will be used in the -next section:: - - stat ~/overcloud-deploy/control-plane/control-plane-export.yaml - -Use the `openstack overcloud export ceph` command to create -``~/central_ceph_external.yaml``:: - - openstack overcloud export ceph \ - --stack control-plane \ - --output-file ~/central_ceph_external.yaml - -By default the ``~/central_ceph_external.yaml`` file created from the -command above will contain the contents of cephx file -central.client.openstack.keyring. This document uses the convention of -calling the file "external" because it's for connecting to a Ceph -cluster (central) which is external and deployed before dcn0 which -contains is only internal and deployed during the dcn0 deployment. -If you do not wish to distribute central.client.openstack.keyring -and chose to create an external cephx keyring called "external" as -described in the optional cephx section above, then use the following -following command instead to create ``~/central_ceph_external.yaml``:: - - openstack overcloud export ceph \ - --stack control-plane \ - --cephx-key-client-name external \ - --output-file ~/central_ceph_external.yaml - -The ``--cephx-key-client-name external`` option passed to the -``openstack overcloud export ceph`` command results in the external -key, created during deployment and defined in -`/home/stack/control-plane/ceph_keys.yaml`, being extracted from -config-download. If the ``--cephx-key-client-name`` is not passed, -then the default cephx client key called `openstack` will be -extracted. - -The generated ``~/central_ceph_external.yaml`` should look something -like the following:: - - parameter_defaults: - CephExternalMultiConfig: - - cluster: "central" - fsid: "3161a3b4-e5ff-42a0-9f53-860403b29a33" - external_cluster_mon_ips: "172.16.11.84, 172.16.11.87, 172.16.11.92" - keys: - - name: "client.external" - caps: - mgr: "allow *" - mon: "profile rbd" - osd: "profile rbd pool=vms, profile rbd pool=volumes, profile rbd pool=images" - key: "AQD29WteAAAAABAAphgOjFD7nyjdYe8Lz0mQ5Q==" - mode: "0600" - dashboard_enabled: false - ceph_conf_overrides: - client: - keyring: /etc/ceph/central.client.external.keyring - -The `CephExternalMultiConfig` section of the above is used to -configure any DCN node as a Ceph client of the central Ceph -cluster. - -The ``openstack overcloud export ceph`` command will obtain all of the -values from the config-download directory of the stack specified by -`--stack` option. All values are extracted from the -``cephadm/ceph_client.yml`` file. This file is generated when -config-download executes the export tasks from the tripleo-ansible -role `tripleo_cephadm`. It should not be necessary to extract these -values manually as the ``openstack overcloud export ceph`` command -will generate a valid YAML file with `CephExternalMultiConfig` -populated for all stacks passed with the `--stack` option. - -The `ceph_conf_overrides` section of the file generated by ``openstack -overcloud export ceph`` should look like the following:: - - ceph_conf_overrides: - client: - keyring: /etc/ceph/central.client.external.keyring - -The above will result in the following lines in -``/etc/ceph/central.conf`` on all DCN nodes which interact with -the central Ceph cluster:: - - [client] - keyring = /etc/ceph/central.client.external.keyring - -The name of the external Ceph cluster, relative to the DCN nodes, -is `central` so the relevant Ceph configuration file is called -``/etc/ceph/central.conf``. This directive is necessary so that the -Glance client called by Nova on all DCN nodes, which will be deployed -in the next section, know which keyring to use so they may connect to -the central Ceph cluster. - -It is necessary to always pass `dashboard_enabled: false` when using -`CephExternalMultiConfig` as the Ceph dashboard cannot be deployed -when configuring an overcloud as a client of an external Ceph cluster. -Thus the ``openstack overcloud export ceph`` command adds this option. - -For more information on the `CephExternalMultiConfig` parameter see -:doc:`ceph_external`. - -Create extra Ceph key for dcn0 (optional) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you do not wish for the central site to use the openstack keyring -generated for the dcn0 site, then create ``~/dcn0/ceph_keys.yaml`` -with content like the following:: - - parameter_defaults: - CephExtraKeys: - - name: "client.external" - caps: - mgr: "allow *" - mon: "profile rbd" - osd: "profile rbd pool=vms, profile rbd pool=volumes, profile rbd pool=images" - key: "AQBO/mteAAAAABAAc4mVMTpq7OFtrPlRFqN+FQ==" - mode: "0600" - -The `CephExtraKeys` section of the above should follow the same -pattern as the first step of this procedure. It should use a -new key, which should be considered sensitive and can be randomly -generated with the same Python command from the first step. This same -key will be used later when Glance on the central site needs to -connect to the dcn0 images pool. - -Override Glance defaults for dcn0 -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Create ``~/dcn0/glance.yaml`` with content like the following:: - - parameter_defaults: - GlanceShowMultipleLocations: true - GlanceEnabledImportMethods: web-download,copy-image - GlanceBackend: rbd - GlangeBackendID: dcn0 - GlanceStoreDescription: 'dcn0 rbd glance store' - GlanceMultistoreConfig: - central: - GlanceBackend: rbd - GlanceStoreDescription: 'central rbd glance store' - CephClusterName: central - -In the above example the `CephClientUserName` is not set because it -uses the default of 'openstack' and thus the openstack cephx key is -used. If you choose to create and distribute separate cephx keys as -described in the optional cephx section, then add this line to this -file so that it looks like the following:: - - parameter_defaults: - GlanceShowMultipleLocations: true - GlanceEnabledImportMethods: web-download,copy-image - GlanceBackend: rbd - GlanceStoreDescription: 'dcn0 rbd glance store' - GlanceMultistoreConfig: - central: - GlanceBackend: rbd - GlanceStoreDescription: 'central rbd glance store' - CephClusterName: central - CephClientUserName: 'external' - -The `CephClientUserName` should only be set to "external" if an -additional key which was passed with `CephExtraKeys` to the -control-plane stack had a name of "client.external". - -The `GlanceEnabledImportMethods` parameter is used to override the -default of 'web-download' to also include 'copy-image', which is -necessary to support the workflow described earlier. - -By default Glance on the dcn0 node will use the RBD store of the -dcn0 Ceph cluster. The `GlanceMultistoreConfig` parameter is then used -to add an additional store of type RBD called `central` which uses -the Ceph cluster deployed by the control-plane stack so the -`CephClusterName` is set to "central". - -Create DCN roles for dcn0 -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Generate the roles used for the deployment:: - - openstack overcloud roles generate DistributedComputeHCI DistributedComputeHCIScaleOut -o ~/dcn0/dcn_roles.yaml - -The `DistributedComputeHCI` role includes the default compute -services, the cinder volume service, and also includes the Ceph Mon, -Mgr, and OSD services for deploying a Ceph cluster at the distributed -site. Using this role, both the compute services and Ceph services are -deployed on the same nodes, enabling a hyper-converged infrastructure -for persistent storage at the distributed site. When Ceph is used, -there must be a minimum of three `DistributedComputeHCI` nodes. This -role also includes a Glance server, provided by the `GlanceApiEdge` -service with in the `DistributedComputeHCI` role. The Nova compute -service of each node in the `DistributedComputeHCI` role is configured -by default to use its local Glance server. - -`DistributedComputeHCIScaleOut` role is like the `DistributedComputeHCI` -role but does not run the Ceph Mon and Mgr service. It offers the Ceph -OSD service however, so it may be used to scale up storage and compute -services at each DCN site after the minimum of three -`DistributedComputeHCI` nodes have been deployed. There is no -`GlanceApiEdge` service in the `DistributedComputeHCIScaleOut` role but -in its place the Nova compute service of the role is configured by -default to connect to a local `HaProxyEdge` service which in turn -proxies image requests to the Glance servers running on the -`DistributedComputeHCI` roles. - -If you do not wish to hyper-converge the compute nodes with Ceph OSD -services, then substitute `DistributedCompute` for -`DistributedComputeHCI` and `DistributedComputeScaleOut` for -`DistributedComputeHCIScaleOut`, and add `CephAll` nodes (which host -both the Mon, Mgr and OSD services). - -Both the `DistributedCompute` and `DistributedComputeHCI` roles -contain `CinderVolumeEdge` and `Etcd` service for running Cinder -in active/active mode but this service will not be enabled unless -the `environments/dcn-storage.yaml` environment file is included in the -deploy command. If the `environments/dcn.yaml` is used in its place, -then the CinderVolumeEdge service will remain disabled. - -The `DistributedCompute` role contains the `GlanceApiEdge` service so -that the Compute service uses its the local Glance and local Ceph -server at the dcn0 site. The `DistributedComputeScaleOut` contains the -`HAproxyEdge` service so that any compute instances booting on the -`DistributedComputeScaleOut` node proxy their request for images to the -Glance services running on the `DistributedCompute` nodes. It is only -necessary to deploy the `ScaleOut` roles if more than three -`DistributedComputeHCI` or `DistributedCompute` nodes are necessary. -Three are needed for the Cinder active/active service and if -applicable the Ceph Monitor and Manager services. - -The roles should align to hosts which are deployed as described in -:doc:`../provisioning/baremetal_provision`. Since each site should -use a separate stack, this example assumes that ``--stack -dcn0`` was passed to the `openstack overcloud node provision` -command and that ``~/deployed-metal-dcn0.yaml`` was the -output of the same command. We also assume that the -``--network-config`` option was used to configure the network when the -hosts were provisioned. - -Deploy the dcn0 Ceph cluster -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Use the `openstack overcloud ceph deploy` command as described in -:doc:`deployed_ceph` to deploy the first DCN Ceph cluster:: - - openstack overcloud ceph deploy \ - ~/deployed-metal-dcn0.yaml \ - --output ~/dcn0/deployed-ceph-dcn0.yaml \ - --config ~/dcn0/initial-ceph.conf \ - --container-image-prepare ~/containers-prepare-parameter.yaml \ - --network-data ~/network-data.yaml \ - --roles-data ~/dcn0/dcn_roles.yaml \ - --cluster dcn0 \ - --stack dcn0 - -The output of the above command, -``--output ~/dcn0/deployed-ceph-dcn0.yaml``, will be -used when deploying the overcloud in the next section. - -The ``--config ~/dcn0/initial-ceph.conf`` is optional and -may be used for initial Ceph configuration. If the Ceph cluster -will be hyper-converged with compute services then create this file -like the following so Ceph will not consume memory that Nova compute -instances will need:: - - $ cat < ~/dcn0/initial-ceph.conf - [osd] - osd_memory_target_autotune = true - osd_numa_auto_affinity = true - [mgr] - mgr/cephadm/autotune_memory_target_ratio = 0.2 - EOF - $ - -The ``--container-image-prepare`` and ``--network-data`` options are -included to make the example complete but are not displayed in this -document. Both are necessary so that ``cephadm`` can download the Ceph -container from the undercloud and so that the correct storage networks -are used. - -Passing ``--stack dcn0`` directs the above command to use the -working directory (e.g. ``$HOME/overcloud-deploy/``) which was -created by `openstack overcloud node provision`. This directory -contains the Ansible inventory and is where generated files from the -Ceph deployment will be stored. - -Passing ``--cluster dcn0`` changes the name of Ceph cluster. As -multiple Ceph clusters will be deployed, each is given a separate -name. This name is inherited in the cephx key and configuration files. - -After Ceph is deployed, confirm that the dcn0 admin cephx key and -Ceph configuration file have been configured in ``/etc/ceph``. -Ensure the `cephadm shell` functions when passed these files:: - - cephadm shell --config /etc/ceph/dcn0.conf \ - --keyring /etc/ceph/dcn0.client.admin.keyring - - -Deploy the dcn0 stack -^^^^^^^^^^^^^^^^^^^^^ - -Deploy the dcn0 stack:: - - openstack overcloud deploy \ - --stack dcn0 \ - --templates /usr/share/openstack-tripleo-heat-templates/ \ - -r ~/dcn0/dcn_roles.yaml \ - -n ~/network-data.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-environment.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/podman.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/cephadm/cephadm-rbd-only.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/dcn-storage.yaml \ - -e ~/overcloud-deploy/control-plane/control-plane-export.yaml \ - -e ~/central_ceph_external.yaml \ - -e ~/dcn0/deployed-ceph-dcn0.yaml \ - -e ~/dcn0/dcn_ceph_keys.yaml \ - -e deployed-vips-dcn0.yaml \ - -e deployed-network-dcn0.yaml \ - -e deployed-metal-dcn0.yaml \ - -e ~/dcn0/az.yaml \ - -e ~/dcn0/glance.yaml - -Passing ``-e ~/dcn0/dcn_ceph_keys.yaml`` is only required if you -followed the optional section called "Create extra Ceph key for dcn0 -(optional)". - -The network related files are included to make the example complete -but are not displayed in this document. For more information on -configuring networks with distributed compute nodes see -:doc:`distributed_compute_node`. - -The ``environments/cinder-volume-active-active.yaml`` file is NOT used -to configure Cinder active/active on the DCN site because -``environments/dcn-storage.yaml`` contains the same parameters. The -``environments/dcn-storage.yaml`` file is also used to configure the -`GlanceApiEdge` and `HAproxyEdge` edge services. If you are not using -hyper-converged Ceph, then use ``environments/dcn.yaml`` instead. -Both ``environments/dcn-storage.yaml`` and ``environments/dcn.yaml`` use -`NovaCrossAZAttach: False` to override the Nova configuration `[cinder]` -`cross_az_attach` setting from its default of `true`. This setting -should be `false` for all nodes in the dcn0 stack so that volumes -attached to an instance must be in the same availability zone in -Cinder as the instance availability zone in Nova. This is useful when -booting an instance from a volume on DCN nodes because Nova will -attempt to create a volume using the same availability zone as what is -assigned to the instance. - -The ``~/dcn0/az.yaml`` file contains the following:: - - parameter_defaults: - ManageNetworks: false - NovaComputeAvailabilityZone: dcn0 - CinderStorageAvailabilityZone: dcn0 - CinderVolumeCluster: dcn0 - -`CinderVolumeCluster` is the name of the Cinder active/active cluster -which is deployed per DCN site. The above setting overrides the -default of "dcn" to "dcn0" found in `environments/dcn-storage.yaml`. See -:doc:`distributed_compute_node` for details on the other parameters -above. - -The ``~/overcloud-deploy/control-plane/control-plane-export.yaml``, -``~/dcn0/dcn_ceph_keys.yaml``, ``~/dcn0/glance.yaml``, and -``role-counts.yaml`` files were created in the previous steps. The -``~/central_ceph_external.yaml`` file should also have been created in -a previous step. Deployment with this file is only necessary if images -on DCN sites will be pushed back to the central site so that they may -then be shared with other DCN sites. This may be useful for sharing -snapshots between sites. - -All files matching ``deployed-*.yaml`` should have been created in the -previous sections. - -Deploy additional DCN sites -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -All of the previous sections which were done for dcn0 may be repeated -verbatim except with "dcn1" substituted for "dcn0" and a new cephx key -should be generated for each DCN site as described under `Create extra -Ceph key`. Other than that, the same process may be continued to -deploy as many DCN sites as needed. Once all of the desired DCN sites -have been deployed proceed to the next section. The -``~/overcloud-deploy/control-plane/control-plane-export.yaml`` and ``~/central_ceph_external.yaml`` -which were created earlier may be reused for each DCN deployment and -do not need to be recreated. The roles in the previous section were -created specifically for dcn0 to allow for variations between DCN -sites. - -Update central site to use additional Ceph clusters as Glance stores -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Once all of the desired DCN sites are deployed the central site needs -to be updated so that the central Glance service may push images to -the DCN sites. - -In this example only one additional DCN site, dcn1, has been deployed -as indicated by the list of undercloud Heat stacks:: - - $ openstack stack list -c "Stack Name" -c "Stack Status" - +---------------+-----------------+ - | Stack Name | Stack Status | - +---------------+-----------------+ - | dcn1 | CREATE_COMPLETE | - | dcn0 | CREATE_COMPLETE | - | control-plane | CREATE_COMPLETE | - +---------------+-----------------+ - $ - -Create ``~/control-plane/glance-dcn-stores.yaml`` with content like the -following:: - - parameter_defaults: - GlanceMultistoreConfig: - dcn0: - GlanceBackend: rbd - GlanceStoreDescription: 'dcn0 rbd glance store' - CephClusterName: dcn0 - dcn1: - GlanceBackend: rbd - GlanceStoreDescription: 'dcn1 rbd glance store' - CephClusterName: dcn1 - -In the above example the `CephClientUserName` is not set because it -uses the default of 'openstack' and thus the openstack cephx key is -used. If you choose to create and distribute separate cephx keys as -described in the optional cephx section, then add this line to this -file per DCN site so that it looks like the following:: - - parameter_defaults: - GlanceShowMultipleLocations: true - GlanceEnabledImportMethods: web-download,copy-image - GlanceBackend: rbd - GlanceStoreDescription: 'central rbd glance store' - CephClusterName: central - GlanceMultistoreConfig: - dcn0: - GlanceBackend: rbd - GlanceStoreDescription: 'dcn0 rbd glance store' - CephClientUserName: 'external' - CephClusterName: dcn0 - dcn1: - GlanceBackend: rbd - GlanceStoreDescription: 'dcn1 rbd glance store' - CephClientUserName: 'external' - CephClusterName: dcn1 - -The `CephClientUserName` should only be set to "external" if an -additional key which was passed with `CephExtraKeys` to the -DCN stacks had a name of "client.external". The above will configure -the Glance service running on the Controllers to use two additional -stores called "dcn0" and "dcn1". - -Use the `openstack overcloud export ceph` command to create -``~/control-plane/dcn_ceph_external.yaml``:: - - openstack overcloud export ceph \ - --stack dcn0,dcn1 \ - --output-file ~/control-plane/dcn_ceph_external.yaml - -In the above example a coma-delimited list of Heat stack names is -provided to the ``--stack`` option. Pass as many stacks as necessary -for all deployed DCN sites so that the configuration data to connect -to every DCN Ceph cluster is extracted into the single generated -``dcn_ceph_external.yaml`` file. - -If you created a separate cephx key called external on each DCN ceph -cluster with ``CephExtraKeys``, then use the following variation of -the above command instead:: - - openstack overcloud export ceph \ - --stack dcn0,dcn1 \ - --cephx-key-client-name external \ - --output-file ~/control-plane/dcn_ceph_external.yaml - -Create ``~/control-plane/dcn_ceph_external.yaml`` should have content -like the following:: - - parameter_defaults: - CephExternalMultiConfig: - - cluster: "dcn0" - fsid: "539e2b96-316e-4c23-b7df-035a3037ddd1" - external_cluster_mon_ips: "172.16.11.61, 172.16.11.64, 172.16.11.66" - keys: - - name: "client.external" - caps: - mgr: "allow *" - mon: "profile rbd" - osd: "profile rbd pool=vms, profile rbd pool=volumes, profile rbd pool=images" - key: "AQBO/mteAAAAABAAc4mVMTpq7OFtrPlRFqN+FQ==" - mode: "0600" - dashboard_enabled: false - ceph_conf_overrides: - client: - keyring: /etc/ceph/dcn0.client.external.keyring - - cluster: "dcn1" - fsid: "7504a91e-5a0f-4408-bb55-33c3ee2c67e9" - external_cluster_mon_ips: "172.16.11.182, 172.16.11.185, 172.16.11.187" - keys: - - name: "client.external" - caps: - mgr: "allow *" - mon: "profile rbd" - osd: "profile rbd pool=vms, profile rbd pool=volumes, profile rbd pool=images" - key: "AQACCGxeAAAAABAAHocX/cnygrVnLBrKiZHJfw==" - mode: "0600" - dashboard_enabled: false - ceph_conf_overrides: - client: - keyring: /etc/ceph/dcn1.client.external.keyring - -The `CephExternalMultiConfig` section of the above is used to -configure the Glance service at the central site as a Ceph client of -all of the Ceph clusters of the DCN sites; that is "dcn0" and "dcn1" -in this example. This will be possible because the central nodes will -have the following files created: - -- /etc/ceph/dcn0.conf -- /etc/ceph/dcn0.client.external.keyring -- /etc/ceph/dcn1.conf -- /etc/ceph/dcn1.client.external.keyring - -For more information on the `CephExternalMultiConfig` parameter see -:doc:`ceph_external`. - -The number of lines in the ``~/control-plane/glance-dcn-stores.yaml`` and -``~/control-plane/dcn_ceph_external.yaml`` files will be proportional to -the number of DCN sites deployed. - -Run the same `openstack overcloud deploy --stack control-plane ...` -command which was run in the previous section but also include the -the ``~/control-plane/glance-dcn-stores.yaml`` and -``~/control-plane/dcn_ceph_external.yaml`` files with a `-e`. When the -stack update is complete, proceed to the next section. - -DCN using only External Ceph Clusters (optional) ------------------------------------------------- - -A possible variation of the deployment described above is one in which -Ceph is not deployed by director but is external to director as -described in :doc:`ceph_external`. Each site must still use a Ceph -cluster which is in the same physical location in order to address -latency requirements but that Ceph cluster does not need to be -deployed by director as in the examples above. In this configuration -Ceph services may not be hyperconverged with the Compute and -Controller nodes. The example in this section makes the following -assumptions: - -- A separate Ceph cluster at the central site called central -- A separate Ceph cluster at the dcn0 site called dcn0 -- A separate Ceph cluster at each dcnN site called dcnN for any other - DCN sites - -For each Ceph cluster listed above the following command has been -run:: - - ceph auth add client.openstack mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=volumes, allow rwx pool=vms, allow rwx pool=images' - -For the central site you may optionally append `, allow rwx -pool=backups, allow rwx pool=metrics` to the above command if you will -be using the Cinder backup or Telemetry services. Either way, the -above command will return a Ceph client key which should be saved in -an environment file to set the value of `CephClientKey`. The -environment file should be named something like -external-ceph-.yaml (e.g. external-ceph-central.yaml, -external-ceph-dcn0.yaml, external-ceph-dcn1.yaml, etc.) and should -contain values like the following:: - - parameter_defaults: - # The cluster FSID - CephClusterFSID: '4b5c8c0a-ff60-454b-a1b4-9747aa737d19' - # The CephX user auth key - CephClientKey: 'AQDLOh1VgEp6FRAAFzT7Zw+Y9V6JJExQAsRnRQ==' - # The list of IPs or hostnames of the Ceph monitors - CephExternalMonHost: '172.16.1.7, 172.16.1.8, 172.16.1.9' - # The desired name of the generated key and conf files - CephClusterName: central - -The above will not result in creating a new Ceph cluster but in -configuring a client to connect to an existing one, though the -`CephClusterName` variable should still be set so that the -configuration files are named based on the variable's value, -e.g. /etc/ceph/central.conf. The above example might be used for -the central site but for the dcn1 site, `CephClusterName` should be -set to "dcn1". Naming the cluster after its planned availability zone -is a strategy to keep the names consistent. Whatever name is supplied -will result in the Ceph configuration file in /etc/ceph/ having that -name, e.g. /etc/ceph/central.conf, /etc/ceph/dcn0.conf, -/etc/ceph/dcn1.conf, etc. and central.client.openstack.keyring, -dcn0.client.openstack.keyring, etc. The name should be unique so as to -avoid file overwrites. If the name is not set it will default to -"ceph". - -In each `openstack overcloud deploy` command in the previous sections -replace ``environments/cephadm/cephadm-rbd-only.yaml`` with -``environments/external-ceph.yaml`` and replace the -``deployed-ceph-.yaml`` with ``external-ceph-.yaml`` as -described above. - -Thus, for a three stack deployment the following will be the case. - -- The initial deployment of the central stack is configured with one - external Ceph cluster called central, which is the default store for - Cinder, Glance, and Nova. We will refer to this as the central - site's "primary external Ceph cluster". - -- The initial deployment of the dcn0 stack is configured - with its own primary external Ceph cluster called dcn0 which is the - default store for the Cinder, Glance, and Nova services at the dcn0 - site. It is also configured with the secondary external Ceph cluster - central. - -- Each subsequent dcnN stack has its own primary external Ceph cluster - and a secondary Ceph cluster which is central. - -- After every DCN site is deployed, the central stack is updated so - that in addition to its primary external Ceph cluster, "central", it - has multiple secondary external Ceph clusters. This stack update - will also configure Glance to use the additional secondary external - Ceph clusters as additional stores. - -In the example above, each site must have a primary external Ceph -cluster and each secondary external Ceph cluster is configured by -using the `CephExternalMultiConfig` parameter described in -:doc:`ceph_external`. - -The `CephExternalMultiConfig` parameter must be manually configured -because the `openstack overcloud export ceph` command can only export -Ceph configuration information from clusters which it has deployed. -However, the `ceph auth add` command and `external-ceph-.yaml` -site file described above contain all of the information necessary -to populate the `CephExternalMultiConfig` parameter. - -If the external Ceph cluster at each DCN site has the default name of -"ceph", then you should still define a unique cluster name within the -`CephExternalMultiConfig` parameter like the following:: - - parameter_defaults: - CephExternalMultiConfig: - - cluster: dcn1 - ... - - cluster: dcn2 - ... - -The above will result in dcn1.conf, dcn2.conf, etc, being created in -/etc/ceph on the control-plane nodes so that Glance is able to use -the correct Ceph configuration file per image store. If each -`cluster:` parameter above were set to "ceph", then the configuration -for each cluster would overwrite the file defined in the previous -configuration, so be sure to use a unique cluster name matching the -planned name of the availability zone. - -Confirm images may be copied between sites ------------------------------------------- - -Ensure you have Glance 3.0.0 or newer as provided by the -`python3-glanceclient` RPM: - -.. code-block:: bash - - $ glance --version - 3.0.0 - -Authenticate to the control-plane using the RC file generated -by the stack from the first deployment which contains Keystone. -In this example the stack was called "control-plane" so the file -to source before running Glance commands will be called -"control-planerc". - -Confirm the expected stores are available: - -.. code-block:: bash - - $ glance stores-info - +----------+----------------------------------------------------------------------------------+ - | Property | Value | - +----------+----------------------------------------------------------------------------------+ - | stores | [{"default": "true", "id": "central", "description": "central rbd glance | - | | store"}, {"id": "http", "read-only": "true"}, {"id": "dcn0", "description": | - | | "dcn0 rbd glance store"}, {"id": "dcn1", "description": "dcn1 rbd glance | - | | store"}] | - +----------+----------------------------------------------------------------------------------+ - -Assuming an image like `cirros-0.4.0-x86_64-disk.img` is in the -current directory, convert the image from QCOW2 format to RAW format -using a command like the following: - -.. code-block:: bash - - qemu-img convert -f qcow2 -O raw cirros-0.4.0-x86_64-disk.img cirros-0.4.0-x86_64-disk.raw - -Create an image in Glance default store at the central site as seen -in the following example: - -.. code-block:: bash - - glance image-create \ - --disk-format raw --container-format bare \ - --name cirros --file cirros-0.4.0-x86_64-disk.raw \ - --store central - -Alternatively, if the image is not in the current directory but in -qcow2 format on a web server, then it may be imported and converted in -one command by running the following: - -.. code-block:: bash - - glance --verbose image-create-via-import --disk-format qcow2 --container-format bare --name cirros --uri http://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img --import-method web-download --stores central - -.. note:: The example above assumes that Glance image format - conversion is enabled. Thus, even though `--disk-format` is - set to `qcow2`, which is the format of the image file, Glance - will convert and store the image in raw format after it's - uploaded because the raw format is the optimal setting for - Ceph RBD. The conversion may be confirmed by running - `glance image-show | grep disk_format` after the image - is uploaded. - -Set an environment variable to the ID of the newly created image: - -.. code-block:: bash - - ID=$(openstack image show cirros -c id -f value) - -Copy the image from the default store to the dcn0 and dcn1 stores: - -.. code-block:: bash - - glance image-import $ID --stores dcn0,dcn1 --import-method copy-image - -Confirm a copy of the image is in each store by looking at the image properties: - -.. code-block:: bash - - $ openstack image show $ID | grep properties - | properties | direct_url='rbd://d25504ce-459f-432d-b6fa-79854d786f2b/images/8083c7e7-32d8-4f7a-b1da-0ed7884f1076/snap', locations='[{u'url': u'rbd://d25504ce-459f-432d-b6fa-79854d786f2b/images/8083c7e7-32d8-4f7a-b1da-0ed7884f1076/snap', u'metadata': {u'store': u'central'}}, {u'url': u'rbd://0c10d6b5-a455-4c4d-bd53-8f2b9357c3c7/images/8083c7e7-32d8-4f7a-b1da-0ed7884f1076/snap', u'metadata': {u'store': u'dcn0'}}, {u'url': u'rbd://8649d6c3-dcb3-4aae-8c19-8c2fe5a853ac/images/8083c7e7-32d8-4f7a-b1da-0ed7884f1076/snap', u'metadata': {u'store': u'dcn1'}}]', os_glance_failed_import='', os_glance_importing_to_stores='', os_hash_algo='sha512', os_hash_value='b795f047a1b10ba0b7c95b43b2a481a59289dc4cf2e49845e60b194a911819d3ada03767bbba4143b44c93fd7f66c96c5a621e28dff51d1196dae64974ce240e', os_hidden='False', stores='central,dcn0,dcn1' | - -The `stores` key, which is the last item in the properties map is set -to 'central,dcn0,dcn1'. - -On further inspection the `direct_url` key is set to:: - - rbd://d25504ce-459f-432d-b6fa-79854d786f2b/images/8083c7e7-32d8-4f7a-b1da-0ed7884f1076/snap - -Which contains 'd25504ce-459f-432d-b6fa-79854d786f2b', the FSID of the -central Ceph cluster, the name of the pool, 'images', followed by -'8083c7e7-32d8-4f7a-b1da-0ed7884f1076', the Glance image ID and name -of the Ceph object. - -The properties map also contains `locations` which is set to similar -RBD paths for the dcn0 and dcn1 cluster with their respective FSIDs -and pool names. Note that the Glance image ID is consistent in all RBD -paths. - -If the image were deleted with `glance image-delete`, then the image -would be removed from all three RBD stores to ensure consistency. -However, if the glanceclient is >3.1.0, then an image may be deleted -from a specific store only by using a syntax like `glance -stores-delete --store `. - -Optionally, run the following on any Controller node from the -control-plane stack: - -.. code-block:: bash - - sudo podman exec ceph-mon-$(hostname) rbd --cluster central -p images ls -l - -Run the following on any DistributedComputeHCI node from the dcn0 stack: - -.. code-block:: bash - - sudo podman exec ceph-mon-$(hostname) rbd --id external --keyring /etc/ceph/dcn0.client.external.keyring --conf /etc/ceph/dcn0.conf -p images ls -l - -Run the following on any DistributedComputeHCI node from the dcn1 stack: - -.. code-block:: bash - - sudo podman exec ceph-mon-$(hostname) rbd --id external --keyring /etc/ceph/dcn1.client.external.keyring --conf /etc/ceph/dcn1.conf -p images ls -l - -The results in all cases should produce output like the following:: - - NAME SIZE PARENT FMT PROT LOCK - 8083c7e7-32d8-4f7a-b1da-0ed7884f1076 44 MiB 2 - 8083c7e7-32d8-4f7a-b1da-0ed7884f1076@snap 44 MiB 2 yes - -When an ephemeral instance is COW booted from the image a similar -command in the vms pool should show the same parent image: - -.. code-block:: bash - - $ sudo podman exec ceph-mon-$(hostname) rbd --id external --keyring /etc/ceph/dcn1.client.external.keyring --conf /etc/ceph/dcn1.conf -p vms ls -l - NAME SIZE PARENT FMT PROT LOCK - 2b431c77-93b8-4edf-88d9-1fd518d987c2_disk 1 GiB images/8083c7e7-32d8-4f7a-b1da-0ed7884f1076@snap 2 excl - $ - - -Confirm image-based volumes may be booted as DCN instances ----------------------------------------------------------- - -An instance with a persistent root volume may be created on a DCN -site by using the active/active Cinder service at the DCN site. -Assuming the Glance image created in the previous step is available, -identify the image ID and pass it to `openstack volume create` with -the `--image` option to create a volume based on that image. - -.. code-block:: bash - - IMG_ID=$(openstack image show cirros -c id -f value) - openstack volume create --size 8 --availability-zone dcn0 pet-volume-dcn0 --image $IMG_ID - -Once the volume is created identify its volume ID and pass it to -`openstack server create` with the `--volume` option. This example -assumes a flavor, key, security group and network have already been -created. - -.. code-block:: bash - - VOL_ID=$(openstack volume show -f value -c id pet-volume-dcn0) - openstack server create --flavor tiny --key-name dcn0-key --network dcn0-network --security-group basic --availability-zone dcn0 --volume $VOL_ID pet-server-dcn0 - -It is also possible to issue one command to have Nova ask Cinder -to create the volume before it boots the instance by passing the -`--image` and `--boot-from-volume` options as in the shown in the -example below: - -.. code-block:: bash - - openstack server create --flavor tiny --image $IMG_ID --key-name dcn0-key --network dcn0-network --security-group basic --availability-zone dcn0 --boot-from-volume 4 pet-server-dcn0 - -The above will only work if the Nova `cross_az_attach` setting -of the relevant compute node is set to `false`. This is automatically -configured by deploying with `environments/dcn-storage.yaml`. If the -`cross_az_attach` setting is `true` (the default), then the volume -will be created from the image not in the dcn0 site, but on the -default central site (as verified with the `rbd` command on the -central Ceph cluster) and then the instance will fail to boot on the -dcn0 site. Even if `cross_az_attach` is `true`, it's still possible to -create an instance from a volume by using `openstack volume create` -and then `openstack server create` as shown earlier. - -Optionally, after creating the volume from the image at the dcn0 -site and then creating an instance from the existing volume, verify -that the volume is based on the image by running the `rbd` command -within a ceph-mon container on the dcn0 site to list the volumes pool. - -.. code-block:: bash - - $ sudo podman exec ceph-mon-$HOSTNAME rbd --cluster dcn0 -p volumes ls -l - NAME SIZE PARENT FMT PROT LOCK - volume-28c6fc32-047b-4306-ad2d-de2be02716b7 8 GiB images/8083c7e7-32d8-4f7a-b1da-0ed7884f1076@snap 2 excl - $ - -The following commands may be used to create a Cinder snapshot of the -root volume of the instance. - -.. code-block:: bash - - openstack server stop pet-server-dcn0 - openstack volume snapshot create pet-volume-dcn0-snap --volume $VOL_ID --force - openstack server start pet-server-dcn0 - -In the above example the server is stopped to quiesce data for clean -a snapshot. The `--force` option is necessary when creating the -snapshot because the volume status will remain "in-use" even when the -server is shut down. When the snapshot is completed start the -server. Listing the contents of the volumes pool on the dcn0 Ceph -cluster should show the snapshot which was created and how it is -connected to the original volume and original image. - -.. code-block:: bash - - $ sudo podman exec ceph-mon-$HOSTNAME rbd --cluster dcn0 -p volumes ls -l - NAME SIZE PARENT FMT PROT LOCK - volume-28c6fc32-047b-4306-ad2d-de2be02716b7 8 GiB images/8083c7e7-32d8-4f7a-b1da-0ed7884f1076@snap 2 excl - volume-28c6fc32-047b-4306-ad2d-de2be02716b7@snapshot-a1ca8602-6819-45b4-a228-b4cd3e5adf60 8 GiB images/8083c7e7-32d8-4f7a-b1da-0ed7884f1076@snap 2 yes - $ - -Confirm image snapshots may be created and copied between sites ---------------------------------------------------------------- - -A new image called "cirros-snapshot" may be created at the dcn0 site -from the instance created in the previous section by running the -following commands. - -.. code-block:: bash - - NOVA_ID=$(openstack server show pet-server-dcn0 -f value -c id) - openstack server stop $NOVA_ID - openstack server image create --name cirros-snapshot $NOVA_ID - openstack server start $NOVA_ID - -In the above example the instance is stopped to quiesce data for clean -a snapshot image and is then restarted after the image has been -created. The output of `openstack image show $IMAGE_ID -f value -c -properties` should contain a JSON data structure whose key called -`stores` should only contain "dcn0" as that is the only store -which has a copy of the new cirros-snapshot image. - -The new image may then by copied from the dcn0 site to the central -site, which is the default backend for Glance. - -.. code-block:: bash - - IMAGE_ID=$(openstack image show cirros-snapshot -f value -c id) - glance image-import $IMAGE_ID --stores central --import-method copy-image - -After the above is run the output of `openstack image show -$IMAGE_ID -f value -c properties` should contain a JSON data structure -whose key called `stores` should look like "dcn0,central" as -the image will also exist in the "central" backend which stores its -data on the central Ceph cluster. The same image at the Central site -may then be copied to other DCN sites, booted in the vms or volumes -pool, and snapshotted so that the same process may repeat. diff --git a/deploy-guide/source/features/domain_specific_ldap_backends.rst b/deploy-guide/source/features/domain_specific_ldap_backends.rst deleted file mode 100644 index bd068f59..00000000 --- a/deploy-guide/source/features/domain_specific_ldap_backends.rst +++ /dev/null @@ -1,295 +0,0 @@ -Domain-specific LDAP Backends -============================= - -It is possible to configure keystone to use one or more LDAP backends for the -identity resources as described in the `OpenStack Identity documentation`_. -This will result in an LDAP backend per keystone domain. - -Setup ------ - -To configure LDAP backends, set the ``KeystoneLDAPDomainEnable`` flag to -``true``. Enabling this will set the ``domain_specific_drivers_enabled`` option -in keystone in the ``identity`` configuration group. By default the domain -configurations are stored in the **/etc/keystone/domains** directory on the -controller nodes. You can override this directory by setting the -``keystone::domain_config_directory`` hiera key, and setting that via the -``ExtraConfig`` parameter in an environment file. For instance, to set this in -the controller nodes, one would do the following:: - - parameter_defaults: - ControllerExtraConfig: - keystone::domain_config_directory: /etc/another/directory - -The LDAP backend configuration should be provided via the -``KeystoneLDAPBackendConfigs`` parameter in tripleo-heat-templates. It's a -dictionary mapping the LDAP domain names to options that take the following -keys: - -* **identity_driver**: Identity backend driver. Defaults to 'ldap' - -* **url**: URL for connecting to the LDAP server. - -* **user**: User BindDN to query the LDAP server. - -* **password**: Password for the BindDN to query the LDAP server. - -* **suffix**: LDAP server suffix - -* **query_scope**: The LDAP scope for queries, this can be either "one" - (onelevel/singleLevel which is the default in keystone) or "sub" - (subtree/wholeSubtree). - -* **page_size**: Maximum results per page; a value of zero ("0") disables - paging. (integer value) - -* **user_tree_dn**: Search base for users. - -* **user_filter**: LDAP search filter for users. - -* **user_objectclass**: LDAP objectclass for users. - -* **user_id_attribute**: LDAP attribute mapped to user id. **WARNING**: must - not be a multivalued attribute. (string value) - -* **user_name_attribute**: LDAP attribute mapped to user name. - -* **user_mail_attribute**: LDAP attribute mapped to user email. - -* **user_enabled_attribute**: LDAP attribute mapped to user enabled flag. - -* **user_enabled_mask**: Bitmask integer to indicate the bit that the enabled - value is stored in if the LDAP server represents "enabled" as a bit on an - integer rather than a boolean. A value of "0" indicates the mask is not used. - If this is not set to "0" the typical value is "2". This is typically used - when "user_enabled_attribute = userAccountControl". (integer value) - -* **user_enabled_default**: Default value to enable users. This should match an - appropriate int value if the LDAP server uses non-boolean (bitmask) values - to indicate if a user is enabled or disabled. If this is not set to "True" - the typical value is "512". This is typically used when - "user_enabled_attribute = userAccountControl". - -* **user_enabled_invert**: Invert the meaning of the boolean enabled values. - Some LDAP servers use a boolean lock attribute where "true" means an account - is disabled. Setting "user_enabled_invert = true" will allow these lock - attributes to be used. This setting will have no effect if - "user_enabled_mask" or "user_enabled_emulation" settings are in use. - (boolean value) - -* **user_attribute_ignore**: List of attributes stripped off the user on - update. (list value) - -* **user_default_project_id_attribute**: LDAP attribute mapped to - default_project_id for users. - -* **user_pass_attribute**: LDAP attribute mapped to password. - -* **user_enabled_emulation**: If true, Keystone uses an alternative method to - determine if a user is enabled or not by checking if they are a member of - the "user_enabled_emulation_dn" group. (boolean value) - -* **user_enabled_emulation_dn**: DN of the group entry to hold enabled users - when using enabled emulation. - -* **user_additional_attribute_mapping**: List of additional LDAP attributes - used for mapping additional attribute mappings for users. Attribute mapping - format is :, where ldap_attr is the attribute in the - LDAP entry and user_attr is the Identity API attribute. (list value) - -* **group_tree_dn**: Search base for groups. - -* **group_filter**: LDAP search filter for groups. - -* **group_objectclass**: LDAP objectclass for groups. - -* **group_id_attribute**: LDAP attribute mapped to group id. - -* **group_name_attribute**: LDAP attribute mapped to group name. - -* **group_member_attribute**: LDAP attribute mapped to show group membership. - -* **group_desc_attribute**: LDAP attribute mapped to group description. - -* **group_attribute_ignore**: List of attributes stripped off the group on - update. (list value) - -* **group_additional_attribute_mapping**: Additional attribute mappings for - groups. Attribute mapping format is :, where ldap_attr - is the attribute in the LDAP entry and user_attr is the Identity API - attribute. (list value) - -* **chase_referrals**: Whether or not to chase returned referrals. Note that - it's possible that your client or even your backend do this for you already. - All this does is try to override the client configuration. If your client - doesn't support this, you might want to enable *chaining* on your LDAP server - side. (boolean value) - -* **use_tls**: Enable TLS for communicating with LDAP servers. Note that you - might also enable this by using a TLS-enabled scheme in the URL (e.g. - "ldaps"). However, if you configure this via the URL, this option is not - needed. (boolean value) - -* **tls_cacertfile**: CA certificate file path for communicating with LDAP - servers. - -* **tls_cacertdir**: CA certificate directory path for communicating with LDAP - servers. - -* **tls_req_cert**: Valid options for tls_req_cert are demand, never, and allow. - -* **use_pool**: Enable LDAP connection pooling. (boolean value and defaults to - true) - -* **pool_size**: Connection pool size. (integer value and defaults to '10') - -* **pool_retry_max**: Maximum count of reconnect trials. (integer value and - defaults to '3' - -* **pool_retry_delay**: Time span in seconds to wait between two reconnect - trials. (floating point value and defaults to '0.1') - -* **pool_connection_timeout**: Connector timeout in seconds. Value -1 - indicates indefinite wait for response. (integer value and defaults to '-1') - -* **pool_connection_lifetime**: Connection lifetime in seconds. (integer value - and defaults to '600') - -* **use_auth_pool**: Enable LDAP connection pooling for end user authentication. - If use_pool is disabled, then this setting is meaningless and is not used at - all. (boolean value and defaults to true) - -* **auth_pool_size**: End user auth connection pool size. (integer value and - defaults to '100') - -* **auth_pool_connection_lifetime**: End user auth connection lifetime in - seconds. (integer value and defaults to '60') - -An example of an environment file with LDAP configuration for the keystone -domain called ``tripleodomain`` would look as follows:: - - parameter_defaults: - KeystoneLDAPDomainEnable: true - KeystoneLDAPBackendConfigs: - tripleodomain: - url: ldap://192.0.2.250 - user: cn=openstack,ou=Users,dc=tripleo,dc=example,dc=com - password: Secrete - suffix: dc=tripleo,dc=example,dc=com - user_tree_dn: ou=Users,dc=tripleo,dc=example,dc=com - user_filter: "(memberOf=cn=OSuser,ou=Groups,dc=tripleo,dc=example,dc=com)" - user_objectclass: person - user_id_attribute: cn - -This will create a file in the default domain directory -**/etc/keystone/domains** with the name **keystone.tripleodomain.conf**. And -will use the attributes to create such a configuration. - -Please note that both the ``KeystoneLDAPDomainEnable`` flag and the -configuration ``KeystoneLDAPBackendConfigs`` must be set. - -One can also specify several domains. For instance:: - - KeystoneLDAPBackendConfigs: - tripleodomain1: - url: ldap://tripleodomain1.example.com - user: cn=openstack,ou=Users,dc=tripleo,dc=example,dc=com - password: Secrete1 - ... - tripleodomain2: - url: ldaps://tripleodomain2.example.com - user: cn=openstack,ou=Users,dc=tripleo,dc=example,dc=com - password: Secrete2 - ... - -This will add two domains, called ``tripleodomain1`` and ``tripleodomain2``, -with their own configurations. - -Post-deployment setup ---------------------- - -After the overcloud deployment is done, you'll need to give the admin user a -role in the newly created domain. - -1. Source the overcloudrc.v3 file:: - - source overcloudrc.v3 - -2. Grant admin user on your domain:: - - openstack role add --domain $(openstack domain show tripleodomain -f value -c id)\ - --user $(openstack user show admin --domain default -f value -c id) \ - $(openstack role show admin -c id -f value) - -3. Test LDAP domain in listing users:: - - openstack user list --domain tripleodomain - -FreeIPA as an LDAP backend --------------------------- - -Before configuring the domain, there needs to be a user that will query -FreeIPA. In this case, we'll create an account called ``keystone`` in FreeIPA, -and we'll use it's credentials on our configuration. On the FreeIPA side and -with proper credentials loaded, we'll do the following:: - - ipa user-add keystone --cn="keystone user" --first="keystone" \ - --last="user" --password - -This will create the user and we'll be prompted to write the password for it. - -Configuring FreeIPA as an LDAP backend for a domain can be done by using the -following template as a configuration:: - - parameter_defaults: - KeystoneLDAPDomainEnable: true - KeystoneLDAPBackendConfigs: - freeipadomain: - url: ldaps://$FREEIPA_SERVER - user: uid=keystone,cn=users,cn=accounts,$SUFFIX - password: $SOME_PASSWORD - suffix: $SUFFIX - user_tree_dn: cn=users,cn=accounts,$SUFFIX - user_objectclass: inetOrgPerson - user_id_attribute: uid - user_name_attribute: uid - user_mail_attribute: mail - group_tree_dn: cn=groups,cn=accounts,$SUFFIX - group_objectclass: groupOfNames - group_id_attribute: cn - group_name_attribute: cn - group_member_attribute: member - group_desc_attribute: description - user_enabled_attribute: nsAccountLock - user_enabled_default: False - user_enabled_invert: true - -* $FREEIPA_SERVER will contain the FQDN that points to your FreeIPA server. - Remember that it needs to be available from some network (most likely the - ctlplane network) in TripleO - -* You should also make sure that the ldap ports need to be accessible. In this - case, we need port 636 available since we're using the ``ldaps`` scheme. - However, if you would be using the ``use_tls`` configuration option or if you - are not using TLS at all (not recommended), you might also need port 389. - -* To use TLS, the FreeIPA server's certificate must also be trusted by the - openldap client libraries. If you're using novajoin (and - :doc:`tls-everywhere`) this is easily achieved since all the nodes in your - overcloud are enrolled in FreeIPA. If you're not using this setup, you should - then follow the 'Getting the overcloud to trust CAs' section in the - :doc:`ssl` document. - -* $SUFFIX will be the domain for your users. Given a domain, the suffix DN can - be created with the following snippet:: - - suffix=`echo $DOMAIN | sed -e 's/^/dc=/' -e 's/\./,dc=/g'` - - Given the domain ``example.com`` the suffix will be ``dc=example,dc=com``. - -* In this configuration, we configure this backend as read-only. So you'll need - to create your OpenStack users on the FreeIPA side. - -.. References - -.. _`OpenStack Identity documentation`: https://docs.openstack.org/admin-guide/identity-integrate-with-ldap.html diff --git a/deploy-guide/source/features/extra_config.rst b/deploy-guide/source/features/extra_config.rst deleted file mode 100644 index bc840e35..00000000 --- a/deploy-guide/source/features/extra_config.rst +++ /dev/null @@ -1,279 +0,0 @@ -Node customization and Third-Party Integration -============================================== - -It is possible to enable additional configuration during one of the -following deployment phases: - -* firstboot - run once config (performed on each node by cloud-init) -* per-node - run after the node is initially created but before services are deployed and configured (e.g by puppet) -* post-deploy - run after the services have been deployed and configured (e.g by puppet) - -.. note:: - - This documentation assumes some knowledge of heat HOT_ template - syntax, and makes use of heat environment_ files. See the upstream - heat documentation_ for further information. - -.. _HOT: https://docs.openstack.org/heat/template_guide/hot_guide.html -.. _environment: https://docs.openstack.org/heat/template_guide/environment.html -.. _documentation: https://docs.openstack.org/heat/template_guide/index.html - -Firstboot extra configuration ------------------------------ - -Firstboot configuration is optional, and is performed on *all* nodes on initial -deployment. - -Any configuration possible via cloud-init may be performed at this point, -either by applying cloud-config yaml or running arbitrary additional -scripts. - -The heat templates used for deployment provide the `OS::TripleO::NodeUserData` -resource as the interface to enable this configuration. A basic example of its -usage is provided below, followed by some notes related to real world -usage. - -The script snippet below shows how to create a simple example containing two -scripts, combined via the MultipartMime_ resource:: - - mkdir firstboot - cat > firstboot/one_two.yaml << EOF - heat_template_version: 2014-10-16 - - resources: - userdata: - type: OS::Heat::MultipartMime - properties: - parts: - - config: {get_resource: one_config} - - config: {get_resource: two_config} - - one_config: - type: OS::Heat::SoftwareConfig - properties: - config: | - #!/bin/bash - echo "one" > /tmp/one - - two_config: - type: OS::Heat::SoftwareConfig - properties: - config: | - #!/bin/bash - echo "two" > /tmp/two - - outputs: - OS::stack_id: - value: {get_resource: userdata} - EOF - -.. _MultipartMime: https://docs.openstack.org/heat/template_guide/openstack.html#OS::Heat::MultipartMime - -.. note:: - - The stack must expose an `OS::stack_id` output which references an - OS::Heat::MultipartMime resource. - -This template is then mapped to the `OS::TripleO::NodeUserData` resource type -via a heat environment file:: - - cat > userdata_env.yaml << EOF - resource_registry: - OS::TripleO::NodeUserData: firstboot/one_two.yaml - EOF - -You may then deploy your overcloud referencing the additional environment file:: - - openstack overcloud deploy --templates \ - -e -e userdata_env.yaml - -.. note:: - - Make sure you pass the same environment parameters that were used at - deployment time in addition to your customization environments at the - end (`userdata_env.yaml`). - -.. note:: - - The userdata is applied to *all* nodes in the deployment. If you need role - specific logic, the userdata scripts can contain conditionals which use - e.g the node hostname to determine the role. - -.. note:: - - OS::TripleO::NodeUserData is only applied on initial node deployment, - not on any subsequent stack update, because cloud-init only processes the - nova user-data once, on first boot. If you need to add custom configuration - that runs on all stack creates and updates, see the - `Post-Deploy extra configuration`_ section below. - -For a more complete example, which creates an additional user and configures -SSH keys by accessing the nova metadata server, see -`/usr/share/openstack-tripleo-heat-templates/firstboot/userdata_example.yaml` -on the undercloud node or the tripleo-heat-templates_ repo. - -.. _tripleo-heat-templates: https://opendev.org/openstack/tripleo-heat-templates - -Per-node extra configuration ----------------------------- - -This configuration happens after any "firstboot" configuration is applied, -but before any Post-Deploy configuration takes place. - -Typically these interfaces are suitable for preparing each node for service -deployment, such as registering nodes with a content repository, or creating -additional data to be consumed by the post-deploy phase. They may also be suitable -integration points for additional third-party services, drivers or plugins. - - -.. note:: - If you only need to provide some additional data to the existing service - configuration, see :ref:`node_config` as this may provide a simpler solution. - -.. note:: - The per-node interface only enable *individual* nodes to be configured, - if cluster-wide configuration is required, the Post-Deploy interfaces should be - used instead. - -The following interfaces are available: - -* `OS::TripleO::ControllerExtraConfigPre`: Controller node additional configuration -* `OS::TripleO::ComputeExtraConfigPre`: Compute node additional configuration -* `OS::TripleO::CephStorageExtraConfigPre` : CephStorage node additional configuration -* `OS::TripleO::NodeExtraConfig`: additional configuration applied to all nodes (all roles). - -Below is an example of a per-node configuration template that shows additional node configuration -via standard heat SoftwareConfig_ resources:: - - mkdir -p extraconfig/per-node - cat > extraconfig/per-node/example.yaml << EOF - - heat_template_version: 2014-10-16 - - parameters: - server: - description: ID of the controller node to apply this config to - type: string - - resources: - NodeConfig: - type: OS::Heat::SoftwareConfig - properties: - group: script - config: | - #!/bin/sh - echo "Node configured" > /root/per-node - - NodeDeployment: - type: OS::Heat::SoftwareDeployment - properties: - config: {get_resource: NodeConfig} - server: {get_param: server} - outputs: - deploy_stdout: - description: Deployment reference, used to trigger post-deploy on changes - value: {get_attr: [NodeDeployment, deploy_stdout]} - - EOF - -The "server" parameter must be specified in all per-node ExtraConfig templates, -this is the server to apply the configuration to, and is provided by the parent -template. Optionally additional implementation specific parameters may also be -provided by parameter_defaults, see below for more details. - -Any resources may be defined in the template, but the outputs must define a "deploy_stdout" -value, which is an identifier used to detect if the configuration applied has changed, -hence when any post-deploy actions (such as re-applying puppet manifests on update) -may need to be performed. - -For a more complete example showing how to apply a personalized map of per-node configuration -to each node, see `/usr/share/openstack-tripleo-heat-templates/puppet/extraconfig/pre_deploy/per_node.yaml` -or the tripleo-heat-templates_ repo. - -.. _SoftwareConfig: https://docs.openstack.org/heat/template_guide/software_deployment.html - - -Post-Deploy extra configuration -------------------------------- - -Post-deploy additional configuration is possible via the -`OS::TripleO::NodeExtraConfigPost` interface, which is applied after any -per-node configuration has completed. - -.. note:: - - The `OS::TripleO::NodeExtraConfigPost` applies configuration to *all* nodes, - there is currently no per-role NodeExtraConfigPost interface. - -Below is an example of a post-deployment configuration template:: - - mkdir -p extraconfig/post-deploy/ - cat > extraconfig/post-deploy/example.yaml << EOF - heat_template_version: 2014-10-16 - - parameters: - servers: - type: json - EndpointMap: - default: {} - type: json - - # Optional implementation specific parameters - some_extraparam: - type: string - - resources: - - ExtraConfig: - type: OS::Heat::SoftwareConfig - properties: - group: script - config: - str_replace: - template: | - #!/bin/sh - echo "extra _APARAM_" > /root/extra - params: - _APARAM_: {get_param: some_extraparam} - - ExtraDeployments: - type: OS::Heat::SoftwareDeploymentGroup - properties: - servers: {get_param: servers} - config: {get_resource: ExtraConfig} - actions: ['CREATE'] # Only do this on CREATE - EOF - -The "servers" parameter must be specified in all NodeExtraConfigPost -templates, this is the server list to apply the configuration to, -and is provided by the parent template. - -Optionally, you may define additional parameters which are consumed by the -implementation. These may then be provided via parameter_defaults in the -environment which enables the configuration. - -.. note:: - - If the parameter_defaults approach is used, care must be used to avoid - unintended reuse of parameter names between multiple templates, because - parameter_defaults is applied globally. - -The "actions" property of the `OS::Heat::SoftwareDeploymentGroup` resource may be -used to specify when the configuration should be applied, e.g only on CREATE, -only on DELETE etc. If this is omitted, the heat default is to apply the -config on CREATE and UPDATE, e.g on initial deployment and every subsequent -update. - -The extra config may be enabled via an environment file:: - - cat > post_config_env.yaml << EOF - resource_registry: - OS::TripleO::NodeExtraConfigPost: extraconfig/post-deploy/example.yaml - parameter_defaults: - some_extraparam: avalue123 - EOF - -You may then deploy your overcloud referencing the additional environment file:: - - openstack overcloud deploy --templates \ - -e -e post_config_env.yaml diff --git a/deploy-guide/source/features/high_availability.rst b/deploy-guide/source/features/high_availability.rst deleted file mode 100644 index aed87701..00000000 --- a/deploy-guide/source/features/high_availability.rst +++ /dev/null @@ -1,18 +0,0 @@ -Configuring High Availability -============================= - -|project| supports high availability of the controller services using -Pacemaker. To enable this feature, you need at least three controller -nodes, enable Pacemaker as the resource manager and specify an NTP -server. - -Create the following environment file:: - - $ cat ~/environment.yaml - parameter_defaults: - ControllerCount: 3 - -And add the following arguments to your `openstack overcloud deploy` -command to deploy with HA:: - - -e environment.yaml -e /usr/share/openstack-tripleo-heat-templates/environments/docker-ha.yaml --ntp-server pool.ntp.org diff --git a/deploy-guide/source/features/index.rst b/deploy-guide/source/features/index.rst deleted file mode 100644 index fe77bb19..00000000 --- a/deploy-guide/source/features/index.rst +++ /dev/null @@ -1,51 +0,0 @@ -Feature Configurations -====================== - -Documentation on additional features for |project|. - -.. toctree:: - - api_policies - backends - baremetal_overcloud - composable_services - custom_networks - custom_roles - compute_nvdimm - deploy_cellv2 - deploy_swift - deployed_server - designate - disable_telemetry - distributed_compute_node - distributed_multibackend_storage - extra_config - tolerated_failure - high_availability - instance_ha - ipsec - keystone_security_compliance - lvmfilter - multiple_overclouds - network_isolation - network_isolation_virt - node_config - node_specific_hieradata - octavia - ops_tools - oslo_messaging_config - ovs_dpdk_config - sriov_deployment - rhsm - role_specific_parameters - routed_spine_leaf_network - server_blacklist - security_hardening - split_stack - ssl - tls-introduction - tls-everywhere - tuned - undercloud_minion - vdpa_deployment - pre_network_config diff --git a/deploy-guide/source/features/instance_ha.rst b/deploy-guide/source/features/instance_ha.rst deleted file mode 100644 index 709ad746..00000000 --- a/deploy-guide/source/features/instance_ha.rst +++ /dev/null @@ -1,81 +0,0 @@ -Configuring Instance High Availability -====================================== - -|project|, starting with the Queens release, supports a form of instance -high availability when the overcloud is deployed in a specific way. - -In order to activate instance high-availability (also called ``IHA``) -the following steps are needed: - -1. Add the following environment file to your overcloud deployment command. Make sure you are deploying an HA overcloud:: - - -e /usr/share/openstack-tripleo-heat-templates/environments/compute-instanceha.yaml - -2. Instead of using the ``Compute`` role use the ``ComputeInstanceHA`` role for your compute plane. The ``ComputeInstanceHA`` role has the following additional services when compared to the ``Compute`` role:: - - - OS::TripleO::Services::ComputeInstanceHA - - OS::TripleO::Services::PacemakerRemote - -3. Make sure that fencing is configured for the whole overcloud (controllers and computes). You can do so by adding an environment file to your deployment command that contains the necessary fencing information. For example:: - - parameter_defaults: - EnableFencing: true - FencingConfig: - devices: - - agent: fence_ipmilan - host_mac: 00:ec:ad:cb:3c:c7 - params: - login: admin - ipaddr: 192.168.24.1 - ipport: 6230 - passwd: password - lanplus: 1 - - agent: fence_ipmilan - host_mac: 00:ec:ad:cb:3c:cb - params: - login: admin - ipaddr: 192.168.24.1 - ipport: 6231 - passwd: password - lanplus: 1 - - agent: fence_ipmilan - host_mac: 00:ec:ad:cb:3c:cf - params: - login: admin - ipaddr: 192.168.24.1 - ipport: 6232 - passwd: password - lanplus: 1 - - agent: fence_ipmilan - host_mac: 00:ec:ad:cb:3c:d3 - params: - login: admin - ipaddr: 192.168.24.1 - ipport: 6233 - passwd: password - lanplus: 1 - - agent: fence_ipmilan - host_mac: 00:ec:ad:cb:3c:d7 - params: - login: admin - ipaddr: 192.168.24.1 - ipport: 6234 - passwd: password - lanplus: 1 - - -Once the deployment is completed, the overcloud should show a stonith device for each compute node and one for each controller node and a GuestNode for every compute node. The expected behavior is that if a compute node dies, it will be fenced and the VMs that were running on it will be evacuated (i.e. restarted) on another compute node. - -In case it is necessary to limit which VMs are to be resuscitated on another compute node it is possible to tag with ``evacuable`` either the image:: - - openstack image set --tag evacuable 0c305437-89eb-48bc-9997-e4e4ea77e449 - -the flavor:: - - nova flavor-key bb31d84a-72b3-4425-90f7-25fea81e012f set evacuable=true - -or the VM:: - - nova server-tag-add 89b70b07-8199-46f4-9b2d-849e5cdda3c2 evacuable - -At the moment this last method should be avoided because of a significant reason: setting the tag on a single VM means that just *that* instance will be evacuated, tagging no VM implies that *all* the servers on the compute node will resuscitate. In a partial tagging situation, if a compute node runs only untagged VMs, the cluster will evacuate all of them, ignoring the overall tag status. diff --git a/deploy-guide/source/features/ipsec.rst b/deploy-guide/source/features/ipsec.rst deleted file mode 100644 index 22bcd069..00000000 --- a/deploy-guide/source/features/ipsec.rst +++ /dev/null @@ -1,170 +0,0 @@ -.. _ipsec: - -Deploying with IPSec -==================== - -Since the Queens release, it is possible to encrypt communications within the -internal network by setting up IPSec tunnels configured by TripleO. - -There are several options that TripleO provides deployers whose requirements call -for encrypting everything in the network. For example, TLS Everywhere has been -supported since the Pike release. This method requires the deployer -to procure a CA server on a separate node. FreeIPA is recommended for this. - -However, there are cases where a deployers authorized CA does not have an -interface that can automatically request certificates. Furthermore, it may -not be possible to add another node to the network for various other reasons. -For these cases, IPSec is a viable, alternative solution. - -.. note:: For more information on TLS Everywhere, please see - :doc:`tls-everywhere`. - -IPSec thus, provides an alternative to TLS Everywhere. With IPSec the encryption -happens on the IP layer, and not over TCP (as happens in TLS). As a result, the -services will communicate with each other over standard 'http', and not -actually know that the underlying traffic is being encrypted. This means that -the services do not require any extra configuration. - -Solution Overview ------------------ - -The current IPSec solution relies on `Libreswan`_, which is already available -in RHEL and CentOS, and is driven and configured via Ansible. - -There are two types of tunnels configured in the overcloud: - -* **node-to-node tunnels**: These tunnels are a simple 1-to-1 tunnel between the ip - addresses of two nodes on the same network. This results in a tunnel to each node - in the overcloud for each network that the node is connected to. - -* **Virtual IP tunnels**: These are tunnels from each Virtual IP address and - each node that can contact to them. The node hosting the VIP will open a tunnel - for any host in the specific network that can properly authenticate. This - makes the configuration simpler, allows for easier scaling, and assists - deployers to securely communicate with the Virtual IP from hosts - or services that are not necessarily managed by TripleO. - -Authentication is currently done via a Pre-Shared Key (PSK) which all the nodes -share. However, future iterations will add more authentication methods to the -deployment. - -Currently, the default encryption method is AES using GCM with a block size of -128 bits. Changing this default will be talked about in a further section. - -To handle the moving of a Virtual IP from one node to another (VIP failover), -we also deploy a pacemaker resource agent per VIP. This resource agent is in -charge of creating the tunnel when the VIP is set in a certain node, and -removing the tunnel when it moves to another node. - -.. note:: One important thing to note is that we set tunnels for every network - except the control plane network. The reason for this is that in our - testing, setting up tunnels for this network cuts of the - communication between the overcloud nodes and the undercloud. We thus - rely on the fact that Ansible uses SSH to communicate with the - overcloud nodes, thus, still giving the deployment secure - communications. - -Deployment ----------- - -.. note:: Please note that the IPSec deployment depends on Ansible being used - for the overcloud deployment. For more information on this, please - see :doc:`../deployment/ansible_config_download` - -.. note:: Also note that the IPSec deployment assumes that you're using network - isolation. For more information on this, please see - :doc:`network_isolation` - -To enable IPSec tunnels for the overcloud, you need to use the following -environment file:: - - /usr/share/openstack-tripleo-heat-templates/environments/ipsec.yaml - -With this, your deployment command will be similar to this:: - - openstack overcloud deploy \ - ... - -e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml \ - -e /home/stack/templates/network-environment.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/ipsec.yaml - -To change the default encryption algorithm, you can use an environment file -that looks as follows:: - - parameter_defaults: - IpsecVars: - ipsec_algorithm: 'aes_gcm256-null' - -The ``IpsecVars`` option is able to change any parameter in the tripleo-ipsec -ansible role. - -.. note:: For more information on the algorithms that Libreswan supports, - please check the `Libreswan documentation`_ - -.. note:: For more information on the available parameters, check the README - file in the `tripleo-ipsec repository`_. - - -Verification ------------- - -To verify that the IPSec tunnels were setup correctly after the overcloud -deployment is done, you'll need to do several things: - -* Log into each node - -* In each node, check the output of ``ipsec status`` with sudo or root - privileges. This will show you the status of all the tunnels that are set up - in the node. - - - The line starting with "Total IPsec connections" should show - that there are active connections. - - The Security Associations should be all authenticated:: - - 000 IKE SAs: total(23), half-open(0), open(0), authenticated(23), anonymous(0) - 000 IPsec SAs: total(37), authenticated(37), anonymous(0) - - Note that this number will vary depending on the number of networks and - nodes you have. - -* The configuration files generated can be found in the ``/etc/ipsec.d`` - directory. - - - They conveniently all start with the prefix **overcloud-** and - you could list them with the following command:: - - ls /etc/ipsec.d/overcloud-*.conf - - - The PSKs can be found with the following command:: - - ls /etc/ipsec.d/overcloud-*.secrets - - - You can find the connection names from the ``*.conf`` files. - - - To view the status of a certain connection, you can use the aforementioned - ``ipsec status`` command, and filter the result, searching for the specific - connection name. For instance, in the node that's hosting the Internal API - VIP, you can view the status of the tunnels for that VIP with the following - command:: - - ipsec status | grep overcloud-internal_api-vip-tunnel - -* To view the status of the resource agents, you can use ``pcs status``. - - - The IPSEC-related agents will have a name with the **tripleo-ipsec** - prefix. - - - Note that the resource agents for the tunnels are collocated with the IP - resource agents. This is enforced through a collocation rule in pacemaker. - You can verify this by running the ``pcs constraint`` command. - -.. note:: To get further explanations for understanding the output of the - ``ipsec status`` command, you can read the `Libreswan wiki entry on - the subject`_. - -.. References - -.. _Libreswan: https://libreswan.org/ -.. _Libreswan documentation: https://libreswan.org/man/ipsec.conf.5.html -.. _Libreswan wiki entry on the subject: https://libreswan.org/wiki/How_to_read_status_output -.. _tripleo-ipsec repository: https://github.com/openstack/tripleo-ipsec/blob/master/README.md diff --git a/deploy-guide/source/features/keystone_security_compliance.rst b/deploy-guide/source/features/keystone_security_compliance.rst deleted file mode 100644 index 90c6f057..00000000 --- a/deploy-guide/source/features/keystone_security_compliance.rst +++ /dev/null @@ -1,50 +0,0 @@ -Keystone Security Compliance -============================ - -Keystone has several configuration options available in order to comply with -standards such as Payment Card Industry - Data Security Standard (PCI-DSS) -v3.1. - -TripleO exposes these features via Heat parameters. They will be listed below: - -* ``KeystoneChangePasswordUponFirstUse``: Enabling this option requires users - to change their password when the user is created, or upon administrative - reset. - -* ``KeystoneDisableUserAccountDaysInactive``: The maximum number of days a user - can go without authenticating before being considered "inactive" and - automatically disabled (locked). - -* ``KeystoneLockoutDuration``: The number of seconds a user account will be - locked when the maximum number of failed authentication attempts (as - specified by ``KeystoneLockoutFailureAttempts``) is exceeded. - -* ``KeystoneLockoutFailureAttempts``: The maximum number of times that a user - can fail to authenticate before the user account is locked for the number of - seconds specified by ``KeystoneLockoutDuration``. - -* ``KeystoneMinimumPasswordAge``: The number of days that a password must be - used before the user can change it. This prevents users from changing their - passwords immediately in order to wipe out their password history and reuse - an old password. - -* ``KeystonePasswordExpiresDays``: The number of days for which a password will - be considered valid before requiring it to be changed. - -* ``KeystonePasswordRegex``: The regular expression used to validate password - strength requirements. - -* ``KeystonePasswordRegexDescription``: Describe your password regular - expression here in language for humans. - -* ``KeystoneUniqueLastPasswordCount``: This controls the number of previous - user password iterations to keep in history, in order to enforce that newly - created passwords are unique. - -.. note:: All of the aforementioned options only apply to the SQL backend. For - other identity backends like LDAP, these configuration settings - should be applied on that backend's side. - -.. note:: All of these parameters are defined as type ``string`` in heat. As - per the implementation, if left unset, they will not be configured at - all in the keystone configuration. diff --git a/deploy-guide/source/features/lvmfilter.rst b/deploy-guide/source/features/lvmfilter.rst deleted file mode 100644 index 19c07bbf..00000000 --- a/deploy-guide/source/features/lvmfilter.rst +++ /dev/null @@ -1,38 +0,0 @@ -Enable LVM2 filtering on overcloud nodes -======================================== - -While by default the overcloud image will not use LVM2 volumes, it is -possible that with some Cinder backends, for example remote iSCSI or FC, -the remote LUNs hosting OpenStack volumes will be visible on the nodes -hosting cinder-volume or nova-compute containers. - -In that case, should the OpenStack guest create LVM2 volumes inside its -additional disks, those volumes will be scanned by the LVM2 tools -installed on the hosting node. - -To prevent that, it is possible to configure an LVM2 global_filter when -deploying or updating the overcloud. The feature is, by default, disabled -and can be enabled passing `LVMFilterEnabled: true` in a Heat environment -file. - -When enabled, a global_filter will be computed from the list of physical -devices hosting active LVM2 volumes. This list can be extended further, -manually, listing any additional block device via `LVMFilterAllowlist` -parameter, which supports regexp. A deny list can be configured as well, -via `LVMFilterDenylist` parameter; it defaults to ['.*'] so that any -block device which isn't in the allow list will be ignored by the LVM2 -tools by default. - -Any of the template parameters can be set per-role; for example, to enable -the feature only on Compute nodes and add `/dev/sdb` to the deny list use:: - - $ cat ~/environment.yaml - parameter_defaults: - ComputeParameters: - LVMFilterEnabled: true - LVMFilterDenylist: - - /dev/sdb - -Then add the following argument to your `openstack overcloud deploy` command:: - - -e environment.yaml diff --git a/deploy-guide/source/features/multiple_overclouds.rst b/deploy-guide/source/features/multiple_overclouds.rst deleted file mode 100644 index 0229e9aa..00000000 --- a/deploy-guide/source/features/multiple_overclouds.rst +++ /dev/null @@ -1,132 +0,0 @@ -Multiple Overclouds from a Single Undercloud -============================================ - -TripleO can be used to deploy multiple Overclouds from a single Undercloud -node. - -In this scenario, a single Undercloud deploys and manages multiple Overclouds -as unique Heat stacks, with no stack resources shared between them. This can -be useful for environments where having a 1:1 ratio of Underclouds and -Overclouds creates an unmanageable amount of overhead, such as edge cloud -deployments. - -Requirements ------------- - -All Overclouds must be deployed in the same tenant (admin) on the Undercloud. -If using Ironic for baremetal provisioning, all Overclouds must be on the same -provisioning network. - - -Undercloud Deployment ---------------------- - -Deploy the Undercloud :doc:`as usual <../deployment/install_undercloud>`. - -First Overcloud ---------------- - -The first Overcloud can be deployed as usual using the :doc:`cli <../deployment/install_overcloud>`. - -Deploying Additional Overclouds -------------------------------- - -Additional Overclouds can be deployed by specifying a new stack name and any -necessary parameters in a new deployment plan. Networks for additional -overclouds must be defined as :doc:`custom networks <./custom_networks>` -with ``name_lower`` and ``service_net_map_replace`` directives for each -overcloud to have unique networks in the resulting stack. - -If your first cloud was named ``overcloud`` and had the following -``network_data.yaml``:: - - cat overcloud/network_data.yaml - - name: InternalApi - name_lower: internal_api_cloud_1 - service_net_map_replace: internal_api - vip: true - vlan: 201 - ip_subnet: '172.17.0.0/24' - allocation_pools: [{'start': '172.17.0.4', 'end': '172.17.0.250'}] - -You would create a new ``network_data.yaml`` with unique ``name_lower`` values -and VLANs for each network, making sure to specify ``service_net_map_replace``:: - - cat overcloud-two/network_data.yaml - - name: InternalApi - name_lower: internal_api_cloud_2 - service_net_map_replace: internal_api - vip: true - vlan: 301 - ip_subnet: '172.21.0.0/24' - allocation_pools: [{'start': '172.21.0.4', 'end': '172.21.0.250'}] - -Then deploy the second Overcloud as:: - - openstack overcloud deploy --templates ~/overcloud-two/templates/ \ - --stack overcloud-two \ - -n ~/overcloud-two/network_data.yaml - - -Managing Heat Templates ------------------------ - -If the Heat templates will be customized for any of the deployed clouds -(undercloud, or any overclouds) they should be copied from -/usr/share/openstack-tripleo-heat-templates to a new location before being -modified. Then the location would be specified to the deploy command using -the --templates flag. - -The templates could be managed using separate directories for each deployed -cloud:: - - ~stack/undercloud-templates - ~stack/overcloud-templates - ~stack/overcloud-two-templates - -Or by creating a repository in a version control system for the templates -and making a branch for each deployment. For example, using git:: - - ~stack/tripleo-heat-templates $ git branch - * master - undercloud - overcloud - overcloud-two - -To deploy to a specific cloud, ensure you are using the correct branch first:: - - cd ~stack/tripleo-heat-templates ;\ - git checkout overcloud-two ;\ - openstack overcloud deploy --templates ~stack/tripleo-heat-templates --stack overcloud-two -e $ENV_FILES - -Using Pre-Provisioned Nodes ---------------------------- - -Deploying multiple overclouds with the Ironic baremetal installer currently -requires a shared provisioning network. If this is not possible, you may use -the :ref:`Deployed Servers ` method with routed networks. Ensure that the values -in the ``HostnameMap`` match the stack name being used for each Overcloud. - -For example: -``hostnamemap.yaml`` for stack ``overcloud``:: - - parameter_defaults: - HostnameMap: - overcloud-controller-0: controller-00-rack01 - overcloud-controller-1: controller-01-rack02 - overcloud-controller-2: controller-02-rack03 - overcloud-novacompute-0: compute-00-rack01 - overcloud-novacompute-1: compute-01-rack01 - overcloud-novacompute-2: compute-02-rack01 - - -``hostnamemap.yaml`` for stack ``overcloud-two``:: - - parameter_defaults: - HostnameMap: - overcloud-two-controller-0: controller-00-rack01 - overcloud-two-controller-1: controller-01-rack02 - overcloud-two-controller-2: controller-02-rack03 - overcloud-two-novacompute-0: compute-00-rack01 - overcloud-two-novacompute-1: compute-01-rack01 - overcloud-two-novacompute-2: compute-02-rack01 diff --git a/deploy-guide/source/features/network_isolation.rst b/deploy-guide/source/features/network_isolation.rst deleted file mode 100644 index 9a6c5a40..00000000 --- a/deploy-guide/source/features/network_isolation.rst +++ /dev/null @@ -1,1085 +0,0 @@ -.. _network_isolation: - -Configuring Network Isolation -============================= - -Introduction ------------- - -|project| provides configuration of isolated overcloud networks. Using -this approach it is possible to host traffic for specific types of network -traffic (tenants, storage, API/RPC, etc.) in isolated networks. This allows -for assigning network traffic to specific network interfaces or bonds. Using -bonds provides fault tolerance, and may provide load sharing, depending on the -bonding protocols used. When isolated networks are configured, the OpenStack -services will be configured to use the isolated networks. If no isolated -networks are configured, all services run on the provisioning network. - -There are two parts to the network configuration: the parameters that apply -to the network as a whole, and the templates which configure the network -interfaces on the deployed hosts. - -Architecture ------------- - -The following VLANs will be used in the final deployment: - -* IPMI* (IPMI System controller, iLO, DRAC) -* Provisioning* (Undercloud control plane for deployment and management) -* Internal API (OpenStack internal API, RPC, and DB) -* Tenant (Tenant tunneling network for GRE/VXLAN networks) -* Storage (Access to storage resources from Compute and Controller nodes) -* Storage Management (Replication, Ceph back-end services) -* External (Public OpenStack APIs, Horizon dashboard, optionally floating IPs) -* Floating IP (Optional, can be combined with External) - -.. note:: - Networks marked with '*' are usually native VLANs, others may be trunked. - -The External network should have a gateway router address. This will be used -in the subnet configuration of the network environment. - -If floating IPs will be hosted on a separate VLAN from External, that VLAN will -need to be trunked to the controller hosts. It will not be included in the -network configuration steps for the deployment, the VLAN will be added via -Neutron and Open vSwitch. There can be multiple floating IP networks, and they -can be attached to multiple bridges. The VLANs will be trunked, but not -configured as interfaces. Instead, Neutron will create an OVS port with the -VLAN segmentation ID on the chosen bridge for each floating IP network. - -The Provisioning network will usually be delivered on a dedicated interface. -DHCP+PXE is used to initially deploy, then the IP will be converted to static. -By default, PXE boot must occur on the native VLAN, although some system -controllers will allow booting from a VLAN. The Provisioning interface is -also used by the Compute and Storage nodes as their default gateway, in order -to contact DNS, NTP, and for system maintenance. The Undercloud can be used -as a default gateway, but in that case all traffic will be behind an IP -masquerade NAT, and will not be reachable from the rest of the network. The -Undercloud is also a single point of failure for the overcloud default route. -If there is an external gateway on a router device on the Provisioning network, -the Undercloud Neutron DHCP server can offer that instead. If the -``network_gateway`` was not set properly in undercloud.conf, it can be set -manually after installing the Undercloud:: - - neutron subnet-show # Copy the UUID from the provisioning subnet - neutron subnet-update --gateway_ip - -Often, the number of VLANs will exceed the number of physical Ethernet ports, -so some VLANs are delivered with VLAN tagging to separate the traffic. On an -Ethernet bond, typically all VLANs are trunked, and there is no traffic on the -native VLAN (native VLANs on bonds are supported, but will require customizing -the NIC templates). - -.. note:: - It is recommended to deploy a Tenant VLAN (which is used for tunneling GRE - and/or VXLAN) even if Neutron VLAN mode is chosen and tunneling is disabled - at deployment time. This requires the least customization at deployment time, - and leaves the option available to use tunnel networks as utility networks, - or for network function virtualization in the future. Tenant networks will - still be created using VLANs, but the operator can create VXLAN tunnels for - special use networks without consuming tenant VLANs. It is possible to add - VXLAN capability to a deployment with a Tenant VLAN, but it is not possible - to add a Tenant VLAN to an already deployed set of hosts without disruption. - -The networks are connected to the roles as follows: - -Controller: - -* Provisioning -* Internal API -* Storage -* Storage Management -* Tenant -* External - -Compute: - -* Provisioning -* Internal API -* Storage -* Tenant - -Ceph Storage: - -* Provisioning -* Storage -* Storage Management - -Cinder Storage: - -* Provisioning -* Internal API -* Storage -* Storage Management - -Swift Storage: - -* Provisioning -* Internal API -* Storage -* Storage Management - -Workflow --------- - -The procedure for enabling network isolation is this: - -#. Create and edit network data YAML definition file for the cluster -#. Use the network data YAML definition file as input to create network - resources and generate the networks-deployed-environment.yaml - environment file -#. Generate templates from Jinja2 -#. Create network environment overrides file (e.g. ~/network-environment-overrides.yaml) -#. Make a copy of the appropriate sample network interface configurations -#. Edit the network interface configurations to match local environment -#. Deploy overcloud with the proper parameters to include network isolation - -.. admonition:: Victoria and prior releases - - For Victoria and earlier releases the procedure is: - - #. Create and edit network data YAML definition file for the cluster - #. Generate templates from Jinja2 - #. Create network environment overrides file (e.g. ~/network-environment-overrides.yaml) - #. Make a copy of the appropriate sample network interface configurations - #. Edit the network interface configurations to match local environment - #. Deploy overcloud with the proper parameters to include network isolation - -The next section will walk through the elements that need to be added to -the network-environment.yaml to enable network isolation. The sections -after that deal with configuring the network interface templates. The final step -will deploy the overcloud with network isolation and a custom environment. - -Create and Edit network data YAML definition file -------------------------------------------------- - -Use the network-data-samples_ in tripleo-heat-templates_ as a reference and -customize the networks, IP addressing, VLANs, etc., as per the cluster -requirements. - -Please refer to the :ref:`network_definition_opts` reference section on the -:ref:`custom_networks` document page for a reference on available options in -the network data YAML schema. - -.. admonition:: Victoria and prior releases - - Copy the default ``network_data.yaml`` file and customize the networks, IP - subnets, VLANs, etc., as per the cluster requirements: - -.. code-block:: bash - - $ cp /usr/share/openstack-tripleo-heat-templates/network_data.yaml ~/templates/network_data.yaml - -Create the networks, segments and subnet resources on the Undercloud --------------------------------------------------------------------- - -.. admonition:: Victoria and prior releases - - For Victoria and prior releases the network resources are created as part of - the overcloud heat stack. This step is not valid for these releases. - -Run the "openstack overcloud network provision" command to create/update the -networks on the Undercloud. This command will also generate the -``networks-deployed-environment.yaml`` environment file which must be used -when deploying the overcloud. - -.. code-block:: bash - - $ openstack overcloud network provision \ - --output ~/templates/networks-deployed-environment.yaml \ - ~/templates/custom_network_data.yaml - -.. note:: This step is optional when using the ``--baremetal-deployment`` and - ``--vip-data`` options with the ``overcloud deploy`` command. The - deploy command will detect the new format of the network data YAML - definition, run the workflow to create the networks and include the - ``networks-deployed-environment.yaml`` automatically. - -Create and Edit network Virtual IPs YAML definition file --------------------------------------------------------- - -.. admonition:: Victoria and prior releases - - For Victoria and prior releases the Virtual IP resources are created as part - of the overcloud heat stack. This step is not valid for these releases. - -Use the vip-data-samples_ in tripleo-heat-templates_ as a reference and -customize the networks, subnet, fixed_ips, dns_names etc., as per the cluster -requirements. - -Please refer to the :ref:`virtual_ips_definition_opts` reference section on the -:ref:`custom_networks` document page for a reference on available options in -the network Virtual IPs data YAML schema. - -The below example show a Virtual IPs definition for the default -network-isolation isolation scenario. - -.. code-block:: yaml - - - network: ctlplane - dns_name: overcloud - - network: external - dns_name: overcloud - - network: internal_api - dns_name: overcloud - - network: storage - dns_name: overcloud - - network: storage_mgmt - dns_name: overcloud - -Create the overcloud network Virtual IPs on the Undercloud ----------------------------------------------------------- - -.. admonition:: Victoria and prior releases - - For Victoria and prior releases the Virtual IP resources are created as part - of the overcloud heat stack. This step is not valid for these releases. - -Run the "openstack overcloud network vip provision" command to create/update -the network Virtual IPs on the Undercloud. This command will also generate the -``vips-deployed-environment.yaml`` environment file which must be used when -deploying the overcloud. - -.. code-block:: bash - - $ openstack overcloud network vip provision \ - --output ~/templates/vips-deployed-environment.yaml \ - ~/templates/custom_vip_data.yaml - -.. note:: This step is optional if using the ``--vip-data`` options with the - ``overcloud deploy`` command. In that case workflow to create the - Virtual IPs and including the environment is automated. - -Generate Templates from Jinja2 ------------------------------- - -With the Queens cycle, the network configuration templates have been converted to -Jinja2 templates, so that templates can be generated for each role with -customized network data. A utility script is available to generate the -templates based on the provided ``roles_data.yaml`` and ``network_data.yaml`` -inputs. - -Before generating the templates, ensure that the ``roles_data.yaml`` is -generated as per the cluster requirements using the command:: - - $ openstack overcloud roles generate -o ~/templates/roles_data.yaml Controller Compute \ - BlockStorage ObjectStorage CephStorage - -.. note:: - If the default ``roles_data.yaml`` or ``network_data.yaml`` file suits the - needs of the cluster, then there is no need to generate/customize the files, - the default files can be used as is for generating the templates. - -To generate the templates, run:: - - $ /usr/share/openstack-tripleo-heat-templates/tools/process-templates.py \ - -p /usr/share/openstack-tripleo-heat-templates \ - -r ~/templates/roles_data.yaml \ - -n ~/templates/network_data.yaml \ - -o ~/generated-openstack-tripleo-heat-templates --safe - -Now the temporary directory ``~/generated-openstack-tripleo-heat-templates`` -contains the generated template files according to provided role and network -data. Copy the required templates to a user specific template directory -``~/templates`` to modify the content to suit the cluster needs. Some of the -specific use of generated templates are explained by some of the below -sections. - -Create Network Environment Overrides File ------------------------------------------ - -The environment file will describe the network environment and will point to -the network interface configuration files to use for the overcloud nodes. - -Earlier method of generating network interface configurations with heat has -been deprecated since victoria. To use a custom network configuration copy -an appropriate sample network interface configuration file from -`tripleo-ansible `_ and make necessary changes. - -Then copy the generated -``net-single-nic-with-vlans.yaml`` file to apply the required cluster specific -changes, which overrides the defaults:: - - $ cp ~/generated-openstack-tripleo-heat-templates/environments/net-single-nic-with-vlans.yaml \ - ~/templates/network-environment-overrides.yaml - -Add any other parameters which should be overridden from the defaults to this -environment file. It is important for the ``ExternalInterfaceDefaultRoute`` to -be reachable on the subnet that is used for ``ExternalNetCidr``. This will -allow the OpenStack Public APIs and the Horizon Dashboard to be reachable. -Without a valid default route, the post-deployment steps cannot be performed. - -.. note:: - - The ``parameter_defaults`` section of the ``network-environment-overrides.yaml`` - contains pointers to the network interface configuration files for the deployed - roles. These files must exist at the path referenced here. - -Example:: - - parameter_defaults: - ControllerNetworkConfigTemplate: 'templates/single_nic_vlans/single_nic_vlans.j2' - ComputeNetworkConfigTemplate: 'templates/single_nic_vlans/single_nic_vlans.j2' - BlockStorageNetworkConfigTemplate: 'templates/single_nic_vlans/single_nic_vlans_storage.j2' - - # May set to br-ex if using floating IPs only on native VLAN on bridge br-ex - NeutronExternalNetworkBridge: "''" - NeutronNetworkType: 'vxlan,vlan' - NeutronTunnelTypes: 'vxlan' - # Customize bonding options if required (ignored if bonds are not used) - BondInterfaceOvsOptions: "lacp=active other-config:lacp-fallback-ab=true" - - -Users can still use the old network interface configuration heat templates -for custom network configuration. Set ``NetworkConfigWithAnsible`` parameter -to ``false`` to use them:: - - parameter_defaults: - NetworkConfigWithAnsible: false - - -Configure IP Subnets --------------------- -Each environment will have its own IP subnets for each network. This will vary -by deployment, and should be tailored to the environment. We will set the -subnet information for all the networks inside our environment file. Each -subnet will have a range of IP addresses that will be used for assigning IP -addresses to hosts and virtual IPs. - -In the example above, the Allocation Pool for the Internal API network starts -at .10 and continues to .200. This results in the static IPs and virtual IPs -that are assigned starting at .10, and will be assigned upwards with .200 being -the highest assigned IP. The External network hosts the Horizon dashboard and -the OpenStack public API. If the External network will be used for both cloud -administration and floating IPs, we need to make sure there is room for a pool -of IPs to use as floating IPs for VM instances. Alternately, the floating IPs -can be placed on a separate VLAN (which is configured by the operator -post-deployment). - -Configure Bonding Options ------------------------------------ - -The example bonding options will try to negotiate LACP, but will fallback to -active-backup if LACP cannot be established:: - - BondInterfaceOvsOptions: - "lacp=active other-config:lacp-fallback-ab=true" - -The BondInterfaceOvsOptions parameter will pass the options to Open vSwitch -when setting up bonding (if used in the environment). The value above will -enable fault-tolerance and load balancing if the switch supports (and is -configured to use) LACP bonding. If LACP cannot be established, the bond will -fallback to active/backup mode, with fault tolerance, but where only one link -in the bond will be used at a time. - -If the switches do not support LACP, then do not configure a bond on the -upstream switch. Instead, OVS can use ``balance-slb`` mode to enable using -two interfaces on the same VLAN as a bond:: - - # Use balance-slb for bonds configured on a switch without LACP support - "bond_mode=balance-slb lacp=off" - -Bonding with balance-slb allows a limited form of load balancing without the -remote switch's knowledge or cooperation. The basics of SLB are simple. SLB -assigns each source MAC+VLAN pair to a link and transmits all packets -from that MAC+VLAN through that link. Learning in the remote switch causes it -to send packets to that MAC+VLAN through the same link. - -OVS will balance traffic based on source MAC and destination VLAN. The -switch will only see a given MAC address on one link in the bond at a time, and -OVS will use special filtering to prevent packet duplication across the links. - -In addition, the following options may be added to the options string to tune -the bond:: - - # Force bond to use active-backup, e.g. for connecting to 2 different switches - "bond_mode=active-backup" - - # Set the LACP heartbeat to 1 second or 30 seconds (default) - "other_config:lacp-time=[fast|slow]" - - # Set the link detection to use miimon heartbeats or monitor carrier (default) - "other_config:bond-detect-mode=[miimon|carrier]" - - # If using miimon, heartbeat interval in milliseconds (100 is usually good) - "other_config:bond-miimon-interval=100" - - # Number of milliseconds a link must be up to be activated (to prevent flapping) - "other_config:bond_updelay=1000" - - # Milliseconds between rebalancing flows between bond members, zero to disable - "other_config:bond-rebalance-interval=10000" - -.. _creating_custom_interface_templates: - -Creating Custom Interface Templates ------------------------------------ - -In order to configure the network interfaces on each node, the network -interface templates may need to be customized. - -Start by copying the existing templates in `tripleo-ansible `_. -The first example copies the templates which include network bonding. The second -example copies the templates which use a single network interface with multiple -VLANs (this configuration is mostly intended for testing). - -.. _tripleo_ansible: https://opendev.org/openstack/tripleo-ansible/src/branch/master/tripleo_ansible/roles/tripleo_network_config/templates - -To copy the bonded example interface configurations, run:: - - $ cp /usr/share/ansible/roles/tripleo_network_config/templates/bonds_vlans/* \ - ~/templates/nic-configs - -To copy the single NIC with VLANs example interface configurations, run:: - - $ cp /usr/share/ansible/roles/tripleo_network_config/templates/single_nic_vlans/* \ - ~/templates/nic-configs - -Or, if you have custom NIC templates from another source, copy them to the -location referenced in the ``parameter_defaults`` section of the environment -file. - -Customizing the Interface Templates ------------------------------------ - -The following example configures a bond on interfaces 3 and 4 of a system -with 4 interfaces. This example is based on the controller template from the -bond-with-vlans sample templates, but the bond has been placed on nic3 and nic4 -instead of nic2 and nic3. The other roles will have a similar configuration, -but will have only a subset of the networks attached. - -.. note:: - The nic1, nic2... abstraction considers only network interfaces which are - connected to an Ethernet switch. If interfaces 1 and 4 are the only - interfaces which are plugged in, they will be referred to as nic1 and nic2. - -Example:: - - --- - {% set mtu_list = [ctlplane_mtu] %} - {% for network in role_networks %} - {{ mtu_list.append(lookup('vars', networks_lower[network] ~ '_mtu')) }} - {%- endfor %} - {% set min_viable_mtu = mtu_list | max %} - network_config: - - type: interface - name: nic1 - mtu: {{ ctlplane_mtu }} - use_dhcp: false - addresses: - - ip_netmask: {{ ctlplane_ip }}/{{ ctlplane_subnet_cidr }} - routes: {{ ctlplane_host_routes }} - - type: ovs_bridge - name: {{ neutron_physical_bridge_name }} - dns_servers: {{ ctlplane_dns_nameservers }} - domain: {{ dns_search_domains }} - members: - - type: ovs_bond - name: bond1 - mtu: {{ min_viable_mtu }} - ovs_options: {{ bond_interface_ovs_options }} - members: - - type: interface - name: nic3 - mtu: {{ min_viable_mtu }} - primary: true - - type: interface - name: nic4 - mtu: {{ min_viable_mtu }} - {% for network in role_networks %} - - type: vlan - mtu: {{ lookup('vars', networks_lower[network] ~ '_mtu') }} - vlan_id: {{ lookup('vars', networks_lower[network] ~ '_vlan_id') }} - addresses: - - ip_netmask: {{ lookup('vars', networks_lower[network] ~ '_ip') }}/{{ lookup('vars', networks_lower[network] ~ '_cidr') }} - routes: {{ lookup('vars', networks_lower[network] ~ '_host_routes') }} - {%- endfor %} - -.. note:: - If you are using old heat network interface configuration templates from - versions prior to Victoria, either migrate them to new format and update - the environments accordingly, for them to be used with ansible interface - or update them to use ``OS::Heat::Value`` resource. - -.. _migrating_existing_network_interface_templates: - -Migrating existing Network Interface Configuration Templates ------------------------------------------------------------- - -Below outlines some guidelines on how to migrate the old heat net configs -to the new format consumed by ansible. - -#. Create a Jinja2 template using `os-net-config `_ - schema. You can also use one of the in-tree `examples `_ - or a copy of `/etc/os-net-config/config.json` (converted to yaml) from - an existing node (per role) as a start. -#. Use `role_networks` and `networks_lower` ansible vars to loop through - available networks for a role and their lowercase names. -#. Use Jinja2 filters to replace heat intrinsic functions. For example - `min_viable_mtu` can be calculated with:: - - {% set mtu_list = [ctlplane_mtu] %} - {% for network in role_networks %} - {{ mtu_list.append(lookup('vars', networks_lower[network] ~ '_mtu')) }} - {%- endfor %} - {% set min_viable_mtu = mtu_list | max %} - -#. Heat parameters used with `get_param` can be mapped to ansible vars as per - below mapping. Host routes are pre-merged and are available as - `ctlplane_host_routes` and `networks_lower[network] ~ '_host_routes'` - ansible vars and can be used directly. - -#. Any custom heat parameter used, already not available as ansible var has to - be passed using `{{role.name}}ExtraGroupVars` THT interface and can then be - used in the templates. For example, `StorageSupernet` parameter has to be - passed as below:: - - parameter_defaults: - ControllerExtraGroupVars: - storage_supernet: 172.16.0.0/16 - -.. table:: **Heat parameters to Ansible vars Mapping** - - ======================================= ================================================================================================================ - Heat Parameters Ansible Vars - ======================================= ================================================================================================================ - BondInterfaceOvsOptions {{ bond_interface_ovs_options }} - ControlPlaneIp {{ ctlplane_ip }} - ControlPlaneSubnetCidr {{ ctlplane_subnet_cidr }} - ControlPlaneDefaultRoute {{ ctlplane_gateway_ip }} - ControlPlaneStaticRoutes {{ ctlplane_host_routes }} - ControlPlaneMtu {{ ctlplane_mtu }} - DnsServers {{ ctlplane_dns_nameservers }} - DnsSearchDomains {{ dns_search_domains }} - NumDpdkInterfaceRxQueues {{ num_dpdk_interface_rx_queues }} - {{network.name}}IpSubnet {{ lookup('vars', networks_lower[network] ~ '_ip') }}/{{ lookup('vars', networks_lower[network] ~ '_cidr') }} - {{network.name}}NetworkVlanID {{ lookup('vars', networks_lower[network] ~ '_vlan_id') }} - {{network.name}}Mtu {{ lookup('vars', networks_lower[network] ~ '_mtu') }} - {{network.name}}InterfaceDefaultRoute {{ lookup('vars', networks_lower[network] ~ '_gateway_ip') }} - {{network.name}}InterfaceRoutes {{ lookup('vars', networks_lower[network] ~ '_host_routes') }} - ======================================= ================================================================================================================ - - -.. _os_net_config_schema: https://opendev.org/openstack/os-net-config/src/branch/master/os_net_config/schema.yaml -.. _config_ex: https://opendev.org/openstack/tripleo-ansible/src/branch/master/tripleo_ansible/roles/tripleo_network_config/templates - -Updating Existing Network Interface Configuration Templates ------------------------------------------------------------ - -Prior to Victoria release the network interface configuration files -used ``OS::Heat::SoftwareConfig`` resource to configure interfaces:: - - resources: - OsNetConfigImpl: - type: OS::Heat::SoftwareConfig - properties: - group: script - config: - str_replace: - template: - get_file: /usr/share/openstack-tripleo-heat-templates/network/scripts/run-os-net-config.sh - params: - $network_config: - network_config: - [NETWORK INTERFACE CONFIGURATION HERE] - -These templates are now expected to use ``OS::Heat::Value`` resource:: - - resources: - OsNetConfigImpl: - type: OS::Heat::Value - properties: - value: - network_config: - [NETWORK INTERFACE CONFIGURATION HERE] - outputs: - config: - value: get_attr[OsNetConfigImpl, value] - - - -Old network interface configuration heat templates can be converted using -the provided conversion `convert-nic-config.py `_ script. - -.. _convert_nic_config: https://opendev.org/openstack/tripleo-heat-templates/src/branch/master/tools/convert_nic_config.py - - -Prior to the Ocata release, the network interface configuration files used -a different mechanism for running os-net-config. Ocata introduced the -run-os-net-config.sh script, and the old mechanism was deprecated. The -deprecated mechanism was removed in Queens, so older templates must be -updated. The resource definition must be changed, and {get_input: bridge_name} is -replaced with the special token "bridge_name", which will be replaced with -the value of the NeutronPhysicalBridge. - -Old Header:: - - resources: - OsNetConfigImpl: - type: OS::Heat::StructuredConfig - properties: - group: os-apply-config - config: - os_net_config: - network_config: - [NETWORK INTERFACE CONFIGURATION HERE] - -New Header:: - - resources: - OsNetConfigImpl: - type: OS::Heat::Value - properties: - value: - network_config: - [NETWORK INTERFACE CONFIGURATION HERE] - -Old Bridge Definition:: - - - type: ovs_bridge - name: {get_input: bridge_name} - -New Bridge Definition:: - - - type: ovs_bridge - name: bridge_name - -Configuring Interfaces ----------------------- -The individual interfaces may need to be modified. As an example, below are -the modifications that would be required to use the second NIC to connect to -an infrastructure network with DHCP addresses, and to use the third and fourth -NICs for the bond: - -Example:: - - network_config: - - type: interface - name: nic2 - mtu: {{ ctlplane_mtu }} - use_dhcp: true - defroute: no - - type: ovs_bridge - name: {{ neutron_physical_bridge_name }} - members: - - type: ovs_bond - name: bond1 - mtu: {{ min_viable_mtu }} - ovs_options: {{ bound_interface_ovs_options }} - members: - - type: interface - name: nic3 - mtu: {{ min_viable_mtu }} - primary: true - - type: interface - name: nic4 - mtu: {{ min_viable_mtu }} - -When using numbered interfaces ("nic1", "nic2", etc.) instead of named -interfaces ("eth0", "eno2", etc.), the network interfaces of hosts within -a role do not have to be exactly the same. For instance, one host may have -interfaces em1 and em2, while another has eno1 and eno2, but both hosts' NICs -can be referred to as nic1 and nic2. - -The numbered NIC scheme only takes into account the interfaces that are live -(have a cable attached to the switch). So if you have some hosts with 4 -interfaces, and some with 6, you should use nic1-nic4 and only plug in 4 -cables on each host. - -Configuring Routes and Default Routes -------------------------------------- -There are two ways that a host may have its default routes set. If the interface -is using DHCP, and the DHCP server offers a gateway address, the system will -install a default route for that gateway. Otherwise, a default route may be set -manually on an interface with a static IP. - -Although the Linux kernel supports multiple default gateways, it will only use -the one with the lowest metric. If there are multiple DHCP interfaces, this can -result in an unpredictable default gateway. In this case, it is recommended that -defroute=no be set for the interfaces other than the one where we want the -default route. In this case, we want a DHCP interface (NIC 2) to be the default -route (rather than the Provisioning interface), so we disable the default route -on the provisioning interface (note that the defroute parameter only applies -to routes learned via DHCP): - -Example:: - - network_config: - - type: interface - name: nic1 - use_dhcp: true - defroute: no - - type: interface - name: nic2 - use_dhcp: true - -To set a static route on an interface with a static IP, specify a route to the -subnet. For instance, here is a hypothetical route to the 10.1.2.0/24 subnet -via the gateway at 172.17.0.1 on the Internal API network: - -Example:: - - - type: vlan - device: bond1 - vlan_id: {{ internal_api_vlan_id }} - addresses: - - ip_netmask: {{ internal_api_ip ~ '/' ~ internal_api_cidr }} - routes: - - ip_netmask: 10.1.2.0/24 - next_hop: 172.17.0.1 - - -Using a Dedicated Interface For Tenant VLANs --------------------------------------------- -When using a dedicated interface or bond for tenant VLANs, a bridge must be -created. Neutron will create OVS ports on that bridge with the VLAN tags for the -provider VLANs. For example, to use NIC 4 as a dedicated interface for tenant -VLANs, you would add the following to the Controller and Compute templates: - -Example:: - - - type: ovs_bridge - name: br-vlan - members: - - type: interface - name: nic4 - primary: true - -A similar configuration may be used to define an interface or a bridge that -will be used for Provider VLANs. Provider VLANs are external networks which -are connected directly to the Compute hosts. VMs may be attached directly to -Provider networks to provide access to datacenter resources outside the cloud. - -Using the Native VLAN for Floating IPs --------------------------------------- -By default, Neutron is configured with an empty string for the Neutron external -bridge mapping. This results in the physical interface being patched to br-int, -rather than using br-ex directly (as in previous versions). This model allows -for multiple floating IP networks, using either VLANs or multiple physical -connections. - -Example:: - - parameter_defaults: - # May set to br-ex if using floating IPs only on native VLAN on bridge br-ex - NeutronExternalNetworkBridge: "''" - -When using only one floating IP network on the native VLAN of a bridge, -then you can optionally set the Neutron external bridge to e.g. "br-ex". This -results in the packets only having to traverse one bridge (instead of two), -and may result in slightly lower CPU when passing traffic over the floating -IP network. - -The next section contains the changes to the NIC config that need to happen -to put the External network on the native VLAN (if the External network is on -br-ex, then that bridge may be used for floating IPs in addition to the Horizon -dashboard and Public APIs). - -Using the Native VLAN on a Trunked Interface --------------------------------------------- -If a trunked interface or bond has a network on the native VLAN, then the IP -address will be assigned directly to the bridge and there will be no VLAN -interface. - -For example, if the external network is on the native VLAN, the bond -configuration would look like this: - -Example:: - - - type: ovs_bridge - name: bridge_name - dns_servers: {{ ctlplane_dns_nameservers }} - addresses: - - ip_netmask: {{ external_ip ~ '/' ~ external_cidr }} - routes: {{ external_host_routes }} - members: - - type: ovs_bond - name: bond1 - ovs_options: {{ bond_interface_ovs_options }} - members: - - type: interface - name: nic3 - primary: true - - type: interface - name: nic4 - -.. note:: - When moving the address (and possibly route) statements onto the bridge, be - sure to remove the corresponding VLAN interface from the bridge. Make sure to - make the changes to all applicable roles. The External network is only on the - controllers, so only the controller template needs to be changed. The Storage - network on the other hand is attached to all roles, so if the storage network - were on the default VLAN, all roles would need to be edited. - -Configuring Jumbo Frames ------------------------- -The Maximum Transmission Unit (MTU) setting determines the maximum amount of -data that can be transmitted by a single Ethernet frame. Using a larger value -can result in less overhead, since each frame adds data in the form of a -header. The default value is 1500, and using a value higher than that will -require the switch port to be configured to support jumbo frames. Most switches -support an MTU of at least 9000, but many are configured for 1500 by default. - -The MTU of a VLAN cannot exceed the MTU of the physical interface. Make sure to -include the MTU value on the bond and/or interface. - -Storage, Storage Management, Internal API, and Tenant networking can all -benefit from jumbo frames. In testing, tenant networking throughput was -over 300% greater when using jumbo frames in conjunction with VXLAN tunnels. - -.. note:: - It is recommended that the Provisioning interface, External interface, and - any floating IP interfaces be left at the default MTU of 1500. Connectivity - problems are likely to occur otherwise. This is because routers typically - cannot forward jumbo frames across L3 boundaries. - -Example:: - - - type: ovs_bond - name: bond1 - mtu: 9000 - ovs_options: {{ bond_interface_ovs_options }} - members: - - type: interface - name: nic3 - mtu: 9000 - primary: true - - type: interface - name: nic4 - mtu: 9000 - - type: vlan - device: bond1 - vlan_id: {{ external_vlan_id }} - addresses: - - ip_netmask: {{ external_ip ~ '/' ~ external_cidr }} - routes: {{ external_host_routes }} - - type: vlan - device: bond1 - mtu: 9000 - vlan_id: {{ internal_api_vlan_id }} - addresses: - - ip_netmask: {{ internal_api_ip ~ '/' ~ internal_api_cidr }} - -Assigning OpenStack Services to Isolated Networks -------------------------------------------------- -Each OpenStack service is assigned to a network using a default mapping. The -service will be bound to the host IP within the named network on each host. - -.. note:: - The services will be assigned to the networks according to the - ``ServiceNetMap`` in ``network/service_net_map.j2.yaml``. Unless these - defaults need to be overridden, the ServiceNetMap does not need to be defined - in the environment file. - -A service can be assigned to an alternate network by overriding the service to -network map in an environment file. The defaults should generally work, but -can be overridden. To override these values, add the ServiceNetMap to the -``parameter_defaults`` section of the network environment. - -Example:: - - parameter_defaults: - - ServiceNetMap: - NeutronTenantNetwork: tenant - CeilometerApiNetwork: internal_api - MongoDbNetwork: internal_api - CinderApiNetwork: internal_api - CinderIscsiNetwork: storage - GlanceApiNetwork: storage - GlanceRegistryNetwork: internal_api - KeystoneAdminApiNetwork: internal_api - KeystonePublicApiNetwork: internal_api - NeutronApiNetwork: internal_api - HeatApiNetwork: internal_api - NovaApiNetwork: internal_api - NovaMetadataNetwork: internal_api - NovaVncProxyNetwork: internal_api - SwiftMgmtNetwork: storage_mgmt - SwiftProxyNetwork: storage - HorizonNetwork: internal_api - MemcachedNetwork: internal_api - RabbitMqNetwork: internal_api - RedisNetwork: internal_api - MysqlNetwork: internal_api - CephClusterNetwork: storage_mgmt - CephPublicNetwork: storage - # Define which network will be used for hostname resolution - ControllerHostnameResolveNetwork: internal_api - ComputeHostnameResolveNetwork: internal_api - BlockStorageHostnameResolveNetwork: internal_api - ObjectStorageHostnameResolveNetwork: internal_api - CephStorageHostnameResolveNetwork: storage - -.. note:: - If an entry in the ServiceNetMap points to a network which does not exist, - that service will be placed on the Provisioning network. To avoid that, - make sure that each entry points to a valid network. - -Deploying the Overcloud With Network Isolation ----------------------------------------------- - -When deploying with network isolation, you should specify the NTP server for the -overcloud nodes. If the clocks are not synchronized, some OpenStack services may -be unable to start, especially when using HA. The NTP server should be reachable -from both the External and Provisioning subnets. The neutron network type should -be specified, along with the tunneling or VLAN parameters. Specify the libvirt -type if on bare metal, so that hardware virtualization will be used. - -To deploy with network isolation and include the network environment file, use -the ``-e`` and ``--networks-file`` parameters with the -``openstack overcloud deploy`` command. The following deploy command should -work for all of the subsequent examples: - -.. code-block:: bash - - openstack overcloud deploy \ - --templates \ - --networks-file ~/templates/custom_network_data.yaml \ - -e ~/templates/networks-deployed-environment.yaml \ - -e ~/templates/vips-deployed-environment.yaml \ - -e baremetal-deployed-environment.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-environment.yaml \ - -e ~/templates/network-environment-overrides.yaml \ - --ntp-server pool.ntp.org - -Alternatively include the network, Virtual IPs and baremetal provisioning -in the ``overcloud deploy`` command to do it all in one: - -.. code-block:: bash - - openstack overcloud deploy \ - --templates \ - --networks-file custom_network_data.yaml \ - --vip-file custom_vip_data.yaml \ - --baremetal-deployment baremetal_deployment.yaml \ - --network-config \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-environment.yaml \ - -e ~/templates/network-environment-overrides.yaml \ - --ntp-server pool.ntp.org - -.. note:: Please refer to :doc:`../provisioning/baremetal_provision` - document page for a reference on the ``baremetal_deployment.yaml`` - used in the above example. - -.. admonition:: Victoria and prior releases - - openstack overcloud deploy --templates \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-environment.yaml \ - -e ~/templates/network-environment-overrides.yaml \ - --ntp-server pool.ntp.org - -To deploy VXLAN mode ``network-environment-overrides.yaml`` should contain the -following parameter values:: - - NeutronNetworkType: vxlan - NeutronTunnelTypes: vxlan - -To deploy with VLAN mode, you should specify the range of VLANs that will be -used for tenant networks. ``network-environment.yaml`` might contain the -following parameter values:: - - NeutronNetworkType: vlan - NeutronBridgeMappings: 'datacentre:br-ex' - NeutronNetworkVLANRanges: 'datacentre:100:199' - -If a dedicated interface or bridge is used for tenant VLANs or provider -networks, it should be included in the bridge mappings. For instance, if the -tenant VLANs were on a bridge named ``br-vlan``, then use these values in -``network-environment.yaml``:: - - NeutronBridgeMappings: 'datacentre:br-ex,tenant:br-vlan' - NeutronNetworkVLANRanges: 'tenant:200:299' - -.. note:: - - You must also pass the environment files (again using the ``-e`` or - ``--environment-file`` option) whenever you make subsequent changes to the - overcloud, such as :doc:`../post_deployment/scale_roles`, - :doc:`../post_deployment/delete_nodes` or - :doc:`../post_deployment/upgrade/minor_update`. - -Creating Floating IP Networks ------------------------------ - -In order to provide external connectivity and floating IPs to the VMs, an -external network must be created. The physical network is referred to by the -name used in the Neutron bridge mappings when deployed. The default bridge -mapping is ``datacentre:br-ex``, which maps the physical network name -``datacentre`` to the bridge ``br-ex`` which includes the physical network -link. For instance, to create a floating IP network on the br-ex bridge on -VLAN 104, this command is used:: - - neutron net-create ext-net --router:external \ - --provider:physical_network datacentre \ - --provider:network_type vlan \ - --provider:segmentation_id 104 - -If the floating IP network is on the native VLAN of br-ex, then a different -command is used to create the external network:: - - neutron net-create ext-net --router:external \ - --provider:physical_network datacentre \ - --provider:network_type flat - -Floating IP networks do not have to use br-ex, they can use any bridge as -long as the NeutronExternalNetworkBridge is set to "''". If the floating IP -network were going to be placed on a bridge named "br-floating", and the -deployment command included the bridge mapping of -``datacenter:br-ex,floating:br-floating``, then following command would be used -to create a floating IP network on VLAN 105:: - - neutron net-create ext-net --router:external \ - --provider:physical_network floating \ - --provider:network_type vlan \ - --provider:segmentation_id 105 - -Then a range of IP addresses must be assigned in the floating IP subnet and -assigned to the physical network. The Subnet will be associated with the network -name that was created in the previous step (``ext-net``):: - - neutron subnet-create --name ext-subnet \ - --enable_dhcp=False \ - --allocation-pool start=10.0.2.50,end=10.0.2.100 \ - --gateway 10.0.2.254 \ - ext-net 10.0.2.0/24 - -Creating Provider Networks --------------------------- - -A Provider Network is a network which is attached physically to a datacenter -network that exists outside of the deployed overcloud. This can be an existing -infrastructure network, or a network which provides external access directly to -VMs via routing instead of floating IPs. - -When a provider network is created, it is associated with a physical network -with a bridge mapping, similar to how floating IP networks are created. The -provider network being added must be attached to both the controller and the -compute nodes, since the compute node will attach a VM virtual network -interface directly to an attached network interface. - -For instance, if the provider network being added is a VLAN on the br-ex -bridge, then this command would add a provider network on VLAN 201:: - - neutron net-create --provider:physical_network datacentre \ - --provider:network_type vlan --provider:segmentation_id 201 \ - --shared provider_network - -This command would create a shared network, but it is also possible to -specify a tenant instead of specifying ``--shared``, and then that network will -only be available to that tenant. If a provider network is marked as external, -then only the operator may create ports on that network. A subnet can be added -to a provider network if Neutron is to provide DHCP services to tenant VMs:: - - neutron subnet-create --name provider-subnet \ - --enable_dhcp=True \ - --allocation-pool start=10.0.3.50,end=10.0.3.100 \ - --gateway 10.0.3.254 \ - provider_network 10.0.3.0/24 - - -.. _tripleo-heat-templates: https://opendev.org/openstack/tripleo-heat-templates -.. _default-network-isolation: https://opendev.org/openstack/tripleo-heat-templates/src/branch/master/network-data-samples/default-network-isolation.yaml -.. _network-data-samples: https://opendev.org/openstack/tripleo-heat-templates/src/branch/master/network-data-samples/ -.. _vip-data-samples: https://opendev.org/openstack/tripleo-heat-templates/src/branch/master/network-data-samples/ diff --git a/deploy-guide/source/features/network_isolation_virt.rst b/deploy-guide/source/features/network_isolation_virt.rst deleted file mode 100644 index cc8a8035..00000000 --- a/deploy-guide/source/features/network_isolation_virt.rst +++ /dev/null @@ -1,73 +0,0 @@ -Configuring Network Isolation in Virtualized Environments -========================================================= - -Introduction ------------- - -This document describes how to configure a virtualized development -environment for use with network isolation. To make things as easy as -possible we will use the ``single-nic-with-vlans`` network isolation -templates to create isolated VLANs on top of the single NIC already -used for the provisioning/``ctlplane``. - -The ``single_nic_vlans.j2`` template work well for many virtualized environments -because they do not require adding any extra NICs. Additionally, Open vSwitch -automatically trunks VLANs for us, so there is no extra switch configuration -required. - -Create an External VLAN on Your Undercloud ------------------------------------------- - -By default all instack undercloud machines have a ``br-ctlplane`` which -is used as the provisioning network. We want to add an interface -on the 10.0.0.0/24 network which is used as the default "external" -(public) network for the overcloud. The default VLAN for the external -network is ``vlan10`` so we create an interface file to do this. Create -the following file ``/etc/sysconfig/network-scripts/ifcfg-vlan10``:: - - DEVICE=vlan10 - ONBOOT=yes - HOTPLUG=no - TYPE=OVSIntPort - OVS_BRIDGE=br-ctlplane - OVS_OPTIONS="tag=10" - BOOTPROTO=static - IPADDR=10.0.0.1 - PREFIX=24 - NM_CONTROLLED=no - -And then run ``ifup vlan10`` on your undercloud. - -Create a Custom Environment File --------------------------------- - -When using network isolation most of the network/config templates configure -static IPs for the ``ctlplane``. To ensure connectivity with Heat and Ec2 -metadata, we need to specify a couple of extra Heat parameters. Create a file -called ``/home/stack/custom.yaml`` with the following contents:: - - parameter_defaults: - EC2MetadataIp: 192.168.24.1 - ControlPlaneDefaultRoute: 192.168.24.1 - -Note that the specified IP addresses ``192.168.24.1`` are the same as the -undercloud IP address. - -Modify Your Overcloud Deploy to Enable Network Isolation --------------------------------------------------------- - -At this point we are ready to create the overcloud using the network -isolation defaults. The example command below demonstrates how to enable -network isolation by using Heat templates for network isolation, a -custom set of network config templates (single NIC VLANs), and our -``custom.yaml`` config file from above:: - - TEMPLATES=/path/to/openstack-tripleo-heat-templates - openstack overcloud deploy \ - --templates=$TEMPLATES \ - -e $TEMPLATES/environments/network-isolation.yaml \ - -e $TEMPLATES/environments/net-single-nic-with-vlans.yaml \ - -e /home/stack/custom.yaml - -After creating the stack you should now have a working virtualized -development environment with network isolation enabled. diff --git a/deploy-guide/source/features/node_config.rst b/deploy-guide/source/features/node_config.rst deleted file mode 100644 index 6abf45b4..00000000 --- a/deploy-guide/source/features/node_config.rst +++ /dev/null @@ -1,99 +0,0 @@ -.. _node_config: - -Modifying default node configuration -==================================== - -Many service configuration options are already exposed via parameters in the -top-level `overcloud.yaml` template, and these options should -be used wherever available to influence overcloud configuration. - -However in the event the service configuration required is not exposed -as a top-level parameter, there are flexible interfaces which enable passing -arbitrary additional configuration to the nodes on deployment. - -Making ansible variable changes -------------------------------- - -Since the Train release, it is now possible to change any Ansible variable -via group vars overriding. -For example, to override the `chrony_role_action` variable used in -ansible-role-chrony for all the Compute roles, we would do the following:: - - cat > compute_params.yaml << EOF - parameter_defaults: - ComputeExtraGroupVars: - chrony_role_action: config - EOF - - openstack overcloud deploy -e compute_params.yaml - -Any variable can be set in that interface and it will take precedence if the -variable was already set somewhere else (e.g. in the composable service). - -For any custom roles (defined via roles_data.yaml) the parameter name will -be RoleNameExtraGroupVars where RoleName is the name specified in -roles_data.yaml. - -Making puppet configuration changes ------------------------------------ - -If you want to make a configuration change, either prior to initial deployment, -or subsequently via an update, you can pass additional data to puppet via hiera -data, using either the global "ExtraConfig" parameter, or one of the role-specific -parameters, e.g using `ComputeExtraConfig` to set the reserved_host_memory -value for compute nodes:: - - - cat > compute_params.yaml << EOF - parameter_defaults: - ComputeExtraConfig: - nova::compute::reserved_host_memory: some_value - EOF - - openstack overcloud deploy -e compute_params.yaml - -The parameters available are: - -* `ExtraConfig`: Apply the data to all nodes, e.g all roles -* `ComputeExtraConfig`: Apply the data only to Compute nodes -* `ControllerExtraConfig`: Apply the data only to Controller nodes -* `BlockStorageExtraConfig`: Apply the data only to BlockStorage nodes -* `ObjectStorageExtraConfig`: Apply the data only to ObjectStorage nodes -* `CephStorageExtraConfig`: Apply the data only to CephStorage nodes - -For any custom roles (defined via roles_data.yaml) the parameter name will -be RoleNameExtraConfig where RoleName is the name specified in roles_data.yaml. - -.. note:: - - Previously the parameter for Controller nodes was named - `controllerExtraConfig` (note the inconsistent capitalization). If - you are updating a deployment which used the old parameter, all - values previously passed to `controllerExtraConfig` should be - passed to `ControllerExtraConfig` instead, and - `controllerExtraConfig: {}` should be explicitly set in - `parameter_defaults`, to ensure that values from the old parameter - will not be used anymore. Also ComputeExtraConfig was previously - named NovaComputeExtraConfig, so a similar update should be performed - where the old naming is used. - -.. note:: - - Passing data via the ExtraConfig parameters will override any statically - defined values in the Hiera data files included as part of tripleo-heat-templates, - e.g those located in `puppet/hieradata` directory. - -.. note:: - - If you set a configuration of a puppet class which is not being included - yet, make sure you include it in the ExtraConfig definition, for example - if you want to change the Max IOPS per host setting:: - - parameter_defaults: - ComputeExtraConfig: - 'nova::scheduler::filter::max_io_ops_per_host': '4.0' - Compute_classes: - - '::nova::scheduler::filter' - - The Compute_classes data is included via the hiera_include in the - overcloud_common.pp puppet manifest. diff --git a/deploy-guide/source/features/node_specific_hieradata.rst b/deploy-guide/source/features/node_specific_hieradata.rst deleted file mode 100644 index 2a8b3f35..00000000 --- a/deploy-guide/source/features/node_specific_hieradata.rst +++ /dev/null @@ -1,113 +0,0 @@ -Provisioning of node-specific Hieradata -======================================= - -This guide assumes that your undercloud is already installed and ready to -deploy an overcloud. - -It is possible to provide some node-specific hieradata via Heat environment -files and as such customize one or more settings for a specific node, -regardless of the Heat `ResourceGroup` to which it belongs. - -As a sample use case, we will distribute a node-specific disks configuration -for a particular CephStorage node, which by default runs the `ceph-osd` service. - -Collecting the node UUID ------------------------- - -The node-specific hieradata is provisioned based on the node UUID, which is -hardware dependent and immutable across reboots/reinstalls. - -First make sure the introspection data is available for the target node, if it -isn't one may run introspection for a particular node as described in: -:doc:`../provisioning/introspect_single_node`. If the `undercloud.conf` does not have -`inspection_extras = true` prior to undercloud installation/upgrade -and introspection, then the machine unique UUID will not be in the -Ironic database. - -Then extract the machine unique UUID for the target node with a command like:: - - baremetal introspection data save NODE-ID | jq .extra.system.product.uuid | tr '[:upper:]' '[:lower:]' - -where `NODE-ID` is the target node Ironic UUID. The value returned by the above -command will be a unique and immutable machine UUID which isn't related to the -Ironic node UUID. For the next step, we'll assume the output was -`32e87b4c-c4a7-41be-865b-191684a6883b`. - -Creating the Heat environment file ----------------------------------- - -Assuming we want to use `/dev/sdc` as a data disk for `ceph-osd` on our target -node, we'll create a yaml file, e.g. `my-node-settings.yaml`, with the -following content depending on if either ceph-ansible (Pike and newer) -or puppet-ceph (Ocata and older). - -For ceph-ansible use:: - - parameter_defaults: - NodeDataLookup: {"32e87b4c-c4a7-41be-865b-191684a6883b": {"devices": ["/dev/sdc"]}} - -For puppet-ceph use:: - - resource_registry: - OS::TripleO::CephStorageExtraConfigPre: /path/to/tripleo-heat-templates/puppet/extraconfig/pre_deploy/per_node.yaml - - parameter_defaults: - NodeDataLookup: {"32e87b4c-c4a7-41be-865b-191684a6883b": {"ceph::profile::params::osds": {"/dev/sdc": {}}}} - -In the above example we're customizing only a single key for a single node, but -the structure is that of a UUID-mapped hash so it is possible to customize -multiple and different keys for multiple nodes. - -Generating the Heat environment file for Ceph devices ------------------------------------------------------ - -The tools directory of tripleo-heat-templates -(`/usr/share/openstack-tripleo-heat-templates/tools/`) contains a -utility called `make_ceph_disk_list.py` which can be used to create -a valid JSON Heat environment file automatically from Ironic's -introspection data. - -Export the introspection data from Ironic for the Ceph nodes to be -deployed:: - - baremetal introspection data save oc0-ceph-0 > ceph0.json - baremetal introspection data save oc0-ceph-1 > ceph1.json - ... - -Copy the utility to the stack user's home directory on the undercloud -and then use it to generate a `node_data_lookup.json` file which may -be passed during openstack overcloud deployment:: - - ./make_ceph_disk_list.py -i ceph*.json -o node_data_lookup.json -k by_path - -Pass the introspection data file from `baremetal introspection data save` for -all nodes hosting Ceph OSDs to the utility as you may only define -`NodeDataLookup` once during a deployment. The `-i` option can take an -expression like `*.json` or a list of files as input. - -The `-k` option defines the key of ironic disk data structure to use -to identify the disk to be used as an OSD. Using `name` is not -recommended as it will produce a file of devices like `/dev/sdd` which -may not always point to the same device on reboot. Thus, `by_path` is -recommended and is the default if `-k` is not specified. - -Ironic will have one of the available disks on the system reserved as -the root disk. The utility will always exclude the root disk from the -list of devices generated. - -Use `./make_ceph_disk_list.py --help` to see other available options. - -Deploying with NodeDataLookup ------------------------------ - -Add the environment file described in the previous section to the -deploy commandline:: - - openstack overcloud deploy [other overcloud deploy options] -e ~/my-node-settings.yaml - -or:: - - openstack overcloud deploy [other overcloud deploy options] -e ~/node_data_lookup.json - -JSON is the recommended format (instead of JSON embedded in YAML) -because you may use `jq` to validate the entire file before deployment. diff --git a/deploy-guide/source/features/octavia.rst b/deploy-guide/source/features/octavia.rst deleted file mode 100644 index ede8177f..00000000 --- a/deploy-guide/source/features/octavia.rst +++ /dev/null @@ -1,77 +0,0 @@ -.. _deploy-octavia: - -Deploying Octavia in the Overcloud -================================== - -This guide assumes that your undercloud is already installed and ready to -deploy an overcloud with Octavia enabled. Please note that only container -deployments are supported. - -Preparing to deploy -------------------- - -TripleO can upload an Octavia Amphora image to the overcloud if one is -available when deploying. - -Configuring the amphora image ------------------------------ - -If the Octavia Amphora image is available when deploying it should be placed -in a readable path with the default location being a good choice. On CentOS, -the default location is:: - - /usr/share/openstack-octavia-amphora-images/amphora-x64-haproxy.qcow2 - -If deploying on Red Hat Enterprise Linux, the default location is:: - - /usr/share/openstack-octavia-amphora-images/octavia-amphora.qcow2 - -On Red Hat Enterprise Linux, downloading an image may be unnecessary as the -amphora image may already be installed. - -If using a non-default location, make sure to specify the location through the -``OctaviaAmphoraImageFilename`` variable in an environment file. For example:: - - parameter_defaults: - OctaviaAmphoraImageFilename: /usr/share/openstack-images/amphora-image.qcow2 - -.. warning:: Home directories are typically not readable by the workflow - tasks that upload the file image to Glance. Please use a generally - accessible path. - -Deploying the overcloud with the octavia services -------------------------------------------------- - -To deploy Octavia services in the overcloud, include the sample environment -file provided. For example:: - - openstack overcloud deploy --templates \ - -e /usr/share/openstack-tripleo-heat-templates/environments/services/octavia.yaml \ - -e ~/containers-default-parameters.yaml - -.. note:: Don't forget to include any additional environment files containing - parameters such as those for the amphora image file. - -Uploading/Updating the amphora image after deployment ------------------------------------------------------ - -Uploading a new amphora image to Glance in the overcloud can be done after -deployment. This may be required if the amphora image was not available at the -time of deployment or the image needs to be updated. - -There are two Octavia specific requirements:: - - - The image must be tagged in Glance (default value 'amphora-image') - - - The image must belong the 'service' project - -To upload an amphora image into glance:: - - openstack image create --disk-format qcow2 --container-format bare \ - --tag 'amphora-image' --file [amphora image filename] \ - --project service new-amphora-image - -.. note:: The amphora image tag name can be customized by setting the - ``OctaviaAmphoraImageTag`` variable. Note that if this is changed - after deployment, Octavia will not be able to use any previously - uploaded images until they are retagged. diff --git a/deploy-guide/source/features/ops_tools.rst b/deploy-guide/source/features/ops_tools.rst deleted file mode 100644 index 366477d4..00000000 --- a/deploy-guide/source/features/ops_tools.rst +++ /dev/null @@ -1,173 +0,0 @@ -Deploying Operational Tools -=========================== - -TripleO comes with an optional suite of tools designed to help operators -maintain an OpenStack environment. The tools perform the following functions: - -- Availability Monitoring -- Centralized Logging -- Performance Monitoring - -This document will go through the presentation and installation of these tools. - -Architecture ------------- - -#. Operational Tool Server: - - - Monitoring Relay/proxy (RabbitMQ_) - - Monitoring Controller/Server (Sensu_) - - Data Store (Redis_) - - API/Presentation Layer (Uchiwa_) - - Log relay/transformer (Fluentd_) - - Data store (Elastic_) - - API/Presentation Layer (Kibana_) - - Performance receptor (Collectd_) - - Aggregator/Relay (Graphite_) - - An API/Presentation Layer (Grafana_) - -#. Undercloud: - - - There is no operational tools installed by default on the undercloud - -#. Overcloud: - - - Monitoring Agent (Sensu_) - - Log Collection Agent (Fluentd_) - - Performance Collector Agent (Collectd_) - -.. _RabbitMQ: https://www.rabbitmq.com -.. _Sensu: http://sensuapp.org -.. _Redis: https://redis.io -.. _Uchiwa: https://uchiwa.io -.. _Fluentd: http://www.fluentd.org -.. _Elastic: https://www.elastic.co -.. _Kibana: https://www.elastic.co/products/kibana -.. _Collectd: https://collectd.org -.. _Graphite: https://graphiteapp.org -.. _Grafana: https://grafana.com - -Deploying the Operational Tool Server -------------------------------------- - -There is an ansible project called opstools-ansible (OpsTools_) on github that helps to install the Operator Server, further documentation of the operational tool server installation can be founded at (OpsToolsDoc_). - -.. _OpsTools: https://github.com/centos-opstools/opstools-ansible -.. _OpsToolsDoc: https://github.com/centos-opstools/opstools-doc - -Deploying the Undercloud ------------------------- - -As there is nothing to install on the undercloud nothing needs to be done. - -Before deploying the Overcloud ------------------------------- - -.. note:: - - The :doc:`../deployment/template_deploy` document has a more detailed explanation of the - following steps. - - -1. Install client packages on overcloud-full image: - - - Mount the image and create a chroot:: - - temp_dir=$(mktemp -d) - sudo tripleo-mount-image -a /path/to/overcloud-full.qcow2 -m $temp_dir - sudo mount -o bind /dev $temp_dir/dev/ - sudo cp /etc/resolv.conf $temp_dir/etc/resolv.conf - sudo chroot $temp_dir /bin/bash - - - Install the packages inside the chroot:: - - dnf install -y centos-release-opstools - dnf install -y sensu fluentd collectd - exit - - - Unmount the image:: - - sudo rm $temp_dir/etc/resolv.conf - sudo umount $temp_dir/dev - sudo tripleo-unmount-image -m $temp_dir - - - Upload new image to undercloud image registry:: - - openstack overcloud image upload --update-existing - -2. Operational tools configuration files: - - The files have some documentation about the parameters that need to be configured - - - Availability Monitoring:: - - /usr/share/openstack-tripleo-heat-templates/environments/monitoring-environment.yaml - - - Centralized Logging:: - - /usr/share/openstack-tripleo-heat-templates/environments/logging-environment.yaml - - - Performance Monitoring:: - - /usr/share/openstack-tripleo-heat-templates/environments/collectd-environment.yaml - -3. Configure the environment - - The easiest way to configure our environment will be to create a parameter file, let's called parameters.yaml with all the parameters defined. - - - Availability Monitoring:: - - MonitoringRabbitHost: server_ip # Server were the rabbitmq was installed - MonitoringRabbitPort: 5672 # Rabbitmq port - MonitoringRabbitUserName: sensu_user # the rabbitmq user to be used by sensu - MonitoringRabbitPassword: sensu_password # The password of the sensu user - MonitoringRabbitUseSSL: false # Set to false - MonitoringRabbitVhost: "/sensu_vhost" # The virtual host of the rabbitmq - - - Centralized Logging:: - - LoggingServers: # The servers - - host: server_ip # The ip of the server - port: 24224 # Port to send the logs [ 24224 plain & 24284 SSL ] - LoggingUsesSSL: false # Plain or SSL connections - # If LoggingUsesSSL is set to false the following lines can - # be deleted - LoggingSharedKey: secret # The key - LoggingSSLCertificate: | # The content of the SSL Certificate - -----BEGIN CERTIFICATE----- - ...contents of server.pem here... - -----END CERTIFICATE----- - - - Performance Monitoring:: - - CollectdServer: collectd0.example.com # Collectd server, where the data is going to be sent - CollectdServerPort: 25826 # Collectd port - # CollectdSecurityLevel: None # Security by default None the other values are - # Encrypt & Sign, but the two following parameters - # need to be set too - # CollectdUsername: user # User to connect to the server - # CollectdPassword: password # Password to connect to the server - - # Collectd, by default, comes with several plugins - # extra plugins can added on this parameter - CollectdExtraPlugins: - - disk # disk plugin - - df # df plugin - ExtraConfig: # If the plugins need to be set, this is the location - collectd::plugin::disk::disks: - - "/^[vhs]d[a-f][0-9]?$/" - collectd::plugin::df::mountpoints: - - "/" - collectd::plugin::df::ignoreselected: false - - -4. Continue following the TripleO instructions for deploying an overcloud:: - - openstack overcloud deploy --templates \ - [-e /usr/share/openstack-tripleo-heat-templates/environments/monitoring-environment.yaml] \ - [-e /usr/share/openstack-tripleo-heat-templates/environments/logging-environment.yaml] \ - [-e /usr/share/openstack-tripleo-heat-templates/environments/collectd-environment.yaml] \ - -e parameters.yaml - - -5. Wait for the completion of the overcloud deployment process. diff --git a/deploy-guide/source/features/oslo_messaging_config.rst b/deploy-guide/source/features/oslo_messaging_config.rst deleted file mode 100644 index 20b1a66f..00000000 --- a/deploy-guide/source/features/oslo_messaging_config.rst +++ /dev/null @@ -1,107 +0,0 @@ -Configuring Messaging RPC and Notifications -=========================================== - -TripleO can configure oslo.messaging RPC and Notification services and -deploy the corresponding messaging backends for the undercloud and -overcloud. The roles OsloMessagingRPC and OsloMessagingNotify have been -added in place of the RabbitMQ Server. Having independent roles for RPC -and Notify allows for the separation of messaging backends as well as -the deployment of different messaging backend intermediaries that are -supported by oslo.messaging drivers:: - - +----------------+-----------+-----------+-----+--------+-----------+ - | Oslo.Messaging | Transport | Backend | RPC | Notify | Messaging | - | Driver | Protocol | Server | | | Type | - +================+===========+===========+=====+========+===========+ - | rabbit | AMQP V0.9 | rabbitmq | yes | yes | queue | - +----------------+-----------+-----------+-----+--------+-----------+ - | amqp | AMQP V1.0 | qdrouterd | yes | | direct | - +----------------+-----------+-----------+-----+--------+-----------+ - | kafka | kafka | kafka | | yes | queue | - | (experimental) | binary | | | | (stream) | - +----------------+-----------+-----------+-----+--------+-----------+ - -Standard Deployment of RabbitMQ Server Backend ----------------------------------------------- - -A single RabbitMQ backend (e.g. server or cluster) is the default -deployment for TripleO. This messaging backend provides the services -for both RPC and Notification communications through its integration -with the oslo.messaging rabbit driver. - -The example `standard messaging`_ environment file depicts the -resource association for this defacto deployment configuration:: - - # ******************************************************************* - # This file was created automatically by the sample environment - # generator. Developers should use `tox -e genconfig` to update it. - # Users are recommended to make changes to a copy of the file instead - # of the original, if any customizations are needed. - # ******************************************************************* - # title: Share single rabbitmq backend for rpc and notify messaging backend - # description: | - # Include this environment to enable a shared rabbitmq backend for - # oslo.messaging rpc and notification services - parameter_defaults: - # The network port for messaging backend - # Type: number - RpcPort: 5672 - - resource_registry: - OS::TripleO::Services::OsloMessagingNotify: ../../deployment/rabbitmq/rabbitmq-messaging-notify-shared-puppet.yaml - OS::TripleO::Services::OsloMessagingRpc: ../../deployment/rabbitmq/rabbitmq-messaging-rpc-container-puppet.yaml - -The `rabbitmq-messaging-rpc-container-puppet.yaml`_ instantiates the rabbitmq server backend -while `rabbitmq-messaging-notify-container-puppet.yaml`_ sets up the notification -transport configuration to use the same shared rabbitmq server. - -Deployment of Separate RPC and Notify Messaging Backends --------------------------------------------------------- - -Separate messaging backends can be deployed for RPC and Notification -communications. For this TripleO deployment, the apache dispatch -router (qdrouterd) can be deployed for the RPC messaging backend using -the oslo.messaging AMQP 1.0 driver. - -The example `hybrid messaging`_ environment file can be used for an -overcloud deployment:: - - # ******************************************************************* - # This file was created automatically by the sample environment - # generator. Developers should use `tox -e genconfig` to update it. - # Users are recommended to make changes to a copy of the file instead - # of the original, if any customizations are needed. - # ******************************************************************* - # title: Hybrid qdrouterd for rpc and rabbitmq for notify messaging backend - # description: | - # Include this environment to enable hybrid messaging backends for - # oslo.messaging rpc and notification services - parameter_defaults: - # The network port for messaging Notify backend - # Type: number - NotifyPort: 5672 - - # The network port for messaging backend - # Type: number - RpcPort: 31459 - - resource_registry: - OS::TripleO::Services::OsloMessagingNotify: ../../deployment/rabbitmq/rabbitmq-messaging-notify-container-puppet.yaml - OS::TripleO::Services::OsloMessagingRpc: ../../deployment/messaging/rpc-qdrouterd-container-puppet.yaml - -The above will instantiate qdrouterd server(s) and configure them for -use as the RPC transport and will also instantiate the rabbitmq backend -and configure it for use as the Notification transport. It should -be noted that the RPC and Notify ports must be distinct to prevent the -qdrouterd and rabbitmq servers from simultaneously using the amqp -standard port (5672). - -Add the following arguments to your `openstack overcloud deploy` -command to deploy with separate messaging backends:: - - openstack overcloud deploy --templates -e /usr/share/openstack-tripleo-heat-templates/environments/messaging/rpc-qdrouterd-notify-rabbitmq-hybrid.yaml - -.. _`standard messaging`: https://github.com/openstack/tripleo-heat-templates/blob/master/environments/messaging/rpc-rabbitmq-notify-rabbitmq-shared.yaml -.. _`rabbitmq-messaging-rpc-container-puppet.yaml`: https://github.com/openstack/tripleo-heat-templates/blob/master/deployment/rabbitmq/rabbitmq-messaging-rpc-container-puppet.yaml -.. _`rabbitmq-messaging-notify-container-puppet.yaml`: https://github.com/openstack/tripleo-heat-templates/blob/master/deployment/rabbitmq/rabbitmq-messaging-notify-container-puppet.yaml -.. _`hybrid messaging`: https://github.com/openstack/tripleo-heat-templates/blob/master/environments/messaging/rpc-qdrouterd-notify-rabbitmq-hybrid.yaml diff --git a/deploy-guide/source/features/ovs_dpdk_config.rst b/deploy-guide/source/features/ovs_dpdk_config.rst deleted file mode 100644 index 851adb0c..00000000 --- a/deploy-guide/source/features/ovs_dpdk_config.rst +++ /dev/null @@ -1,98 +0,0 @@ -Deploying with OVS DPDK Support -=============================== - -TripleO can deploy Overcloud nodes with OVS DPDK support. A new role -``ComputeOvsDpdk`` has been added to create a custom ``roles_data.yaml`` with -composable OVS DPDK role. - -Execute below command to create the ``roles_data.yaml``:: - - openstack overcloud roles generate -o roles_data.yaml Controller ComputeOvsDpdk - -Once a roles file is created, the following changes are required: - -- Deploy Command -- Parameters -- Network Config - -Deploy Command ----------------- -Deploy command should include the generated roles data file from the above -command. - -Deploy command should also include the OVS DPDK environment file to override the -default neutron-ovs-agent service with neutron-ovs-dpdk-agent service. All the -required parameters are specified in this environment file as commented. The -parameters has to be configured according to the baremetal on which OVS DPDK -is enabled. - -Also, OVS-DPDK requires mandatory kernel parameters to be set before -configuring the DPDK driver, like ``intel_iommu=on`` on Intel machines. In -order to enable the configuration of kernel parameters to the host, host- -config-pre-network environment file has to be added for the deploy command. - -Adding the following arguments to the ``openstack overcloud deploy`` command -will do the trick:: - - openstack overcloud deploy --templates \ - -r roles_data.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/services/neutron-ovs-dpdk.yaml \ - ... - -Parameters ----------- -Following are the list of parameters which need to be provided for deploying -with OVS DPDK support. - -* OvsPmdCoreList: List of Logical CPUs to be allocated for Poll Mode Driver -* OvsDpdkCoreList: List of Logical CPUs to be allocated for the openvswitch - host process (lcore list) -* OvsDpdkMemoryChannels: Number of memory channels -* OvsDpdkSocketMemory: Socket memory list per NUMA node - - -Example:: - - parameter_defaults: - OvsPmdCoreList: "2,3,18,19" - OvsDpdkCoreList: "0,1,16,17" - OvsDpdkMemoryChannels: "4" - OvsDpdkSocketMemory: "1024,1024" - - -The parameter ``KernelArgs`` should be provided in the deployment environment -file, with the set of kernel boot parameters to be applied on the -``ComputeOvsDpdk`` role where OVS DPDK is enabled:: - - parameter_defaults: - ComputeOvsDpdkParameters: - KernelArgs: "default_hugepagesz=1GB hugepagesz=1G hugepages=64 intel_iommu=on iommu=pt" - - -Network Config --------------- -DPDK supported network interfaces should be specified in the network config -templates to configure OVS DPDK on the node. The following new network config -types have been added to support DPDK. - -- ovs_user_bridge -- ovs_dpdk_port -- ovs_dpdk_bond - -Example:: - - network_config: - - - type: ovs_user_bridge - name: br-link - use_dhcp: false - members: - - - type: ovs_dpdk_port - name: dpdk0 - mtu: 2000 - rx_queue: 2 - members: - - - type: interface - name: nic3 diff --git a/deploy-guide/source/features/pre_network_config.rst b/deploy-guide/source/features/pre_network_config.rst deleted file mode 100644 index f21e14a0..00000000 --- a/deploy-guide/source/features/pre_network_config.rst +++ /dev/null @@ -1,116 +0,0 @@ -Configure node before Network Config -==================================== - -In specific deployments, it is required to perform additional configurations -on the overcloud node before network deployment, but after applying kernel -args. For example, OvS-DPDK deployment requires DPDK to be enabled in -OpenvSwitch before network deployment (os-net-config), but after the -hugepages are created (hugepages are created using kernel args). This -requirement is also valid for some 3rd party SDN integration. This kind of -configuration requires additional TripleO service definitions. This document -explains how to achieve such deployments on and after `train` release. - -.. note:: - - In `queens` release, the resource `PreNetworkConfig` can be overridden to - achieve the required behavior, which has been deprecated from `train` - onwards. The implementations based on `PreNetworkConfig` should be - moved to other available alternates. - -The TripleO service `OS::TripleO::BootParams` configures the parameter -`KernelArgs` and reboots the node using the `tripleo-ansible` role -`tripleo_kernel`. Some points to consider on `KernelArgs`: - -* `BootParams` service is enabled by default on all the roles. -* The node will be restarted only when kernel args are applied for the first - time (fresh node configuration). -* In case of adding `KernelArgs` during update/upgrade/scale operations, when - a particular role does not have `KernelArgs`, it results in node reboot. - Such scenarios should be treated as role migration instead adding only - `KernelArgs`. -* `KernelArgs` can be updated from `wallaby` release onwards (where the role - already has `KernelArgs` but requires modification). In such cases, the - node reboot has to be planned by the user manually, after the TripleO - deployment is completed. For example, increasing the hugepages count post - deployment. - - -The firstboot_ scripts provide a mechanism to apply the custom node -configuration which is independent of kernel args. - -.. _firstboot: https://github.com/openstack/tripleo-heat-templates/tree/master/firstboot - -Custom Service --------------- - -When a configuration needs to be applied on the node after reboot and before -the network config, then a custom service template should be added that -includes the `BootParams` resource (example below) and any other required -configuration. It is important to allow the default implementation -of `BootParams` service to be included as it is, because any improvements -or fixes will be automatically included in the deployment. - -Here is an example OvS-DPDK_ has been configured after `BootParams` but before -network config:: - - heat_template_version: wallaby - - description: > - Open vSwitch Configuration - - parameters: - ServiceData: - default: {} - description: Dictionary packing service data - type: json - ServiceNetMap: - default: {} - description: Mapping of service_name -> network name. Typically set - via parameter_defaults in the resource registry. Use - parameter_merge_strategies to merge it with the defaults. - type: json - RoleName: - default: '' - description: Role name on which the service is applied - type: string - RoleParameters: - default: {} - description: Parameters specific to the role - type: json - EndpointMap: - default: {} - description: Mapping of service endpoint -> protocol. Typically set - via parameter_defaults in the resource registry. - type: json - - resources - BootParams: - type: /usr/share/openstack-tripleo-heat-templates/deployments/kernel/kernel-boot-params-baremetal-ansible.yaml - properties: - ServiceData: {get_param: ServiceData} - ServiceNetMap: {get_param: ServiceNetMap} - EndpointMap: {get_param: EndpointMap} - RoleName: {get_param: RoleName} - RoleParameters: {get_param: RoleParameters} - - outputs: - role_data: - description: Role data for the Open vSwitch service. - value: - service_name: openvswitch - deploy_steps_tasks: - - get_attr: [BootParams, role_data, deploy_steps_tasks] - - - name: Run ovs-dpdk role - when: step|int == 0 - include_role: - name: tripleo_ovs_dpdk - -.. _OvS-DPDK: https://github.com/openstack/tripleo-heat-templates/blob/master/deployment/openvswitch/openvswitch-dpdk-baremetal-ansible.yaml - -.. note:: - In the above sample service definition, the condition `step|int == 0` in - the `deploy_steps_tasks` section forces the associated steps to run - before starting any other node configuration (including network deployment). - -Add this service to the roles definition of the required roles so that the -configuration can be applied after reboot but before network deployment. diff --git a/deploy-guide/source/features/rhsm.rst b/deploy-guide/source/features/rhsm.rst deleted file mode 100644 index 5b20258d..00000000 --- a/deploy-guide/source/features/rhsm.rst +++ /dev/null @@ -1,139 +0,0 @@ -Deploying with RHSM -=================== - -Summary -------- - -Starting in the Queens release, it is possible to use Ansible to apply the -RHSM (Red Hat Subscription Management) configuration. - -Instead of the pre_deploy rhel-registration script, the new RHSM service will -allow our operators to: - -#. deploy advanced RHSM configurations, where each role can have their own - repositories for example. - -#. use config-download mechanism so operators can run the playbooks at anytime - after the deployment, in case RHSM parameters have changed. - - -Using RHSM ----------- -To enable deployment with Ansible and config-download pass the additional arg -to the deployment command:: - - openstack overcloud deploy \ - \ - -e ~/rhsm.yaml - -The ``rhsm.yaml`` environment enables mapping the OS::TripleO::Services::Rhsm to -the extraconfig service:: - - resource_registry: - OS::TripleO::Services::Rhsm: /usr/share/openstack-tripleo-heat-templates/deployment/rhsm/rhsm-baremetal-ansible.yaml - parameter_defaults: - RhsmVars: - rhsm_activation_key: "secrete_key" - rhsm_org_id: "Default_Organization" - rhsm_server_hostname: "mysatserver.com" - rhsm_baseurl: "https://mysatserver.com/pulp/repos" - rhsm_method: satellite - rhsm_insecure: yes - rhsm_release: 8.1 - -In some advanced use cases, you might want to configure RHSM for a specific role:: - - parameter_defaults: - ComputeHCIParameters: - RhsmVars: - rhsm_activation_key: "secrete_key" - rhsm_org_id: "Default_Organization" - rhsm_server_hostname: "mysatserver.com" - rhsm_baseurl: "https://mysatserver.com/pulp/repos" - rhsm_method: satellite - rhsm_insecure: yes - rhsm_release: 8.1 - -In that case, all nodes deployed with ComputeHCI will be configured with these RHSM parameters. - -Scale-down the Overcloud ------------------------- -The automatic unsubscription isn't currently supported and before scaling down the Overcloud, -the operator will have to run this playbook against the host(s) that will be removed. -Example when we want to remove 2 compute nodes:: - - - hosts: - - overcloud-compute47 - - overcloud-compute72 - vars: - rhsm_username: bob.smith@acme.com - rhsm_password: my_secret - rhsm_state: absent - roles: - - openstack.redhat-subscription - -The playbook needs to be executed prior to the actual scale-down. - -Transition from previous method -------------------------------- - -The previous method ran a script called rhel-registration during -pre_deploy step, which is located in the ``extraconfig/pre_deploy/rhel-registration`` -folder. While the script is still working, you can perform a -migration to the new service by replacing the parameters used in -rhel-registration with RhsmVars and switching the resource_registry -from:: - - resource_registry: - OS::TripleO::NodeExtraConfig: rhel-registration.yaml - -To:: - - resource_registry: - # Before Train cycle, the file is in /usr/share/openstack-tripleo-heat-templates/extraconfig/services/rhsm.yaml - OS::TripleO::Services::Rhsm: /usr/share/openstack-tripleo-heat-templates/deployment/rhsm/rhsm-baremetal-ansible.yaml - -The following table shows a migration path from the old -rhe-registration parameters to the new RhsmVars: - -+------------------------------+------------------------------+ -| rhel-registration script | rhsm with Ansible (RhsmVars) | -+==============================+==============================+ -| rhel_reg_activation_key | rhsm_activation_key | -+------------------------------+------------------------------+ -| rhel_reg_auto_attach | rhsm_autosubscribe | -+------------------------------+------------------------------+ -| rhel_reg_sat_url | rhsm_satellite_url | -+------------------------------+------------------------------+ -| rhel_reg_org | rhsm_org_id | -+------------------------------+------------------------------+ -| rhel_reg_password | rhsm_password | -+------------------------------+------------------------------+ -| rhel_reg_repos | rhsm_repos | -+------------------------------+------------------------------+ -| rhel_reg_pool_id | rhsm_pool_ids | -+------------------------------+------------------------------+ -| rhel_reg_user | rhsm_username | -+------------------------------+------------------------------+ -| rhel_reg_method | rhsm_method | -+------------------------------+------------------------------+ -| rhel_reg_http_proxy_host | rhsm_rhsm_proxy_hostname | -+------------------------------+------------------------------+ -| rhel_reg_http_proxy_port | rhsm_rhsm_proxy_port | -+------------------------------+------------------------------+ -| rhel_reg_http_proxy_username | rhsm_rhsm_proxy_user | -+------------------------------+------------------------------+ -| rhel_reg_http_proxy_password | rhsm_rhsm_proxy_password | -+------------------------------+------------------------------+ - - -More about the Ansible role ---------------------------- - -TripleO is using the Ansible role_ for Red Hat Subscription. - -.. _role: https://github.com/openstack/ansible-role-redhat-subscription - -The role parameters aren't documented here to avoid duplication but it is -recommended to take a look at them in the repository when using this feature -in TripleO. diff --git a/deploy-guide/source/features/role_specific_parameters.rst b/deploy-guide/source/features/role_specific_parameters.rst deleted file mode 100644 index ffff5d9b..00000000 --- a/deploy-guide/source/features/role_specific_parameters.rst +++ /dev/null @@ -1,108 +0,0 @@ -Role-Specific Parameters -======================== - -A service can be associated with multiple roles, like ``nova-compute`` -service can be associated with **ComputeRole1** and **ComputeRole2**. The -``nova-compute`` service takes multiple parameters like ``NovaVcpuPinSet``, -``NovaReservedHostMemory``, etc. It is possible to provide separate values -specific to a role with the following changes in the user environment file:: - - parameter_defaults: - NovaReservedHostMemory: 512 - ComputeRole1Parameters: - NovaReservedHostMemory: 2048 - ComputeRole2Parameter: - NovaReservedHostMemory: 1024 - -The format to provide role-specific parameters is ``Parameters``, -where the ``RoleName`` is the name of the role as defined in the -``roles_data.yaml`` template. - -In the above specified example, the value "512" will be applied all the roles -which has the ``nova-compute`` service, where as the value "2048" will be -applied only on the **ComputeRole1** role and the value "1024" will be applied -only on the **ComputeRole2** role. - -With this approach, the service implementation has to merge the role-specific -parameters with the global parameters in their definition template. The role- -specific parameter takes higher precedence than the global parameters. - -For any custom service which need to use role-specific parameter, the -parameter merging should be done. Here is a sample parameter merging example -which will be done by the service implementation:: - - RoleParametersValue: - type: OS::Heat::Value - properties: - type: json - value: - map_replace: - - map_replace: - - neutron::agents::ml2::ovs::datapath_type: NeutronDatapathType - neutron::agents::ml2::ovs::vhostuser_socket_dir: NeutronVhostuserSocketDir - vswitch::dpdk::driver_type: NeutronDpdkDriverType - vswitch::dpdk::host_core_list: HostCpusList - vswitch::dpdk::pmd_core_list: NeutronDpdkCoreList - vswitch::dpdk::memory_channels: NeutronDpdkMemoryChannels - vswitch::dpdk::socket_mem: NeutronDpdkSocketMemory - - values: {get_param: [RoleParameters]} - - values: - NeutronDatapathType: {get_param: NeutronDatapathType} - NeutronVhostuserSocketDir: {get_param: NeutronVhostuserSocketDir} - NeutronDpdkDriverType: {get_param: NeutronDpdkDriverType} - HostCpusList: {get_param: HostCpusList} - NeutronDpdkCoreList: {get_param: NeutronDpdkCoreList} - NeutronDpdkMemoryChannels: {get_param: NeutronDpdkMemoryChannels} - NeutronDpdkSocketMemory: {get_param: NeutronDpdkSocketMemory} - -A service can have a unique variable name that is different than the role specific one. -The example below shows how to define the service variable ``KeystoneWSGITimeout``, override -it with the role specific variable ``WSGITimeout`` if it is found, and create a new alias variable -named ``wsgi_timeout`` to store the value. Later on, that value can be retrieved by using -``{get_attr: [RoleParametersValue, value, wsgi_timeout]}``.:: - - parameters: - - KeystoneWSGITimeout: - description: The timeout for the Apache virtual host created for the API endpoint. - type: string - default: '60' - tags: - - role_specific - - resources: - - RoleParametersValue: - type: OS::Heat::Value - properties: - type: json - value: - map_replace: - - map_replace: - - wsgi_timeout: WSGITimeout - - values: {get_param: [RoleParameters]} - - values: - WSGITimeout: {get_param: KeystoneWSGITimeout} - - outputs: - role_data: - description: Role data for the Keystone API role. - value: - config_settings: - map_merge: - - keystone::wsgi::apache::vhost_custom_fragment: - list_join: [' ', ['Timeout', {get_attr: [RoleParametersValue, value, wsgi_timeout]}]] - -Now the variable can optionally have a default set at the composable roles data level.:: - - - name: Undercloud - RoleParametersDefault: - WSGITimeout: '600' - -.. note:: - As of now, not all parameters can be set per role, it is based on the - service or template implementation. Each service should have the - implementation to merge the global parameters and role-specific - parameters, as explained in the above examples. A warning will be shown - during the deployment, if an invalid parameter (which does not support - role-specific implementation) is provided as role-specific input. diff --git a/deploy-guide/source/features/routed_spine_leaf_network.rst b/deploy-guide/source/features/routed_spine_leaf_network.rst deleted file mode 100644 index a7294440..00000000 --- a/deploy-guide/source/features/routed_spine_leaf_network.rst +++ /dev/null @@ -1,621 +0,0 @@ -.. _routed_spine_leaf_network: - -Deploying Overcloud with L3 routed networking -============================================= - -Layer 3 Routed spine and leaf architectures is gaining in popularity due to the -benefits, such as high-performance, increased scalability and reduced failure -domains. - -The below diagram is an example L3 routed -`Clos `_ architecture. In this -example each server is connected to top-of-rack leaf switches. Each leaf switch -is attached to each spine switch. Within each rack, all servers share a layer 2 -domain. The layer 2 network segments are local to the rack. Layer 3 routing via -the spine switches permits East-West traffic between the racks: - -.. image:: ../_images/spine_and_leaf.svg - -.. Note:: Typically Dynamic Routing is implemented in such an architecture. - Often also - `ECMP `_ - (Equal-cost multi-path routing) and - `BFD `_ - (Bidirectional Forwarding Detection) are used to provide non-blocking - forwarding and fast convergence times in case of failures. - Configuration of the underlying network architecture is not in the - scope of this document. - -Layer 3 routed Requirements ---------------------------- - -For TripleO to deploy the ``overcloud`` on a network with a layer 3 routed -architecture the following requirements must be met: - -* **Layer 3 routing**: - The network infrastructure must have *routing* configured to enable traffic - between the different layer 2 segments. This can be statically or dynamically - configured. - -* **DHCP-Relay**: - Each layer 2 segment that is not local to the ``undercloud`` must provide - *dhcp-relay*. DHCP requests must be forwarded to the Undercloud on the - provisioning network segment where the ``undercloud`` is connected. - - .. Note:: The ``undercloud`` uses two DHCP servers. One for baremetal node - introspection, and another for deploying overcloud nodes. - - Make sure to read `DHCP relay configuration`_ to understand the - requirements when configuring *dhcp-relay*. - -Layer 3 routed Limitations --------------------------- - -* Some roles, such as the Controller role, use virtual IP addresses and - clustering. The mechanism behind this functionality requires layer-2 network - connectivity between these nodes. These nodes must all be placed within the - same leaf. - -* Similar restrictions apply to networker nodes. The Network service implements - highly-available default paths in the network using Virtual Router Redundancy - Protocol (VRRP). Since VRRP uses a virtual router ip address, master and - backup nodes must be connected to the same L2 network segment. - -* When using tenant or provider networks with VLAN segmentation, the particular - VLANs used must be shared between all networker and compute nodes. - - .. Note:: It is possible to configure the Network service with multiple sets - of networker nodes. Each set would share routes for their networks, - and VRRP would be used within each set of networker nodes to - provide highly-available default paths. In such configuration all - networker nodes sharing networks must be on the same L2 network - segment. - -Create undercloud configuration -------------------------------- - -To deploy the ``overcloud`` on a L3 routed architecture the ``undercloud`` -needs to be configured with multiple neutron network segments and subnets on -the ``ctlplane`` network. - -#. In the ``[DEFAULT]`` section of ``undercloud.conf`` enable the routed - networks feature by setting ``enable_routed_networks`` to ``true``. For - example:: - - enable_routed_networks = true - -#. In the ``[DEFAULT]`` section of ``undercloud.conf`` add a comma separated - list of control plane subnets. Define one subnet for each layer 2 segment in - the routed spine and leaf. For example:: - - subnets = leaf0,leaf1,leaf2 - -#. In the ``[DEFAULT]`` section of ``undercloud.conf`` specify the subnet that - is associated with the physical layer 2 segment that is *local* to the - ``undercloud``. For example:: - - local_subnet = leaf0 - -#. For each of the control plane subnets specified in ``[DEFAULT]\subnets`` - add an additional section in ``undercloud.conf``, for example:: - - [leaf0] - cidr = 192.168.10.0/24 - dhcp_start = 192.168.10.10 - dhcp_end = 192.168.10.90 - inspection_iprange = 192.168.10.100,192.168.10.190 - gateway = 192.168.10.1 - masquerade = False - - [leaf1] - cidr = 192.168.11.0/24 - dhcp_start = 192.168.11.10 - dhcp_end = 192.168.11.90 - inspection_iprange = 192.168.11.100,192.168.11.190 - gateway = 192.168.11.1 - masquerade = False - - [leaf2] - cidr = 192.168.12.0/24 - dhcp_start = 192.168.12.10 - dhcp_end = 192.168.12.90 - inspection_iprange = 192.168.12.100,192.168.12.190 - gateway = 192.168.12.1 - masquerade = False - -Install the undercloud ----------------------- - -Once the ``undercloud.conf`` is updated with the desired configuration, install -the undercloud by running the following command:: - - $ openstack undercloud install - -Once the ``undercloud`` is installed complete the post-install tasks such as -uploading images and registering baremetal nodes. (For addition details -regarding the post-install tasks, see -:doc:`../deployment/install_overcloud`.) - -DHCP relay configuration ------------------------- - -The TripleO Undercloud uses two DHCP servers on the provisioning network, one -for ``introspection`` and another one for ``provisioning``. When configuring -*dhcp-relay* make sure that DHCP requests are forwarded to both DHCP servers on -the Undercloud. - -For devices that support it, UDP *broadcast* can be used to relay DHCP requests -to the L2 network segment where the Undercloud provisioning network is -connected. Alternatively UDP *unicast* can be can be used, in this case DHCP -requests are relayed to specific ip addresses. - -.. Note:: Configuration of *dhcp-relay* on specific devices types is beyond the - scope of this document. As a reference - `DHCP relay configuration (Example)`_ using the implementation in - `ISC DHCP software `_ is - available below. (Please refer to manual page - `dhcrelay(8) `_ for further - details on how to use this implementation.) - - -Broadcast DHCP relay -~~~~~~~~~~~~~~~~~~~~ - -DHCP requests are relayed onto the L2 network segment where the DHCP server(s) -reside using UDP *broadcast* traffic. All devices on the network segment will -receive the broadcast traffic. When using UDP *broadcast* both DHCP servers on -the Undercloud will receive the relayed DHCP request. - -Depending on implementation this is typically configured by specifying either -*interface* or *ip network address*: - -* **Interface**: - Specifying an interface connected to the L2 network segment where the DHCP - requests will be relayed. -* **IP network address**: - Specifying the network address of the IP network where the DHCP request will - be relayed. - -Unicast DHCP relay -~~~~~~~~~~~~~~~~~~ - -DHCP requests are relayed to specific DHCP servers using UDP *unicast* traffic. -When using UDP *unicast* the device configured to provide *dhcp-relay* must be -configured to relay DHCP requests to both the IP address assigned to the -interface used for *introspection* on the Undercloud and the IP address of the -network namespace created by the Network service to host the DHCP service for -the ``ctlplane`` network. - -The interface used for *introspection* is the one defined as -``inspection_interface`` in ``undercloud.conf``. - -.. Note:: It is common to use the ``br-ctlplane`` interface for introspection, - the IP address defined as ``local_ip`` in ``undercloud.conf`` will be - on the ``br-ctlplane`` interface. - -The IP address allocated to the neutron DHCP namespace will typically be the -first address available in the IP range configured for the ``local_subnet`` in -``undercloud.conf``. (The first address in the IP range is the one defined as -``dhcp_start`` in the configuration.) For example: ``172.20.0.10`` would be the -IP address when the following configuration is used:: - - [DEFAULT] - local_subnet = leaf0 - subnets = leaf0,leaf1,leaf2 - - [leaf0] - cidr = 172.20.0.0/26 - dhcp_start = 172.20.0.10 - dhcp_end = 172.20.0.19 - inspection_iprange = 172.20.0.20,172.20.0.29 - gateway = 172.20.0.62 - masquerade = False - -.. Warning:: The IP address for the DHCP namespace is automatically allocated, - it will in most cases be the first address in the IP range, but - do make sure to verify that this is the case by running the - following commands on the Undercloud:: - - $ openstack port list --device-owner network:dhcp -c "Fixed IP Addresses" - +----------------------------------------------------------------------------+ - | Fixed IP Addresses | - +----------------------------------------------------------------------------+ - | ip_address='172.20.0.10', subnet_id='7526fbe3-f52a-4b39-a828-ec59f4ed12b2' | - +----------------------------------------------------------------------------+ - $ openstack subnet show 7526fbe3-f52a-4b39-a828-ec59f4ed12b2 -c name - +-------+--------+ - | Field | Value | - +-------+--------+ - | name | leaf0 | - +-------+--------+ - -DHCP relay configuration (Example) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In the following examples ``dhcrelay`` from -`ISC DHCP software `_ is started using -configuration parameters to relay incoming DHCP request on interfaces: -``eth1``, ``eth2`` and ``eth3``. The undercloud DHCP servers are on the network -segment connected to the ``eth0`` interface. The DHCP server used for -``introspection`` is listening on ip address: ``172.20.0.1`` and the DHCP -server used for ``provisioning`` is listening on ip address: ``172.20.0.10``. - -Example, dhcrelay version 4.2.5 (in CentOS 7):: - - dhcrelay -d --no-pid 172.20.0.10 172.20.0.1 \ - -i eth0 -i eth1 -i eth2 -i eth3 - -Example, dhcrelay version 4.3.6 (in Fedora 28):: - - dhcrelay -d --no-pid 172.20.0.10 172.20.0.1 \ - -iu eth0 -id eth1 -id eth2 -id eth3 - -Map bare metal node ports to control plane network segments ------------------------------------------------------------ - -To enable deployment onto a L3 routed network the baremetal ports must have -its ``physical_network`` field configured. Each baremetal port is associated -with a baremetal node in the Bare Metal service. The physical network names are -the ones used in the ``subnets`` option in the undercloud configuration. - -.. Note:: The physical network name of the subnet specified as ``local_subnet`` - in ``undercloud.conf`` is special. It is **always** named - ``ctlplane``. - -#. Make sure the baremetal nodes are in one of the following states: *enroll*, - or *manageable*. If the baremetal node is not in one of these states the - command used to set the ``physical_network`` property on the baremetal port - will fail. (For additional details regarding node states see - :doc:`../provisioning/node_states`.) - - To set all nodes to ``manageable`` state run the following command:: - - for node in $(baremetal node list -f value -c Name); do \ - baremetal node manage $node --wait; done - -#. Use ``baremetal port list --node `` command to find out - which baremetal ports are associated with which baremetal node. Then set the - ``physical-network`` for the ports. - - In the example below three subnets where defined in the configuration, - *leaf0*, *leaf1* and *leaf2*. Notice that the ``local_subnet`` is ``leaf0``, - since the physical network for the ``local_subnet`` is always ``ctlplane`` - the baremetal port connected to ``leaf0`` use ``ctlplane``. The remaining - ports use the ``leafX`` names:: - - $ baremetal port set --physical-network ctlplane - - $ baremetal port set --physical-network leaf1 - $ baremetal port set --physical-network leaf2 - $ baremetal port set --physical-network leaf2 - -#. Make sure the nodes are in ``available`` state before deploying the - overcloud:: - - $ openstack overcloud node provide --all-manageable - -Create network data with multi-subnet networks ----------------------------------------------- - -Network data (``network_data.yaml``) is used to define the networks in the -deployment. Each network has a base subnet defined by the network's -properties: ``ip_subnet``, ``allocation_pools``, ``gateway_ip``, ``vlan`` etc. - -With support for routed networks (multiple subnets per network) the schema for -network's was extended with the ``subnets`` property, a map of one or more -additional subnets associated with the network. ``subnets`` property example:: - - subnets: - : - vlan: '' - ip_subnet: '/' - allocation_pools: [{'start': '', 'end': ''}] - gateway_ip: '' - -.. Note:: The name of the base subnet is ``name_lower`` with the suffix - ``_subnet`` appended. For example, the base subnet on the - ``InternalApi`` network will be named ``internal_api_subnet``. This - name is used when setting the subnet for a role to use the base - subnet. (See - `Create roles specific to each leaf (layer 2 segment)`_) - -Full networks data example:: - - - name: External - vip: true - name_lower: external - vlan: 100 - ip_subnet: '10.0.0.0/24' - allocation_pools: [{'start': '10.0.0.4', 'end': '10.0.0.99'}] - gateway_ip: '10.0.0.254' - - name: InternalApi - name_lower: internal_api - vip: true - vlan: 10 - ip_subnet: '172.17.0.0/24' - allocation_pools: [{'start': '172.17.0.10', 'end': '172.17.0.250'}] - gateway_ip: '172.17.0.254' - subnets: - internal_api_leaf1: - vlan: 11 - ip_subnet: '172.17.1.0/24' - allocation_pools: [{'start': '172.17.1.10', 'end': '172.17.1.250'}] - gateway_ip: '172.17.1.254' - - name: Storage - vip: true - vlan: 20 - name_lower: storage - ip_subnet: '172.18.0.0/24' - allocation_pools: [{'start': '172.18.0.10', 'end': '172.18.0.250'}] - gateway_ip: '172.18.0.254' - subnets: - storage_leaf1: - vlan: 21 - ip_subnet: '172.18.1.0/24' - allocation_pools: [{'start': '172.18.1.10', 'end': '172.18.1.250'}] - gateway_ip: '172.18.1.254' - - name: StorageMgmt - name_lower: storage_mgmt - vip: true - vlan: 30 - ip_subnet: '172.19.0.0/24' - allocation_pools: [{'start': '172.19.0.10', 'end': '172.19.0.250'}] - gateway_ip: '172.19.0.254' - subnets: - storage_mgmt_leaf1: - vlan: 31 - ip_subnet: '172.19.1.0/24' - allocation_pools: [{'start': '172.19.1.10', 'end': '172.19.1.250'}] - gateway_ip: '172.19.1.254' - - name: Tenant - vip: false # Tenant network does not use VIPs - name_lower: tenant - vlan: 40 - ip_subnet: '172.16.0.0/24' - allocation_pools: [{'start': '172.16.0.10', 'end': '172.16.0.250'}] - gateway_ip: '172.16.0.254' - subnets: - tenant_leaf1: - vlan: 41 - ip_subnet: '172.16.1.0/24' - allocation_pools: [{'start': '172.16.1.10', 'end': '172.16.1.250'}] - gateway_ip: '172.16.1.254' - -Create roles specific to each leaf (layer 2 segment) ----------------------------------------------------- - -To aid in scheduling and to allow override of leaf specific parameters in -``tripleo-heat-templates`` create new roles for each l2 leaf. In the -``networks`` property for each role, add the networks and associated subnet. - -The following is an example with one controller role, and two compute roles. -Please refer to :doc:`custom_roles` for details on configuring custom roles. - -Example ``roles_data`` below. (The list of default services has been left out.) - -:: - - ############################################################################# - # Role: Controller # - ############################################################################# - - name: Controller - description: | - Controller role that has all the controller services loaded and handles - Database, Messaging and Network functions. - CountDefault: 1 - tags: - - primary - - controller - networks: - External: - subnet: external_subnet - InternalApi: - subnet: internal_api_subnet - Storage: - subnet: storage_subnet - StorageMgmt: - subnet: storage_mgmt_subnet - Tenant: - subnet: tenant_subnet - HostnameFormatDefault: '%stackname%-controller-%index%' - ServicesDefault: - - OS::TripleO::Services::AodhApi - - OS::TripleO::Services:: [...] - ############################################################################# - # Role: ComputeLeaf0 # - ############################################################################# - - name: ComputeLeaf0 - description: | - Basic Compute Node role - CountDefault: 1 - networks: - InternalApi: - subnet: internal_api_subnet - Tenant: - subnet: tenant_subnet - Storage: - subnet: storage_subnet - HostnameFormatDefault: '%stackname%-compute-leaf0-%index%' - disable_upgrade_deployment: True - ServicesDefault: - - OS::TripleO::Services::AuditD - - OS::TripleO::Services:: [...] - ############################################################################# - # Role: ComputeLeaf1 # - ############################################################################# - - name: ComputeLeaf1 - description: | - Basic Compute Node role - CountDefault: 1 - networks: - InternalApi: - subnet: internal_api_leaf1 - Tenant: - subnet: tenant_leaf1 - Storage: - subnet: storage_leaf1 - HostnameFormatDefault: '%stackname%-compute-leaf1-%index%' - disable_upgrade_deployment: True - ServicesDefault: - - OS::TripleO::Services::AuditD - - OS::TripleO::Services:: [...] - -Configure node placement ------------------------- - -Use node placement to map the baremetal nodes to roles, with each role using a -different set of local layer 2 segments. Please refer to :doc:`../provisioning/node_placement` -for details on how to configure node placement. - -Add role specific configuration to ``parameter_defaults`` ---------------------------------------------------------- - -In TripleO templates role specific parameters are defined using variables. One -of the variables used is ``{{role.name}}``. The templates have parameters such -as ``{{role.name}}Count``, ``Overcloud{{role.name}}Flavor``, -``{{role.name}}ControlPlaneSubnet`` and many more. This enables per-role values -for these parameters. - -Before deploying the ``overcloud`` create an environment file (The examples in -this document uses ``node_data.yaml`` for this.) that contains the required -overrides. In the example below there are parameter overrides to specify the -*Count*, *Flavor* and *ControlPlaneSubnet* to use for the following roles: - -* Controller -* ComputeLeaf0 -* ComputeLeaf1 - -Parameter override example:: - - parameter_defaults: - OvercloudComputeLeaf0Flavor: compute-leaf0 - OvercloudComputeLeaf1Flavor: compute-leaf1 - ControllerCount: 3 - ComputeLeaf0Count: 5 - ComputeLeaf1Count: 5 - ControllerControlPlaneSubnet: leaf0 - ComputeLeaf0ControlPlaneSubnet: leaf0 - ComputeLeaf1ControlPlaneSubnet: leaf1 - -Network configuration templates -------------------------------- - -Network configuration templates are dynamically generated, but depending on the -hardware configuration, the sample configurations might not be an option. If -this is the case, the dynamically generated network configuration templates can -be generated manually providing a good starting point for manual customization. - -Use the ``process-templates.py`` tool to generate network config templates for -all roles. For example:: - - $ /usr/share/openstack-tripleo-heat-templates/tools/process-templates.py \ - -p /usr/share/openstack-tripleo-heat-templates \ - -r /home/stack/roles_data.yaml \ - -n /home/stack/network_data_subnets_routed.yaml \ - -o /home/stack/processed_templates - -The generated example templates for each role can now be found under the -``/home/stack/processed_templates/network/config/`` directory:: - - /home/stack/processed_templates/network/config/ - ├── bond-with-vlans - │   ├── computeleaf0.yaml - │   ├── computeleaf1.yaml - │   ├── controller-no-external.yaml - │   ├── controller-v6.yaml - │   ├── controller.yaml - │   └── README.md - ├── multiple-nics - │   ├── compute-dvr.yaml - │   ├── computeleaf0.yaml - │   ├── computeleaf1.yaml - │   ├── controller-v6.yaml - │   ├── controller.yaml - │   └── README.md - ├── single-nic-linux-bridge-vlans - │   ├── computeleaf0.yaml - │   ├── computeleaf1.yaml - │   ├── controller-v6.yaml - │   ├── controller.yaml - │   └── README.md - └── single-nic-vlans - ├── computeleaf0.yaml - ├── computeleaf1.yaml - ├── controller-no-external.yaml - ├── controller-v6.yaml - ├── controller.yaml - └── README.md - -Inspect the generated template files to find out which sample is most similar -to the specific deployments hardware configuration. Make copies, and edit the -network configuration templates as needed. - -.. Note:: If compute nodes (or some other roles) in different leaf's have the - same hardware configuration and network needs, a single network - configuration template can be used for both roles. For example the - ``computeleaf0.yaml`` template could be copied as compute.yaml, and - be used for both compute roles (``computeleaf0`` and - ``computeleaf1``). - -Create a environment file (``network-environment-overrides.yaml``) with -``resource_registry`` overrides to specify the network configuration templates -to use. For example:: - - resource_registry: - # Port assignments for the Controller - OS::TripleO::Controller::Net::SoftwareConfig: - /home/stack/templates/controller.yaml - # Port assignments for the ComputeLeaf0 - OS::TripleO::ComputeLeaf0::Net::SoftwareConfig: - /home/stack/templates/compute.yaml - # Port assignments for the ComputeLeaf1 - OS::TripleO::ComputeLeaf1::Net::SoftwareConfig: - /home/stack/templates/compute.yaml - - -Virtual IP addresses (VIPs) ---------------------------- - -If the a controller role which is hosting VIP's (Virtual IP addresses) is not -using the base subnet of one or more networks, additional overrides to the -``VipSubnetMap`` is required to ensure VIP's are created on the subnet -associated with the L2 network segment the controller nodes is connected to. - -Example, specifying which subnet's to use when creating VIP's for the different -networks:: - - parameter_defaults: - VipSubnetMap: - ctlplane: leaf1 - redis: internal_api_leaf1 - InternalApi: internal_api_leaf1 - Storage: storage_leaf1 - StorageMgmt: storage_mgmt_leaf1 - -In this document the ctlplane subnet for the Controller is ``leaf0``. To set -which subnet on the ctlplane network that will be used for cluster VIP's -(Virtual IP addresses) the ``VipSubnetMap`` parameter must be overridden in an -environment file. For example add the following to -``network-environment-overrides.yaml``:: - - parameter_defaults: - VipSubnetMap: - ctlplane: leaf0 - - -Deploy the overcloud --------------------- - -To deploy the overcloud, run the ``openstack overcloud deploy`` specifying the -roles data file, the network data file and environment files. For example:: - - $ openstack overcloud deploy --templates \ - -n /home/stack/templates/network_data_subnets_routed.yaml - -r /home/stack/templates/roles_data.yaml \ - -e /home/stack/environments/node_data.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-environment.yaml \ - -e /home/stack/environments/network-environment-overrides.yaml - -.. Note:: Remember to include other environment files that you might want for - configuration of the overcloud. diff --git a/deploy-guide/source/features/security_hardening.rst b/deploy-guide/source/features/security_hardening.rst deleted file mode 100644 index 956210ea..00000000 --- a/deploy-guide/source/features/security_hardening.rst +++ /dev/null @@ -1,408 +0,0 @@ -Security Hardening -================== - -TripleO can deploy Overcloud nodes with various Security Hardening values -passed in as environment files to the ``openstack overcloud deploy`` command. - -.. note:: - It is especially important to remember that you **must** include all - environment files needed to deploy the overcloud. Make sure - you pass the full environment in addition to your customization environments - at the end of each of the ``openstack overcloud deploy`` command. - -Horizon Password Validation ---------------------------- - -Horizon provides a password validation check which OpenStack cloud operators -can use to enforce password complexity. - -Regular expression can be used for password validation with help text to display -if the users password does not adhere with validation checks. - -The following example will enforce users to create a password between 8 and 18 -characters in length:: - - parameter_defaults: - HorizonPasswordValidator: '^.{8,18}$' - HorizonPasswordValidatorHelp: 'Password must be between 8 and 18 characters.' - -If the above yaml was saved as ``horizon_password.yaml`` we can then pass this -into the overcloud deploy command as follows:: - - openstack overcloud deploy --templates \ - -e -e horizon_password.yaml - -Default Security Values in Horizon ----------------------------------- - -The following config directives are set to ``True`` as a secure default, however -if a reason exists for an operator to disable one of the following values, they -can do so using an environment file. - -.. note:: The following directives should only be set to ``False`` once the - potential security impacts are fully understood. - -Enforce Password Check -~~~~~~~~~~~~~~~~~~~~~~ - -By setting ``ENFORCE_PASSWORD_CHECK`` to ``True`` within Horizon's -``local_settings.py``, it displays an ‘Admin Password’ field on the -“Change Password” form to verify that it is the admin loggedin that wants to -perform the password change. - -If a need is present to disable ``ENFORCE_PASSWORD_CHECK`` then this can be -achieved using an environment file contain the following parameter:: - - parameter_defaults: - ControllerExtraConfig: - horizon::enforce_password_check: false - -Disallow Iframe Embed -~~~~~~~~~~~~~~~~~~~~~ - -DISALLOW_IFRAME_EMBED can be used to prevent Horizon from being embedded within -an iframe. Legacy browsers are still vulnerable to a Cross-Frame Scripting (XFS) -vulnerability, so this option allows extra security hardening where iframes are -not used in deployment. - -If however a reason exists to allow Iframe embedding, then the following -parameter can be set within an environment file:: - - parameter_defaults: - ControllerExtraConfig: - horizon::disallow_iframe_embed: false - -Disable Password Reveal -~~~~~~~~~~~~~~~~~~~~~~~ - -In the same way as ``ENFORCE_PASSWORD_CHECK`` and ``DISALLOW_IFRAME_EMBED`` the -``DISABLE_PASSWORD_REVEAL`` value to be toggled as a parameter:: - - parameter_defaults: - ControllerExtraConfig: - horizon::disable_password_reveal: false - -SSH Banner Text ---------------- - -SSH ``/etc/issue`` Banner text can be set using the following parameters in an -environment file:: - - resource_registry: - OS::TripleO::Services::Sshd: ../deployment/sshd/sshd-baremetal-ansible.yaml - - parameter_defaults: - BannerText: | - ****************************************************************** - * This system is for the use of authorized users only. Usage of * - * this system may be monitored and recorded by system personnel. * - * Anyone using this system expressly consents to such monitoring * - * and is advised that if such monitoring reveals possible * - * evidence of criminal activity, system personnel may provide * - * the evidence from such monitoring to law enforcement officials.* - ****************************************************************** - -As with the previous Horizon Password Validation example, saving the above into -a yaml file, will allow passing the aforementioned parameters into the overcloud -deploy command:: - - openstack overcloud deploy --templates \ - -e -e ssh_banner.yaml - -Audit ------ - -Having a system capable of recording all audit events is key for troubleshooting -and performing analysis of events that led to a certain outcome. The audit system -is capable of logging many events such as someone changing the system time, -changes to Mandatory / Discretionary Access Control, creating / destroying users -or groups. - -Rules can be declared using an environment file and injected into -``/etc/audit/audit.rules``:: - - parameter_defaults: - AuditdRules: - 'Record Events that Modify User/Group Information': - content: '-w /etc/group -p wa -k audit_rules_usergroup_modification' - order : 1 - 'Collects System Administrator Actions': - content: '-w /etc/sudoers -p wa -k actions' - order : 2 - 'Record Events that Modify the Systems Mandatory Access Controls': - content: '-w /etc/selinux/ -p wa -k MAC-policy' - order : 3 - -Firewall Management -------------------- - -Iptables rules are automatically deployed on overcloud nodes to open only the -ports which are needed to get OpenStack working. Rules can be added during the -deployment when needed. For example, for Zabbix monitoring system. - -.. code-block:: yaml - - parameter_defaults: - ExtraFirewallRules: - '301 allow zabbix': - dport: 10050 - proto: tcp - source: 10.0.0.8 - jump: accept - -Rules can also be used to restrict access. The number used at definition of a -rule will determine where the nftables rule will be inserted. For example, -rabbitmq rule number is 109 by default. If you want to restrain it, you could -do. - -.. code-block:: yaml - - parameter_defaults: - ExtraFirewallRules: - '098 allow rabbit from internalapi network': - dport: - - 4369 - - 5672 - - 25672 - proto: tcp - source: 10.0.0.0/24 - jump: accept - '099 drop other rabbit access': - dport: - - 4369 - - 5672 - - 25672 - proto: tcp - jump: drop - -In this example, 098 and 099 are arbitrarily numbers that are smaller than the -default rabbitmq rule number. To know the number of a rule, inspect the active -nftables rules on an appropriate node (controller, in case of rabbitmq) - -.. code-block:: shell - - nft list chain inet filter TRIPLEO_INPUT - [...] - tcp dport { 4369, 5672, 25672-25683 } ct state new counter packets 0 bytes 0 accept comment "109 rabbitmq" - -Alternatively it's possible to get the information in tripleo service in the -definition. In our case in `deployment/rabbitmq/rabbitmq-container-puppet.yaml`. - -.. code-block:: yaml - - firewall_rules: - '109 rabbitmq': - dport: - - 4369 - - 5672 - - 25672 - - 25673-25683 - -Additional information regarding the available interface options, the role, -some of the implementation details can be reviewed `here `_. - -VXLAN and nftables -~~~~~~~~~~~~~~~~~~ - -In order to properly get VXLAN support, you have to add a couple of rules to -the Undercloud firewall. This is especially true for a lab environment, or on -the upstream CI infrastructure. Here's an example of the custom rules for -the CI, feel free to adapt them. Note that the network is the one used on the -eth0 interface, aka "public" one of the Undercloud. - -.. code-block:: yaml - - parameter_defaults: - ExtraFirewallRules: - '020 Allow VXLan from CI infra network': - proto: "udp" - dport: 4789 - source: "PUBLIC_NETWORK_CIDR" - state: [] - '021 Allow OTV for vxlan from CI infra network': - proto: "udp" - dport: 8472 - source: "PUBLIC_NETWORK_CIDR" - state: [] - -.. note:: The ``state: []`` is mandatory in order to not only catch the NEW - connection (default with the nftables and iptables modules). - -AIDE - Intrusion Detection --------------------------- - -AIDE (Advanced Intrusion Detection Environment) is a file and directory -integrity checker. It is used as medium to reveal possible unauthorized file -tampering / changes. - -AIDE creates an integrity database of file hashes, which can then be used as a -comparison point to verify the integrity of the files and directories. - -The TripleO AIDE service allows an operator to populate entries into an AIDE -configuration, which is then used by the AIDE service to create an integrity -database. This can be achieved using an environment file with the following -example structure - -.. code-block:: yaml - - resource_registry: - OS::TripleO::Services::Aide: /usr/share/openstack-tripleo-heat-templates/deployment/aide/aide-baremetal-ansible.yaml - - parameter_defaults: - AideRules: - 'TripleORules': - content: 'TripleORules = p+sha256' - order : 1 - 'etc': - content: '/etc/ TripleORules' - order : 2 - 'boot': - content: '/boot/ TripleORules' - order : 3 - 'sbin': - content: '/sbin/ TripleORules' - order : 4 - 'var': - content: '/var/ TripleORules' - order : 5 - 'not var/log': - content: '!/var/log.*' - order : 6 - 'not var/spool': - content: '!/var/spool.*' - order : 7 - 'not /var/adm/utmp': - content: '!/var/adm/utmp$' - order: 8 - 'not nova instances': - content: '!/var/lib/nova/instances.*' - order: 9 - -.. note:: - Operators should select their own required AIDE values, as the example list - above is not actively maintained or benchmarked. It only seeks to provide - an document the YAML structure required. - -If above environment file were saved as `aide.yaml` it could then be passed to -the `overcloud deploy` command as follows:: - - openstack overcloud deploy --templates -e aide.yaml - -Let's walk through the different values used here. - -First an 'alias' name `TripleORules` is declared to save us repeatedly typing -out the same attributes each time. To the alias we apply attributes of -`p+sha256`. In AIDE terms this reads as monitor all file permissions `p` with an -integrity checksum of `sha256`. For a complete list of attributes that can be -used in AIDE's config files, refer to the `AIDE MAN page `_. - -Complex rules can be created using this format, such as the following:: - - MyAlias = p+i+n+u+g+s+b+m+c+sha512 - -The above would translate as monitor permissions, inodes, number of links, user, -group, size, block count, mtime, ctime, using sha256 for checksum generation. - -Note, the alias should always have an order position of `1`, which means that -it is positioned at the top of the AIDE rules and is applied recursively to all -values below. - -Following after the alias are the directories to monitor. Note that regular -expressions can be used. For example we set monitoring for the `var` directory, -but overwrite with a not clause using `!` with `'!/var/log.*'` and -`'!/var/spool.*'`. - -Further AIDE values -~~~~~~~~~~~~~~~~~~~ - -The following AIDE values can also be set. - -`AideConfPath`: The full POSIX path to the aide configuration file, this -defaults to `/etc/aide.conf`. If no requirement is in place to change the file -location, it is recommended to stick with the default path. - -`AideDBPath`: The full POSIX path to the AIDE integrity database. This value is -configurable to allow operators to declare their own full path, as often AIDE -database files are stored off node perhaps on a read only file mount. - -`AideDBTempPath`: The full POSIX path to the AIDE integrity temporary database. -This temporary files is created when AIDE initializes a new database. - -'AideHour': This value is to set the hour attribute as part of AIDE cron -configuration. - -'AideMinute': This value is to set the minute attribute as part of AIDE cron -configuration. - -'AideCronUser': This value is to set the linux user as part of AIDE cron -configuration. - -'AideEmail': This value sets the email address that receives AIDE reports each -time a cron run is made. - -'AideMuaPath': This value sets the path to the Mail User Agent that is used to -send AIDE reports to the email address set within `AideEmail`. - -Cron configuration -~~~~~~~~~~~~~~~~~~ - -The AIDE TripleO service allows configuration of a cron job. By default it will -send reports to `/var/log/audit/`, unless `AideEmail` is set, in which case it -will instead email the reports to the declared email address. - -AIDE and Upgrades -~~~~~~~~~~~~~~~~~ - -When an upgrade is performed, the AIDE service will automatically regenerate -a new integrity database to ensure all upgraded files are correctly recomputed -to possess a updated checksum. - -If `openstack overcloud deploy` is called as a subsequent run to an initial -deployment *and* the AIDE configuration rules are changed, the TripleO AIDE -service will rebuild the database to ensure the new config attributes are -encapsulated in the integrity database. - -SecureTTY ---------- - -SecureTTY allows disabling root access via any console device (tty) by means of -entries to the `/etc/securetty` file. - -An environment file can be used to set `/etc/securetty` entries as follows:: - - resource_registry: - OS::TripleO::Services::Securetty: ../deployment/securetty/securetty-baremetal-puppet.yaml - - parameter_defaults: - TtyValues: - - console - - tty1 - - tty2 - - tty3 - - tty4 - - tty5 - - tty6 - -Keystone CADF auditing ----------------------- - -Keystone CADF auditing can be enabled by setting `KeystoneNotificationFormat`:: - - parameter_defaults: - KeystoneNotificationFormat: cadf - -login.defs values ------------------ - -Entries can be made to `/etc/login.defs` to enforce password characteristics -for new users added to the system, for example:: - - resource_registry: - OS::TripleO::Services::LoginDefs: ../deployment/login-defs/login-defs-baremetal-puppet.yaml - - parameter_defaults: - PasswordMaxDays: 60 - PasswordMinDays: 1 - PasswordMinLen: 5 - PasswordWarnAge: 7 - FailDelay: 4 diff --git a/deploy-guide/source/features/server_blacklist.rst b/deploy-guide/source/features/server_blacklist.rst deleted file mode 100644 index 1183aef6..00000000 --- a/deploy-guide/source/features/server_blacklist.rst +++ /dev/null @@ -1,87 +0,0 @@ -Disabling updates to certain nodes -================================== - -Server blacklist ----------------- -Servers can be excluded from getting any updated Heat deployments by adding -them to a blacklist parameter called ``DeploymentServerBlacklist``. - - -Setting the blacklist -_____________________ -The ``DeploymentServerBlacklist`` parameter is a list of Heat server names. - -Write a new environment file, or add the parameter value to an existing -custom environment file and pass the file to the deployment command:: - - parameter_defaults: - DeploymentServerBlacklist: - - overcloud-compute-0 - - overcloud-compute-1 - - overcloud-compute-2 - -.. note:: - The server names in the parameter value are the names according to Heat, not - the actual server hostnames. - -Any servers in the list will be blacklisted by Heat from getting any updated -triggered deployments from Heat. After the stack operation completes, any -blacklisted servers will be unchanged. The blacklisted servers also could have -been powered off, or had their ``os-collect-config`` agents stopped during the -stack operation. - -The blacklist can be used during scale out operations or for isolating changes -to certain servers only. - -.. warning:: - Blacklisting servers disables **all** updates to the blacklisted nodes, even - for those deployments that could be considered critical. - -.. warning:: - Blacklisting servers should be done with caution, and only when the operator - understands that the requested change can be applied with a blacklist in - effect. - - It would be possible to blacklist servers in ways to create a hung stack in - Heat, or a misconfigured overcloud. For example, cluster configuration - changes that would need to be applied to all members of a pacemaker cluster - would not support blacklisting certain cluster members since it - could result is a misconfigured cluster. - -.. warning:: - The blacklist should not be used during the update or upgrade procedures. - Those procedures have their own methods for isolating changes to particular - servers. See the documentation for updates/upgrades for more information. - -.. warning:: - In cases where servers are added to the blacklist, further changes to those - nodes are not supported until the server is removed from the blacklist. This - includes updates/upgrades/scale up/scale down/node replacement. - -Clearing the blacklist -______________________ -When clearing the blacklist for subsequent stack operations, an empty parameter -value must be sent with the deploy command. It is not sufficient to simply omit -the parameter since Heat will use the previously saved value. - -Send an empty list value to force Heat to clear the blacklist:: - - parameter_defaults: - DeploymentServerBlacklist: [] - -Skip deploy identifier ----------------------- -The default behavior during a stack update operation is to force puppet to -reapply all manifests. This can be a time consuming operation and is not always -required if not changing any configuration date such as in the case of only -scaling out certain roles. - -The behavior can be overridden by passing ``--skip-deploy-identifier`` to the -``openstack overcloud deploy`` command. - -Similar to the server blacklist feature, this feature should be used only when -the operator is sure that puppet can be safely skipped on the stack update. - -.. note:: - In some cases, puppet will still run even when ``--skip-deploy-identifier`` - is specified. These cases include changes to puppet manifests or hieradata. diff --git a/deploy-guide/source/features/split_stack.rst b/deploy-guide/source/features/split_stack.rst deleted file mode 100644 index bd5cfc35..00000000 --- a/deploy-guide/source/features/split_stack.rst +++ /dev/null @@ -1,129 +0,0 @@ -Splitting the Overcloud stack into multiple independent Heat stacks -=================================================================== - -.. note:: Since victoria TripleO provisions baremetal using a separate - workflow :doc:`../provisioning/baremetal_provision` that does not - involve Heat stack, making this feature irrelevant. - -split-stack is a feature in TripleO that splits the overcloud stack into -multiple independent stacks in Heat. - -The ``overcloud`` stack is split into an ``overcloud-baremetal`` and -``overcloud-services`` stack. This allows for independent and isolated -management of the baremetal and services part of the Overcloud deployment. It -is a more modular design than deploying a single ``overcloud`` stack in that it -allows either the baremetal or services stack to be replaced by tooling that is -external to TripleO if desired. - -The ``overcloud-services`` stack makes extensive use of the deployed-server -feature, documented at :doc:`deployed_server` in order to orchestrate the -deployment and configuration of the services separate from the baremetal -deployment. - - -split-stack allows for mixing baremetal systems deployed by TripleO and those -deployed by external tooling when creating the services stack. Since the -baremetal resources are completely abstracted behind the deployed-server -interface when deploying the services stack, it does not matter whether the -servers were actually created with TripleO or not. - - -split-stack Requirements ------------------------- - -A default split-stack deployment (detailed in the later steps) can be deployed -without any special requirements. - -More advanced deployments where baremetal servers provisioned by TripleO will -be mixed with those not provisioned by TripleO will want to pay attention to -the requirements around using already deployed servers from -:doc:`deployed_server`. The requirements for using deployed servers will apply -when not using servers provisioned by TripleO. - -Default split-stack deployment ------------------------------- - -split-stack will be deployed by running 2 separate ``openstack overcloud -deploy`` commands to deploy the separate stacks. - -If applicable, prepare the custom roles files and any custom environments -initially. The custom roles file and an environment setting the role counts -should be passed to both deployment commands so that enough baremetal nodes are -deployed per what the ``overcloud-services`` stack expects. - -Baremetal Deployment Command -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Run the deployment command to deploy the ``overcloud-baremetal`` stack. -An additional environment file, ``overcloud-baremetal.yaml``, is passed to the -deployment to enable deploying just the baremetal stack. - -Enough baremetal nodes should be deployed to match how many nodes per role will -be needed when the services stack is deployed later. Be sure that the -environment file being used to set the role counts is passed to the baremetal -deployment command:: - - openstack overcloud deploy \ - \ - --stack overcloud-baremetal \ - -r roles-data.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/overcloud-baremetal.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/split-stack-consistent-hostname-format.yaml - -The ``--stack`` argument sets the name of the Heat stack to -``overcloud-baremetal``. This will also be the name of the Swift container that -stores the stack's plan (templates) and of the Mistral environment. - -The ``roles-data.yaml`` roles file illustrates passing a custom roles file to -the deployment command. It is not necessary to use custom roles when using -split stack, however if custom roles are used, the same roles file should be -used for both stacks. - -The ``overcloud-baremetal.yaml`` environment will set the parameters for the -deployment such that no services will be deployed. - -The ``split-stack-consistent-hostname-format.yaml`` environment will set the -respective ``HostnameFormat`` parameters for each role defined in -the role files used. The server hostnames for the 2 stacks must be the same, -otherwise the servers will not be able to pull their deployment metadata from -Heat. - -.. warning:: - - Do not pass any network isolation templates or NIC config templates to the - ``overcloud-baremetal`` stack deployment command. These will only be passed - to the ``overcloud-services`` stack deployment command. - -Services Deployment Command -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The services stack, ``overcloud-services`` will now be deployed with a separate -deployment command:: - - openstack overcloud deploy \ - \ - --stack overcloud-services \ - --disable-validations \ - -r roles-data.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/deployed-server-environment.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/deployed-server-deployed-neutron-ports.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/deployed-server-bootstrap-environment-centos.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/split-stack-consistent-hostname-format.yaml - -The ``overcloud-services`` stack makes use of the "deployed-server" feature. -The additional environments needed are shown in the above command. See -:doc:`deployed_server` for more information on how to fully configure the -feature. - -The roles file, ``roles-data.yaml`` is again passed to the services stack as -the same roles file should be used for both stacks. - -The ``split-stack-consistent-hostname-format.yaml`` environment is again -passed, so that the hostnames used for the server resources created by Heat are -the same as were created in the previous baremetal stack. - -During this deployment, any network isolation environments and/or NIC config -templates should be passed for the desired network configuration. - -The stack should complete and the generated ``overcloudrc`` can be used to -interact with the Overcloud. diff --git a/deploy-guide/source/features/sriov_deployment.rst b/deploy-guide/source/features/sriov_deployment.rst deleted file mode 100644 index a06226bb..00000000 --- a/deploy-guide/source/features/sriov_deployment.rst +++ /dev/null @@ -1,88 +0,0 @@ -Deploying with SR-IOV Support -=============================== - -TripleO can deploy Overcloud nodes with SR-IOV support. A new role ``ComputeSriov`` -has been added to create a custom ``roles_data.yaml`` with composable SR-IOV role. - -Execute below command to create the ``roles_data.yaml``:: - - openstack overcloud roles generate -o roles_data.yaml Controller ComputeSriov - -Once a roles file is created, the following changes are required: - -- Deploy Command -- Parameters -- Network Config - -Deploy Command ----------------- -Deploy command should include the generated roles data file from the above -command. - -Deploy command should also include the SR-IOV environment file to include the -neutron-sriov-agent service. All the required parameters are also specified in -this environment file. The parameters has to be configured according to the -baremetal on which SR-IOV needs to be enabled. - -Also, SR-IOV requires mandatory kernel parameters to be set, like -``intel_iommu=on iommu=pt`` on Intel machines. In order to enable the -configuration of kernel parameters to the host, host-config-pre-network -environment file has to be added for the deploy command. - -Adding the following arguments to the ``openstack overcloud deploy`` command -will do the trick:: - - openstack overcloud deploy --templates \ - -r roles_data.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/services/neutron-sriov.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/host-config-and-reboot.yaml \ - ... - -Parameters ----------- -Following are the list of parameters which need to be provided for deploying -with SR-IOV support. - -* NovaPCIPassthrough: Provide the list of SR-IOV device names, the logical network, - PCI addresses etc. The PF/VF devices matching the criteria would be available for - guests. -* NeutronPhysicalDevMappings: The map of logical network name and the physical interface. - - -Example:: - - parameter_defaults: - NovaPCIPassthrough: - - devname: "p7p1" - physical_network: "sriov1_net" - - devname: "p7p2" - physical_network: "sriov2_net" - NeutronPhysicalDevMappings: "sriov1_net:p7p1,sriov2_net:p7p2" - - -The parameter ``KernelArgs`` should be provided in the deployment environment -file, with the set of kernel boot parameters to be applied on the -``ComputeSriov`` role where SR-IOV is enabled:: - - parameter_defaults: - ComputeSriovParameters: - KernelArgs: "intel_iommu=on iommu=pt" - - -Network Config --------------- -SR-IOV supported network interfaces should be specified in the network config -templates as sriov_pf type. This mechanism of configuring numvfs for SR-IOV -device is recommended and NeutronSriovNumVFs shall be avoided. - -Example:: - - network_config: - - type: sriov_pf - name: p7p2 - mtu: 9000 - numvfs: 10 - use_dhcp: false - defroute: false - nm_controlled: true - promisc: false diff --git a/deploy-guide/source/features/ssl.rst b/deploy-guide/source/features/ssl.rst deleted file mode 100644 index 2374ce34..00000000 --- a/deploy-guide/source/features/ssl.rst +++ /dev/null @@ -1,399 +0,0 @@ -.. _ssl: - -Deploying with SSL -================== - -TripleO supports deploying with SSL on the public OpenStack endpoints as well -as deploying SSL in the internal network for most services. - -This document will focus on deployments using network isolation. For more -details on deploying that way, see -:doc:`network_isolation` - -Undercloud SSL --------------- - -To enable SSL with an automatically generated certificate, you must set -the ``generate_service_certificate`` option in ``undercloud.conf`` to -``True``. This will generate a certificate in ``/etc/pki/tls/certs`` with -a file name that follows the following pattern:: - - undercloud-[undercloud_public_vip].pem - -This will be a PEM file in a format that HAProxy can understand (see the -HAProxy documentation for more information on this). - -.. admonition:: Stable Branch - :class: stable - - As of the Rocky release, the default is to have TLS enabled through - this option. - -This option for auto-generating certificates uses Certmonger to request -and keep track of the certificate. So you will see a certificate with the -ID of ``undercloud-haproxy-public-cert`` in certmonger (you can check this -by using the ``sudo getcert list`` command). Note that this also implies -that certmonger will manage the certificate's lifecycle, so when it needs -renewing, certmonger will do that for you. - -The default is to use Certmonger's ``local`` CA. So using this option has -the side-effect of extracting Certmonger's local CA to a PEM file that is -located in the following path:: - - /etc/pki/ca-trust/source/anchors/cm-local-ca.pem - -This certificate will then be added to the trusted CA chain, since this is -needed to be able to use the undercloud's endpoints with that certificate. - -.. admonition:: Stable Branch - :class: stable - - As of the Rocky release, the default is for TripleO pass this CA - certificate to overcloud nodes so it'll be trusted. - -.. note:: If you need to access the undercloud from outside the node, the - aforementioned file is the one you need to add to your trust store. - So for RHEL-based systems you need to copy ``cm-local-ca.pem`` into - ``/etc/pki/ca-trust/source/anchors/`` and subsequently run the - command ``update-ca-trust extract``. This will add that CA to your - trust store. - -However, it is possible to not use certmonger's ``local`` CA. For -instance, one can use FreeIPA as the CA by setting the option -``certificate_generation_ca`` in ``undercloud.conf`` to have 'IPA' as the -value. This requires the undercloud host to be enrolled as a FreeIPA -client, and to define a ``haproxy/@`` -service in FreeIPA. We also need to set the option ``service_principal`` -to the relevant value in ``undercloud.conf``. Finally, we need to set the -public endpoints to use FQDNs instead of IP addresses, which will also -then use an FQDN for the certificate. - -To enable an FQDN for the certificate we set the ``undercloud_public_vip`` -to the desired hostname in ``undercloud.conf``. This will in turn also set -the keystone endpoints to relevant values. - -Note that the ``generate_service_certificate`` option doesn't take into -account the ``undercloud_service_certificate`` option and will have -precedence over it. - -To enable SSL on the undercloud with a pre-created certificate, you must -set the ``undercloud_service_certificate`` option in ``undercloud.conf`` -to an appropriate certificate file. Important: -The certificate file's Common Name *must* be set to the value of -``undercloud_public_vip`` in undercloud.conf. - -If you do not have a trusted CA signed certificate file, you can alternatively -generate a self-signed certificate file using the following command:: - - openssl genrsa -out privkey.pem 2048 - -The next command will prompt for some identification details. Most of these don't -matter, but make sure the ``Common Name`` entered matches the value of -``undercloud_public_vip`` in undercloud.conf:: - - openssl req -new -x509 -key privkey.pem -out cacert.pem -days 365 - -Combine the two files into one for HAProxy to use. The order of the -files in this command matters, so do not change it:: - - cat cacert.pem privkey.pem > undercloud.pem - -Move the file to a more appropriate location and set the SELinux context:: - - sudo mkdir /etc/pki/instack-certs - sudo cp undercloud.pem /etc/pki/instack-certs - sudo semanage fcontext -a -t etc_t "/etc/pki/instack-certs(/.*)?" - sudo restorecon -R /etc/pki/instack-certs - -``undercloud_service_certificate`` should then be set to -``/etc/pki/instack-certs/undercloud.pem``. - -Add the self-signed CA certificate to the undercloud system's trusted -certificate store:: - - sudo cp cacert.pem /etc/pki/ca-trust/source/anchors/ - sudo update-ca-trust extract - -.. note:: If you're using a self-signed or autogenerated certificate for the - undercloud, the overcloud nodes will need to trust it. So the - contents of the certificate need to be set in the CAMap as described - in ":ref:`ca-trust`" section. - -Overcloud SSL -------------- - -Certificate and Public VIP Configuration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The public VIP of the deployed overcloud needs to be predictable in order for -the SSL certificate to be configured properly. There are two options for -configuring the certificate: - -#. The certificate's Common Name can be set to the IP of the public - VIP. In this case, the Common Name must match *exactly*. If the public - VIP is ``10.0.0.1``, the certificate's Common Name must also be ``10.0.0.1``. - Wild cards will not work. - -#. The overcloud endpoints can be configured to point at - a DNS name. In this case, the certificate's Common Name must be valid - for the FQDN of the overcloud endpoints. Wild cards should work fine. - Note that this option also requires pre-configuration of the specified - DNS server with the appropriate FQDN and public VIP. - -In either case, the public VIP must be explicitly specified as part of the -deployment configuration. This can be done by passing an environment file -like the following:: - - parameter_defaults: - PublicVirtualFixedIPs: [{'ip_address':'10.0.0.1'}] - -.. note:: If network isolation is not in use, the ControlFixedIPs parameter - should be set instead. - -The selected IP should fall in the specified allocation range for the public -network. - -Certificate Details -~~~~~~~~~~~~~~~~~~~ - -.. This admonition is intentionally left class-less because it is only used - on the SSL page. -.. admonition:: Self-Signed SSL - - It is not recommended that the self-signed certificate is trusted; So for - this purpose, having a self-signed CA certificate is a better choice. In - this case we will trust the self-signed CA certificate, and not the leaf - certificate that will be used for the public VIP; This leaf certificate, - however, will be signed by the self-signed CA. - - For the self-signed case, just the predictable public VIP method will - be documented, as DNS configuration is outside the scope of this document. - - Generate a private key:: - - openssl genrsa -out overcloud-ca-privkey.pem 2048 - - Generate a self-signed CA certificate. This command will prompt for some - identifying information. Most of the fields don't matter, and the CN should - not be the same as the one we'll give the leaf certificate. You can choose a - CN for this such as "TripleO CA":: - - openssl req -new -x509 -key overcloud-ca-privkey.pem \ - -out overcloud-cacert.pem -days 365 - - Add the self-signed CA certificate to the undercloud's trusted certificate - store. Adding this file to the overcloud nodes will be discussed later:: - - sudo cp overcloud-cacert.pem /etc/pki/ca-trust/source/anchors/ - sudo update-ca-trust extract - - This certificate location needs to be added to the ``enabled-tls.yaml`` file - with the parameter ``PublicTLSCAFile`` like so:: - - parameter_defaults: - PublicTLSCAFile: '/etc/pki/ca-trust/source/anchors/overcloud-cacert.pem' - - ``PublicTLSCAFile`` ensures the CA Certificate will be added to the ``clouds.yaml`` - file for the ``cacert`` parameter. - - Generate the leaf certificate request and key that will be used for the - public VIP. To do this, we will create two files for the certificate - request. First, we create the server.csr.cnf:: - - [req] - default_bits = 2048 - prompt = no - default_md = sha256 - distinguished_name = dn - [dn] - C=AU - ST=Queensland - L=Brisbane - O=your-org - OU=admin - emailAddress=me@example.com - CN=openstack.example.com - - Create v3.ext:: - - authorityKeyIdentifier=keyid,issuer - basicConstraints=CA:FALSE - keyUsage = digitalSignature, nonRepudiation, keyEncipherment, dataEncipherment - subjectAltName = @alt_names - [alt_names] - DNS.1=openstack.example.com - - Create the Key:: - - openssl req -new -sha256 -nodes -out server.csr \ - -newkey rsa:2048 -keyout server-key.pem \ - -config <( cat server.csr.cnf ) - - Create the certificate:: - - openssl x509 -req -in server.csr \ - -CA overcloud-cacert.pem \ - -CAkey overcloud-ca-privkey.pem \ - -CAcreateserial -out server-cert.pem \ - -days 500 -sha256 -extfile v3.ext - - The following is a list of which files generated in the previous steps - map to which parameters in the SSL environment files:: - - overcloud-cacert.pem: SSLRootCertificate - server-key.pem: SSLKey - server-cert.pem: SSLCertificate - -The contents of the private key and certificate files must be provided -to Heat as part of the deployment command. To do this, there is a sample -environment file in tripleo-heat-templates with fields for the file contents. - -It is generally recommended that the original copy of tripleo-heat-templates -in ``/usr/share/openstack-tripleo-heat-templates`` not be altered, since it -could be overwritten by a package update at any time. Instead, make a copy -of the templates:: - - cp -r /usr/share/openstack-tripleo-heat-templates ~/ssl-heat-templates - -Then edit the enable-tls.yaml environment file. If using the location from the -previous command, the correct file would be in -``~/ssl-heat-templates/environments/ssl/enable-tls.yaml``. Insert the contents of -the private key and certificate files in their respective locations. - -.. admonition:: Stable Branch - :class: stable - - In the Pike release the SSL environment files in the top-level environments - directory were deprecated and moved to the ``ssl`` subdirectory as - shown in the example paths. For Ocata and older the paths will still need - to refer to the top-level environments. The filenames are all the same, but - the ``ssl`` directory must be removed from the path. - -.. note:: The certificate and key will be multi-line values, and all of the lines - must be indented to the same level. - -An abbreviated version of how the file should look:: - - parameter_defaults: - SSLCertificate: | - -----BEGIN CERTIFICATE----- - MIIDgzCCAmugAwIBAgIJAKk46qw6ncJaMA0GCSqGSIb3DQEBCwUAMFgxCzAJBgNV - [snip] - sFW3S2roS4X0Af/kSSD8mlBBTFTCMBAj6rtLBKLaQbIxEpIzrgvp - -----END CERTIFICATE----- - [rest of file snipped] - -``SSLKey`` should look similar, except with the value of the private key. - -``SSLIntermediateCertificate`` can be set in the same way if the certificate -signer uses an intermediate certificate. Note that the ``|`` character must -be added as in the other values to indicate that this is a multi-line value. - -When using a self-signed certificate or a signer whose certificate is -not in the default trust store on the overcloud image it will be necessary -to inject the certificate as part of the deploy process. This can be done -with the environment file ``~/ssl-heat-templates/environments/ssl/inject-trust-anchor.yaml``. -Insert the contents of the signer's root CA certificate in the appropriate -location, in a similar fashion to what was done for the certificate and key -above. - -.. admonition:: Self-Signed SSL - :class: selfsigned - - Injecting the root CA certificate is required for self-signed SSL. The - correct value to use is the contents of the ``overcloud-cacert.pem`` file. - -DNS Endpoint Configuration -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When deploying with DNS endpoint addresses, two additional parameters must be -passed in a Heat environment file. These are ``CloudName`` and ``DnsServers``. -To do so, create a new file named something like ``cloudname.yaml``:: - - parameter_defaults: - CloudName: my-overcloud.my-domain.com - DnsServers: 10.0.0.100 - -Replace the values with ones appropriate for the target environment. Note that -the configured DNS server(s) must have an entry for the configured ``CloudName`` -that matches the public VIP. - -In addition, when a DNS endpoint is being used, make sure to pass the -``tls-endpoints-public-dns.yaml`` environment to your deploy command. See the examples -below. - -Deploying an SSL Environment -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The ``enable-tls.yaml`` file must always be passed to use SSL on the public -endpoints. Depending on the specific configuration, additional files will -also be needed. Examples of the necessary parameters for different scenarios -follow. - -IP-based certificate:: - - -e ~/ssl-heat-templates/environments/ssl/enable-tls.yaml -e ~/ssl-heat-templates/environments/ssl/tls-endpoints-public-ip.yaml - -Self-signed IP-based certificate:: - - -e ~/ssl-heat-templates/environments/ssl/enable-tls.yaml -e ~/ssl-heat-templates/environments/ssl/tls-endpoints-public-ip.yaml -e ~/ssl-heat-templates/environments/ssl/inject-trust-anchor.yaml - -DNS-based certificate:: - - -e ~/ssl-heat-templates/environments/ssl/enable-tls.yaml -e ~/ssl-heat-templates/environments/ssl/tls-endpoints-public-dns.yaml -e ~/cloudname.yaml - -Self-signed DNS-based certificate:: - - -e ~/ssl-heat-templates/environments/ssl/enable-tls.yaml -e ~/ssl-heat-templates/environments/ssl/tls-endpoints-public-dns.yaml -e ~/cloudname.yaml -e ~/ssl-heat-templates/environments/ssl/inject-trust-anchor.yaml - -It is also possible to get all your certificates from a CA. For this you need -to include the **environments/services/haproxy-public-tls-certmonger.yaml** -environment file. - -.. _ca-trust: - -Getting the overcloud to trust CAs -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -As mentioned above, it is possible to get the overcloud to trust a CA by using -the ``~/ssl-heat-templates/environments/ssl/inject-trust-anchor.yaml`` environment -and adding the necessary details there. However, that environment has the -restriction that it will only allow you to inject one CA. However, the -file ``~/ssl-heat-templates/environments/ssl/inject-trust-anchor-hiera.yaml`` is an -alternative that actually supports as many CA certificates as you need. - -.. note:: This is only available since Newton. Older versions of TripleO don't - support this. - -This file is a template of how you should fill the ``CAMap`` parameter which is -passed via parameter defaults. It looks like this:: - - CAMap: - first-ca-name: - content: | - The content of the CA cert goes here - second-ca-name: - content: | - The content of the CA cert goes here - -where ``first-ca-name`` and ``second-ca-name`` will generate the files -``first-ca-name.pem`` and ``second-ca-name.pem`` respectively. These files will -be stored in the ``/etc/pki/ca-trust/source/anchors/`` directory in each node -of the overcloud and will be added to the trusted certificate chain of each of -the nodes. You must be careful that the content is a block string in yaml and -is in PEM format. - -.. admonition:: Stable Branch - :class: stable - - As of Rocky, the undercloud now defaults to using TLS through the - autogenerated certificate. If you're upgrading your undercloud and - had the ``generate_service_certificate``, it also automatically passes - the CA certificate via the ``CAMap`` parameter. - -.. note:: In some cases, such as when using Ceph, the overcloud needs to trust - the undercloud's CA certificate. If you're using the default CA in - the undercloud, and autogenerated your certificates, you'll need to - copy the contents of - ``/etc/pki/ca-trust/source/anchors/cm-local-ca.pem`` into the - aforementioned ``CAMap`` parameter. diff --git a/deploy-guide/source/features/swift_external.rst b/deploy-guide/source/features/swift_external.rst deleted file mode 100644 index 07650a64..00000000 --- a/deploy-guide/source/features/swift_external.rst +++ /dev/null @@ -1,85 +0,0 @@ -Use an external Swift Proxy with the Overcloud -=============================================== - -|project| supports use of an external Swift (or Ceph RadosGW) proxy, already -available to the operator. - -Use of an external Swift proxy can be configured using a particular environment file -when deploying the overcloud, specifically `environments/swift-external.yaml`. - -In the environment file above user must adjust the parameters to fit -its setup by creating a custom environment file (i.e. -*~/my-swift-settings.yaml*):: - - parameter_defaults: - ExternalSwiftPublicUrl: 'http://:9024/v1/AUTH_%(tenant_id)s' - ExternalSwiftInternalUrl: 'http://:9024/v1/AUTH_%(tenant_id)s' - ExternalSwiftAdminUrl: 'http://:9024' - ExternalSwiftUserTenant: 'service' - SwiftPassword: 'choose_a_random_password' - -.. note:: - - When the external Swift is implemented by Ceph RadosGW, the endpoint will be - different; the /v1/ part needs to be replaced with /swift/v1, for example: - `http://:9024/v1/AUTH_%(tenant_id)s` - becomes - `http://:9024/swift/v1/AUTH_%(tenant_id)s` - -The user can create an environment file with the required settings -and add the files above to the deploy commandline:: - - openstack overcloud deploy --templates -e /usr/share/openstack-tripleo-heat-templates/environments/swift-external.yaml -e ~/my-swift-settings.yaml - -Once the deploy has succeeded, user has to complete the -configuration on the external swift proxy, configuring it to use the -keystone authentication provider. This environment file creates also -a service user called *swift* that can be used for this purpose. The -password for this user is defined by using the *SwiftPassword* -parameter, as shown above. - -The external Swift proxy must use Keystone from the overcloud, otherwise -authentication will fail. The public Keystone endpoint must be -accessible from the proxy therefore. - -The following snippet from `/etc/swift/proxy-server.conf` is an example -how to configure the Swift proxy to use Keystone from the overcloud:: - - [pipeline:main] - pipeline = [... other middlewares ...] authtoken keystone [... other middlewares ...] - - [filter:keystone] - use = egg:swift#keystoneauth - operator_roles = admin, SwiftOperator - cache = swift.cache - - [filter:authtoken] - paste.filter_factory = keystonemiddleware.auth_token:filter_factory - signing_dir = /tmp/keystone-signing-swift - www_authenticate_uri = http://:5000/ - auth_url = http://:5000/ - password = - auth_plugin = password - project_domain_id = default - user_domain_id = default - project_name = service - username = swift - cache = swift.cache - include_service_catalog = False - delay_auth_decision = True - -For Ceph RadosGW instead, the following settings can be used:: - - rgw_keystone_api_version: 3 - rgw_keystone_url: http://:5000/ - rgw_keystone_accepted_roles: 'member, Member, admin' - rgw_keystone_accepted_admin_roles: ResellerAdmin, swiftoperator - rgw_keystone_admin_domain: default - rgw_keystone_admin_project: service - rgw_keystone_admin_user: swift - rgw_keystone_admin_password: - rgw_keystone_implicit_tenants: 'true' - rgw_keystone_revocation_interval: '0' - rgw_s3_auth_use_keystone: 'true' - rgw_swift_versioning_enabled: 'true' - rgw_swift_account_in_url: 'true' diff --git a/deploy-guide/source/features/tls-everywhere.rst b/deploy-guide/source/features/tls-everywhere.rst deleted file mode 100644 index 94c558a6..00000000 --- a/deploy-guide/source/features/tls-everywhere.rst +++ /dev/null @@ -1,405 +0,0 @@ -Deploying TLS-everywhere -======================== - -Setting up *TLS-everywhere* primarily consists of a few additional steps you -need to take on the undercloud and FreeIPA server. These steps consist of -installing additional packages and enrolling the undercloud host as a FreeIPA -client. - -The OpenStack release you are deploying affects which tools you can use to -deploy *TLS-everywhere*. For deployments using Queens through Stein you must -use Novajoin. For deployments using Train or Ussuri, you can use either -Novajoin or tripleo-ipa. For deployments using Victoria or newer releases you -must use tripleo-ipa. Deployments :ref:`deployed_server` must also use -tripleo-ipa. We recommend using tripleo-ipa whenever possible. Let's walk -through each step using both tripleo-ipa and Novajoin. - -You can find a primer on the various TLS deployment strategies and components -in the :doc:`tls-introduction` documentation. - -TLS-everywhere with tripleo-ipa -------------------------------- - -.. note:: - - This deployment strategy is only supported on Train and newer releases. If - you're deploying a version older than Train, you'll need to use Novajoin to - accomplish *TLS-everywhere*, which is documented below. - -Do the following steps before deploying your undercloud. - -Configure DNS -~~~~~~~~~~~~~ - -*TLS-everywhere* deployments use FreeIPA as the DNS server. You need to set the -proper search domain and nameserver on the undercloud. To do this, you need to -know the deployment domain, the domain of the FreeIPA server, and the FreeIPA -server's IP address. For example, if the deployment domain is `example.com` and -the FreeIPA server domain is `bigcorp.com`, you should set the following in -`/etc/resolv.conf`:: - - search example.com bigcorp.com - nameserver $FREEIPA_IP_ADDRESS - -This step ensures the undercloud can resolve newly added hosts and services -after TripleO enrolls them as FreeIPA clients. You only need to add both search -domains if they're different. If the FreeIPA server is using the same domain as -the deployment you only need to specify the deployment domain. - -Configure FreeIPA -~~~~~~~~~~~~~~~~~ - -.. note:: - This section assumes you have permissions to make writeable changes to your - FreeIPA server. If you don't have those permissions or direct access to the - FreeIPA server, you'll need to contact your FreeIPA administrator and have - them perform the following steps either using ansible scripts or manually. - -Before you configure the undercloud, you need to ensure FreeIPA is configured -with the correct principal and privileges. This allows the undercloud to add -new hosts, services, and DNS records in FreeIPA during the overcloud -installation. - -The undercloud will enroll itself as a FreeIPA client and download a keytab to -use for authentication during the installation process. To do this, it needs a -one-time password (OTP) from FreeIPA that you configure in ``undercloud.conf``. - -You can generate the OTP manually if you have the correct permissions to add -hosts, modify permissions, update roles, and create principals in FreeIPA. You -need to perform these actions from an existing FreeIPA client. Note, the -FreeIPA server itself is enrolled as a client. - -You can find a set of `playbooks -`_ -in tripleo-ipa that automate creating permissions, hosts, and principals for -the undercloud. These playbooks expect the ``IPA_PRINCIPAL``, which is a user -in FreeIPA, to have the necessary permissions to perform the tasks in each -playbook (e.g., ``ipa privilege-add-permission``, ``ipa host-add``, etc). They -also expect you to generate a kerberos token before executing each playbook. - -Create a FreeIPA role -^^^^^^^^^^^^^^^^^^^^^ - -First, you need to create a new FreeIPA role with the appropriate permissions -for managing hosts, principals, services, and DNS entries:: - - $ kinit - $ export IPA_PASSWORD=$IPA_PASSWORD - $ export IPA_PRINCIPAL=$IPA_USER - $ export UNDERCLOUD_FQDN=undercloud.example.com - $ ansible-playbook /usr/share/ansible/tripleo-playbooks/ipa-server-create-role.yaml - -Register the undercloud -^^^^^^^^^^^^^^^^^^^^^^^ - -Next, you need to register the undercloud as a FreeIPA client and generate a -OTP that the undercloud will use for enrollment, which is necessary before it -can manage entities in FreeIPA:: - - $ export IPA_PASSWORD=$IPA_PASSWORD - $ export IPA_PRINCIPAL=$IPA_USER - $ export UNDERCLOUD_FQDN=undercloud.example.com - $ ansible-playbook /usr/share/ansible/tripleo-playbooks/ipa-server-register-undercloud.yaml - -If successful, the ansible output will contain an OTP. Save this OTP because -you will need it when you configure the undercloud. - -Create a principal -^^^^^^^^^^^^^^^^^^ - -Finally, create a FreeIPA principal and grant it the necessary permissions to -manage hosts, services, and DNS entries in FreeIPA:: - - $ export IPA_PASSWORD=$IPA_PASSWORD - $ export IPA_PRINCIPAL=$IPA_USER - $ export UNDERCLOUD_FQDN=undercloud.example.com - $ ansible-playbook /usr/share/ansible/tripleo-playbooks/ipa-server-create-principal.yaml - -Configure the Undercloud -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. warning:: - This section only provides guidance for configuring *TLS-everywhere*. You - need to make sure your undercloud configuration is complete before starting - the undercloud installation process. - -Set the following variables in `undercloud.conf`:: - - ipa_otp = $OTP - overcloud_domain_name = example.com - undercloud_nameservers = $FREEIPA_IP_ADDRESS - -Your undercloud configuration is ready to be deployed and has the necessary -changes to allow you to deploy *TLS-everywhere* for the overcloud. - -Undercloud Install -~~~~~~~~~~~~~~~~~~ - -After you've had an opportunity to verify all undercloud configuration options, -including the options listed above, start the undercloud installation process:: - - $ openstack undercloud install - -Undercloud Verification -~~~~~~~~~~~~~~~~~~~~~~~ - -You should verify that the undercloud was enrolled properly by listing the -hosts in FreeIPA:: - - $ sudo kinit - $ sudo ipa host-find - -You should also confirm that ``/etc/novajoin/krb5.keytab`` exists on the -undercloud. The ``novajoin`` directory name is purely for legacy naming -reasons. The keytab is placed in this directory regardless of using novajoin -to enroll the undercloud as a FreeIPA client. - -You can proceed with the :ref:`Overcloud TLS-everywhere` if the undercloud -installation was successful. - -TLS-everywhere with Novajoin ----------------------------- - -.. warning:: This deployment strategy is only supported up to the Train release. We - recommend using tripleo-ipa to accomplish *TLS-everywhere* in newer - releases. Steps for using tripleo-ipa are documented above. This deployment - strategy has been removed in Victoria. - -Do the following steps before deploying your undercloud. - -Configure DNS -~~~~~~~~~~~~~ - -*TLS-everywhere* deployments use FreeIPA as the DNS server. You need to set the -proper search domain and nameserver on the undercloud. To do this, you need to -know the deployment domain, the domain of the FreeIPA server, and the FreeIPA -server's IP address. For example, if the deployment domain is `example.com` and -the FreeIPA server domain is `bigcorp.com`, you should set the following in -`/etc/resolv.conf`:: - - search example.com bigcorp.com - nameserver $FREEIPA_IP_ADDRESS - -This step ensures the undercloud can resolve newly added hosts and services -after TripleO enrolls them as FreeIPA clients. You only need to add both search -domains if they're different. If the FreeIPA server is using the same domain as -the deployment you only need to specify the deployment domain. - -Add Undercloud as a FreeIPA host -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Next, you need to add the undercloud as a host in FreeIPA. This will generate a -one-time password that TripleO uses to enroll the undercloud as a FreeIPA -client, giving the undercloud the permissions it needs to add new hosts, -services, and DNS records. You can use the following command-line utility to -add the undercloud as a FreeIPA host:: - - novajoin-ipa-setup \ - --principal $IPA_USER \ - --password $IPA_PASSWORD \ - --server ipa.bigcorp.com \ - --realm BIGCORP.COM \ - --domain example.com \ - --hostname undercloud.example.com \ - --precreate - -If successful, the command will return a one-time password. Save this password -because you will need it later to configure the undercloud. - -Configure the Undercloud -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. warning:: - This section only provides guidance for configuring *TLS-everywhere*. You - need to make sure your undercloud configuration is complete before starting - the undercloud installation process. - -Set the following variables in `undercloud.conf`:: - - enable_novajoin = True - ipa_otp = $IPA_OTP - overcloud_domain_name = example.com - -Your undercloud configuration is ready to be deployed and has the necessary -changes to allow you to deploy *TLS-everywhere* for the overcloud. - -Undercloud Install -~~~~~~~~~~~~~~~~~~ - -After you've had an opportunity to verify all undercloud configuration options, -including the options listed above, start the undercloud installation process:: - - $ openstack undercloud install - -Undercloud Verification -~~~~~~~~~~~~~~~~~~~~~~~ - -You should verify that the undercloud was enrolled properly by listing the -hosts in FreeIPA:: - - $ sudo kinit - $ sudo ipa host-find - -You should also confirm that ``/etc/novajoin/krb5.keytab`` exists on the -undercloud and that the ``novajoin`` and ``novajoin-notifier`` services are -running. - -You can proceed with the :ref:`Overcloud TLS-everywhere` if the undercloud -installation was successful. - -.. _Overcloud TLS-everywhere: - -Configuring the Overcloud -------------------------- - -*TLS-everywhere* requires you to set extra parameters and templates before you -deploy, or update, your overcloud. These changes consist of settings domain -information and including additional heat templates in your deploy command. -Let's walk through each step individually. - -Set Parameters -~~~~~~~~~~~~~~ - -Next, you need to set parameters so that TripleO knows where to find your -FreeIPA server and configures DNS. You need to set these variables so that -TripleO adds DNS records that map to the correct hosts. Let's continue assuming -we have a file called ``tls-parameters.yaml`` and it contains the following -parameter_defaults section:: - - parameter_defaults: - DnsSearchDomains: ["example.com"] - DnsServers: ["192.168.1.13"] - CloudDomain: example.com - CloudName: overcloud.example.com - CloudNameInternal: overcloud.internalapi.example.com - CloudNameStorage: overcloud.storage.example.com - CloudNameStorageManagement: overcloud.storagemgmt.example.com - CloudNameCtlplane: overcloud.ctlplane.example.com - -.. note:: - If you are using deployed servers, you must also specify the following - parameters:: - - IdMInstallClientPackages: True - - This option is required to install packages needed to enroll overcloud - hosts as FreeIPA clients. Deployments using Novajoin do not require this - option since the necessary packages are built into the overcloud images. If - you do not specify this argument, you need to ensure dependencies for - ansible-freeipa are present on the overcloud servers before deploying the - overcloud. - -The ``DnsServers`` value above assumes we have FreeIPA available at -192.168.1.13. - -It's important to note that you will need to update the `DnsSearchDomains` to -include the domain of the IPA server if it's different than the `CloudDomain`. -For example, if your `CloudDomain` is `example.com` and your IPA server is -located at `ipa.bigcorp.com`, then you need to include `bigcorp.com` as an -additional search domain:: - - DnsSearchDomains: ["example.com", "bigcorp.com"] - -Composable Services -~~~~~~~~~~~~~~~~~~~ - -In addition to the parameters above, you might need to update the -``resource_registry`` in ``tls-parameters.yaml`` to include a composable -service. There are two composable services, one for Novajoin and the other is -for tripleo-ipa. TripleO uses the Novajoin composable service for deploying -*TLS-everywhere* by default. If you need or want to use tripleo-ipa, you'll -need to update the registry to use a different composable service. Both options -are described below. - -Novajoin Composable Service -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This was the default option until Ussuri. As of Victoria, this option has -been removed, and deployers upgrading to Victoria will be migrated to tripleo-ipa. - -For reference, the Novajoin based composable service is located at -/usr/share/openstack-tripleo-heat-templates/deployment/ipa/ipaclient-baremetal-ansible.yaml - -tripleo-ipa Composable Service -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you're deploying *TLS-everywhere* with tripleo-ipa prior to Victoria, you need to -override the default Novajoin composable service. Add the following composable service to -the ``resource_registry`` in ``tls-parameters.yaml``:: - - resource_registry: - OS::TripleO::Services::IpaClient: /usr/share/openstack-tripleo-heat-templates/deployment/ipa/ipaservices-baremetal-ansible.yaml - -As of Victoria, this is the only method for deploying *TLS-everywhere*. - -Specify Templates -~~~~~~~~~~~~~~~~~ - -At this point, you should have all the settings configured for a successful -*TLS-everywhere* deployment. The only remaining step is to include the -following templates in your overcloud deploy command:: - - $ openstack overcloud deploy \ - -e /usr/share/openstack-tripleo-heat-templates/environments/ssl/tls-everywhere-endpoints-dns.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/services/haproxy-public-tls-certmonger.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/ssl/enable-internal-tls.yaml \ - -e tls-parameters.yaml - -Remember, ``tls-parameters.yaml`` is the file containing the parameters above. - -Overcloud Verification ----------------------- - -After the overcloud is deployed, you can confirm each endpoint is using HTTPS -by querying keystone's endpoints:: - - $ openstack --os-cloud overcloud endpoint list - -Deleting Overclouds -------------------- - -.. note:: - This functionality is only invoked when you use the ``openstack overcloud - delete`` command using Train or newer releases. The overcloud is - technically a heat stack, but using ``openstack stack delete`` will not - clean up FreeIPA. - -.. note:: - This section is only applicable to deployments using tripleo-ipa. Novajoin - cleans up FreeIPA after consuming notifications about instance deletion. - -The python-tripleoclient CLI cleans up hosts, services, and DNS records in -FreeIPA when you delete an overcloud:: - - $ openstack overcloud delete overcloud - -You can verify the hosts, services, DNS records were removed by querying -FreeIPA:: - - $ kinit - $ ipa host-find - $ ipa service-find - $ ipa dnsrecord-find example.com. - -The undercloud host, service, and DNS records are untouched when deleting -overclouds. Overcloud hosts, services, and DNS records are re-added to FreeIPA -during subsequent deployments. - -If you don't want to clean up FreeIPA when you delete your overcloud, you can -use the ``openstack overcloud delete --skip-ipa-cleanup`` parameter. This -option leaves all overcloud hosts, services, and DNS records in FreeIPA. You -might find this useful if your FreeIPA server is unreachable or if you plan to -clean up FreeIPA later. - -To clean up FreeIPA manually, you need the Ansible inventory file that -describes your deployment. If you don't have it handy, you can generate one -from the undercloud using:: - - $ source stackrc - $ tripleo-ansible-inventory --static-yaml-inventory generated-inventory.yaml - -The utility will generate an inventory file and store it as -``generated-inventory.yaml``. You can invoke the playbook that cleans up -FreeIPA using:: - - $ ansible-playbook -i generated-inventory.yaml /usr/share/ansible/tripleo-playbooks/cli-cleanup-ipa.yml diff --git a/deploy-guide/source/features/tls-introduction.rst b/deploy-guide/source/features/tls-introduction.rst deleted file mode 100644 index b92fa735..00000000 --- a/deploy-guide/source/features/tls-introduction.rst +++ /dev/null @@ -1,159 +0,0 @@ -.. _tls-introduction: - -TLS Introduction -================ - -Depending on your deployment's security requirements, you might be required to -encrypt network traffic. TripleO helps you accomplish this by supporting -various TLS deployment options. Let's start by understanding the different ways -we can deploy TLS. - -The first option is to only encrypt traffic between clients and public -endpoints. This approach results in fewer certificates to manage, and we refer -to it as *public TLS*. Public endpoints, in this sense, are endpoints only -exposed to end-users. Traffic between internal endpoints is not encrypted. - -The second option leverages TLS for all endpoints in the entire deployment, -including the overcloud, undercloud, and any systems that natively support TLS. -We typically refer to this approach as *TLS-everywhere* because we use TLS -everywhere we can, encrypting as much network traffic as possible. Certificate -management automation is critical with this approach because the number of -certificates scales linearly with the number of services in your deployment. -TripleO uses several components to help ease the burden of managing -certificates. This option is desirable for deployments susceptible to industry -regulation or those who have a higher security risk. Healthcare, -telecommunications, and the public sector are but a few industries that make -extensive use of *TLS-everywhere*. You can think of *public TLS* as a subset of -what *TLS-everywhere* offers. - -TripleO uses the following components to implement *public TLS* and -*TLS-everywhere*. - -Certmonger ----------- - -`Certmonger`_ is a daemon that helps simplify certificate management between -endpoints and certificate authorities (CAs). You can use it to generate key -pairs and certificate signing requests (CSRs). It can self-sign CSRs or send -them to external CAs for signing. Certmonger also tracks the expiration of each -certificate it manages. When a certificate is about to expire, Certmonger -requests a new certificate, updates it accordingly, and may restart a service. -This automation keeps the node enrolled as a client of the certificate -authority so that you don’t have to update hundreds, or thousands, of -certificates manually. Certmonger runs on each node that provides an endpoint -in your deployment. - -.. _Certmonger: https://pagure.io/certmonger - -FreeIPA -------- - -`FreeIPA`_ is a multi-purpose system that includes a certificate authority -(DogTag Certificate System), LDAP (389 Directory Server), MIT Kerberos, NTP -server, and DNS. TripleO uses all of these subsystems to implement TLS across -OpenStack. For example, if you use FreeIPA in your deployment, you can sign -CSRs with DogTag, as opposed to self-signing CSRs with certmonger locally. - -FreeIPA runs on a supplemental node in your deployment, and it is kept separate -from other infrastructure. - -.. _FreeIPA: https://www.freeipa.org/page/Main_Page - -Installing FreeIPA -~~~~~~~~~~~~~~~~~~ - -Similar to setting up the undercloud node, you need to set the hostname -properly for the FreeIPA server. For this example, let's assume we're using -``example.com`` as the domain name for the deployment.:: - - sudo hostnamectl set-hostname ipa.example.come - sudo hostnamectl set-hostname --transient ipa.example.com - -Collect and install the FreeIPA packages:: - - sudo yum install -y ipa-server ipa-server-dns - -Configure FreeIPA:: - - sudo ipa-server-install --realm EXAMPLE.COM / - --ds-password $DIRECTORY_MANAGER_PASSWORD / - --admin-password $ADMIN_PASSWORD / - --hostname ipa.example.com / - --setup-dns / - --auto-forwarders / - --auto-reverse / - --unattended - -By default, FreeIPA does not public it's Certificate Revocation List (CRL) -on startup. As the CRL is retrieved when the overcloud nodes retrieve -certificates from FreeIPA, we should configure it to do so and restart -FreeIPA.:: - - sed -i -e \ - 's/ca.crl.MasterCRL.publishOnStart=.*/ca.crl.MasterCRL.publishOnStart=true/' \ - /etc/pki/pki-tomcat/ca/CS.cfg - systemctl restart ipa - -If your IPA server is not at 4.8.5 or higher, you will need to add an -ACL to allow for the proper generation of certificates with a IP SAN.:: - - cat << EOF | ldapmodify -x -D "cn=Directory Manager" -w $DIRECTORY_MANAGER_PASSWORD - dn: cn=dns,dc=example,dc=com - changetype: modify - add: aci - aci: (targetattr = "aaaarecord || arecord || cnamerecord || idnsname || objectclass || ptrrecord")(targetfilter = "(&(objectclass=idnsrecord)(|(aaaarecord=*)(arecord=*)(cnamerecord=*)(ptrrecord=*)(idnsZoneActive=TRUE)))")(version 3.0; acl "Allow hosts to read DNS A/AAA/CNAME/PTR records"; allow (read,search,compare) userdn = "ldap:///fqdn=*,cn=computers,cn=accounts,dc=example,dc=com";) - EOF - -If you are upgrading to Victoria and you have been using novajoin, an additional permission -must be added to the Nova Host Manager role to allow the creation of DNS zone entries. -As an admin user:: - - ipa privilege-add-permission 'Nova Host Management' --permission \ - 'System: Modify Realm Domains' - -Please refer to ``ipa-server-install --help`` for specifics on each argument or -reference the `FreeIPA documentation`_. The directions above are only a guide. -You may need to adjust certain values and configuration options to use FreeIPA, -depending on your requirements. - -.. _FreeIPA documentation: https://www.freeipa.org/page/Documentation - -Novajoin --------- - -`Novajoin`_ is a vendor data service that extends nova's config drive -functionality and you use it when you want to deploy *TLS-everywhere*. When the -undercloud creates new nodes for the overcloud, novajoin creates a host entry -in FreeIPA to enable the overcloud node to enroll as a FreeIPA client. - -If you want to use novajoin, you must have nova deployed in your undercloud. -Novajoin isn't supported for deployments :doc:`deployed_server`. - -Novajoin was introduced in the Queens release and is supported through Train. -The `tripleo-ipa`_ project, described below, effectively replaced novajoin in -the Train release. - -As of Victoria, novajoin is not longer supported. If you are updating -from Ussuri, tripleo will automatically migrate your deployment from novajoin -to tripleo-ipa. Tripleo will stop and remove the novajoin containers from -the undercloud. If in-flight validations are enabled, tripleo will run a -pre-upgrade validation to verify that the needed ACI and permissions have been -added to the FreeIPA server. See the previous section on "Installing FreeIPA" -for more details. - -.. _Novajoin: https://opendev.org/x/novajoin - -tripleo-ipa ------------ - -`tripleo-ipa`_ is a collection of Ansible roles used to integrate FreeIPA into -TripleO deployments and you use it when you want to deploy *TLS-everywhere*. -These playbooks support deployments using nova and ironic in the undercloud as -well as :doc:`deployed_server`. This project was introduced in Train and -effectively replaces the novajoin metadata service. - -We recommend using tripleo-ipa for all *TLS-everywhere* deployments as of the -Train release. As of Victoria, tripleo-ipa is the only supported method to -configure and deploy *TLS-everywhere*. - -.. _tripleo-ipa: https://opendev.org/x/tripleo-ipa diff --git a/deploy-guide/source/features/tolerated_failure.rst b/deploy-guide/source/features/tolerated_failure.rst deleted file mode 100644 index 7c7f3d29..00000000 --- a/deploy-guide/source/features/tolerated_failure.rst +++ /dev/null @@ -1,46 +0,0 @@ -Tolerate deployment failures -============================ - -When proceeding to large scale deployments, it happens very often to have -infrastructure problems such as network outages, wrong configurations applied -on hardware, hard drive issues, etc. - -It is unpleasant to deploy hundred of nodes and only have a few of them which -failed. On most of large-scale use-cases, deployers would not care about -these nodes, as long as the cloud can already be used with the successfully -deployed servers. - -For that purpose, it is possible in |project| to specify a percentage value, -per role, that will tell how much failures we tolerate. - -Example: We deploy 50 compute nodes with the role "Compute". If I set the -following environment, my deployment will go until the end even if up to 5 -nodes fail to deploy:: - - parameter_defaults: - ComputeMaxFailPercentage: 10 - -At the end of the deployment, a report will be printed and if nodes failed to -deploy, it'll be shown like this:: - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~~~~~~~~~~~~~~~~~~ State Information ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ~~~~~~~~~ Number of nodes which did not deploy successfully: 3 ~~~~~~~~~~~~~~ - This or these node(s) failed to deploy: compute3, compute24, compute29 - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If one or multiple node(s) failed to deploy, the tripleoclient return code -won't be 0 and an error will be printed with a Python trace. Very often the -problem can be read from the Ansible logs by searching for the nodes which -didn't deploy successfully. - -If you want to target all the compute nodes in our deployment and you have more -than one role to deploy computes, then you'll probably want to allocate one -value per role and distribute it based on your expectations and needs. - -.. Warning:: - - For now, this only works for the execution of the deployment steps - from config-download playbooks. Minor updates, major upgrades, fast forward - upgrades and baremetal provisioning operations aren't supported yet, but - will certainly be in the future. diff --git a/deploy-guide/source/features/tuned.rst b/deploy-guide/source/features/tuned.rst deleted file mode 100644 index 89ddd3d1..00000000 --- a/deploy-guide/source/features/tuned.rst +++ /dev/null @@ -1,61 +0,0 @@ -Deploying custom tuned profiles -=============================== - -TripleO can be used to deploy Overcloud nodes with different tuned -profiles in addition to custom tuned profiles. - -Deploying with existing tuned profiles --------------------------------------- - -Create an environment file, e.g. `~/tuned.yaml`, with the following -content: - -.. code-block:: yaml - - parameter_defaults: - TunedProfileName: throughput-performance - -Deploy the Overcloud as usual using the :doc:`CLI -<../deployment/install_overcloud>` and pass the environment -file using the `-e` option: - -.. code-block:: bash - - openstack overcloud deploy --templates -e ~/tuned.yaml - -In the above example, the `throughput-performance` tuned profile will -be applied to the overcloud nodes. The TunedProfileName parameter may -be set to any tuned profile already on the node. - -Deploying with custom tuned profiles ------------------------------------- - -If the tuned profile you wish to apply is not already on the overcloud -node being deployed, then TripleO can create the tuned profile for -you and will set the name of the new profile to whatever -TunedProfileName parameter you supply. - -The following example creates a custom tuned profile called -`my_profile` which inherits from the existing throughput-performance -tuned profile and then adds a few extra tunings: - -.. code-block:: yaml - - parameter_defaults: - TunedCustomProfile: | - [main] - summary=my profile - include=throughput-performance - [sysctl] - vm.dirty_ratio = 10 - vm.dirty_background_ratio = 3 - [sysfs] - /sys/kernel/mm/ksm/run=0 - TunedProfileName: my_profile - -The above will create the file `/etc/tuned/my_profile/tuned.conf` -on the overcloud nodes and tuned.conf will contain the tuned -directives defined by the TunedCustomProfile parameter. The -TunedCustomProfile parameter should be set to a multiline string using -YAML's literal block scalar (i.e. the pipe '|') and that string should -contain valid tuned directives in INI format. diff --git a/deploy-guide/source/features/undercloud_minion.rst b/deploy-guide/source/features/undercloud_minion.rst deleted file mode 100644 index 2d65f1ec..00000000 --- a/deploy-guide/source/features/undercloud_minion.rst +++ /dev/null @@ -1,152 +0,0 @@ -(DEPRECATED) Installing a Undercloud Minion -=========================================== - -.. note:: - The minion functionality is deprecated starting in Wallaby. - -.. note:: - This is optional functionality that is helpful for large scale related - deployments. - -.. note:: - The minion functionality is only available starting from the Train cycle. - -The undercloud can be scaled horizontally by installing and configuring undercloud -minions. The minions can expand the number of heat-engine and ironic-conductors -available the overall undercloud installation. The undercloud minions can be -added and removed as necessary to scale processing during a deployment. - -Installation Steps ------------------- - -.. note:: - The minion requires an undercloud has been installed. The undercloud - installation process has two output files that we will need to install the - minion. - -#. Log in to your machine (baremetal or VM) where you want to install the - minion as a non-root user (such as the stack user):: - - ssh @ - - .. note:: - If you don't have a non-root user created yet, log in as root and create - one with following commands:: - - sudo useradd stack - sudo passwd stack # specify a password - - echo "stack ALL=(root) NOPASSWD:ALL" | sudo tee -a /etc/sudoers.d/stack - sudo chmod 0440 /etc/sudoers.d/stack - - su - stack - - .. note:: - The minion is intended to work correctly with SELinux enforcing. - Installations with the permissive/disabled SELinux are not recommended. - The ``minion_enable_selinux`` config option controls that setting. - - .. note:: - vlan tagged interfaces must follow the if_name.vlan_id convention, like for - example: eth0.vlan100 or bond0.vlan120. - -#. Enable needed repositories: - - .. admonition:: RHEL - :class: rhel - - Enable optional repo:: - - sudo yum install -y yum-utils - sudo yum-config-manager --enable rhelosp-rhel-7-server-opt - - .. include:: ../repositories.rst - -.. We need to manually continue our list numbering here since the above - "include" directive breaks the numbering. - -3. Install the TripleO CLI, which will pull in all other necessary packages as dependencies:: - - sudo yum install -y python-tripleoclient - -#. Copy the `tripleo-undercloud-outputs.yaml` and `tripleo-undercloud-passwords.yaml` - from the undercloud to the node being provisioned as a minion:: - - scp tripleo-undercloud-outputs.yaml tripleo-undercloud-passwords.yaml @: - -#. (Optional) Copy Undercloud CA certificate if SSL is enabled. - - On the undercloud:: - - scp /etc/pki/ca-trust/source/anchors/cm-local-ca.pem @: - - On the minion:: - - sudo update-ca-trust enable - sudo cp cm-local-ca.pem /etc/pki/ca-trust/source/anchors/undercloud-ca.pem - sudo update-ca-trust extract - -#. Prepare the configuration file:: - - cp /usr/share/python-tripleoclient/minion.conf.sample ~/minion.conf - - Update the settings in this file to match the desired configuration. The - options in the minion.conf are similarly configured as the undercloud.conf - on the undercloud node. It is important to configure the `minion_local_ip` - and the `minion_local_interface` to match the available interfaces on the - minion system. - - .. note:: - The minion configured interface and ip must be on the control plane network. - -#. Run the command to install the minion: - - To deploy a minion:: - - openstack undercloud minion install - -#. Verify services - - - Heat Engine - - By default only the heat-engine service is configured. To verify it has - been configured correctly, run the following on the undercloud:: - - source ~/stackrc - openstack orchestration service list - - Example output:: - - (undercloud) [stack@undercloud ~]$ openstack orchestration service list - +------------------------+-------------+--------------------------------------+------------------------+--------+----------------------------+--------+ - | Hostname | Binary | Engine ID | Host | Topic | Updated At | Status | - +------------------------+-------------+--------------------------------------+------------------------+--------+----------------------------+--------+ - | undercloud.localdomain | heat-engine | b1af4e18-6859-4b73-b1cf-87674bd0ce1f | undercloud.localdomain | engine | 2019-07-25T23:19:34.000000 | up | - | minion.localdomain | heat-engine | 3a0d7080-06a9-4049-bb00-dbdcafbce0fc | minion.localdomain | engine | 2019-07-25T23:19:24.000000 | up | - | undercloud.localdomain | heat-engine | f6ccea46-2b30-4869-b06f-935c342a9ed6 | undercloud.localdomain | engine | 2019-07-25T23:19:34.000000 | up | - | minion.localdomain | heat-engine | eef759de-f7d3-472a-afbc-878eb6a3b9c0 | minion.localdomain | engine | 2019-07-25T23:19:24.000000 | up | - | minion.localdomain | heat-engine | 7f076afe-5116-45ad-9f08-aab7fbfda40b | minion.localdomain | engine | 2019-07-25T23:19:24.000000 | up | - | undercloud.localdomain | heat-engine | 038ead61-91f1-4739-8537-df63a9e2c917 | undercloud.localdomain | engine | 2019-07-25T23:19:34.000000 | up | - | undercloud.localdomain | heat-engine | f16a4f55-b053-4650-9202-781aef55698e | undercloud.localdomain | engine | 2019-07-25T23:19:36.000000 | up | - | minion.localdomain | heat-engine | e853d9c9-9f75-4958-ad9b-49e4b63b79b2 | minion.localdomain | engine | 2019-07-25T23:19:24.000000 | up | - +------------------------+-------------+--------------------------------------+------------------------+--------+----------------------------+--------+ - - - - Ironic Conductor - - If the ironic conductor service has been enabled, run the following on the - undercloud:: - - source ~/stackrc - baremetal conductor list - - Example output:: - - (undercloud) [stack@undercloud ~]$ baremetal conductor list - +------------------------+-----------------+-------+ - | Hostname | Conductor Group | Alive | - +------------------------+-----------------+-------+ - | undercloud.localdomain | | True | - | minion.localdomain | | True | - +------------------------+-----------------+-------+ - diff --git a/deploy-guide/source/features/vdpa_deployment.rst b/deploy-guide/source/features/vdpa_deployment.rst deleted file mode 100644 index 8d9868fa..00000000 --- a/deploy-guide/source/features/vdpa_deployment.rst +++ /dev/null @@ -1,440 +0,0 @@ -Deploying with vDPA Support -=============================== - -TripleO can deploy Overcloud nodes with vDPA support. A new role ``ComputeVdpa`` -has been added to create a custom ``roles_data.yaml`` with composable vDPA role. - -vDPA is very similar to SR-IOV and leverages the same Openstack components. It's -important to note that vDPA can't function without OVS Hardware Offload. - -Mellanox is the only NIC vendor currently supported with vDPA. - -CentOS9/RHEL9 with a kernel of 5.14 or higher is required. - -Execute below command to create the ``roles_data.yaml``:: - - openstack overcloud roles generate -o roles_data.yaml Controller ComputeVdpa - -Once a roles file is created, the following changes are required: - -- Deploy Command -- Parameters -- Network Config -- Network and Port creation - -Deploy Command ----------------- -Deploy command should include the generated roles data file from the above -command. - -Deploy command should also include the SR-IOV environment file to include the -``neutron-sriov-agent`` service. All the required parameters are also specified -in this environment file. The parameters has to be configured according to the -baremetal on which vDPA needs to be enabled. - -Also, vDPA requires mandatory kernel parameters to be set, like -``intel_iommu=on iommu=pt`` on Intel machines. In order to enable the -configuration of kernel parameters to the host, The ``KernelArgs`` role -parameter has to be defined accordingly. - -Adding the following arguments to the ``openstack overcloud deploy`` command -will do the trick:: - - openstack overcloud deploy --templates \ - -r roles_data.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/services/neutron-sriov.yaml \ - ... - -Parameters ----------- - -Unlike SR-IOV, vDPA devices shouldn't be added to ``NeutronPhysicalDevMappings`` but to the -``NovaPCIPassthrough``. The vDPA bridge should also be added to the ``NeutronBridgeMappings`` -and the ``physical_network`` to the ``NeutronNetworkVLANRanges``. - -The parameter ``KernelArgs`` should be provided in the deployment environment -file, with the set of kernel boot parameters to be applied on the -``ComputeVdpa`` role where vDPA is enabled. - -The ``PciPassthroughFilter`` is required for vDPA. The ``NUMATopologyFilter`` will become -optional when ``libvirt`` will support the locking of the guest memory. At this time, it -is mandatory to have it:: - - parameter_defaults: - NeutronTunnelTypes: '' - NeutronNetworkType: 'vlan' - NeutronNetworkVLANRanges: - - tenant:1300:1399 - NovaSchedulerDefaultFilters: - - PciPassthroughFilter - - NUMATopologyFilter - - ... - ComputeVdpaParameters: - NovaPCIPassthrough: - - vendor_id: "15b3" - product_id: "101e" - address: "06:00.0" - physical_network: "tenant" - - vendor_id: "15b3" - product_id: "101e" - address: "06:00.1" - physical_network: "tenant" - KernelArgs: "[...] iommu=pt intel_iommu=on" - NeutronBridgeMappings: - - tenant:br-tenant - -.. note:: - It's important to use the ``product_id`` of a VF device and not a PF - - 06:00.1 Ethernet controller [0200]: Mellanox Technologies MT2892 Family [ConnectX-6 Dx] [15b3:101d] - 06:00.2 Ethernet controller [0200]: Mellanox Technologies ConnectX Family mlx5Gen Virtual Function [15b3:101e] - - - - -Network Config --------------- -vDPA supported network interfaces should be specified in the network config -templates as sriov_pf type. It should also be under an OVS bridge with a ``link_mode`` -set to ``switchdev`` - -Example:: - - - type: ovs_bridge - name: br-tenant - members: - - type: sriov_pf - name: enp6s0f0 - numvfs: 8 - use_dhcp: false - vdpa: true - link_mode: switchdev - - type: sriov_pf - name: enp6s0f1 - numvfs: 8 - use_dhcp: false - vdpa: true - link_mode: switchdev - - -Network and Port Creation -------------------------- - -When creating the network, it has to be mapped to the physical network:: - - $ openstack network create \ - --provider-physical-network tenant \ - --provider-network-type vlan \ - --provider-segment 1337 \ - vdpa_net1 - - $ openstack subnet create \ - --network vdpa_net1 \ - --subnet-range 192.0.2.0/24 \ - --dhcp \ - vdpa_subnet1 - -To allocate a port from a vdpa-enabled NIC, create a neutron port and set the -``--vnic-type`` to ``vdpa``:: - - $ openstack port create --network vdpa_net1 \ - --vnic-type=vdpa \ - vdpa_direct_port1 - -Scheduling instances --------------------- - -Normally, the ``PciPassthroughFilter`` is sufficient to ensure that a vDPA instance will -land on a vDPA host. If we want to prevent other instances from using a vDPA host, we need -to setup the `isolate-aggregate feature -`_. - -Example:: - - $ openstack --os-placement-api-version 1.6 trait create CUSTOM_VDPA - $ openstack aggregate create \ - --zone vdpa-az1 \ - vdpa_ag1 - $ openstack hypervisor list -c ID -c "Hypervisor Hostname" -f value | grep vdpa | \ - while read l - do UUID=$(echo $l | cut -f 1 -d " ") - H_NAME=$(echo $l | cut -f 2 -d " ") - echo $H_NAME $UUID - openstack aggregate add host vdpa_ag1 $H_NAME - traits=$(openstack --os-placement-api-version 1.6 resource provider trait list \ - -f value $UUID | sed 's/^/--trait /') - openstack --os-placement-api-version 1.6 resource provider trait set \ - $traits --trait CUSTOM_VDPA $UUID - done - $ openstack --os-compute-api-version 2.53 aggregate set \ - --property trait:CUSTOM_VDPA=required \ - vdpa_ag1 - -The flavor will map to that new aggregate with the ``trait:CUSTOM_VDPA`` property:: - - $ openstack --os-compute-api-version 2.86 flavor create \ - --ram 4096 \ - --disk 10 \ - --vcpus 2 \ - --property hw:cpu_policy=dedicated \ - --property hw:cpu_realtime=True \ - --property hw:cpu_realtime_mask=^0 \ - --property trait:CUSTOM_VDPA=required \ - vdpa_pinned - -.. note:: - It's also important to have the ``hw:cpu_realtime*`` properties here since - ``libvirt`` doesn't currently support the locking of guest memory. - - -This should launch an instance on one of the vDPA hosts:: - - $ openstack server create \ - --image cirros \ - --flavor vdpa_pinned \ - --nic port-id=vdpa_direct_port1 \ - vdpa_test_1 - -Validations ------------ - -Confirm that a PCI device is in switchdev mode:: - - [root@computevdpa-0 ~]# devlink dev eswitch show pci/0000:06:00.0 - pci/0000:06:00.0: mode switchdev inline-mode none encap-mode basic - [root@computevdpa-0 ~]# devlink dev eswitch show pci/0000:06:00.1 - pci/0000:06:00.1: mode switchdev inline-mode none encap-mode basic - -Verify if offload is enabled in OVS:: - - [root@computevdpa-0 ~]# ovs-vsctl get Open_vSwitch . other_config:hw-offload - "true" - -Validate the interfaces are added to the tenant bridge:: - - [root@computevdpa-0 ~]# ovs-vsctl show - be82eb5b-94c3-449d-98c8-0961b6b6b4c4 - Manager "ptcp:6640:127.0.0.1" - is_connected: true - [...] - Bridge br-tenant - Controller "tcp:127.0.0.1:6633" - is_connected: true - fail_mode: secure - datapath_type: system - Port br-tenant - Interface br-tenant - type: internal - Port enp6s0f0 - Interface enp6s0f0 - Port phy-br-tenant - Interface phy-br-tenant - type: patch - options: {peer=int-br-tenant} - Port enp6s0f1 - Interface enp6s0f1 - [...] - - -Verify if the NICs have ``hw-tc-offload`` enabled:: - - [root@computevdpa-0 ~]# for i in {0..1};do ethtool -k enp6s0f$i | grep tc-offload;done - hw-tc-offload: on - hw-tc-offload: on - -Verify that the udev rules have been created:: - - [root@computevdpa-0 ~]# cat /etc/udev/rules.d/80-persistent-os-net-config.rules - # This file is autogenerated by os-net-config - SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}!="", ATTR{phys_port_name}=="pf*vf*", ENV{NM_UNMANAGED}="1" - SUBSYSTEM=="net", ACTION=="add", DRIVERS=="?*", KERNELS=="0000:06:00.0", NAME="enp6s0f0" - SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}=="80ecee0003723f04", ATTR{phys_port_name}=="pf0vf*", IMPORT{program}="/etc/udev/rep-link-name.sh $attr{phys_port_name}", NAME="enp6s0f0_$env{NUMBER}" - SUBSYSTEM=="net", ACTION=="add", DRIVERS=="?*", KERNELS=="0000:06:00.1", NAME="enp6s0f1" - SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}=="80ecee0003723f04", ATTR{phys_port_name}=="pf1vf*", IMPORT{program}="/etc/udev/rep-link-name.sh $attr{phys_port_name}", NAME="enp6s0f1_$env{NUMBER}" - - -Validate that the ``numvfs`` are correctly defined:: - - [root@computevdpa-0 ~]# cat /sys/class/net/enp6s0f0/device/sriov_numvfs - 8 - [root@computevdpa-0 ~]# cat /sys/class/net/enp6s0f1/device/sriov_numvfs - 8 - -Validate that the ``pci/passthrough_whitelist`` contains all the PFs:: - - [root@computevdpa-0 ~]# grep ^passthrough_whitelist /var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.conf - passthrough_whitelist={"address":"06:00.0","physical_network":"tenant","product_id":"101d","vendor_id":"15b3"} - passthrough_whitelist={"address":"06:00.1","physical_network":"tenant","product_id":"101d","vendor_id":"15b3"} - -Verify the ``nodedev-list`` from ``libvirt``:: - - [root@computevdpa-0 ~]# podman exec -u0 nova_virtqemud virsh -c qemu:///system nodedev-list | grep -P "pci_0000_06|enp6|vdpa" - net_enp6s0f0np0_04_3f_72_ee_ec_84 - net_enp6s0f0np0_0_1a_c1_a5_25_94_ef - net_enp6s0f0np0_1_3a_dc_1d_36_85_af - net_enp6s0f0np0_2_6a_95_0c_e9_8f_1a - net_enp6s0f0np0_3_ba_c8_5b_f5_70_cc - net_enp6s0f0np0_4_9e_03_86_23_cd_65 - net_enp6s0f0np0_5_0a_5c_8b_c4_00_7a - net_enp6s0f0np0_6_2e_f6_bc_e6_6f_cd - net_enp6s0f0np0_7_ce_1e_b2_20_5e_15 - net_enp6s0f1np1_04_3f_72_ee_ec_85 - net_enp6s0f1np1_0_a6_04_9e_5a_cd_3b - net_enp6s0f1np1_1_56_5d_59_b0_df_17 - net_enp6s0f1np1_2_de_ac_7c_3f_19_b1 - net_enp6s0f1np1_3_16_0c_8c_47_40_5c - net_enp6s0f1np1_4_0e_a6_15_f5_68_77 - net_enp6s0f1np1_5_e2_73_dc_f9_c2_46 - net_enp6s0f1np1_6_e6_13_57_c9_cf_0f - net_enp6s0f1np1_7_62_10_4f_2b_1b_ae - net_vdpa06p00vf2_42_11_c8_97_aa_43 - net_vdpa06p00vf3_2a_59_5e_32_3e_b7 - net_vdpa06p00vf4_9a_5c_3f_c9_cc_42 - net_vdpa06p00vf5_26_73_2a_e3_db_f9 - net_vdpa06p00vf6_9a_bf_a9_e9_6b_06 - net_vdpa06p00vf7_d2_1f_cc_00_a9_95 - net_vdpa06p01vf0_ba_81_cb_7e_01_1d - net_vdpa06p01vf1_56_95_fa_5e_4a_51 - net_vdpa06p01vf2_72_53_64_8d_12_98 - net_vdpa06p01vf3_9e_ff_1d_6d_c1_4e - net_vdpa06p01vf4_96_20_f3_b1_69_ef - net_vdpa06p01vf5_ea_0c_8b_0b_3f_ff - net_vdpa06p01vf6_0a_53_4e_94_e0_8b - net_vdpa06p01vf7_16_84_48_e6_74_59 - net_vdpa06p02vf0_b2_cc_fa_16_f0_52 - net_vdpa06p02vf1_0a_12_1b_a2_1a_d3 - pci_0000_06_00_0 - pci_0000_06_00_1 - pci_0000_06_00_2 - pci_0000_06_00_3 - pci_0000_06_00_4 - pci_0000_06_00_5 - pci_0000_06_00_6 - pci_0000_06_00_7 - pci_0000_06_01_0 - pci_0000_06_01_1 - pci_0000_06_01_2 - pci_0000_06_01_3 - pci_0000_06_01_4 - pci_0000_06_01_5 - pci_0000_06_01_6 - pci_0000_06_01_7 - pci_0000_06_02_0 - pci_0000_06_02_1 - vdpa_0000_06_00_2 - vdpa_0000_06_00_3 - vdpa_0000_06_00_4 - vdpa_0000_06_00_5 - vdpa_0000_06_00_6 - vdpa_0000_06_00_7 - vdpa_0000_06_01_0 - vdpa_0000_06_01_1 - vdpa_0000_06_01_2 - vdpa_0000_06_01_3 - vdpa_0000_06_01_4 - vdpa_0000_06_01_5 - vdpa_0000_06_01_6 - vdpa_0000_06_01_7 - vdpa_0000_06_02_0 - vdpa_0000_06_02_1 - - -Validate that the vDPA devices have been created, this should match the vdpa -devices from ``virsh nodedev-list``:: - - [root@computevdpa-0 ~]# ls -tlra /dev/vhost-vdpa-* - crw-------. 1 root root 241, 0 Jun 30 12:52 /dev/vhost-vdpa-0 - crw-------. 1 root root 241, 1 Jun 30 12:52 /dev/vhost-vdpa-1 - crw-------. 1 root root 241, 2 Jun 30 12:52 /dev/vhost-vdpa-2 - crw-------. 1 root root 241, 3 Jun 30 12:52 /dev/vhost-vdpa-3 - crw-------. 1 root root 241, 4 Jun 30 12:52 /dev/vhost-vdpa-4 - crw-------. 1 root root 241, 5 Jun 30 12:53 /dev/vhost-vdpa-5 - crw-------. 1 root root 241, 6 Jun 30 12:53 /dev/vhost-vdpa-6 - crw-------. 1 root root 241, 7 Jun 30 12:53 /dev/vhost-vdpa-7 - crw-------. 1 root root 241, 8 Jun 30 12:53 /dev/vhost-vdpa-8 - crw-------. 1 root root 241, 9 Jun 30 12:53 /dev/vhost-vdpa-9 - crw-------. 1 root root 241, 10 Jun 30 12:53 /dev/vhost-vdpa-10 - crw-------. 1 root root 241, 11 Jun 30 12:53 /dev/vhost-vdpa-11 - crw-------. 1 root root 241, 12 Jun 30 12:53 /dev/vhost-vdpa-12 - crw-------. 1 root root 241, 13 Jun 30 12:53 /dev/vhost-vdpa-13 - crw-------. 1 root root 241, 14 Jun 30 12:53 /dev/vhost-vdpa-14 - crw-------. 1 root root 241, 15 Jun 30 12:53 /dev/vhost-vdpa-15 - -Validate the ``pci_devices`` table in the database from one of the controllers:: - - [root@controller-2 neutron]# podman exec -u0 $(podman ps -q -f name=galera) mysql -t -D nova -e "select address,product_id,vendor_id,dev_type,dev_id from pci_devices where address like '0000:06:%' and deleted=0;" - +--------------+------------+-----------+----------+------------------+ - | address | product_id | vendor_id | dev_type | dev_id | - +--------------+------------+-----------+----------+------------------+ - | 0000:06:01.1 | 101e | 15b3 | vdpa | pci_0000_06_01_1 | - | 0000:06:00.2 | 101e | 15b3 | vdpa | pci_0000_06_00_2 | - | 0000:06:00.3 | 101e | 15b3 | vdpa | pci_0000_06_00_3 | - | 0000:06:00.4 | 101e | 15b3 | vdpa | pci_0000_06_00_4 | - | 0000:06:00.5 | 101e | 15b3 | vdpa | pci_0000_06_00_5 | - | 0000:06:00.6 | 101e | 15b3 | vdpa | pci_0000_06_00_6 | - | 0000:06:00.7 | 101e | 15b3 | vdpa | pci_0000_06_00_7 | - | 0000:06:01.0 | 101e | 15b3 | vdpa | pci_0000_06_01_0 | - | 0000:06:01.2 | 101e | 15b3 | vdpa | pci_0000_06_01_2 | - | 0000:06:01.3 | 101e | 15b3 | vdpa | pci_0000_06_01_3 | - | 0000:06:01.4 | 101e | 15b3 | vdpa | pci_0000_06_01_4 | - | 0000:06:01.5 | 101e | 15b3 | vdpa | pci_0000_06_01_5 | - | 0000:06:01.6 | 101e | 15b3 | vdpa | pci_0000_06_01_6 | - | 0000:06:01.7 | 101e | 15b3 | vdpa | pci_0000_06_01_7 | - | 0000:06:02.0 | 101e | 15b3 | vdpa | pci_0000_06_02_0 | - | 0000:06:02.1 | 101e | 15b3 | vdpa | pci_0000_06_02_1 | - | 0000:06:00.2 | 101e | 15b3 | vdpa | pci_0000_06_00_2 | - | 0000:06:00.3 | 101e | 15b3 | vdpa | pci_0000_06_00_3 | - | 0000:06:00.4 | 101e | 15b3 | vdpa | pci_0000_06_00_4 | - | 0000:06:00.5 | 101e | 15b3 | vdpa | pci_0000_06_00_5 | - | 0000:06:00.6 | 101e | 15b3 | vdpa | pci_0000_06_00_6 | - | 0000:06:00.7 | 101e | 15b3 | vdpa | pci_0000_06_00_7 | - | 0000:06:01.0 | 101e | 15b3 | vdpa | pci_0000_06_01_0 | - | 0000:06:01.1 | 101e | 15b3 | vdpa | pci_0000_06_01_1 | - | 0000:06:01.2 | 101e | 15b3 | vdpa | pci_0000_06_01_2 | - | 0000:06:01.3 | 101e | 15b3 | vdpa | pci_0000_06_01_3 | - | 0000:06:01.4 | 101e | 15b3 | vdpa | pci_0000_06_01_4 | - | 0000:06:01.5 | 101e | 15b3 | vdpa | pci_0000_06_01_5 | - | 0000:06:01.6 | 101e | 15b3 | vdpa | pci_0000_06_01_6 | - | 0000:06:01.7 | 101e | 15b3 | vdpa | pci_0000_06_01_7 | - | 0000:06:02.0 | 101e | 15b3 | vdpa | pci_0000_06_02_0 | - | 0000:06:02.1 | 101e | 15b3 | vdpa | pci_0000_06_02_1 | - +--------------+------------+-----------+----------+------------------+ - -The ``vdpa`` command:: - - [root@computevdpa-0 ~]# vdpa dev - 0000:06:01.0: type network mgmtdev pci/0000:06:01.0 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:00.6: type network mgmtdev pci/0000:06:00.6 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:00.4: type network mgmtdev pci/0000:06:00.4 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:00.2: type network mgmtdev pci/0000:06:00.2 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:01.1: type network mgmtdev pci/0000:06:01.1 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:00.7: type network mgmtdev pci/0000:06:00.7 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:00.5: type network mgmtdev pci/0000:06:00.5 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:00.3: type network mgmtdev pci/0000:06:00.3 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:02.0: type network mgmtdev pci/0000:06:02.0 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:01.6: type network mgmtdev pci/0000:06:01.6 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:01.4: type network mgmtdev pci/0000:06:01.4 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:01.2: type network mgmtdev pci/0000:06:01.2 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:02.1: type network mgmtdev pci/0000:06:02.1 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:01.7: type network mgmtdev pci/0000:06:01.7 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:01.5: type network mgmtdev pci/0000:06:01.5 vendor_id 5555 max_vqs 16 max_vq_size 256 - 0000:06:01.3: type network mgmtdev pci/0000:06:01.3 vendor_id 5555 max_vqs 16 max_vq_size 256 - -Validating the OVN agents:: - - (overcloud) [stack@undercloud-0 ~]$ openstack network agent list --host computevdpa-0.home.arpa - +--------------------------------------+----------------------+-------------------------+-------------------+-------+-------+----------------------------+ - | ID | Agent Type | Host | Availability Zone | Alive | State | Binary | - +--------------------------------------+----------------------+-------------------------+-------------------+-------+-------+----------------------------+ - | ef2e6ced-e723-449c-bbf8-7513709f33ea | OVN Controller agent | computevdpa-0.home.arpa | | :-) | UP | ovn-controller | - | 7be39049-db5b-54fc-add1-4a0687160542 | OVN Metadata agent | computevdpa-0.home.arpa | | :-) | UP | neutron-ovn-metadata-agent | - +--------------------------------------+----------------------+-------------------------+-------------------+-------+-------+----------------------------+ - - -Other useful commands for troubleshooting:: - - [root@computevdpa-0 ~]# ovs-appctl dpctl/dump-flows -m type=offloaded - [root@computevdpa-0 ~]# ovs-appctl dpctl/dump-flows -m - [root@computevdpa-0 ~]# tc filter show dev enp6s0f1_1 ingress - [root@computevdpa-0 ~]# tc -s filter show dev enp6s0f1_1 ingress - [root@computevdpa-0 ~]# tc monitor diff --git a/deploy-guide/source/index.rst b/deploy-guide/source/index.rst deleted file mode 100644 index 29b73c9f..00000000 --- a/deploy-guide/source/index.rst +++ /dev/null @@ -1,19 +0,0 @@ -======================== -TripleO Deployment Guide -======================== - -TripleO is a project aimed at installing, upgrading and operating OpenStack -clouds using OpenStack's own cloud facilities as the foundation - building on -Nova, Ironic, Neutron and Heat to automate cloud management at datacenter -scale. - -.. toctree:: - :maxdepth: 2 - :includehidden: - - environments/index - provisioning/index - features/index - deployment/index - post_deployment/index - troubleshooting/index diff --git a/deploy-guide/source/post_deployment/backup_and_restore/00_index.rst b/deploy-guide/source/post_deployment/backup_and_restore/00_index.rst deleted file mode 100644 index cde9186e..00000000 --- a/deploy-guide/source/post_deployment/backup_and_restore/00_index.rst +++ /dev/null @@ -1,26 +0,0 @@ -TripleO backup and restore (Undercloud and Overcloud control plane) -=================================================================== - -This documentation section will describe a method to backup and restore both Undercloud and Overcloud -control plane. - -The use case involved in the creation and restore of these procedures are related to the -possible failures of a minor update or major upgrade for both Undercloud and Overcloud. - -The general approach to recover from failures during the minor update or major upgrade workflow -is to fix the environment and restart services before re-running the last executed step. - -There are specific cases in which rolling back to previous steps in the upgrades -workflow can lead to general failures in the system, i.e. -when executing `yum history` to rollback the upgrade of certain packages, -the dependencies resolution might select to remove critical packages like `systemd`. - -.. toctree:: - :maxdepth: 2 - :includehidden: - - 01_undercloud_backup - 02_overcloud_backup - 03_undercloud_restore - 04_overcloud_restore - 05_rear diff --git a/deploy-guide/source/post_deployment/backup_and_restore/01_undercloud_backup.rst b/deploy-guide/source/post_deployment/backup_and_restore/01_undercloud_backup.rst deleted file mode 100644 index 432edd6e..00000000 --- a/deploy-guide/source/post_deployment/backup_and_restore/01_undercloud_backup.rst +++ /dev/null @@ -1,125 +0,0 @@ -Backing up the Undercloud -========================= - -In order to backup your Undercloud you need to -make sure a set of files and databases are stored -correctly to be used in case of an issue running -the updates or upgrades workflows. - -The following sections will describe how to -execute an Undercloud backup. - -NTP service ------------ - -OpenStack services are time sensitive, users need to -be sure their environment have the time synchronized -correctly before proceeding with any backup task. - -By default, both Undercloud and Overcloud should have -configured correctly the NTP service as there are -parameters specifically defined to manage this service. - -The user is responsible to ensure that the Undercloud -restore is consistent in time. For example, a user -installs the Undercloud at the time 'm', then they deploy -the Undercloud and the Overcloud at the time 'n', and -they create an Undercloud backup at the time 'o'. When the user -restore the Undercloud it needs to be sure is restored -at a time later than 'o'. So, before and after restoring the Undercloud -node is important to have all the deployment with the time -updated and synchronized correctly. - -In case this is done manually, execute: - -:: - - sudo yum install -y ntp - sudo chkconfig ntpd on - sudo service ntpd stop - sudo ntpdate pool.ntp.org - sudo service ntpd restart - -After ensuring the environment have the time synchronized correctly -you can continue with the backup tasks. - -CLI driven backups ------------------- - -There is an automated way of creating an Undercloud backup, -this CLI option allows the operator to run a database and filesystem backup. -By default, all databases are included in the backup, also, the folder `/home/stack`. - -The command usage is:: - - openstack undercloud backup [--add-path ADD_FILES_TO_BACKUP] [--exclude-path EXCLUDE_FILES_TO_BACKUP] - -For example, we can run a full MySQL backup with additional paths as:: - - openstack undercloud backup --add-path /etc/ \ - --add-path /var/log/ \ - --add-path /root/ \ - --add-path /var/lib/glance/ \ - --add-path /var/lib/docker/ \ - --add-path /var/lib/certmonger/ \ - --add-path /var/lib/registry/ \ - --add-path /srv/node/ \ - --exclude-path /home/stack/ - -Note that we are excluding the folder `/home/stack/` -from the backup, but this folder is not included using the ``--add-path``, -CLI option, this is due to the fact that the `/home/stack/` folder is -added by default in any backup as it contains necessary files -to restore correctly the Undercloud. -You can exclude that folder and add specific files if you are required to -do so. - -When executing the Undercloud backup via the OpenStack -CLI, the backup is stored in a temporary folder called -`/var/tmp/`. -After this operation, the result of the backup procedure -is stored in the swift container called `undercloud-backups` -and it will expire after 24 hours of its creation. - -Manual backups --------------- - -If the user needs to run the backup manually, -the following steps must be executed. - -Database backups -~~~~~~~~~~~~~~~~ - -The operator needs to backup all databases in the Undercloud node - -.. admonition:: Train - :class: stable - - :: - - /bin/hiera -c /etc/puppet/hiera.yaml mysql::server::root_password - podman exec mysql bash -c "mysqldump -uroot -pPASSWORD --opt --all-databases" > /root/undercloud-all-databases.sql - -Filesystem backups -~~~~~~~~~~~~~~~~~~ - -* MariaDB configuration file on undercloud (so we can restore databases accurately). -* All glance image data in /var/lib/glance/images. -* All swift data in /srv/node. -* All data in stack users home directory. -* Also the DB backup created in the previous step. - -The following command can be used to perform a backup of all data from the undercloud node:: - - sudo tar --xattrs --ignore-failed-read -cf \ - UC-backup-`date +%F`.tar \ - /root/undercloud-all-databases.sql \ - /etc \ - /var/log \ - /root \ - /var/lib/glance \ - /var/lib/docker \ - /var/lib/certmonger \ - /var/lib/registry \ - /srv/node \ - /home/stack diff --git a/deploy-guide/source/post_deployment/backup_and_restore/02_overcloud_backup.rst b/deploy-guide/source/post_deployment/backup_and_restore/02_overcloud_backup.rst deleted file mode 100644 index 8b21b736..00000000 --- a/deploy-guide/source/post_deployment/backup_and_restore/02_overcloud_backup.rst +++ /dev/null @@ -1,156 +0,0 @@ -Backing up the Overcloud control plane services -=============================================== - -This backup guide is meant to backup services based on a HA + containers deployment. - -Prerequisites -------------- - -There is a need to backup the control plane services in the Overcloud, to do so, we need -to apply the same approach from the Undercloud, which is, running a backup of the databases -and create a filesystem backup. - -Databases backup ----------------- - -MySQL backup -~~~~~~~~~~~~ - -If using HA the operator can run the database backup in any controller node -using the ``--single-transaction`` option when executing the mysqldump. - -If the deployment is using containers the hieradata file containing the mysql -root password is located in the folder `/var/lib/config-data/mysql/etc/puppet/hieradata/`. - -The file containing the mysql root password is `service_configs.json` and the key is -`mysql::server::root_password`. - -Create a temporary folder to store the backups:: - - sudo -i - mkdir -p /var/tmp/mysql_backup/ - -Store the MySQL root password to be added to further queries:: - - MYSQLDBPASS=$(cat /var/lib/config-data/mysql/etc/puppet/hieradata/service_configs.json | grep mysql | grep root_password | awk -F": " '{print $2}' | awk -F"\"" '{print $2}') - -Execute from any controller:: - - mysql -uroot -p$MYSQLDBPASS -e "select distinct table_schema from information_schema.tables where engine='innodb' and table_schema != 'mysql';" \ - -s -N | xargs mysqldump -uroot -p$MYSQLDBPASS --single-transaction --databases > /var/tmp/mysql_backup/openstack_databases-`date +%F`-`date +%T`.sql - -This will dump a database backup called /var/tmp/mysql_backup/openstack_databases-.sql - -Then backup all the users and permissions information:: - - mysql -uroot -p$MYSQLDBPASS -e "SELECT CONCAT('\"SHOW GRANTS FOR ''',user,'''@''',host,''';\"') FROM mysql.user where (length(user) > 0 and user NOT LIKE 'root')" \ - -s -N | xargs -n1 mysql -uroot -p$MYSQLDBPASS -s -N -e | sed 's/$/;/' > /var/tmp/mysql_backup/openstack_databases_grants-`date +%F`-`date +%T`.sql - -This will dump a database backup called `/var/tmp/mysql_backup/openstack_databases_grants-.sql` - -MongoDB backup (only needed until Ocata) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Since OpenStack Pike, there is no support for MongoDB, so be sure you backup the data from -your telemetry backend. - -If telemetry services are used, then its needed to backup the data stored in the MongoDB instance. -Connect to any controller and get the IP of the MongoDB primary instance:: - - MONGOIP=$(cat /etc/mongod.conf | grep bind_ip | awk '{print $3}') - -Now, create the backup:: - - mkdir -p /var/tmp/mongo_backup/ - mongodump --oplog --host $MONGOIP --out /var/tmp/mongo_backup/ - -Be sure the files were created successfully. - -Redis backup -~~~~~~~~~~~~~~ - -If telemetry services are used, then it's needed to backup the data stored in the Redis instance. - -Let's get the Redis endpoint to get the backup, open `/var/lib/config-data/haproxy/etc/haproxy/haproxy.cfg` -and get the bind IP in the `listen redis` section, should have a string of this form `bind transparent`:: - - grep -A1 'listen redis' /var/lib/config-data/haproxy/etc/haproxy/haproxy.cfg - REDISIP=$(grep -A1 'listen redis' /var/lib/config-data/haproxy/etc/haproxy/haproxy.cfg | grep bind | awk '{print $2}' | awk -F":" '{print $1}') - -Let's store the master auth password to connect to the Redis cluster, the config file should be -`/var/lib/config-data/redis/etc/redis.conf` and the password under the `masterauth` parameter. -Let's store it in a variable:: - - REDISPASS=$(cat /var/lib/config-data/redis/etc/redis.conf | grep masterauth | grep -v \# | awk '{print $2}') - -Let's check connectivity to the Redis cluster:: - - redis-cli -a $REDISPASS -h $REDISIP ping - -Now, create a database dump by executing:: - - redis-cli -a $REDISPASS -h $REDISIP bgsave - -Now the database backup should be stored in the -default directory `/var/lib/redis/` directory. - -Filesystem backup ------------------ - -We need to backup all files that can be used to recover -from a possible failure in the Overcloud controllers when -executing a minor update or a major upgrade. - -The option ``--ignore-failed-read`` is added to the `tar` -command because the list of files to backup might be -different on each environment and we make the list of -paths to backup is as much general as possible. - -The following folders should be backed up:: - - mkdir -p /var/tmp/filesystem_backup/ - tar --xattrs --ignore-failed-read \ - -zcvf /var/tmp/filesystem_backup/fs_backup-`date '+%Y-%m-%d-%H-%M-%S'`.tar.gz \ - /etc/nova \ - /var/log/nova \ - /var/lib/nova \ - --exclude /var/lib/nova/instances \ - /etc/glance \ - /var/log/glance \ - /var/lib/glance \ - /etc/keystone \ - /var/log/keystone \ - /var/lib/keystone \ - /etc/httpd \ - /etc/cinder \ - /var/log/cinder \ - /var/lib/cinder \ - /etc/heat \ - /var/log/heat \ - /var/lib/heat \ - /var/lib/heat-config \ - /var/lib/heat-cfntools \ - /etc/rabbitmq \ - /var/log/rabbitmq \ - /var/lib/rabbitmq \ - /etc/neutron \ - /var/log/neutron \ - /var/lib/neutron \ - /etc/corosync \ - /etc/haproxy \ - /etc/logrotate.d/haproxy \ - /var/lib/haproxy \ - /etc/openvswitch \ - /var/log/openvswitch \ - /var/lib/openvswitch \ - /etc/ceilometer \ - /var/lib/redis \ - /etc/sysconfig/memcached \ - /etc/gnocchi \ - /var/log/gnocchi \ - /etc/aodh \ - /var/log/aodh \ - /etc/panko \ - /var/log/panko \ - /etc/ceilometer \ - /var/log/ceilometer diff --git a/deploy-guide/source/post_deployment/backup_and_restore/03_undercloud_restore.rst b/deploy-guide/source/post_deployment/backup_and_restore/03_undercloud_restore.rst deleted file mode 100644 index 42b1c795..00000000 --- a/deploy-guide/source/post_deployment/backup_and_restore/03_undercloud_restore.rst +++ /dev/null @@ -1,204 +0,0 @@ -Restoring the Undercloud -======================== - -The following restore process assumes you are recovering from a failed Undercloud node where you have to reinstall it from scratch. -It assumes that the hardware layout is the same, and the hostname and Undercloud settings of the machine will be the same as well. -Once the machine is installed and is in a clean state, re-enable all the subscriptions/repositories needed to install and run TripleO. - -Note that unless specified, all commands should run as the stack user. - -NTP service ------------ - -OpenStack services are time sensitive, users need to -be sure their environment have the time synchronized -correctly before proceeding with any backup task. - -By default, both Undercloud and Overcloud should have -configured correctly the NTP service as there are -parameters specifically defined to manage this service. - -The user is responsible to ensure that the Undercloud -restore is consistent in time. For example, a user -installs the Undercloud at the time 'm', then they deploy -the Undercloud and the Overcloud at the time 'n', and -they create an Undercloud backup at the time 'o'. When the user -restore the Undercloud it needs to be sure is restored -at a time later than 'o'. So, before and after restoring the Undercloud -node is important to have all the deployment with the time -updated and synchronized correctly. - -In case this is done manually, execute: - -:: - - sudo yum install -y ntp - sudo chkconfig ntpd on - sudo service ntpd stop - sudo ntpdate pool.ntp.org - sudo service ntpd restart - -After ensuring the environment have the time synchronized correctly -you can continue with the restore tasks. - -Downloading automated Undercloud backups ----------------------------------------- - -If the user has executed the Undercloud backup from the -TripleO CLI, it will need to download it to a local folder -and from there execute the restore steps. - -:: - - # From the Undercloud - source stackrc - mkdir /var/tmp/test_bk_down - cd /var/tmp/test_bk_down - openstack container save undercloud-backups - -Now, in the `restore_uc_backup` folder there must be a file with the -following naming convention `UC-backup-.tar`. - -After getting the backup file and unzipping it in any -selected folder, the user can proceed with the Undercloud restore. - -The following is an example of how to extract the Undercloud -backup content: - -:: - - sudo tar -xvf /var/tmp/test_bk_down/UC-backup-*.tar -C /var/tmp/test_bk_down || true - -There, the user will have a tar file with the content of the file system backup -and another gz file with the content of the database backup. - -The user can proceed to unzip the database -and filesystem backup by executing: - -:: - - sudo gunzip /var/tmp/test_bk_down/*.gz -c > /var/tmp/test_bk_down/all-databases.sql - sudo tar -xvf /var/tmp/test_bk_down/filesystem-*.tar -C /var/tmp/test_bk_down --xattrs - -Restoring a backup of your Undercloud on a Fresh Machine --------------------------------------------------------- - -Assuming that the user has a fresh installed Undercloud -node, the user is able to log in as the stack user, and -have the Backup restored in the folder -`/var/tmp/test_bk_down`, follow the next steps. - -Synchronize the stack home directory, haproxy configuration, -certificates and hieradata with the backup content: - -:: - - sudo rsync -aX /var/tmp/test_bk_down/home/stack/ /home/stack - sudo rsync -aX /var/tmp/test_bk_down/etc/haproxy/ /etc/haproxy/ - sudo rsync -aX /var/tmp/test_bk_down/etc/pki/instack-certs/ /etc/pki/instack-certs/ - sudo mkdir -p /etc/puppet/hieradata/ - sudo rsync -aX /var/tmp/test_bk_down/etc/puppet/hieradata/ /etc/puppet/hieradata/ - sudo rsync -aX /var/tmp/test_bk_down/srv/node/ /srv/node/ - sudo rsync -aX /var/tmp/test_bk_down/var/lib/glance/ /var/lib/glance/ - -The Keystone configuration files need to be synchronized -before reinstalling the Undercloud node. This is needed -because we need to have the same keys in the folders -`credential-keys` and `fernet-keys` so they don't get regenerated -when running the puppet Undercloud configuration. -Take into account that the package `openstack-keystone` needs to be installed -before synchronizing its configuration data: - -:: - - sudo rsync -a /var/tmp/test_bk_down/etc/keystone/ /etc/keystone/ - -If the user is using SSL, you need to refresh the CA certificate: - -:: - - sudo mkdir -p /etc/pki/instack-certs || true - sudo cp /home/stack/undercloud.pem /etc/pki/instack-certs - sudo cp /home/stack/cacert.pem /etc/pki/ca-trust/source/anchors/ - sudo cp /home/stack/overcloud-cacert.pem /etc/pki/ca-trust/source/anchors/ - sudo semanage fcontext -a -t etc_t "/etc/pki/instack-certs(/.*)?" - sudo restorecon -R /etc/pki/instack-certs - sudo update-ca-trust extract - -Install the required packages with: - -:: - - sudo yum install -y mariadb mariadb-server python-tripleoclient - -If you are using Pike and Ceph will be used in the Overcloud, install -ceph-ansible on the Undercloud: - -:: - - sudo yum install -y ceph-ansible - -Restart MySQL: - -:: - - sudo systemctl restart mariadb - -Allow restore big dump DB files: - -:: - - mysql -uroot -e"set global max_allowed_packet = 1073741824;" - - -Restore the DB backup: - -:: - - mysql -u root < /var/tmp/test_bk_down/all-databases.sql - -Restart Mariadb to refresh the permissions from the backup file: - -:: - - sudo systemctl restart mariadb - -Register the root password from the configuration file and clean -the DB password to be able to reinstall the Undercloud: - -:: - - oldpassword=$(sudo cat /var/tmp/test_bk_down/root/.my.cnf | grep -m1 password | cut -d'=' -f2 | tr -d "'") - mysqladmin -u root -p$oldpassword password '' - -Remove old user permissions if it exists, replace with the host related to each user. - -:: - - mysql -e 'select host, user, password from mysql.user;' - for i in ceilometer glance heat ironic keystone neutron nova mistral zaqar;do mysql -e "drop user $i@" || true ;done - mysql -e 'flush privileges' - -We have to now install the swift and glance base packages, and then restore their data: - -:: - - sudo yum install -y openstack-glance openstack-swift - # Restore data from the Backup to: srv/node and var/lib/glance/images - # Confirm data is owned by correct user - sudo chown -R swift: /srv/node - sudo chown -R glance: /var/lib/glance/images - -Finally, we rerun the Undercloud installation from the stack user, making sure to run it in the stack user home dir: - -:: - - # Double check hostname is correctly set in /etc/hosts - openstack undercloud install - -Reconnect the restored Undercloud to the Overcloud --------------------------------------------------- -Having completed the steps above, the Undercloud can be expected to automatically -restore its connection to the Overcloud. The nodes will continue to poll -Orchestration (heat) for pending tasks, using a simple HTTP request issued every -few seconds. diff --git a/deploy-guide/source/post_deployment/backup_and_restore/04_overcloud_restore.rst b/deploy-guide/source/post_deployment/backup_and_restore/04_overcloud_restore.rst deleted file mode 100644 index a1cc820a..00000000 --- a/deploy-guide/source/post_deployment/backup_and_restore/04_overcloud_restore.rst +++ /dev/null @@ -1,185 +0,0 @@ -Restoring the Overcloud control plane services -============================================== - -Restoring the Overcloud control plane from a failed state -depends on the specific issue the operator is facing. - -This section provides a restore method for -the backups created in previous steps. - -The general strategy of restoring an Overcloud control plane -will be to have the services working back again to -re-run the update/upgrade tasks. - -YUM update rollback -------------------- - -Depending on the updated packages, running a yum rollback -based on the `yum history` command might not be a good idea. -In the specific case of an OpenStack minor update or a major upgrade -will be harder as there will be several dependencies and packages -to downgrade based on the number of transactions yum had to run to upgrade -all the node packages. -Also, using `yum history` to rollback transactions -can lead to target to remove packages needed for the -system to work correctly. - - -Database restore ----------------- - -In the case we have updated the packages correctly, and the user has an -issue with updating the database schemas, we might need to restore the -database cluster. - -With all the services stopped in the Overcloud controllers (except MySQL), go through -the following procedure: - -On all the controller nodes, drop connections to the database port via the VIP by running:: - - MYSQLIP=$(grep -A1 'listen mysql' /var/lib/config-data/haproxy/etc/haproxy/haproxy.cfg | grep bind | awk '{print $2}' | awk -F":" '{print $1}') - sudo nft add rule inet filter TRIPLEO_INPUT tcp dport 3306 ip daddr $MYSQLIP drop - -This will isolate all the MySQL traffic to the nodes. - -On only one controller node, unmanage galera so that it is out of pacemaker's control:: - - pcs resource unmanage galera - -Remove the wsrep_cluster_address option from `/var/lib/config-data/mysql/etc/my.cnf.d/galera.cnf`. -This needs to be executed on all nodes:: - - grep wsrep_cluster_address /var/lib/config-data/mysql/etc/my.cnf.d/galera.cnf - vi /var/lib/config-data/mysql/etc/my.cnf.d/galera.cnf - -On all the controller nodes, stop the MariaDB database:: - - mysqladmin -u root shutdown - -On all the controller nodes, move existing MariaDB data directories and prepare new data directories:: - - sudo -i - mv /var/lib/mysql/ /var/lib/mysql.old - mkdir /var/lib/mysql - chown mysql:mysql /var/lib/mysql - chmod 0755 /var/lib/mysql - mysql_install_db --datadir=/var/lib/mysql --user=mysql - chown -R mysql:mysql /var/lib/mysql/ - restorecon -R /var/lib/mysql - -On all the controller nodes, move the root configuration to a backup file:: - - sudo mv /root/.my.cnf /root/.my.cnf.old - sudo mv /etc/sysconfig/clustercheck /etc/sysconfig/clustercheck.old - -On the controller node we previously set to `unmanaged`, bring the galera cluster up with pacemaker:: - - pcs resource manage galera - pcs resource cleanup galera - -Wait for the galera cluster to come up properly and run the following -command to wait and see all nodes set as masters as follows:: - - pcs status | grep -C3 galera - # Master/Slave Set: galera-master [galera] - # Masters: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ] - -NOTE: If the cleanup does not show all controller nodes as masters, re-run the following command:: - - pcs resource cleanup galera - -On the controller node we previously set to `unmanaged` which is managed back -by pacemaker, restore the OpenStack database that was backed up in a previous section. -This will be replicated to the other controllers by Galera:: - - mysql -u root < openstack_database.sql - -On the same controller node, restore the users and permissions:: - - mysql -u root < grants.sql - -Pcs status will show the galera resource in error because it's now using the wrong user/password to connect to poll the database status. -On all the controller nodes, restore the root/clustercheck configuration to a backup file:: - - sudo mv /root/.my.cnf.old /root/.my.cnf - sudo mv /etc/sysconfig/clustercheck.old /etc/sysconfig/clustercheck - -Test the clustercheck locally for each controller node:: - - /bin/clustercheck - -Perform a cleanup in pacemaker to reprobe the state of the galera nodes:: - - pcs resource cleanup galera - -Test clustercheck on each controller node via xinetd.d:: - - curl overcloud-controller-0:9200 - # curl overcloud-controller-1:9200 - # curl overcloud-controller-2:9200 - -Remove the firewall rule from each node for the services to restore access to the database:: - - sudo nft -a list chain inet filter TRIPLEO_INPUT | grep mysql - [...] - tcp dport 3306 ip daddr $MYSQLIP drop # handle 499 - sudo nft delete rule inet filter TRIPLEO_INPUT handle 499 - -Filesystem restore ------------------- - -On all overcloud nodes, copy the backup tar file to a temporary -directory and uncompress all the data:: - - mkdir /var/tmp/filesystem_backup/data/ - cd /var/tmp/filesystem_backup/data/ - mv . - tar --xattrs -xvzf .tar.gz - -NOTE: Untarring directly on the / directory will -override your current files. Its recommended to -untar the file in a different directory. - -Cleanup the redis resource --------------------------- - -Run:: - - pcs resource cleanup redis - -Start up the services on all the controller nodes -------------------------------------------------- - -The operator must check that all services are starting correctly, -the services installed in the controllers depend on the operator -needs so the following commands might not apply completely. -The goal of this section is to show that all services must be -started correctly before proceeding to retry an update, upgrade or -use the Overcloud on a regular basis. - -Non containerized environment -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Command to start services:: - - sudo -i ;systemctl start openstack-ceilometer-central; systemctl start memcached; pcs resource enable rabbitmq; systemctl start openstack-nova-scheduler; systemctl start openstack-heat-api; systemctl start mongod; systemctl start redis; systemctl start httpd; systemctl start neutron-ovs-cleanup - -Once all the controller nodes are up, start the compute node services on all the compute nodes:: - - sudo -i; systemctl start openstack-ceilometer-compute.service; systemctl start openstack-nova-compute.service - -Containerized environment -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The operator must check all containerized services are running correctly, please identify those stopped services by running:: - - sudo docker ps - -Once the operator finds a stopped service, proceed to start it by running:: - - sudo docker start - - - - - diff --git a/deploy-guide/source/post_deployment/backup_and_restore/05_rear.rst b/deploy-guide/source/post_deployment/backup_and_restore/05_rear.rst deleted file mode 100644 index 68db73b4..00000000 --- a/deploy-guide/source/post_deployment/backup_and_restore/05_rear.rst +++ /dev/null @@ -1,212 +0,0 @@ -Creating backups and restores using ReaR ----------------------------------------- - - -ReaR is a disaster recovery solution for Linux. -Relax-and-Recover, creates both a bootable rescue -image and a backup of the associated files you choose. - -When doing disaster recovery of a system, this Rescue -Image plays the files back from the backup and so in -very quickly to the latest state. - -Various configuration options are available for the rescue -image. For example, slim ISO files, USB sticks or even images -for PXE servers are generated. As many backup options are -possible. Starting with a simple archive file (eg * .tar.gz), -various backup technologies such as IBM Tivoli Storage -Manager (TSM), EMC NetWorker (Legato), Bacula or even Bareos -can be addressed. - -ReaR is written in Bash and it enables the skillful distribution -of Rescue Images and if necessary archive files via NFS, CIFS -(SMB) or another transport method in the network. -The actual recovery process then takes place via this transport -route. - -In this specific case, due to the nature of the OpenStack deployment -we will choose those protocols that are allowed by default in the -Iptables rules (SSH, SFTP in particular). - -We will apply this specific use of ReaR to recover -a failed control plane after a critical maintenance -task (like an upgrade). - -1. Prepare the Undercloud backup bucket. - -We need to prepare the place to store the backups from -the Overcloud. From the Undercloud, check you have enough -space to make the backups and prepare the environment. -We will also create a user in the Undercloud with no shell -access to be able to push the backups from the controllers -or the compute nodes:: - - groupadd backup - mkdir /data - useradd -m -g backup -d /data/backup backup - echo "backup:backup" | chpasswd - chown -R backup:backup /data - chmod -R 755 /data - -2. Run the backup from the Overcloud nodes. - -Let's install some required packages and run some previous -configuration steps:: - - # Install packages - sudo yum install rear genisoimage syslinux lftp wget -y - - # Make sure you are able to use sshfs to store the ReaR backup - sudo yum install fuse -y - sudo yum groupinstall "Development tools" -y - wget http://download-ib01.fedoraproject.org/pub/epel/7/x86_64/Packages/f/fuse-sshfs-2.10-1.el7.x86_64.rpm - sudo rpm -i fuse-sshfs-2.10-1.el7.x86_64.rpm - - sudo mkdir -p /data/backup - sudo sshfs -o allow_other backup@undercloud-0:/data/backup /data/backup - # Use backup password, which is... backup - -Now, let's configure ReaR config file.:: - - #Configure ReaR - sudo tee -a "/etc/rear/local.conf" > /dev/null <<'EOF' - OUTPUT=ISO - OUTPUT_URL=sftp://backup:backup@undercloud-0/data/backup/ - BACKUP=NETFS - BACKUP_URL=sshfs://backup@undercloud-0/data/backup/ - BACKUP_PROG_COMPRESS_OPTIONS=( --gzip ) - BACKUP_PROG_COMPRESS_SUFFIX=".gz" - BACKUP_PROG_EXCLUDE=( '/tmp/*' '/data/*' ) - EOF - -Now run the backup, this should create an ISO image in -the Undercloud node (/data/backup/). - -**You will be asked for the backup user password**:: - - sudo rear -d -v mkbackup - -Now, we can proceed to simulate a failure in any node we want -to restore for testing the procedure.:: - - sudo rm -rf /lib - -After the ISO image is created, we can proceed to -verify we can restore it from the Hypervisor. - -3. Prepare the hypervisor. - -We will run in the Hypervisor some pre backup steps in -order to have the correct configuration to mount the -backup bucket from the Undercloud node:: - - # Enable the use of fusefs for the VMs on the hypervisor - setsebool -P virt_use_fusefs 1 - - # Install some required packages - sudo yum install -y fuse-sshfs - - # Mount the Undercloud backup folder to access the images - mkdir -p /data/backup - sudo sshfs -o allow_other root@undercloud-0:/data/backup /data/backup - ls /data/backup/* - -4. Stop the damaged controller node. - -In this step we will proceed to edit the VM definition -to be able to boot the rescue image.:: - - virsh shutdown controller-0 - # virsh destroy controller-0 - - # Wait until is down - watch virsh list --all - - # Backup the guest definition - virsh dumpxml controller-0 > controller-0.xml - cp controller-0.xml controller-0.xml.bak - -Now, we need to change the guest definition to boot from the ISO file. - -Edit controller-0.xml and update it to boot from the ISO file. - -Find the OS section,add the cdrom device and enable the boot menu.:: - - - - - - - -Edit the devices section and add the CDROM.:: - - - - - - -
- - -Update the guest definition.:: - - virsh define controller-0.xml - -Restart and connect to the guest:: - - virsh start controller-0 - virsh console controller-0 - -You should be able to see the boot menu to start the recover -process, select Recover controller-0 and follow the instructions. - -Now, before proceeding to run the controller restore, it's -possible that the host undercloud-0 can't be resolved, -just execute.:: - - echo "192.168.24.1 undercloud-0" >> /etc/hosts - -Having resolved the Undercloud host, we just need to follow the -wizard and wait to have the environment restored. - -You should see a message like: :: - - Welcome to Relax-and-Recover. Run "rear recover" to restore your system ! - RESCUE controller-0:~ # rear recover - -The image restore should progress quickly. - -Now, each time you reboot the node will have the ISO file -as the first boot option so it's something we need to fix. -In the mean time let's check if the restore went fine. - -Reboot the guest booting from the hard disk. - -Now we can see that the guest VM started successfully. - -Now we need to restore the guest to it's original definition, -so from the Hypervisor we need to restore the `controller-0.xml.bak` -file we created.:: - - # From the Hypervisor - virsh shutdown controller-0 - watch virsh list --all - virsh define controller-0.xml.bak - virsh start controller-0 - -Considerations: -~~~~~~~~~~~~~~~ - -- Space. -- Multiple protocols supported but we might then to update - firewall rules, that's why we choose SFTP. -- Network load when moving data. -- Shutdown/Starting sequence for HA control plane. -- Do we need to backup the data plane? -- User workloads should be handled by a third party backup software. - -References -~~~~~~~~~~ - -#. https://www.anstack.com/blog/2019/05/20/relax-and-recover-backups.html -#. http://relax-and-recover.org/ diff --git a/deploy-guide/source/post_deployment/delete_nodes.rst b/deploy-guide/source/post_deployment/delete_nodes.rst deleted file mode 100644 index 2575bb14..00000000 --- a/deploy-guide/source/post_deployment/delete_nodes.rst +++ /dev/null @@ -1,71 +0,0 @@ -.. _delete_nodes: - -Deleting Overcloud Nodes -======================== - -There may be situations where it's necessary to remove specific Compute nodes -from the overcloud. In those situations, it is possible to remove specific nodes -by following the process outlined in on this page. - -.. note:: - If you're just scaling down nodes and plan to re-use them. Use :ref:`scale_roles` - instead. For temporary issues with nodes, they can be blacklisted temporarily - using ``DeploymentServerBlacklist``. - This guide is specifically for removing nodes from the environment. - -.. note:: - If your Compute node is still hosting VM's, ensure they are migrated to - another Compute node before continuing. - -.. note:: - If you are using :ref:`baremetal_provision` then follow those - scale-down instructions to call ``openstack overcloud node delete`` with a - ``--baremetal-deployment`` argument instead of passing a list of nodes to - delete as arguments. - -To delete a specific node from the Overcloud. We use the following command:: - - openstack overcloud node delete --stack $STACK_NAME - -.. note:: - This command uses the hostnames as it's referring to nodes from the Ansible - inventory. While there is currently a process to translate Nova UUID's to - the hostname. This may be removed in future releases. As such, it is - recommended to use the hostname instead of Nova UUIDs. - -This command updates the heat stack with updated numbers and list of resource -IDs (which represent nodes) to be deleted. - -.. admonition:: Train - :class: train - - In Train, we added a user confirmation to the scale down command to - prevent accidental node removal. - To skip it, please use "--yes". - -.. admonition:: Train - :class: train - - Starting in Train and onward, `openstack overcloud node delete` can take - a list of server hostnames instead of instance ids. However they can't be - mixed while running the command. Example: if you use hostnames, it would - have to be for all the nodes to delete. - -.. note:: - Before deleting a compute node or a cephstorage node, please make sure that - the node is quiesced, see :ref:`quiesce_compute` or - :ref:`quiesce_cephstorage`. - -.. note:: - You can generate the list of hostname in the Ansible inventory using:: - - . stackrc - tripleo-ansible-inventory --stack --static-yaml-inventory overcloud-inv.yaml - - This file will contain the Ansible inventory in use and help to identify the - hostname that needs to be passed to the `node delete` command. - -.. note:: - Once the node deletion has completed. Be sure to decrement the node count in your templates. - For example, if removing a compute node, then the ``ComputeCount:`` value needs to be updated - to reflect the new correct number of nodes in the environment. diff --git a/deploy-guide/source/post_deployment/fernet_key_rotation.rst b/deploy-guide/source/post_deployment/fernet_key_rotation.rst deleted file mode 100644 index 90983466..00000000 --- a/deploy-guide/source/post_deployment/fernet_key_rotation.rst +++ /dev/null @@ -1,24 +0,0 @@ -.. _fernet_key_rotation: - -Rotation Keystone Fernet Keys from the Overcloud -================================================ - -Like most passwords in your overcloud deployment, keystone fernet keys are also -stored as part of the deployment plan in mistral. The overcloud deployment's -fernet keys can be rotated with the following command:: - - openstack workflow execution create \ - tripleo.fernet_keys.v1.rotate_fernet_keys \ - '{"container": "overcloud"}' - -Where the value for "container" is the name of the plan (which defaults to -"overcloud"). - -After waiting some time you can verify the output by taking the execution ID -from that was the output of the previous command, and issuing the following -command:: - - openstack workflow execution output show EXECUTION_UUID - -Please note that there must be an overcloud deployment ready and accessible in -order to execute this action. diff --git a/deploy-guide/source/post_deployment/index.rst b/deploy-guide/source/post_deployment/index.rst deleted file mode 100644 index 06cf676e..00000000 --- a/deploy-guide/source/post_deployment/index.rst +++ /dev/null @@ -1,36 +0,0 @@ -Post Cloud Deployment -===================== - -This section describes additional items that can be performed or configured -post cloud deployment. - -.. toctree:: - :maxdepth: 1 - - backup_and_restore/00_index.rst - delete_nodes - fernet_key_rotation - scale_roles - tempest/index - update_undercloud_ssh_keys - updating-stacks-notes - upgrade/index - validations/index - - - -Post Cloud Deployment Advanced Topics -===================================== - -This section describes advanced post deployment tasks that can be performed -or configured post cloud deployment. - -.. toctree:: - :maxdepth: 1 - - migration - quiesce_cephstorage - quiesce_compute - updating_network_configuration_post_deployment - vm_snapshot - pre_cache_images diff --git a/deploy-guide/source/post_deployment/migration.rst b/deploy-guide/source/post_deployment/migration.rst deleted file mode 100644 index 1bd49cad..00000000 --- a/deploy-guide/source/post_deployment/migration.rst +++ /dev/null @@ -1,53 +0,0 @@ -Migrating Workloads from an existing OpenStack cloud -==================================================== - -|project| provides the ability to manage changes over time to a cloud that it -has deployed. However, it cannot automatically take over the management of -existing OpenStack clouds deployed with another installer. Since there can be -no one-size-fits-all procedure for upgrading an existing cloud to use -|project|, it is recommended that a new cloud be deployed with |project| and -any workloads running on an existing cloud be migrated off. - -Migrating User Workloads ------------------------- - -Since the best way of avoiding or handling any downtime associated with moving -an application from one cloud to another is application-dependent, it is -preferable to have end users migrate their own applications at a time and in -the manner of their choosing. This can also help to spread out the network -bandwidth requirements, rather than copying a large number of snapshots in -bulk. - -Ideally applications can be re-created from first principles (an Orchestration -tool such as Heat can help make this repeatable) and any data populated after -the fact. This allows the new VMs to be backed by a copy-on-write disk image -overlaid on the original base image. The alternative is to :doc:`export and -then import <./vm_snapshot>` snapshots of the VM images. This may require -considerably more disk space as each VM's base image becomes its snapshot, -where previously multiple VMs may have shared the same base image. - -Reclaiming Excess Capacity --------------------------- - -As workloads are migrated off the previous cloud, compute node hardware can be -freed up to reallocate to the new cloud. Since there is likely no guarantee as -to the order in which users will migrate, it will be necessary to consolidate -the remaining VMs onto a smaller number of machines as utilization drops. This -can be done by performing live migration within the old cloud. - -Select a compute node to remove from service and follow the procedure for -:doc:`quiesce_compute`. Once this is done, the node can be removed from the old -cloud and the hardware reused, possibly by adding it to the new cloud. - -Adding New Capacity -------------------- - -As utilization of the new cloud increases and hardware becomes available from -the old cloud, additional compute nodes can be added to the new cloud with -|project|. - -First, register and introspect the additional hardware with Ironic just as you -would have done when :doc:`initially deploying -<../deployment/install_overcloud>` the cloud with |project|. Then -:doc:`scale out ` the 'Compute' role in the new overcloud to start -making use of the additional capacity. diff --git a/deploy-guide/source/post_deployment/pre_cache_images.rst b/deploy-guide/source/post_deployment/pre_cache_images.rst deleted file mode 100644 index 607f61ea..00000000 --- a/deploy-guide/source/post_deployment/pre_cache_images.rst +++ /dev/null @@ -1,215 +0,0 @@ -.. _precache_image: - -Pre-caching images on Compute Nodes -=================================== - -Fetching an image from Glance is often the most time consuming step when -booting an instance. This can be particularly significant in cases where the -data must traverse a high latency or limited bandwidth network, for example -with :doc:`../features/distributed_compute_node`. - -An ansible playbook is available to ensure images are already cached in cases -where instance creation times must be minimized. - -.. admonition:: Ussuri - :class: ussuri - - Since Ussuri Nova also provides an API to pre-cache images on Compute nodes. - See the `Nova Image pre-caching documentation `_. - -.. note:: The Nova Image Cache is not used when using Ceph RBD for Glance images and Nova ephemeral disk. See `Nova Image Caching documentation `_. - -Image Cache Cleanup -------------------- - -The nova-compute service remains responsible for cleaning up old unused images -on a compute node. -A periodic job examines each of the images that are not currently used by an -instance on the host. -If an image is older than the configured maximum age it will be removed. - -When an image is pre-cached the modification time is set to the current -time. This effectively sets the image age back to 0. -Therefore the pre-caching task should be repeated on an interval that is less -than the maximum image age to ensure images remain cached. - -Configuring the maximum image age ---------------------------------- - -The default maximum image age is 86400 seconds (24 hours). -This can be increased for all computes by setting `NovaImageCacheTTL` in the -deployment parameters:: - - [stack@undercloud-0 ~]$ cat nova-cache-environment.yaml - parameter_defaults: - # Set the max image age to 30 days - NovaImageCacheTTL: 2592000 - -Alternatively `NovaImageCacheTTL` can be set for individual compute roles:: - - [stack@undercloud-0 ~]$ cat nova-cache-environment.yaml - parameter_defaults: - # Set the max image age to 30 days for the ComputeSite1 role - ComputeSite1Parameters: - NovaImageCacheTTL: 2592000 - # Set the max image age to 7 days for the ComputeSite2 role - ComputeSite2Parameters: - NovaImageCacheTTL: 604800 - # Any other Compute roles default to 86400 - -.. _cache_all_computes: - -Pre-caching a list of images on all Compute nodes -------------------------------------------------- - -Get an ansible inventory for the stack name (default `overcloud`): - -.. admonition:: Wallaby - :class: wallaby - - ``tripleo-ansible-inventory`` is deprecated as of Wallaby. - - .. code-block:: bash - - [stack@undercloud-0 ~]$ mkdir -p inventories - [stack@undercloud-0 ~]$ . stackrc - (undercloud) [stack@undercloud-0 ~]$ tripleo-ansible-inventory \ - --plan overcloud --static-yaml-inventory inventories/inventory.yaml - -.. code-block:: bash - - [stack@undercloud-0 ~]$ find ~/overcloud-deploy/*/config-download \ - -name tripleo-ansible-inventory.yaml |\ - while read f; do cp $f inventories/$(basename $(dirname $f)).yaml; done - -Determine the list of image IDs to pre-cache:: - - [stack@undercloud-0 ~]$ . overcloudrc - (overcloud) [stack@undercloud-0 ~]$ openstack image list - +--------------------------------------+---------+--------+ - | ID | Name | Status | - +--------------------------------------+---------+--------+ - | 07bc2424-753b-4f65-9da5-5a99d8383fe6 | image_0 | active | - | d5187afa-c821-4f22-aa4b-4e76382bef86 | image_1 | active | - +--------------------------------------+---------+--------+ - -Add the image ids to an argument file for the ansible playbook:: - - (overcloud) [stack@undercloud-0 ~]$ cat < nova_cache_args.yml - tripleo_nova_image_cache_images: - - id: 07bc2424-753b-4f65-9da5-5a99d8383fe6 - - id: d5187afa-c821-4f22-aa4b-4e76382bef86 - EOF - -Source the overcloud rc file to provide the necessary credentials for image download:: - - [stack@undercloud-0 ~]$ . overcloudrc - -Run the `tripleo_nova_image_cache` playbook:: - - (overcloud) [stack@undercloud-0 ~]$ ansible-playbook -i inventories --extra-vars "@nova_cache_args.yml" /usr/share/ansible/tripleo-playbooks/tripleo_nova_image_cache.yml - - PLAY [TripleO Nova image cache management] *************************************************************************************************************************************************************************************************** - - TASK [tripleo-nova-image-cache : Cache image 07bc2424-753b-4f65-9da5-5a99d8383fe6] *********************************************************************************************************************************************************** - changed: [compute-0] - changed: [compute-1] - - TASK [tripleo-nova-image-cache : Cache image d5187afa-c821-4f22-aa4b-4e76382bef86] *********************************************************************************************************************************************************** - changed: [compute-0] - changed: [compute-1] - -.. note:: If the image already exists in cache then no change is reported however the image modification time is updated - -.. warning:: The ansible `forks` config option (default=5) will affect the number of concurrent image downloads. Consider the load on the image service if adjusting this. - -Multi-stacks inventory ----------------------- - -When a multi-stack deployment is used, such as in -:doc:`../features/distributed_compute_node` and -:doc:`../features/deploy_cellv2`, a merged inventory allows images to be cached -on all compute nodes with a single playbook run. - -For each deployed stack, its ansible inventory is generated in -``overcloud-deploy//config-download/tripleo-ansible-inventory.yaml``. -Collect all inventories under the ``inventories`` directory: - -.. admonition:: Wallaby - :class: wallaby - - ``tripleo-ansible-inventory`` is deprecated as of Wallaby. A multi-stack - inventory can be created by specifying a comma separated list of stacks: - - .. code-block:: bash - - [stack@undercloud-0 ~]$ mkdir -p inventories - [stack@undercloud-0 ~]$ . stackrc - (undercloud) [stack@undercloud-0 ~]$ tripleo-ansible-inventory \ - --plan overcloud,site1,site2 \ - --static-yaml-inventory inventories/multiinventory.yaml - -.. code-block:: bash - - [stack@undercloud-0 ~]$ mkdir -p inventories - [stack@undercloud-0 ~]$ find ~/overcloud-deploy/*/config-download \ - -name tripleo-ansible-inventory.yaml |\ - while read f; do cp $f inventories/$(basename $(dirname $f)).yaml; done - -When all inventory files are stored in a single directory, ansible merges it. -The playbook can then be run once as in :ref:`cache_all_computes` to pre-cache on all compute nodes. - -.. _scp_distribution: - -Pre-caching on one node and distributing to remaining nodes ------------------------------------------------------------ - -In the case of a :doc:`../features/distributed_compute_node` it may be desirable to transfer an image to a single compute node at a remote site and then redistribute it from that node to the remaining compute nodes. -The SSH/SCP configuration that exists between the compute nodes to support cold migration/resize is reused for this purpose. - -.. warning:: SSH/SCP is inefficient over high latency networks. The method should only be used when the compute nodes targeted by the playbook are all within the same site. To ensure this is the case set tripleo_nova_image_cache_plan to the stack name of the site. Multiple runs of ansible-playbook are then required, targeting a different site each time. - -To enable this simply set `tripleo_nova_image_cache_use_proxy: true` in the arguments file. -The image is distributed from the first compute node by default. To use a specific compute node also set `tripleo_nova_image_cache_proxy_hostname`. - -For example:: - - (central) [stack@undercloud-0 ~]$ cat < dcn1_nova_cache_args.yml - tripleo_nova_image_cache_use_proxy: true - tripleo_nova_image_cache_proxy_hostname: dcn1-compute-1 - tripleo_nova_image_cache_images: - - id: 07bc2424-753b-4f65-9da5-5a99d8383fe6 - tripleo_nova_image_cache_plan: dcn1 - EOF - - (central) [stack@undercloud-0 ~]$ ansible-playbook -i inventories --extra-vars "@dcn1_nova_cache_args.yml" /usr/share/ansible/tripleo-playbooks/tripleo_nova_image_cache.yml - - PLAY [TripleO Nova image cache management] *************************************************************************************************************************************************************************************************** - - TASK [tripleo-nova-image-cache : Show proxy host] ******************************************************************************************************************************************************************************************** - ok: [dcn-compute-0] => { - "msg": "Proxy host is dcn-compute-1" - } - - TASK [tripleo-nova-image-cache : Cache image 07bc2424-753b-4f65-9da5-5a99d8383fe6] *********************************************************************************************************************************************************** - skipping: [dcn1-compute-0] - changed: [dcn1-compute-1] - - TASK [tripleo-nova-image-cache : Cache image (via proxy) 07bc2424-753b-4f65-9da5-5a99d8383fe6] *********************************************************************************************************************************************** - skipping: [dcn1-compute-1] - changed: [dcn1-compute-0] - - (central) [stack@undercloud-0 ~]$ cat < dcn2_nova_cache_args.yml - tripleo_nova_image_cache_use_proxy: true - tripleo_nova_image_cache_images: - - id: 07bc2424-753b-4f65-9da5-5a99d8383fe6 - tripleo_nova_image_cache_plan: dcn2 - EOF - - (central) [stack@undercloud-0 ~]$ ansible-playbook -i inventories --extra-vars "@dcn2_nova_cache_args.yml" /usr/share/ansible/tripleo-playbooks/tripleo_nova_image_cache.yml - - PLAY [TripleO Nova image cache management] *************************************************************************************************************************************************************************************************** - ... - ... - -.. warning:: The ansible `forks` config option (default=5) will affect the number of concurrent SCP transfers. Consider the load on the proxy compute node if adjusting this. diff --git a/deploy-guide/source/post_deployment/quiesce_cephstorage.rst b/deploy-guide/source/post_deployment/quiesce_cephstorage.rst deleted file mode 100644 index 7a7cb4e6..00000000 --- a/deploy-guide/source/post_deployment/quiesce_cephstorage.rst +++ /dev/null @@ -1,47 +0,0 @@ -.. _quiesce_cephstorage: - -Quiescing a CephStorage Node -============================ - -The process of quiescing a cephstorage node means to inform the Ceph -cluster that one or multiple OSDs will be permanently removed so that -the node can be shut down without affecting the data availability. - -Take the OSDs out of the cluster --------------------------------- - -Before you remove an OSD, you need to take it out of the cluster so that Ceph -can begin rebalancing and copying its data to other OSDs. Running the following -commands on a given cephstorage node will take all data out of the OSDs hosted -on it:: - - OSD_IDS=$(ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }') - for OSD_ID in $OSD_IDS; do ceph crush reweight osd.$OSD_ID 0.0; done - -Ceph will begin rebalancing the cluster by migrating placement groups out of -the OSDs. You can observe this process with the ceph tool:: - - ceph -w - -You should see the placement group states change from active+clean to active, -some degraded objects, and finally active+clean when migration completes. - -Removing the OSDs ------------------ - -After the rebalancing, the OSDs will still be running. Running the following on -that same cephstorage node will stop all OSDs hosted on it, remove them from the -CRUSH map, from the OSDs map and delete the authentication keys:: - - OSD_IDS=$(ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }') - for OSD_ID in $OSD_IDS; do - ceph osd out $OSD_ID - systemctl stop ceph-osd@$OSD_ID - ceph osd crush remove osd.$OSD_ID - ceph auth del osd.$OSD_ID - ceph osd rm $OSD_ID - done - -You are now free to reboot or shut down the node (using the Ironic API), or -even remove it from the overcloud altogether by scaling down the overcloud -deployment, see :ref:`delete_nodes`. diff --git a/deploy-guide/source/post_deployment/quiesce_compute.rst b/deploy-guide/source/post_deployment/quiesce_compute.rst deleted file mode 100644 index a9b915a1..00000000 --- a/deploy-guide/source/post_deployment/quiesce_compute.rst +++ /dev/null @@ -1,83 +0,0 @@ -.. _quiesce_compute: - -Quiescing a Compute Node -======================== - -The process of quiescing a compute node means to migrate workload off the node -so that it can be shut down without affecting the availability of end-users' -VMs. You might want to perform this procedure when rebooting a compute node to -ensure that package updates are applied (e.g. after a kernel update); to -consolidate workload onto a smaller number of machines when scaling down an -overcloud; or when replacing the compute node hardware. - -Setting up Keys ---------------- - -Assuming that the backing files for Nova VMs are not hosted on a shared storage -volume (with all compute nodes having access), the compute nodes will need to -be configured with ssh keys so that the `nova` user on each compute node has -ssh access to the corresponding account on the other compute nodes. - -First, generate an ssh key:: - - ssh-keygen -t rsa -f nova_id_rsa - -Then, on each compute node, run the following script to set up the keys:: - - NOVA_SSH=/var/lib/nova/.ssh - mkdir ${NOVA_SSH} - - cp nova_id_rsa ${NOVA_SSH}/id_rsa - chmod 600 ${NOVA_SSH}/id_rsa - cp nova_id_rsa.pub ${NOVA_SSH}/id_rsa.pub - cp nova_id_rsa.pub ${NOVA_SSH}/authorized_keys - - chown -R nova.nova ${NOVA_SSH} - - # enable login for nova user on compute hosts: - usermod -s /bin/bash nova - - # add ssh keys of overcloud nodes into known hosts: - ssh-keyscan -t rsa `os-apply-config --key hosts --type raw --key-default '' | awk '{print $1}'` >>/etc/ssh/ssh_known_hosts - - -Initiating Migration --------------------- - -First, obtain a list of the current Nova services:: - - source ~/overcloudrc # admin credentials for the overcloud - nova service-list - -Disable the `nova-compute` service on the node you wish to quiesce, to prevent -new VMs being scheduled on it:: - - nova service-disable nova-compute - - -Begin the process of migrating VMs off the node:: - - nova host-servers-migrate - -Completing Migration --------------------- - -The current status of the migration process can be retrieved with the command:: - - nova migration-list - -When migration of each VM completes, its state in Nova will change to -`VERIFY_RESIZE`. This gives you an opportunity to confirm that the migration -completed successfully, or to roll it back. To confirm the migration, use the -command:: - - nova resize-confirm - -Finally, once all migrations are complete and confirmed, remove the service -running (but disabled) on the compute node from Nova altogether:: - - nova service-delete - -You are now free to reboot or shut down the node (using the Ironic API), or -even remove it from the overcloud altogether by scaling down the overcloud -deployment, see :ref:`delete_nodes`. diff --git a/deploy-guide/source/post_deployment/scale_roles.rst b/deploy-guide/source/post_deployment/scale_roles.rst deleted file mode 100644 index c770f4e8..00000000 --- a/deploy-guide/source/post_deployment/scale_roles.rst +++ /dev/null @@ -1,9 +0,0 @@ -.. _scale_roles: - -Scaling overcloud roles -======================= -If you want to increase or decrease resource capacity of a running overcloud, -you can start more servers of a selected role or delete some servers if -capacity should be decreased. This is now covered by the new Baremetal -provisioning process. Please refer to the following document for this node -scaling procedure :doc:`../provisioning/baremetal_provision` diff --git a/deploy-guide/source/post_deployment/tempest/index.rst b/deploy-guide/source/post_deployment/tempest/index.rst deleted file mode 100644 index f6b99805..00000000 --- a/deploy-guide/source/post_deployment/tempest/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -Tempest -======= - -This section describes tempest related items. - -.. toctree:: - :maxdepth: 1 - - os_tempest - tempest - tempest_plugins diff --git a/deploy-guide/source/post_deployment/tempest/os_tempest.rst b/deploy-guide/source/post_deployment/tempest/os_tempest.rst deleted file mode 100644 index d0ee3e9c..00000000 --- a/deploy-guide/source/post_deployment/tempest/os_tempest.rst +++ /dev/null @@ -1,168 +0,0 @@ -os_tempest -========== - -os_tempest is the unified ansible role for installing, configuring and running tempest -tests as well as processing tempest results using stackviz. - -TripleO CI group collaborates on `os_tempest` development. To see what -`os_tempest` is and the reasons why it was started have a look -`at the os-tempest documentation `_. - -Installation on a manually deployed TripleO Standalone Deployment ------------------------------------------------------------------ -Follow the `os_tempest Installation guide -`_. - -If the installation was successful you can expect ansible-galaxy to list the following roles: config_template, python_venv_build, os_tempest. - -.. code-block:: shell - - $ ansible-galaxy list - - config_template, master - - python_venv_build, master - - os_tempest, (unknown version) - -Running os_tempest role using the playbook ------------------------------------------- -In order to run os_tempest role on target host, you must first ensure you have a working clouds.yaml. - -Then, we can create a `tempest.yaml` playbook with the following vars: - -.. code-block:: yaml - - --- - - hosts: localhost - name: Run Tempest on Standalone - vars: - ansible_become: true - tempest_run: 'yes' - tempest_install_method: 'distro' - tempest_cloud_name: 'standalone' - tempest_workspace: "/home/centos/tempest" - tempest_services: - - neutron - tempest_public_net_physical_type: 'datacentre' - tempest_private_net_provider_type: 'geneve' - tempest_service_setup_host: '{{ inventory_hostname }}' - tempest_public_subnet_cidr: '192.168.0.0/24' - tempest_public_subnet_gateway_ip: '{{ tempest_public_subnet_cidr|nthhost(1) }}' - tempest_public_subnet_allocation_pools: '{{ tempest_public_subnet_cidr|nthhost(100) ~ "-" ~ tempest_public_subnet_cidr|nthhost(120) }}' - tempest_use_tempestconf: true - tempest_run_stackviz: false - tempest_tempest_conf_overrides: - auth.tempest_roles: "Member" - tempest_test_includelist: - - 'tempest.api.identity.v3' - gather_facts: true - roles: - - os_tempest - -What are these above vars: -++++++++++++++++++++++++++ - -* `ansible_become: true`: os_tempest requires root permission for installation and creation of tempest related directories -* `tempest_run: 'yes'`: For running os_tempest role, by default, It is set to `no`. -* `tempest_install_method: 'distro'`: Set to `distro` for installing tempest and it's plugins from distro packages -* `tempest_workspace`: It is the full directory path where we want to create tempest workspace. -* `tempest_cloud_name: 'standalone'`: Name of the cloud name from clouds.yaml file for using to create tempest related resources on target host. -* `tempest_services`: For installing tempest plugins as well as creating pre tempest resources like networks for tempest tests. -* `tempest_public_net_physical_type`: - The name of public physical network. For standalone tripleo deployment, it can found under `/var/lib/config-data/ - puppet-generated/neutron/etc/neutron/plugins/ml2/ml2_conf.ini` and then look for the value of `flat_networks`. -* `tempest_private_net_provider_type`: - The Name of the private network provider type, in case of ovn deployment, it should be `geneve`. - It can be found under `/var/lib/config-data/puppet-generated/neutron/etc/neutron/plugins/ml2/ml2_conf.ini` and then look for `type_drivers`. -* `tempest_service_setup_host`: It should be set to ansible inventory hostname. For some operation, the ansible role delegates to inventory hostname. -* `tempest_public_subnet_cidr`: Based on the standalone deployment IP, we need to pass a required cidr. -* `tempest_public_subnet_gateway_ip and tempest_public_subnet_allocation_pools`: - Subnet Gateway IP and allocation pool can be calculated based on the value of `tempest_public_subnet_cidr` nthhost value. -* `tempest_use_tempestconf`: For generating tempest.conf, we use python-tempestconf tool. By default It is set to false. Set it to `true` for using it -* `tempest_run_stackviz`: Stackviz is very useful in CI for analyzing tempest results, for local use, we set it to false. By default it is set to true. -* `tempest_tempest_conf_overrides`: In order to pass additional tempest configuration to python-tempestconf tool, we can pass a dictionary of values. -* `tempest_test_includelist`: We need to pass a list of tests which we wish to run on the target host as a list. -* `tempest_test_excludelist`: In order to skip tempest tests, we can pass the list here. -* `gather_facts`: We need to set gather_facts to true as os_tempest rely on targeted environment facts for installing stuff. - - -Here are the `defaults vars of os_tempest role `_. - -How to run it? -++++++++++++++ -We can use `ansible-playbook` command to run the `tempest.yaml` playbook. - -.. code-block:: shell - - $ ansible-playbook tempest.yaml - -Once the playbook run finishes, we can find the tempest related directories in the tempest workspace. -within `tempest_workspace/etc/` dir, we can find following files: - -* tempest.conf -* tempest_includelist.txt -* tempest_excludelist.txt - -within `/var/log/tempest` dir, we can find the tempest tests results in html format. - -* stestr_results.html -* test_list.txt - -Create your own os_tempest job -------------------------------- - -We are going to use `tripleo-ci-centos-7-standalone-os-tempest` job, which -uses the role for validating the cloud. - -Create a job definition in your `.zuul.yaml` file putting -`tripleo-ci-centos-7-standalone-os-tempest` as a parent of the job: - -.. code-block:: yaml - - - job: - name: our-tripleo-os-tempest-job - parent: tripleo-ci-centos-7-standalone-os-tempest - -.. note:: - - More about Zuul job definitions can be found in - `the official Zuul documentation `_. - -.. note:: - - This page assumes that the reader is familiar with - `TripleO CI jobs `_ - and with the procedures of - `adding new TripleO jobs `_. - -By default, `tripleo-ci-centos-7-standalone-os-tempest` sets the following -variables for controlling behaviour of `os_tempest`: - -.. code-block:: yaml - - vars: - tempest_install_method: distro - tempest_cloud_name: 'standalone' - -It runs `tempest.yaml` playbook which sets the rest of the `os_tempest` -variables needed for execution on top of an environment deployed by one of the -TripleO CI jobs. The -`content of the playbook can be seen here `_. - -If you want to set some of the variables mentioned above differently you need -to override them by adding those variables to your job definition. - -Let's say you would like to change `tempest_cloud_name` and -`tempest_public_net_physical_type`. After setting the variables your job -definition should look like: - -.. code-block:: yaml - - - job: - name: our-tripleo-os-tempest-job - parent: tripleo-ci-centos-7-standalone-os-tempest - vars: - tempest_cloud_name: - tempest_public_net_physical_type: - -To see configuration options, please, follow -`this page `_ -of official documentation of `os_tempest` role. diff --git a/deploy-guide/source/post_deployment/tempest/tempest.rst b/deploy-guide/source/post_deployment/tempest/tempest.rst deleted file mode 100644 index 32b76863..00000000 --- a/deploy-guide/source/post_deployment/tempest/tempest.rst +++ /dev/null @@ -1,703 +0,0 @@ -Tempest -======= - -This is a set of integration tests to be run against a live OpenStack cluster. -Tempest has batteries of tests for OpenStack API validation, scenarios, and -other specific tests useful in validating an OpenStack deployment. - -Current State of Tempest ------------------------- - -Source code : https://opendev.org/openstack/tempest/ - -Tempest Version release wise: -+++++++++++++++++++++++++++++ -* Ocata : 16.1.0 -* Pike : 17.2.0 -* Queens : 18.0.0 -* Master : master - -What Tempest provides? ----------------------- - -* Tempest provides a set of stable apis/interfaces which are used in tempest - tests and tempest plugins to keep backward compatibility. - - * Below is the list of stable interfaces: - - * tempest.lib.* - * tempest.config - * tempest.test_discover.plugins - * tempest.common.credentials_factory - * tempest.clients - * tempest.test - -* Tempest contains API tests for Nova, Glance, Cinder, Swift, Keystone as well - as scenario tests for covering these components and these tests are used for - InterOp certifications as validating the OpenStack deployment for the above - services. - -* The test which do not fit within the Tempest testsuite will go under - respective service specific tempest plugins. - -Tempest Plugins ---------------- - -Tempest plugins contain the API and scenario tests for specific OpenStack -services. -Here is the detailed list of `tempest plugins consumed`_ in a TripleO deployment. - -.. _tempest plugins consumed: ./tempest_plugins.html - -Packages provided by RDO ------------------------- - -* Tempest related RPMs - - * python-tempest: this package contains the tempest python library and is - consumed as a dependency for out of tree tempest plugins i.e. for Horizon - and Designate tempest plugins. - * python-tempestconf: It provides the :command:`discover-tempest-config` - utility through which we can generate tempest config. - * openstack-tempest: this package contains a set of integration tests to be - run against a live OpenStack cluster and required executables for running - tempest. Packages `python-tempest` and `python-tempestconf` mentioned above - are dependencies of `openstack-tempest` package. - * openstack-tempest-all: It will install openstack-tempest as well as all - the tempest plugins on the system. - -* Test Runners: - - * python-stestr: It is a parallel python test runner built around subunit. - It is used by Tempest to run tempest tests under the hood. - * python-os-testr: It is another test runner wrapped around stestr. It is - also used to run tempest tests. - -* Kolla based tempest container - - * RDO also provides Kolla based container images for Tempest. It has - openstack-tempest and all the required tempest plugins installed in it. - * Run the following command to pull the tempest container Image:: - - $ sudo docker pull docker.io/tripleomaster/centos-binary-tempest - - -Some housekeeping rules ------------------------ - -* **Always** install tempest and its dependencies from **RPM**. -* Make sure the right package with **correct version** is installed - (openstack-tempest RPM and its plugins are well tested in CI). -* **Never ever** mix pip and RPM in an openstack deployment. -* Please **read** the documentation fully before running tempest. -* openstack-tempest rpm **does not** install tempest plugins, they need to be - installed separately. -* Additional configuration for tempest plugins **may need** to be set. -* **python-tempestconf** is installed by **openstack-tempest** rpm itself. It's - not needed to install it separately. -* openstack-tempest is installed **on the undercloud**. -* Source **openstackrc file** for undercloud or overcloud when running Tempest - from undercloud. -* openstack-tempest is currently used **to validate** undercloud as well as - overcloud. -* Use Tempest **container image** to avoid installing tempest plugins on the - deployed cloud. - - -Using TripleO-QuickStart to run Tempest ---------------------------------------- - -TripleO project provides validate-tempest ansible role through which Tempest is -used to validate undercloud and overcloud. -Set your workspace and path to a config file that contains the node -configuration, the following is the default:: - - CONFIG=config/general_config/minimal.yml - WORKSPACE=/home/centos/.quickstart - -* Running tempest against overcloud:: - - $ cd - - $ bash quickstart.sh \ - --bootstrap \ - --tags all \ - --config $CONFIG \ - --working-dir $WORKSPACE/ \ - --no-clone \ - --release master-tripleo-ci \ - --extra-vars test_ping=False \ - --extra-vars run_tempest=True \ - $VIRTHOST - - The above command will run smoke tests on overcloud and use tempest rpm. - -* Running tempest against undercloud:: - - $ bash quickstart.sh \ - --bootstrap \ - --tags all \ - --config $CONFIG \ - --working-dir $WORKSPACE/ \ - --no-clone \ - --release master-tripleo-ci \ - --extra-vars test_ping=False \ - --extra-vars run_tempest=True \ - --extra-vars tempest_overcloud=False \ - --extra-vars tempest_undercloud=True \ - --extra-vars tempest_white_regex='tempest.api.(identity|compute|network|image)' \ - $VIRTHOST - - The above command will run Identity, Compute, Network and Image api tests on - undercloud. - -* Running Tempest against undercloud using containerized tempest:: - - $ bash quickstart.sh \ - --bootstrap \ - --tags all \ - --config $CONFIG \ - --working-dir $WORKSPACE/ \ - --no-clone \ - --release master-tripleo-ci \ - --extra-vars test_ping=False \ - --extra-vars run_tempest=True \ - --extra-vars tempest_overcloud=False \ - --extra-vars tempest_undercloud=True \ - --extra-vars tempest_format=container \ - --extra-vars tempest_white_regex='tempest.api.(identity|compute|network|image)' \ - $VIRTHOST - - The above command will run Identity, Compute, Network and Image api tests on - undercloud using containerized tempest. - -.. note:: - Here is the list of - `validate-tempest role variables `_ - which can be modified using extra-vars. - - -Running Tempest manually ------------------------- - -Required resources before running Tempest -+++++++++++++++++++++++++++++++++++++++++ - -The following resources are needed to be created, only if Tempest is run -manually. - -* If Tempest is run against undercloud, then source the stackrc file:: - - $ source stackrc - - $ export OS_AUTH_URL="$OS_AUTH_URL/v$OS_IDENTITY_API_VERSION" - -* If Tempest is run against overcloud, then source the overcloudrc file:: - - $ source overcloudrc - -* Create *Member* role for undercloud/overcloud, it will be used by tempest - tests:: - - $ openstack role create --or-show Member - -* Create a public network having external connectivity, will be used by tempest - tests when running tempest tests against overcloud - - * Create a public network:: - - $ openstack network create public \ - --external \ - --provider-network-type flat \ - --provider-physical-network datacentre - - * Create/Attach subnet to it:: - - $ openstack subnet create ext-subnet \ - --subnet-range 192.168.24.0/24 \ - --allocation-pool start=192.168.24.150,end=192.168.24.250 \ - --gateway 192.168.24.1 \ - --no-dhcp \ - --network public - - -Installing Tempest RPM and its plugins -++++++++++++++++++++++++++++++++++++++ - -Install openstack-tempest:: - - $ sudo yum -y install openstack-tempest - -Install tempest plugins - -* Find out what are the openstack services configured on overcloud/undercloud. -* Then install the respective plugins on undercloud using yum command. - -Getting the list of tempest rpms and tempest plugins installed on undercloud:: - - $ rpm -qa | grep tempest - - -Tempest workspace -+++++++++++++++++ - -Create a tempest workspace:: - - $ tempest init tempest_workspace - -tempest_workspace directory will be created automatically in the location where -the above command is executed. -It will create three folders within tempest_workspace directory. - -* etc - tempest configuration file tempest.conf will resides here. -* logs - tempest.log file will be here -* tempest_lock - It holds the lock for tempest workspace. -* .stestr.conf - It is used to load all the tempest tests. - -List tempest workspaces:: - - $ tempest workspace list - -The tempest workspace information is found in ~/.tempest folder. - - -Generating tempest.conf using discover-tempest-config -+++++++++++++++++++++++++++++++++++++++++++++++++++++ - -For running Tempest a tempest configuration file called ``tempest.conf`` needs -to be created. Thanks to that file Tempest knows the configuration of the -environment it will be run against and can execute the proper set of tests. - -The tempest configuration file can be generated automatically by -:command:`discover-tempest-config` binary, which is provided by -``python-tempestconf`` package installed by ``openstack-tempest`` rpm. -:command:`discover-tempest-config` queries the cloud and discovers cloud -configuration. - -.. note:: - To know more about ``python-tempestconf`` visit - `python-tempestconf's documentation. `_ - -.. note:: - Not all of the configuration may be discovered by - :command:`discover-tempest-config`, therefore the tempest.conf needs to be - rechecked for correctness or tuned so that it better suits the user's needs. - -All the below operations will be performed from undercloud. - -For undercloud -************** - -Source the stackrc file:: - - $ source stackrc - -Use :command:`discover-tempest-config` to generate ``tempest.conf`` -automatically:: - - $ cd - - $ discover-tempest-config --out etc/tempest.conf \ - --image \ - --debug \ - --create \ - auth.use_dynamic_credentials true \ - auth.tempest_roles Member \ - network-feature-enabled.port_security true \ - compute-feature-enabled.attach_encrypted_volume False \ - validation.image_ssh_user cirros \ - validation.ssh_user cirros \ - compute-feature-enabled.console_output true - - -For overcloud -************* - -Source the overcloudrc file:: - - $ source overcloudrc - -Use :command:`discover-tempest-config` to generate tempest.conf automatically:: - - $ cd - - $ discover-tempest-config --out etc/tempest.conf \ - --deployer-input ~/tempest-deployer-input.conf \ - --network-id $(openstack network show public -f value -c id) \ - --image \ - --debug \ - --remove network-feature-enabled.api_extensions=dvr \ - --create \ - auth.use_dynamic_credentials true \ - auth.tempest_roles Member \ - network-feature-enabled.port_security true \ - compute-feature-enabled.attach_encrypted_volume False \ - network.tenant_network_cidr 192.168.0.0/24 \ - compute.build_timeout 500 \ - volume-feature-enabled.api_v1 False \ - validation.image_ssh_user cirros \ - validation.ssh_user cirros \ - network.build_timeout 500 \ - volume.build_timeout 500 \ - object-storage-feature-enabled.discoverability False \ - service_available.swift False \ - compute-feature-enabled.console_output true \ - orchestration.stack_owner_role Member - -On the successful execution of above command, the tempest.conf will be get -generated in /etc/tempest.conf. - -Things to keep in mind while using discover-tempest-config -********************************************************** -* tempest.conf values may be overridden by passing [section].[key] [value] - arguments. - For example: when **compute.allow_tenant_isolation true** is passed to - :command:`discover-tempest-config` that value will be set in tempest.conf and will - override the value set by discovery. - `More about override options. `_ - -* If OpenStack was deployed using TripleO/Director, pass the deployment input - file tempest-deployer-input.conf to the :command:`discover-tempest-config` command with - ``--deployer-input`` option. The file contains some version specific values set - by the installer. More about the argument can be found in - `python-tempestconf's CLI documentation. `_ - -* ``--remove`` option can be used to remove values from tempest.conf, - for example: ``--remove network-feature-enabled.api_extensions=dvr``. - The feature is useful when some values in tempest.conf are automatically - set by the discovery, but they are not wanted to be printed to tempest.conf. - More about the feature can be found - `here `_. - - -Always save the state of resources before running tempest tests -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -In order to be able to use tempest utility to clean up resources after running -tests, it's needed to initialize the state of resources before running the -tests:: - - $ tempest cleanup --init-saved-state - -It will create **saved_state.json** file in tempest workspace containing all -the tenants and resources information present on the system under test. More -about the feature can be found in -`Tempest documentation `_. - -List tempest plugins installed on undercloud -++++++++++++++++++++++++++++++++++++++++++++ - -Since we install the required tempest plugins on undercloud, use tempest -command to find out:: - - $ tempest list-plugins - -List tempest tests -++++++++++++++++++ - -Go to tempest workspace and run the following command to get the list:: - - $ cd - $ tempest run -l - -To grep a list of specific tests like all compute tests:: - - $ tempest run -l | grep compute - -Running Tempest tests -+++++++++++++++++++++ - -**tempest run** utility is used to run tempest tests. It will use the configs -defined in tempest.conf to run tests against the targeted host. - -* For running all api/scenario tempest tests:: - - $ tempest run -r '(api|scenario)' - -* For running smoke tests for basic sanity of the deployed cloud:: - - $ tempest run --smoke - -* For running specific tempest plugin tests like: keystone_tempest_plugin tests:: - - $ tempest run --regex '(keystone_tempest_plugin)' - -* Running multiple tests:: - - $ tempest run --regex '(test_regex1 | test_regex2 | test_regex 3)' - -* Use ``--black-regex`` argument to skip specific tests:: - - $ tempest run -r '(api|scenario)' --black-regex='(keystone_tempest_plugin)' - - The above will skip all keystone_tempest_plugin tests. - -Using whitelist file for running selective tests -++++++++++++++++++++++++++++++++++++++++++++++++ - -Writing long test regex seems to be boring, let's create a simple whitelist file -and use the same with tempest run to run those specific whitelist tests. - -* Create a whitelist.txt file in tempest workspace:: - - $ touch whitelist.txt - -* Append all the all tests in a newline which we want to run in whitelist.txt - file:: - - $ cat whitelist.txt - keystone_tempest_plugin.* - # networking bgpvpn tempest tests - networking_bgpvpn_tempest.tests* - - .. note:: - Use **#** to add comments in the whitelist/blacklist file. - -* Running tempest tests present in whitelist file:: - - $ tempest run -w - - -Using blacklist file to skipping multiple tests -+++++++++++++++++++++++++++++++++++++++++++++++ - -If we want to skip multiple tests, we can blacklist file for the same. - -* Create a skip_test.txt file in tempest workspace:: - - $ touch skip_test.txt - - -* Append all the all tests in a newline which we want to skip in skip_test.txt - file:: - - $ cat whitelist.txt - keystone_tempest_plugin.* - # networking bgpvpn tempest tests - networking_bgpvpn_tempest.tests* - -* Use *-b* optuon with tempest run to skip/blacklist tests:: - - $ tempest run -w -b - -Running Tempest tests serially as well as in parallel -+++++++++++++++++++++++++++++++++++++++++++++++++++++ - -* All test methods within a TestCase are assumed to be executed serially. -* To run tempest tests serially:: - - $ tempest run --serial - -* Run the tests in parallel (this is the default):: - - $ tempest run --parallel - -* Specify the number of workers to use when running tests in parallel:: - - $ tempest run -r '(test_regex)' --concurrency - -* The default number of workers is equal to the number of CPUs on the system - under test. - -Generating HTML report of tempest tests -+++++++++++++++++++++++++++++++++++++++ - -* In order to generate tempest subunit files in v2 format, use ``--subunit`` - flag with tempest run:: - - $ tempest run -r '(test_regex)' --subunit - -* Generating html output from it:: - - $ subunit2html .stestr/ tempest.html - -* subunit2html command is provided by python-subunit rpm package. - - -Where are my tempest tests results? -+++++++++++++++++++++++++++++++++++ - -Once tempest run finishes, All the tests results are stored in subunit file -format under **.stestr** folder under tempest workspace. - -* 0,1, files contains the tempest run output. -* **failing** contains the list of failed tests with detailed api responses. -* All the tests executions api responses is logged in **tempest.log** file in - tempest workspace. - - -Status of Tempest tests after tempest run -+++++++++++++++++++++++++++++++++++++++++ - -After the execution of tempest tests, It will generate 3 status - -* **PASSED**: The test successfully run. -* **FAILED**: The test got failed due to specific reasons. -* **SKIPPED**: If a tempest tests is skipped, it will give a reason why it is - skipped. - - -Cleaning up environment after tempest run -+++++++++++++++++++++++++++++++++++++++++ -More about this feature can be found in -`Tempest documentation ` - -* Get a report of resources and tenants which got created/modified after tempest tests run:: - - $ tempest cleanup --dry-run - - It will create a dry_run.json file in tempest workspace. -* Cleaning up the environment:: - - $ tempest cleanup - -* We can force delete the tempest resources and as well as associated admin - tenants:: - - $ tempest cleanup --delete-tempest-conf-object - - -Running containerized Tempest manually --------------------------------------- -This section shows how to run Tempest from a container against overcloud or -undercloud on undercloud. The required resources for running containerized -Tempest are the same as for running the non-containerized one. -To find out which resources are needed, see -`Required resources before running Tempest`_. - -All the steps below use **stack user** as an example. You may be ssh-ed as a -different user but in that case you **have to** change all of the paths below -accordingly (instead of stack user, use your $USER) - -Prepare the tempest container -+++++++++++++++++++++++++++++ -* Change to `/home/stack` directory:: - - $ cd /home/stack - -* Download a container:: - - $ docker pull docker.io/tripleomaster/centos-binary-tempest:current-tripleo-rdo - -* Create directories which will be used for exchanging data between the host - machine and the container:: - - $ mkdir container_tempest tempest_workspace - -* We'll use container_tempest as a source of files for the container, so let's - copy there all needed files:: - - $ cp stackrc overcloudrc tempest-deployer-input.conf container_tempest - -* List available images:: - - $ docker image list - - or:: - - $ docker images - - you should see something like:: - - REPOSITORY TAG IMAGE ID CREATED SIZE - docker.io/tripleomaster/centos-binary-tempest current-tripleo-rdo 881f7ac24d8f 10 days ago 1.09 GB - - -How to execute commands within the container? -+++++++++++++++++++++++++++++++++++++++++++++ -In order to make it easier, create an alias as follows:: - - $ alias docker-tempest="docker run -i \ - -v "$(pwd)"/container_tempest:/home/stack/container_tempest \ - -v "$(pwd)"/tempest_workspace:/home/stack/tempest_workspace \ - docker.io/tripleomaster/centos-binary-tempest:current-tripleo-rdo \ - /bin/bash" - -When mounting the directories, make sure that **absolute** paths are used. - -* If you want to check available tempest plugins in the container, run:: - - $ docker-tempest -c "tempest list-plugins" - -* For getting a list of tempest related rpms installed within the tempest - container run:: - - $ docker-tempest -c "rpm -qa | grep tempest" - - -Generate tempest.conf and run tempest tests within the container -++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -* Let's create a tempest script which will be later executed within the - container in order to generate tempest.conf and run tempest tests:: - - $ cat <<'EOF'>> /home/stack/container_tempest/tempest_script.sh - # Set the exit status for the command - set -e - - # if you want to run tempest against overcloud, overcloudrc file needs - # to be sourced and in case of undercloud it's stackrc - # NOTE: the files need to be copied to /home/stack/container_tempest - # directory in order to have it accessible from the container - source /home/stack/container_tempest/overcloudrc - - # Create a tempest workspace, use the shared directory so that the files - # in it are accessible from the host as well. - tempest init /home/stack/tempest_workspace - - # change directory to tempest_workspace - pushd /home/stack/tempest_workspace - - # export TEMPESTCONF environment variable for easier later usage - export TEMPESTCONF="/usr/bin/discover-tempest-config" - # Execute the discover-tempest-config in order to generate tempest.conf - # Set --out to /home/stack/tempest_workspace/tempest.conf so that the - # tempest.conf file is later accessible from host machine as well. - # Set --deployer-input to point to the tempest-deployer-input.conf - # located in the shared directory. - $TEMPESTCONF \ - --out /home/stack/tempest_workspace/etc/tempest.conf \ - --deployer-input /home/stack/container_tempest/tempest-deployer-input.conf \ - --debug \ - --create \ - object-storage.reseller_admin ResellerAdmin - - # Run for example smoke tests - tempest run --smoke - - EOF - - .. note:: - - * Apart from arguments passed to python-tempestconf showed above, any other - wanted arguments can be specified there. See - `Generating tempest.conf using discover-tempest-config`_. - * Instead of running smoke tests, other types of tests can be ran, - see `Running Tempest tests`_ section. - * `Always save the state of resources before running tempest tests`_. - * If you **already have** a `tempest.conf` file and you want to just run - tempest tests, **omit** TEMPESTCONF from the script above and replace it - with a command which copies your `tempest.conf` from `container_tempest` - directory to `tempest_workspace/etc` directory:: - - $ cp /home/stack/container_tempest/tempest.conf /home/stack/tempest_workspace/etc/tempest.conf - -* Set executable privileges to the `tempest_script.sh` script:: - - $ chmod +x container_tempest/tempest_script.sh - -* Run the tempest script from the container as follows:: - - $ docker run -i \ - -v "$(pwd)"/container_tempest:/home/stack/container_tempest \ - -v "$(pwd)"/tempest_workspace:/home/stack/tempest_workspace \ - docker.io/tripleomaster/centos-binary-tempest:current-tripleo-rdo \ - /bin/bash \ - -c 'set -e; /home/stack/container_tempest/tempest_script.sh' - -* In case you want to rerun the tempest tests, clean tempest workspace first:: - - $ sudo rm -rf /home/stack/tempest_workspace - $ mkdir /home/stack/tempest_workspace - - .. note:: - It's done with sudo because tempest in containers creates the files - as root. diff --git a/deploy-guide/source/post_deployment/tempest/tempest_plugins.rst b/deploy-guide/source/post_deployment/tempest/tempest_plugins.rst deleted file mode 100644 index f8bb96a9..00000000 --- a/deploy-guide/source/post_deployment/tempest/tempest_plugins.rst +++ /dev/null @@ -1,64 +0,0 @@ -Tempest Plugins -=============== - -Below is the detailed list of openstack services wise tempest plugins. - -* Keystone - * RPM package name: python3-keystone-tests-tempest - * Source Code: https://opendev.org/openstack/keystone-tempest-plugin - -* Neutron - * RPM package name: python3-neutron-tests-tempest - * Source Code: https://opendev.org/openstack/neutron-tempest-plugin - -* Designate - * RPM package name: python3-designate-tests-tempest - * Source Code: https://opendev.org/openstack/designate-tempest-plugin - -* Octavia - * RPM package name: python3-octavia-tests-tempest - * Source Code: https://opendev.org/openstack/octavia-tempest-plugin - -* Kuryr - * RPM package: python3-kuryr-tests-tempest - * Source Code: https://opendev.org/openstack/kuryr-tempest-plugin - -* patrole - * RPM package: python3-patrole-tests-tempest - * Source Code: https://opendev.org/openstack/patrole - -* Barbican - * RPM Package: python3-barbican-tests-tempest - * Source Code: https://opendev.org/openstack/barbican-tempest-plugin - -* nova-join - * RPM Package: python3-novajoin-tests-tempest - * Source Code: https://opendev.org/openstack/novajoin-tempest-plugin - -* zaqar - * RPM Package: python3-zaqar-tests-tempest - * Source Code: https://opendev.org/openstack/zaqar-tempest-plugin - -* Heat - * RPM Package: python3-heat-tests-tempest - * Source Code: https://opendev.org/openstack/heat-tempest-plugin - -* Mistral - * RPM package: python3-mistral-tests-tempest - * Source Code: https://opendev.org/openstack/mistral-tempest-plugin - -* Manila - * RPM Package: python3-manila-tests-tempest - * Source Code: https://opendev.org/openstack/manila-tempest-plugin - -* Telemetry - * RPM Package: python3-telemetry-tests-tempest - * Source Code: https://opendev.org/openstack/telemetry-tempest-plugin - -* Cinder - * RPM package: python3-cinder-tests-tempest - * Source Code: https://opendev.org/openstack/cinder-tempest-plugin - -* Ironic - * RPM Package: python3-ironic-tests-tempest - * Source Code: https://opendev.org/openstack/ironic-tempest-plugin diff --git a/deploy-guide/source/post_deployment/update_undercloud_ssh_keys.rst b/deploy-guide/source/post_deployment/update_undercloud_ssh_keys.rst deleted file mode 100644 index 477c335f..00000000 --- a/deploy-guide/source/post_deployment/update_undercloud_ssh_keys.rst +++ /dev/null @@ -1,41 +0,0 @@ -Updating undercloud user's ssh key -================================== - -In order to update the ssh key for the user on the undercloud, a few steps must -be done to ensure you do not lock yourself out of the overcloud nodes. When -the undercloud is installed, an ssh key is created and added to Nova running -on the undercloud for provisioning the overcloud nodes. This key is uploaded -into Nova as the `default` keypair. To view the keypair run:: - - source stackrc - openstack keypair list - -Process to rotate ssh key -^^^^^^^^^^^^^^^^^^^^^^^^^ - -The process to rotate the user key is as follows: - -1. Generate new key and do not replace the existing key. For example:: - - ssh-keygen -t rsa -N '' -f ~/new_ssh_key - -2. Copy ssh key to all existing hosts for the heat-admin user:: - - for HOST in $(openstack server list -f value -c Networks | sed -e 's/ctlplane=//'); do - ssh-copy-id -i ~/new_ssh_key heat-admin@$HOST - done - -3. Update the Undercloud's Nova default keypair:: - - openstack keypair delete default - openstack keypair create --public-key ~/new_ssh_key.pub default - -4. Backup old key and replace it with the new keys:: - - mkdir ~/.ssh/old_keys - mv ~/.ssh/id_rsa ~/.ssh/old_keys/id_rsa.backup-$(date +'%Y-%m-%d') - mv ~/.ssh/id_rsa.pub ~/.ssh/old_keys/id_rsa.pub.backup-$(date +'%Y-%m-%d') - mv ~/new_ssh_key ~/.ssh/id_rsa - mv ~/new_ssh_key.pub ~/.ssh/id_rsa.pub - -5. Remove old key from the allowed hosts on the nodes. diff --git a/deploy-guide/source/post_deployment/updating-stacks-notes.rst b/deploy-guide/source/post_deployment/updating-stacks-notes.rst deleted file mode 100644 index cfa01aea..00000000 --- a/deploy-guide/source/post_deployment/updating-stacks-notes.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. _notes-for-stack-updates: - -Understanding undercloud/standalone stack updates -================================================= - -To update a service password or a secret when upgrading from a -non-containerized undercloud, you should edit ``undercloud.conf``. -Then you should use the ``openstack undercloud upgrade`` command. - -.. note:: ``undercloud.conf`` takes priority over - ``tripleo-undercloud-passwords.yaml`` only when running the undercloud - upgrade command. For the undercloud install command, you should edit - ``tripleo-undercloud-passwords.yaml`` instead. - -In order to apply changes for an existing containerized undercloud or -standalone installation, there is an important thing to remember. - -Undercloud and standalone heat installers create one-time ephemeral stacks. -Unlike the normal overcloud stacks, they cannot be updated via the regular -stack update procedure. Instead, the created heat stacks may be updated -virtually. For the most of the cases, the installer will take care of it -automatically via the `StackAction` heat parameter overrides. - -You can enforce the virtual update/create of the heat stack via -the ``--force-stack-update`` and ``--force-stack-create`` options. - -And the recommended command to apply changes for an existing containerized -undercloud installation is: - -.. code-block:: bash - - openstack undercloud install --force-stack-update - -Otherwise, start a new installation with ``--force-stack-create``. New -passwords will be generated in ``tripleo-undercloud-passwords.yaml``. - -It is better to be always explicit. - -.. note:: The console log for these operations will always have heat reporting - the STACK_CREATED status. Check the deployment logs for the actual virtual - create or update actions taken. diff --git a/deploy-guide/source/post_deployment/updating_network_configuration_post_deployment.rst b/deploy-guide/source/post_deployment/updating_network_configuration_post_deployment.rst deleted file mode 100644 index a7315abd..00000000 --- a/deploy-guide/source/post_deployment/updating_network_configuration_post_deployment.rst +++ /dev/null @@ -1,32 +0,0 @@ -.. _update_network_configuration_post_deployment: - -Updating network configuration on the Overcloud after a deployment -================================================================== - -By default, subsequent change(s) made to network configuration templates -(bonding options, mtu, bond type, etc) are not applied on existing nodes when -the overcloud stack is updated. - -.. Warning:: Network configuration updates are disabled by default to avoid - issues that may arise from network reconfiguration. - - Network configuration updates should only be enabled when needed. - -To push an updated network configuration add ``UPDATE`` to list of actions set -in the ``NetworkDeploymentActions`` parameter. (The default is ``['CREATE']``, -to enable network configuration on stack update it must be changed to: -``['CREATE','UPDATE']``.) - -* Enable update of the network configuration for all roles by adding the - following to ``parameter_defaults`` in an environment file:: - - parameter_defaults: - NetworkDeploymentActions: ['CREATE','UPDATE'] - -* Limit the network configuration update to nodes of a specific role by using a - role-specific parameter, i.e: ``{role.name}NetworkDeploymentActions``. For - example to update the network configuration on the nodes in the Compute role, - add the following to ``parameter_defaults`` in an environment file:: - - parameter_defaults: - ComputeNetworkDeploymentActions: ['CREATE','UPDATE'] diff --git a/deploy-guide/source/post_deployment/upgrade/fast_forward_upgrade.rst b/deploy-guide/source/post_deployment/upgrade/fast_forward_upgrade.rst deleted file mode 100644 index 5cc0376f..00000000 --- a/deploy-guide/source/post_deployment/upgrade/fast_forward_upgrade.rst +++ /dev/null @@ -1,897 +0,0 @@ -.. _ffu-docs: - -Fast Forward Upgrade - Upgrading from Newton to Queens -====================================================== - -Upgrading a TripleO deployment from Newton to Queens is done by first -executing a minor update in both undercloud and overcloud, to ensure that the -system is using the latest Newton release. After that, the undercloud is -upgraded to the target version Queens. This will then be used to upgrade the -overcloud. - -.. note:: - - Before upgrading the undercloud to Queens, make sure you have created a valid - backup of the current undercloud and overcloud. The complete backup - procedure can be found on: - :doc:`undercloud backup<../backup_and_restore/00_index>` - -Undercloud FFU upgrade ----------------------- - -.. note:: - - Fast Forward Upgrade testing cannot cover all possible deployment - configurations. Before performing the Fast Forward Upgrade of the undercloud - in production, test it in a matching staging environment, and create a backup - of the undercloud in the production environment. Please refer to - :doc:`undercloud backup<../backup_and_restore/01_undercloud_backup>` - for proper documentation on undercloud backups. - -The undercloud FFU upgrade consists of 3 consecutive undercloud upgrades to -Ocata, Pike and Queens. - -Undercloud upgrade to Ocata -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: bash - - ## Install tripleo-repos - TRIPLEO_REPOS_RPM=$(curl -L --silent https://trunk.rdoproject.org/centos7/current/ | grep python2-tripleo-repos | awk -F "href" {'print $2'} | awk -F '"' {'print $2'}) - sudo yum localinstall -y https://trunk.rdoproject.org/centos7/current/${TRIPLEO_REPOS_RPM} - - ## Deploy repos via tripleo-repos - sudo tripleo-repos -b ocata current ceph - - ## Pre-upgrade stop services and update specific packages - sudo systemctl stop openstack-* neutron-* httpd - sudo yum update -y instack-undercloud openstack-puppet-modules openstack-tripleo-common python-tripleoclient - openstack undercloud upgrade - -Undercloud upgrade to Pike -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: bash - - ## Deploy repos via tripleo-repos - sudo tripleo-repos -b pike current ceph - - ## Update tripleoclient and install ceph-ansible - sudo yum -y install ceph-ansible - sudo yum -y update python-tripleoclient - openstack undercloud upgrade - -Undercloud upgrade to Queens -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: bash - - ## Deploy repos via tripleo-repos - sudo tripleo-repos -b queens current ceph - - ## Update tripleoclient - sudo yum -y update python-tripleoclient - openstack undercloud upgrade - -Maintaining the system while the undercloud is on Queens and overcloud on Newton --------------------------------------------------------------------------------- - -After upgrading undercloud to Queens, the system is expected to be stable and -allow normal management operations of the overcloud nodes that are still on -Newton. However, to ensure that compatibility, several steps need to be -performed. - -1. You need to use the newer introspection images, because of incompatible - changes in the newer versions of ironic client. - - .. code-block:: bash - - mkdir /home/stack/images - cd /home/stack/images - wget https://images.rdoproject.org/queens/delorean/current-tripleo/ironic-python-agent.tar - tar -xvf ironic-python-agent.tar - - source /home/stack/stackrc - openstack overcloud image upload --image-path /home/stack/images/ \ - --update-existing - -2. Remember to keep the old Newton templates. When the undercloud is upgraded, - the new Queens templates are installed. The Newton templates can be used to - perform any needed configuration or management of the overcloud nodes. Be - sure that you have copied your old templates. Or if you didn't have a local - copy, clone the Newton templates under a new directory: - - .. code-block:: bash - - git clone -b stable/newton \ - https://git.opendev.org/openstack/tripleo-heat-templates tripleo-heat-templates-newton - -3. Use a new `plan-environment.yaml` file. As undercloud CLI calls have been - upgraded, they will request that file. It needs to be on - /home/stack/tripleo-heat-templates-newton, and have the following content: - - - .. code-block:: yaml - - version: 1.0 - - name: overcloud - description: > - Default Deployment plan - template: overcloud.yaml - passwords: {} - environments: - - path: overcloud-resource-registry-puppet.yaml - - Create a new docker-ha.yaml env file, based on the puppet-pacemaker one: - - .. code-block:: bash - - cp /home/stack/tripleo-heat-templates-newton/environments/puppet-pacemaker.yaml \ - /home/stack/tripleo-heat-templates-newton/environments/docker-ha.yaml - - Create an empty docker.yaml env file, replacing the one that is currently on - newton: - - .. code-block:: bash - - : > /home/stack/tripleo-heat-templates-newton/environments/docker.yaml - - After all these steps have been performed, the Queens undercloud can be used - successfully to provide and manage a Newton overcloud. - -Upgrading the overcloud from Newton to Queens ---------------------------------------------- - -.. note:: - - Generic Fast Forward Upgrade testing in the overcloud cannot cover all - possible deployment configurations. Before performing Fast Forward Upgrade - testing in the overcloud, test it in a matching staging environment, and - create a backup of the production environment (your controller nodes and your - workloads). - -The Queens upgrade workflow essentially consists of the following steps: - -#. `Prepare your environment - get container images`_, backup. - Generate any environment files you need for the upgrade such as the - references to the latest container images or commands used to switch repos. - -#. `openstack overcloud ffwd-upgrade prepare`_ $OPTS. - Run a heat stack update to generate the upgrade playbooks. - -#. `openstack overcloud ffwd-upgrade run`_. Run the ffwd upgrade tasks on all - nodes. - -#. `openstack overcloud upgrade run`_ $OPTS. - Run the upgrade on specific nodes or groups of nodes. Repeat until all nodes - are successfully upgraded. - -#. `openstack overcloud ceph-upgrade run`_ $OPTS. (optional) - Not necessary unless a TripleO managed Ceph cluster is deployed in the - overcloud; this step performs the upgrade of the Ceph cluster. - -#. `openstack overcloud ffwd-upgrade converge`_ $OPTS. - Finally run a heat stack update, unsetting any upgrade specific variables - and leaving the heat stack in a healthy state for future updates. - -.. _queens-upgrade-dev-docs: https://docs.openstack.org/tripleo-docs/latest/install/developer/upgrades/major_upgrade.html # WIP @ https://review.opendev.org/#/c/569443/ - -Prepare your environment - Get container images -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When moving from Newton to Queens, the setup will be changing from baremetal to -containers. So as a part of the upgrade the container images for the target -release should be downloaded to the Undercloud. -Please see the `openstack overcloud container image prepare` -:doc:`../../deployment/install_overcloud` for more information. - -The output of this step will be a Heat environment file that contains -references to the latest container images. You will need to pass this file -into the **upgrade prepare** command using the -e flag to include the -generated file. - -You may want to populate a local docker registry in your undercloud, to make the -deployment faster and more reliable. In that case you need to use the 8787 port, -and the ip needs to be the `local_ip` parameter from the `undercloud.conf` file. - -.. code-block:: bash - - openstack overcloud container image prepare \ - --namespace=192.0.2.1:8787/tripleoqueens --tag current-tripleo \ - --output-env-file /home/stack/container-default-parameters.yaml \ - --output-images-file overcloud_containers.yaml \ - --push-destination 192.0.2.1:8787 - -In place of the `` token should go all parameters that you used with -previous `openstack overcloud deploy` command. - -After that, upload your images. - -.. code-block:: bash - - openstack overcloud container image upload \ - --config-file /home/stack/overcloud_containers.yaml \ - -e /home/stack/container-default-parameters.yaml - -Prepare your environment - New templates -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You will also need to create an environment file to override the -`FastForwardCustomRepoScriptContent` and `FastForwardRepoType` -tripleo-heat-templates parameters, that can be used to switch the yum repos in -use by the nodes during the upgrade. -This will likely be the same commands that were used to switch repositories -on the undercloud. - -.. code-block:: yaml - - cat < init-repo.yaml - parameter_defaults: - FastForwardRepoType: custom-script - FastForwardCustomRepoScriptContent: | - set -e - case $1 in - ocata) - - ;; - pike) - - ;; - queens) - - ;; - *) - echo "unknown release $1" >&2 - exit 1 - esac - yum clean all - EOF - -The resulting init-repo.yaml will then be passed into the upgrade prepare using -the -e option. - -.. _Upgradeinitcommand: https://github.com/openstack/tripleo-heat-templates/blob/1d9629ec0b3320bcbc5a4150c8be19c6eb4096eb/puppet/role.role.j2.yaml#L468-L493 - -You will also need to create a cli_opts_params.yaml file, that will contain the -number of nodes for each role, and the flavor to be used. See that sample: - -.. code-block:: bash - - cat < cli_opts_params.yaml - parameter_defaults: - ControllerCount: 3 - ComputeCount: 1 - CephStorageCount: 1 - NtpServer: clock.redhat.com - EOF - -Prepare your environment - Adapt your templates -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Before running Fast Forward Upgrade, it is important that you ensure that the -custom templates that you are using in your deploy (Newton version), are -adapted to the syntax needed for the new stable release (Queens version). -Please check the annex in this document, and the changelogs of all the -different versions to get a detailed list of the templates that need to be -changed. - - -openstack overcloud ffwd-upgrade prepare -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. note:: - - Before running the overcloud upgrade prepare ensure you have a valid backup - of the current state, including the **undercloud** since there will be a - Heat stack update performed here. The complete backup procedure can be - found on: - :doc:`undercloud backup<../backup_and_restore/00_index>` - - -.. note:: - - After running the ffwd-upgrade prepare and until successful completion - of the ffwd-upgrade converge operation, stack updates to the deployment - Heat stack are expected to fail. That is, operations such as scaling to - add a new node or to apply any new TripleO configuration via Heat stack - update **must not** be performed on a Heat stack that has been prepared - for upgrade with the 'prepare' command. Only consider doing so after - running the converge step. See the queens-upgrade-dev-docs_ for more. - -Run **overcloud ffwd-upgrade prepare**. This command expects the full set -of environment files that were passed into the deploy command, as well as the -roles_data.yaml file used to deploy the overcloud you are about to upgrade. The -environment file should point to the file that was output by the image -prepare command you ran to get the latest container image references. - -.. note:: - - It is especially important to remember that you **must** include all - environment files that were used to deploy the overcloud that you are about - to upgrade. - -.. code-block:: bash - - openstack overcloud ffwd-upgrade prepare --templates \ - -e /home/stack/containers-default-parameters.yaml \ - \ - -e init-repo.yaml - -e cli_opts_params.yaml - -r /path/to/roles_data.yaml - - -In place of the `` token should go all parameters that you used with -previous `openstack overcloud deploy` command. - -This will begin an update on the overcloud Heat stack but without -applying any of the TripleO configuration, as explained above. Once this -`ffwd-upgrade prepare` operation has successfully completed the heat stack will -be in the UPDATE_COMPLETE state. At that point you can use `config download` to -download and inspect the configuration ansible playbooks that will be used -to deliver the upgrade in the next step: - -.. code-block:: bash - - openstack overcloud config download --config-dir SOMEDIR - # playbooks will be downloaded to SOMEDIR directory - -openstack overcloud ffwd-upgrade run -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This will execute the ffwd-upgrade initial steps in all nodes. - -.. code-block:: bash - - openstack overcloud ffwd-upgrade run --yes - -After this step, the upgrade commands can be executed in all nodes. - -openstack overcloud upgrade run -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This will run the ansible playbooks to deliver the upgrade configuration. -By default, 3 playbooks are executed: the upgrade_steps_playbook, then the -deploy_steps_playbook and finally the post_upgrade_steps_playbook. These -playbooks are invoked on those overcloud nodes specified by the ``--limit`` -parameter. - -.. code-block:: bash - - openstack overcloud upgrade run --limit Controller - - -.. note:: - - *Optionally* you can specify ``--playbook`` to manually step through the upgrade - playbooks: You need to run all three in this order and as specified below - (no path) for a full upgrade to Queens. - - -.. code-block:: bash - - openstack overcloud upgrade run --limit Controller --playbook upgrade_steps_playbook.yaml - openstack overcloud upgrade run --limit Controller --playbook deploy_steps_playbook.yaml - openstack overcloud upgrade run --limit Controller --playbook post_upgrade_steps_playbook.yaml - -After all three playbooks have been executed without error on all nodes of -the controller role the controlplane will have been fully upgraded to Queens. -At a minimum an operator should check the health of the pacemaker cluster - -.. admonition:: Stable Branch - :class: stable - - The ``--limit`` was introduced in the Stein release. In previous versions, - use ``--nodes`` or ``--roles`` parameters. - -For control plane nodes, you are expected to upgrade all nodes within a role at -the same time: pass a role name to ``--limit``. For non-control-plane nodes, -you often want to specify a single node or a list of nodes to ``--limit``. - -The controller nodes need to be the first upgraded, following by the compute -and storage ones. - -.. code-block:: bash - - [root@overcloud-controller-0 ~]# pcs status | grep -C 10 -i "error\|fail\|unmanaged" - -The operator may also want to confirm that openstack and related service -containers are all in a good state and using the image references passed -during upgrade prepare with the ``--container-registry-file`` parameter. - -.. code-block:: bash - - [root@overcloud-controller-0 ~]# docker ps -a - -.. warning:: - - When the upgrade has been applied on the Controllers, but not on the other - nodes, it is important to don't execute any operation on the overcloud. The - nova, neutron.. commands will be up at this point but users are not advised - to use them, until all the steps of Fast Forward Upgrade have been - completed, or it may drive unexpected results. - -For non controlplane nodes, such as Compute or ObjectStorage, you can use -``--limit overcloud-compute-0`` to upgrade particular nodes, or even -"compute0,compute1,compute3" for multiple nodes. Note these are again -upgraded in parallel. Also note that you can pass roles names to upgrade all -nodes in a role at the same time is preferred. - -.. code-block:: bash - - openstack overcloud upgrade run --limit overcloud-compute-0 - -Use of ``--limit`` allows the operator to upgrade some subset, perhaps just -one, compute or other non controlplane node and verify that the upgrade is -successful. One may even migrate workloads onto the newly upgraded node and -confirm there are no problems, before deciding to proceed with upgrading the -remaining nodes that are still on Newton. - -Again you can optionally step through the upgrade playbooks if you prefer. Be -sure to run upgrade_steps_playbook.yaml then deploy_steps_playbook.yaml and -finally post_upgrade_steps_playbook.yaml in that order. - -For re-run, you can specify ``--skip-tags validation`` to skip those step 0 -ansible tasks that check if services are running, in case you can't or -don't want to start them all. - -.. code-block:: bash - - openstack overcloud upgrade run --limit Controller --skip-tags validation - -openstack overcloud ceph-upgrade run -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This step is only necessary if Ceph was deployed in the Overcloud. It triggers -an upgrade of the Ceph cluster which will be performed without taking down -the cluster. - - .. note:: - - It is especially important to remember that you **must** include all - environment files that were used to deploy the overcloud that you are about - to upgrade. - - .. code-block:: bash - - openstack overcloud ceph-upgrade run --templates \ - --container-registry-file /home/stack/containers-default-parameters.yaml \ - -r /path/to/roles_data.yaml - -In place of the `` token should go all parameters that you used with -previous `openstack overcloud deploy` command. - -At the end of the process, Ceph will be upgraded from Jewel to Luminous so -there will be new containers for the `ceph-mgr` service running on the -controlplane node. - -openstack overcloud ffwd-upgrade converge -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Finally, run the converge heat stack update. This will re-apply all Queens -configuration across all nodes and unset all variables that were used during -the upgrade. Until you have successfully completed this step, heat stack -updates against the overcloud stack are expected to fail. You can read more -about why this is the case in the queens-upgrade-dev-docs_. - -.. note:: - - It is especially important to remember that you **must** include all - environment files that were used to deploy the overcloud that you are about - to upgrade converge, including the list of Queens container image references - and the roles_data.yaml roles and services definition. You should omit - any repo switch commands and ensure that none of the environment files - you are about to use is specifying a value for UpgradeInitCommand. - -.. note:: - - The Queens container image references that were passed into the - `openstack overcloud ffwd-upgrade prepare`_ with the - ``--container-registry-file`` parameter **must** be included as an - environment file, with the -e option to the openstack overcloud - ffwd-upgrade run command, together with all other environment files - for your deployment. - -.. code-block:: bash - - openstack overcloud ffwd-upgrade converge --templates - -e /home/stack/containers-default-parameters.yaml \ - -e cli_opts_params.yaml \ - -r /path/to/roles_data.yaml - - -In place of the `` token should go all parameters that you used with -previous `openstack overcloud deploy` command. - -The Heat stack will be in the **UPDATE_IN_PROGRESS** state for the duration of -the openstack overcloud upgrade converge. Once converge has completed -successfully the Heat stack should also be in the **UPDATE_COMPLETE** state. - -Annex: Template changes needed from Newton to Queens ----------------------------------------------------- -In order to reuse the Newton templates when the cloud has been upgraded to -Queens, several changes are needed. Those changes need to be done before -starting Fast Forward Upgrade on the overcloud. - -Following there is a list of all the changes needed: - - -1. Remove those deprecated services from your custom roles_data.yaml file: - -* OS::TripleO::Services::Core -* OS::TripleO::Services::GlanceRegistry -* OS::TripleO::Services::VipHosts - - -2. Add the following new services to your custom roles_data.yaml file: - -* OS::TripleO::Services::MySQLClient -* OS::TripleO::Services::NovaPlacement -* OS::TripleO::Services::PankoApi -* OS::TripleO::Services::Sshd -* OS::TripleO::Services::CertmongerUser -* OS::TripleO::Services::Docker -* OS::TripleO::Services::MySQLClient -* OS::TripleO::Services::ContainersLogrotateCrond -* OS::TripleO::Services::Securetty -* OS::TripleO::Services::Tuned -* OS::TripleO::Services::Clustercheck (just required on roles that also uses - OS::TripleO::Services::MySQL) -* OS::TripleO::Services::Iscsid (to configure iscsid on Controller, Compute - and BlockStorage roles) -* OS::TripleO::Services::NovaMigrationTarget (to configure migration on - Compute roles) - - -3. Update any additional parts of the overcloud that might require these new - services such as: - -* Custom ServiceNetMap parameter - ensure to include the latest - ServiceNetMap for the new services. You can locate in - network/service_net_map.j2.yaml file -* External Load Balancer - if using an external load balancer, include - these new services as a part of the external load balancer configuration - - -4. A new feature for composable networks was introduced on Pike. If using a - custom roles_data file, edit the file to add the composable networks to each - role. For example, for Controller nodes: - - :: - - - name: Controller - networks: - - External - - InternalApi - - Storage - - StorageMgmt - - Tenant - - Check the default networks on roles_data.yaml for further examples of syntax. - - -5. The following parameters are deprecated and have been replaced with - role-specific parameters: - -* from controllerExtraConfig to ControllerExtraConfig -* from OvercloudControlFlavor to OvercloudControllerFlavor -* from controllerImage to ControllerImage -* from NovaImage to ComputeImage -* from NovaComputeExtraConfig to ComputeExtraConfig -* from NovaComputeServerMetadata to ComputeServerMetadata -* from NovaComputeSchedulerHints to ComputeSchedulerHints -* from NovaComputeIPs to ComputeIPs -* from SwiftStorageServerMetadata to ObjectStorageServerMetadata -* from SwiftStorageIPs to ObjectStorageIPs -* from SwiftStorageImage to ObjectStorageImage -* from OvercloudSwiftStorageFlavor to OvercloudObjectStorageFlavor - - -6. Some composable services include new parameters that configure Puppet - hieradata. If you used hieradata to configure these parameters in the past, - the overcloud update might report a Duplicate declaration error. - If this situation, use the composable service parameter. - For example, instead of the following: - - :: - - parameter_defaults: - controllerExtraConfig: - heat::config::heat_config: - DEFAULT/num_engine_workers: - value: 1 - - Use the following: - - :: - - parameter_defaults: - HeatWorkers: 1 - - -7. In your resource_registry, check that you are using the containerized - services from the deployment subdirectory of your core Heat template - collection. For example: - - :: - - resource_registry: - OS::TripleO::Services::CephMon: ../deployment/ceph-ansible/ceph-mon.yaml - OS::TripleO::Services::CephOSD: ../deployment/ceph-ansible/ceph-osd.yaml - OS::TripleO::Services::CephClient: ../deployment/ceph-ansible/ceph-client.yaml - - -8. When upgrading to Queens, if Ceph has been deployed in the Overcloud, then - use the `ceph-ansible.yaml` environment file **instead of** - `storage-environment.yaml`. Make sure to move any customization into - `ceph-ansible.yaml` (or a copy of ceph-ansible.yaml) - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/docker.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/ceph-ansible/ceph-ansible.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-composable-steps-docker.yaml \ - -e overcloud-repos.yaml - - Customizations for the Ceph deployment previously passed as hieradata via - \*ExtraConfig should be removed as they are ignored, specifically the - deployment will stop if ``ceph::profile::params::osds`` is found to - ensure the devices list has been migrated to the format expected by - ceph-ansible. It is possible to use the ``CephAnsibleExtraConfig`` and - `CephAnsibleDisksConfig`` parameters to pass arbitrary variables to - ceph-ansible, like ``devices`` and ``dedicated_devices``. - - The other parameters (for example ``CinderRbdPoolName``, - ``CephClientUserName``, ...) will behave as they used to with puppet-ceph - with the only exception of ``CephPools``. This can be used to create - additional pools in the Ceph cluster but the two tools expect the list - to be in a different format. Specifically while puppet-ceph expected it - in this format:: - - { - "mypool": { - "size": 1, - "pg_num": 32, - "pgp_num": 32 - } - } - - with ceph-ansible that would become:: - - [{"name": "mypool", "pg_num": 32, "rule_name": ""}] - -9. If using custom nic-configs, the format has changed, and it is using an - script to generate the entries now. So you will need to convert your old - syntax from: - - :: - - resources: - OsNetConfigImpl: - properties: - config: - os_net_config: - network_config: - - type: interface - name: nic1 - mtu: 1350 - use_dhcp: false - addresses: - - ip_netmask: - list_join: - - / - - - {get_param: ControlPlaneIp} - - {get_param: ControlPlaneSubnetCidr} - routes: - - ip_netmask: 169.254.169.254/32 - next_hop: {get_param: EC2MetadataIp} - - type: ovs_bridge - name: br-ex - dns_servers: {get_param: DnsServers} - use_dhcp: false - addresses: - - ip_netmask: {get_param: ExternalIpSubnet} - routes: - - ip_netmask: 0.0.0.0/0 - next_hop: {get_param: ExternalInterfaceDefaultRoute} - members: - - type: interface - name: nic2 - mtu: 1350 - primary: true - - type: interface - name: nic3 - mtu: 1350 - use_dhcp: false - addresses: - - ip_netmask: {get_param: InternalApiIpSubnet} - - type: interface - name: nic4 - mtu: 1350 - use_dhcp: false - addresses: - - ip_netmask: {get_param: StorageIpSubnet} - - type: interface - name: nic5 - mtu: 1350 - use_dhcp: false - addresses: - - ip_netmask: {get_param: StorageMgmtIpSubnet} - - type: ovs_bridge - name: br-tenant - dns_servers: {get_param: DnsServers} - use_dhcp: false - addresses: - - ip_netmask: {get_param: TenantIpSubnet} - members: - - type: interface - name: nic6 - mtu: 1350 - primary: true - group: os-apply-config - type: OS::Heat::StructuredConfig - - - To - - :: - - resources: - OsNetConfigImpl: - type: OS::Heat::SoftwareConfig - properties: - group: script - config: - str_replace: - template: - get_file: ../../../../../network/scripts/run-os-net-config.sh - params: - $network_config: - network_config: - - type: interface - name: nic1 - mtu: 1350 - use_dhcp: false - addresses: - - ip_netmask: - list_join: - - / - - - {get_param: ControlPlaneIp} - - {get_param: ControlPlaneSubnetCidr} - routes: - - ip_netmask: 169.254.169.254/32 - next_hop: {get_param: EC2MetadataIp} - - type: ovs_bridge - name: br-ex - dns_servers: {get_param: DnsServers} - use_dhcp: false - addresses: - - ip_netmask: {get_param: ExternalIpSubnet} - routes: - - ip_netmask: 0.0.0.0/0 - next_hop: {get_param: ExternalInterfaceDefaultRoute} - members: - - type: interface - name: nic2 - mtu: 1350 - primary: true - - type: interface - name: nic3 - mtu: 1350 - use_dhcp: false - addresses: - - ip_netmask: {get_param: InternalApiIpSubnet} - - type: interface - name: nic4 - mtu: 1350 - use_dhcp: false - addresses: - - ip_netmask: {get_param: StorageIpSubnet} - - type: interface - name: nic5 - mtu: 1350 - use_dhcp: false - addresses: - - ip_netmask: {get_param: StorageMgmtIpSubnet} - - type: ovs_bridge - name: br-tenant - dns_servers: {get_param: DnsServers} - use_dhcp: false - addresses: - - ip_netmask: {get_param: TenantIpSubnet} - members: - - type: interface - name: nic6 - mtu: 1350 - primary: true - - -10. If using a modified version of the core Heat template collection from - Newton, you need to re-apply your customizations to a copy of the Queens - version. To do this, use a git version control system or similar tooling - to compare. - - -Annex: NFV template changes needed from Newton to Queens --------------------------------------------------------- -Following there is a list of general changes needed into NFV context: - -1. Fixed VIP addresses for overcloud networks use new parameters as syntax: - - :: - - parameter_defaults: - ... - # Predictable VIPs - ControlFixedIPs: [{'ip_address':'192.168.201.101'}] - InternalApiVirtualFixedIPs: [{'ip_address':'172.16.0.9'}] - PublicVirtualFixedIPs: [{'ip_address':'10.1.1.9'}] - StorageVirtualFixedIPs: [{'ip_address':'172.18.0.9'}] - StorageMgmtVirtualFixedIPs: [{'ip_address':'172.19.0.9'}] - RedisVirtualFixedIPs: [{'ip_address':'172.16.0.8'}] - - -For DPDK environments: - -1. Modify HostCpuList and NeutronDpdkCoreList to match your configuration. - Ensure that you use only double quotation marks in the yaml file for these - parameters: - - :: - - HostCpusList: "0,16,8,24" - NeutronDpdkCoreList: "1,17,9,25" - -2. Modify NeutronDpdkSocketMemory to match your configuration. Ensure that you - use only double quotation marks in the yaml file for this parameter: - - :: - - NeutronDpdkSocketMemory: "2048,2048" - -3. Modify NeutronVhostuserSocketDir as follows: - - :: - - NeutronVhostuserSocketDir: "/var/lib/vhost_sockets" - -4. Modify VhostuserSocketGroup as follows, mapping to the right compute role: - - :: - - parameter_defaults: - Parameters: - VhostuserSocketGroup: "hugetlbfs" - -5. In the parameter_defaults section, add a network deployment parameter to run - os-net-config during the upgrade process to associate OVS PCI address with - DPDK ports: - - :: - - parameter_defaults: - ComputeNetworkDeploymentActions: ['CREATE', 'UPDATE'] - - The parameter name must match the name of the role you use to deploy DPDK. - In this example, the role name is Compute so the parameter name is - ComputeNetworkDeploymentActions. - -6. In the resource_registry section, override the - ComputeNeutronOvsDpdk service to the neutron-ovs-dpdk-agent docker service: - - :: - - resource_registry: - OS::TripleO::Services::ComputeNeutronOvsDpdk: ../deployment/neutron/neutron-ovs-dpdk-agent-container-puppet.yaml - -For SR-IOV environments: - -1. In the resource registry section, override the NeutronSriovAgent service - to the neutron-sriov-agent docker service: - - :: - - resource_registry: - OS::TripleO::Services::NeutronSriovAgent: ../deployment/neutron/neutron-sriov-agent-container-puppet.yaml diff --git a/deploy-guide/source/post_deployment/upgrade/index.rst b/deploy-guide/source/post_deployment/upgrade/index.rst deleted file mode 100644 index b83d0dd7..00000000 --- a/deploy-guide/source/post_deployment/upgrade/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -Upgrades -======== - -This section describes upgrade related items. - -.. toctree:: - :maxdepth: 1 - - minor_update - undercloud - major_upgrade - fast_forward_upgrade diff --git a/deploy-guide/source/post_deployment/upgrade/major_upgrade.rst b/deploy-guide/source/post_deployment/upgrade/major_upgrade.rst deleted file mode 100644 index 56d515a0..00000000 --- a/deploy-guide/source/post_deployment/upgrade/major_upgrade.rst +++ /dev/null @@ -1,913 +0,0 @@ -Upgrading to a Next Major Release -================================= - -Upgrading a TripleO deployment to the next major release is done by first -upgrading the undercloud and using it to upgrade the overcloud. - -Note that there are version specific caveats and notes which are pointed out -as below: - -.. note:: - - You can use the "Limit Environment Specific Content" in the left hand nav - bar to restrict content to the upgrade you are performing. - -.. note:: - - Generic upgrade testing cannot cover all possible deployment - configurations. Before performing the upgrade in production, test - it in a matching staging environment, and create a backup of the - production environment. - -.. Undercloud upgrade section -.. include:: undercloud.rst - -Upgrading the Overcloud to Queens and later -------------------------------------------- - -The overcloud upgrade workflow is mainly delivered through the -`openstack overcloud upgrade` command, in particular one of its -subcommands: **prepare**, **run** and **converge**. Each subcommand -has its own set of options which you can explore with ``--help``: - -.. code-block:: bash - - source /home/stack/stackrc - openstack overcloud upgrade run --help - -The upgrade workflow essentially consists of the following steps: - -#. `Prepare your environment files`_. - Generate any environment files you need for the upgrade such as the - references to the latest container images or commands used to - switch repos. - -#. `openstack overcloud upgrade prepare`_. - Run a heat stack update to generate the upgrade playbooks. - -#. `openstack overcloud external-upgrade run (for container images)`_. - Generate any environment files you need for the upgrade such as the - references to the latest container images or commands used to switch repos. - -#. `openstack overcloud upgrade run`_. - Run the upgrade on specific nodes or groups of nodes. Repeat until all nodes - are successfully upgraded. - -#. `openstack overcloud external-upgrade run (for services)`_. (optional) - This step is only necessary if your deployment contains services - which are managed using external installers, e.g. Ceph. - -#. `openstack overcloud external-upgrade run (for online upgrades)`_ - Run the part of service upgrades which can run while the cloud is - fully operational, e.g. online data migrations. - -#. `openstack overcloud upgrade converge`_. - Finally run a heat stack update, unsetting any upgrade specific variables - and leaving the heat stack in a healthy state for future updates. - -Detailed information and pointers can be found in the relevant the -queens-upgrade-dev-docs_. - -.. _queens-upgrade-dev-docs: https://docs.openstack.org/tripleo-docs/latest/install/developer/upgrades/major_upgrade.html # WIP @ https://review.opendev.org/#/c/569443/ - -Prepare your environment files -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -First we prepare an environment file for new container images: - -.. admonition:: Pike to Queens - :class: ptoq - - As part of the upgrade to Queens, the container images for the - target release should be downloaded to the Undercloud. Please see - the `openstack overcloud container image prepare`. - :doc:`../../deployment/install_overcloud` for more information. - - The output of this step will be a Heat environment file that contains - references to the latest container images. You will need to pass the path to - this file into the **upgrade prepare** command using the -e option as you would - any other environment file. - -.. admonition:: Queens to Rocky - :class: qtor - - In Rocky we only generate a new environment file with - ``ContainerImagePrepare`` parameter at this point in the workflow. See - :doc:`container image preparation documentation<../../deployment/container_image_prepare>`. - for details how to generate this environment file. - - The file is then passed to the `upgrade prepare` command, and - images will be uploaded to the local registry in a separate - `external-upgrade run` step afterwards. - -You will also need to create an environment file to override the -UpgradeInitCommand_ tripleo-heat-templates parameter, that can be used to -switch the yum repos in use by the nodes during the upgrade. This will likely -be the same commands that were used to switch repositories on the undercloud. - -.. code-block:: bash - - cat < init-repo.yaml - parameter_defaults: - UpgradeInitCommand: | - set -e - # -- REPLACE LINES WITH YOUR REPO SWITCH COMMANDS -- - curl -L -o /etc/yum.repos.d/delorean.repo https://trunk.rdoproject.org/centos7-queens/current/delorean.repo - curl -L -o /etc/yum.repos.d/delorean-deps.repo https://trunk.rdoproject.org/centos7-queens/delorean-deps.repo - yum clean all - EOF - -The resulting init-repo.yaml will then be passed into the upgrade prepare using -the -e option. - -.. _Upgradeinitcommand: https://github.com/openstack/tripleo-heat-templates/blob/1d9629ec0b3320bcbc5a4150c8be19c6eb4096eb/puppet/role.role.j2.yaml#L468-L493 - -openstack overcloud upgrade prepare -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. note:: - - Before running the overcloud upgrade prepare ensure you have a valid backup - of the current state, including the **undercloud** since there will be a - Heat stack update performed here. - -.. note:: - - If you have enabled neutron_DVR_ in your deployment you must ensure that - compute nodes are connected to the External network via the - roles_data.yaml that you will pass using the -r parameter to upgrade prepare. - This is necessary to allow floating IP connectivity via the external api network. - -.. note:: - - After running the upgrade prepare and until successful completion - of the upgrade converge operation, stack updates to the deployment Heat - stack are expected to fail. That is, operations such as scaling to add - a new node or to apply any new TripleO configuration via Heat stack - update **must not** be performed on a Heat stack that has been prepared - for upgrade with the 'prepare' command and only consider doing so after - running the converge step. See the queens-upgrade-dev-docs_ for more. - -Run **overcloud upgrade prepare**. This command expects the full set -of environment files that were passed into the deploy command, as well -as the roles_data.yaml and network_data.yaml, if you've customized -those. Be sure to include environment files with the new container -image parameter and Yum repository switch parameter. - -.. note:: - - It is especially important to remember that you **must** include all - environment files that were used to deploy the overcloud including the - container image references file for the target version container images - -.. code-block:: bash - - openstack overcloud upgrade prepare --templates \ - -r /path/to/roles_data.yaml \ - -n /path/to/network_data.yaml \ - -e \ - -e init-repo.yaml \ - -e containers-prepare-parameter.yaml - -This will begin an update on the overcloud Heat stack but without -applying any of the TripleO configuration. Once this `upgrade prepare` -operation has successfully completed the heat stack will be in the -UPDATE_COMPLETE state. At that point you can use `config download` to -download and inspect the configuration ansible playbooks that will be -used to deliver the upgrade in the next step: - -.. code-block:: bash - - openstack overcloud config download --config-dir SOMEDIR - # playbooks will be downloaded to SOMEDIR directory - -.. _neutron_DVR: https://specs.openstack.org/openstack/neutron-specs/specs/juno/neutron-ovs-dvr.html - - -openstack overcloud external-upgrade run (for container images) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. admonition:: Rocky - :class: qtor - - In Rocky and beyond, container images will need to be uploaded to - the local registry after we've run `upgrade prepare`. Run: - - .. code-block:: bash - - openstack overcloud external-update run --tags container_image_prepare - -.. _openstack-overcloud-upgrade-run: - -openstack overcloud upgrade run -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The `upgrade run` command runs the Ansible playbooks to deliver the upgrade configuration. -By default, 3 playbooks are executed: the upgrade_steps_playbook, then the -deploy_steps_playbook and finally the post_upgrade_steps_playbook. These -playbooks are invoked on those overcloud nodes specified by the ``--limit`` -parameter. - -.. code-block:: bash - - openstack overcloud upgrade run --limit Controller - -**Optionally** specify ``--playbook`` to manually step through the upgrade -playbooks: You need to run all three in this order and as specified below -(no path) for a full upgrade. - -.. code-block:: bash - - openstack overcloud upgrade run --limit Controller --playbook upgrade_steps_playbook.yaml - openstack overcloud upgrade run --limit Controller --playbook deploy_steps_playbook.yaml - openstack overcloud upgrade run --limit Controller --playbook post_upgrade_steps_playbook.yaml - -After all three playbooks have been executed without error on all nodes of -the controller role the controlplane will have been fully upgraded to Queens. -At a minimum an operator should check the health of the pacemaker cluster. - -.. code-block:: bash - - [root@overcloud-controller-0 ~]# pcs status | grep -C 10 -i "error\|fail" - -The operator may also want to confirm that openstack and related service -containers are all in a good state and using the target version (new) images -passed during upgrade prepare. - -.. code-block:: bash - - [root@overcloud-controller-0 ~]# docker ps -a - -For non controlplane nodes, such as Compute or ObjectStorage, you can use -``--limit overcloud-compute-0`` to upgrade particular nodes, or even -"compute0,compute1,compute3" for multiple nodes. Note these are again -upgraded in parallel. Also note that you can pass roles names to upgrade all -nodes in a role at the same time is preferred. - -.. code-block:: bash - - openstack overcloud upgrade run --limit overcloud-compute-0 - -Use of ``--limit`` allows the operator to upgrade some subset, perhaps just one, -compute or other non controlplane node and verify that the upgrade is -successful. One may even migrate workloads onto the newly upgraded node and -confirm there are no problems, before deciding to proceed with upgrading the -remaining nodes. - -For re-run, you can specify ``--skip-tags`` validation to skip those step 0 -ansible tasks that check if services are running, in case you can't or -don't want to start them all. - -.. code-block:: bash - - openstack overcloud upgrade run --limit Controller --skip-tags validation - -.. admonition:: Stable Branch - :class: stable - - The --limit was introduced in the Stein release. In previous versions, use - --nodes or --roles parameters. - -openstack overcloud external-upgrade run (for services) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This step is only necessary a service using an external installer was -deployed in the Overcloud. Most typically this is the case of -overclouds with Ceph. - -.. admonition:: Pike to Queens - :class: ptoq - - Among the services with external installers, only upgrade of Ceph - is supported in the Queens release cycle. It has a specific - `ceph-upgrade` command. Run it as follows: - - .. note:: - - It is especially important to remember that you **must** include all - environment files that were used to deploy the overcloud. - - .. code-block:: bash - - openstack overcloud ceph-upgrade run --templates \ - -r /path/to/roles_data.yaml \ - -n /path/to/network_data.yaml \ - -e \ - -e containers-prepare-parameter.yaml - -.. admonition:: Queens to Rocky - :class: qtor - - More services with external installers can be upgraded to - Rocky. The `external-upgrade run` command accepts a ``--tags`` - parameter which allows to limit the scope of the upgrade to - particular services. It is recommended to always use this - parameter for accurately scoping the upgrade. - - For example, to upgrade Ceph, run the following command: - - .. code-block:: bash - - openstack overcloud external-upgrade run --tags ceph - - .. note:: - - The `external-upgrade run` command does not update the Heat - stack, and as such it does not accept any environment files as - parameters. It uses playbooks generated during `upgrade - prepare`. - -openstack overcloud external-upgrade run (for online upgrades) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. admonition:: Queens to Rocky - :class: qtor - - The offline (downtime inducing) part of upgrade has finished at this - point, and the cloud should be fully operational. Some services have - an online component to their upgrade procedure -- operations which - don't induce downtime and can run while the cloud operates - normally. For OpenStack services these are e.g. online data - migrations. Run all these online upgrade operations by executing the - following command: - - .. code-block:: bash - - openstack overcloud external-upgrade run --tags online_upgrade - - .. note:: - - If desired, the online upgrades can be run per-service. E.g. to run - only Nova online data migrations, execute: - - .. code-block:: bash - - openstack overcloud external-upgrade run --tags online_upgrade_nova - - However, when executing online upgrades in selective parts like - this, extra care must be taken to not miss any necessary online - upgrade operations. - -openstack overcloud upgrade converge -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Finally, run the upgrade converge step. This will re-apply all -configuration across all nodes and unset all variables that were used -during the upgrade. Successful completion of this step is required to -assert that the overcloud state is in sync with the latest TripleO -Heat templates, which is a prerequisite for any further overcloud -management (e.g. scaling). - -.. note:: - - It is especially important to remember that you **must** include - all environment files that were used to deploy the overcloud, - including the new container image parameter file. You should - omit any repo switch commands and ensure that none of the - environment files you are about to use is specifying a value for - UpgradeInitCommand. - -.. code-block:: bash - - openstack overcloud upgrade converge --templates - -r /path/to/roles_data.yaml \ - -n /path/to/network_data.yaml \ - -e \ - -e containers-prepare-parameter.yaml - -Successful completion of the `upgrade converge` command concludes the -major version upgrade. - -Upgrading the Overcloud to Ocata or Pike ----------------------------------------- - -As of the Ocata release, the upgrades workflow in tripleo has changed -significantly to accommodate the operators' new ability to deploy custom roles -with the Newton release (see the Composable Service Upgrade spec_ for more -info). The new workflow uses ansible upgrades tasks to define the upgrades -workflow on a per-service level. The Pike release upgrade uses a similar -mechanism and the steps are invoked with the same cli. A big difference however -is that after upgrading to Pike most of the overcloud services will be running -in containers. - -.. note:: - - Upgrades to Pike and further will only be tested with containers. Baremetal - deployments, which don't use containers, will be deprecated in Queens and - have full support removed in Rocky. - -The operator starts the upgrade with a ``openstack overcloud deploy`` that -includes the major-upgrade-composable-steps.yaml_ environment file (or the -docker variant for the `containerized upgrade to Pike`__) -as well as all environment files used on the initial deployment. This will -collect the ansible upgrade tasks for all roles, except those that have the -``disable_upgrade_deployment`` flag set ``True`` in roles_data.yaml_. The -tasks will be executed in a series of steps, for example (and not limited to): -step 0 for validations or other pre-upgrade tasks, step 1 to stop the -pacemaker cluster, step 2 to stop services, step 3 for package updates, -step 4 for cluster startup, step 5 for any special case db syncs or post -package update migrations. The Pike upgrade tasks are in general much simpler -than those used in Ocata since for Pike these tasks are mainly for stopping -and disabling the systemd services, since they will be containerized as part -of the upgrade. - -After the ansible tasks have run the puppet (or docker, for Pike containers) -configuration is also applied in the 'normal' manner we do on an initial -deploy, to complete the upgrade and bring services back up, or start the -service containers, as the case may be for Ocata or Pike. - -For those roles with the ``disable_upgrade_deployment`` flag set True, the -operator will upgrade the corresponding nodes with the -upgrade-non-controller.sh_. The operator uses that script to invoke the -tripleo_upgrade_node.sh_ which is delivered during the -major-upgrade-composable-steps that come first, as described above. - -#. Run the major upgrade composable ansible steps - - This step will upgrade the nodes of all roles that do not explicitly set the - ``disable_upgrade_deployment`` flag to ``True`` in the roles_data.yaml_ - (this is an operator decision, and the current default is for the **Compute** - and **ObjectStorage** roles to have this set). - - The ansible upgrades tasks are collected from all service manifests_ and - executed in a series of steps as described in the introduction above. - Even before the invocation of these ansible tasks however, this upgrade - step also delivers the tripleo_upgrade_node.sh_ and role specific puppet - manifest to allow the operator to upgrade those nodes after this step has - completed. - - From Ocata to Pike, the Overcloud will be upgraded to a containerized - environment. All OpenStack related services will run in containers. - - If you deploy TripleO with custom roles, you want to synchronize them with - `roles_data.yaml` visible in default roles and make sure parameters and new - services are present in your roles. - - .. admonition:: Newton - :class: newton - - Newton roles_data.yaml is available here: - https://github.com/openstack/tripleo-heat-templates/blob/stable/newton/roles_data.yaml - - .. admonition:: Ocata - :class: ocata - - Ocata roles_data.yaml is available here: - https://github.com/openstack/tripleo-heat-templates/blob/stable/ocata/roles_data.yaml - - .. admonition:: Pike - :class: pike - - Pike roles_data.yaml is available here: - https://github.com/openstack/tripleo-heat-templates/blob/stable/pike/roles_data.yaml - - .. admonition:: Queens - :class: queens - - Queens roles_data.yaml is available here: - https://github.com/openstack/tripleo-heat-templates/blob/stable/queens/roles_data.yaml - - - Create an environment file with commands to switch OpenStack repositories to - a new release. This will likely be the same commands that were used to switch - repositories on the undercloud - - .. code-block:: bash - - cat > overcloud-repos.yaml < \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-composable-steps.yaml \ - -e overcloud-repos.yaml - - .. note:: - - Before upgrading your deployment to containers, you must perform the - actions mentioned here to prepare your environment. In particular - *image prepare* to generate the docker registry which you must include - as one of the environment files specified below: - * :doc:`../../deployment/install_overcloud` - - .. __: - - Run `overcloud deploy`, passing in full set of environment - files plus `major-upgrade-composable-steps-docker.yaml` and - `overcloud-repos.yaml` (and docker registry if upgrading to containers) - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/docker.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-composable-steps-docker.yaml \ - -e overcloud-repos.yaml - - .. note:: - - It is especially important to remember that you **must** include all - environment files that were used to deploy the overcloud that you are about - to upgrade. - - .. note:: - - If the Overcloud has been deployed with Pacemaker, then add the - `docker-ha.yaml` environment file to the upgrade command - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/docker.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/docker-ha.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-composable-steps-docker.yaml \ - -e overcloud-repos.yaml - - .. admonition:: Ceph - :class: ceph - - When upgrading to Pike, if Ceph has been deployed in the Overcloud, then - use the `ceph-ansible.yaml` environment file **instead of** - `storage-environment.yaml`. Make sure to move any customization into - `ceph-ansible.yaml` (or a copy of ceph-ansible.yaml) - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/docker.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/ceph-ansible/ceph-ansible.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-composable-steps-docker.yaml \ - -e overcloud-repos.yaml - - Customizations for the Ceph deployment previously passed as hieradata - via \*ExtraConfig should be removed as they are ignored, specifically - the deployment will stop if ``ceph::profile::params::osds`` is found to - ensure the devices list has been migrated to the format expected by - ceph-ansible. It is possible to use the ``CephAnsibleExtraConfig`` and - ``CephAnsibleDisksConfig`` parameters to pass arbitrary variables to - ceph-ansible, like ``devices`` and ``dedicated_devices``. - - The other parameters (for example ``CinderRbdPoolName``, - ``CephClientUserName``, ...) will behave as they used to with puppet-ceph - with the only exception of ``CephPools``. This can be used to create - additional pools in the Ceph cluster but the two tools expect the list - to be in a different format. Specifically while puppet-ceph expected it - in this format:: - - { - "mypool": { - "size": 1, - "pg_num": 32, - "pgp_num": 32 - } - } - - with ceph-ansible that would become:: - - [{"name": "mypool", "pg_num": 32, "rule_name": ""}] - - .. note:: - - The first step of the ansible tasks is to validate that the deployment is - in a good state before performing any other upgrade operations. Each - service manifest in the tripleo-heat-templates includes a check that it is - running and if any of those checks fail the upgrade will exit early at - ansible step 0. - - If you are re-running the upgrade after an initial failed attempt, you may - need to disable these checks in order to allow the upgrade to proceed with - services down. This is done with the SkipUpgradeConfigTags parameter to - specify that tasks with the 'validation' tag should be skipped. You can - include this in any of the environment files you are using:: - - SkipUpgradeConfigTags: [validation] - -#. Upgrade remaining nodes for roles with ``disable_upgrade_deployment: True`` - - It is expected that the operator will want to upgrade the roles that have the - ``openstack-nova-compute`` and ``openstack-swift-object`` services deployed - to allow for pre-upgrade migration of workloads. For this reason the default - ``Compute`` and ``ObjectStorage`` roles in the roles_data.yaml_ have the - ``disable_upgrade_deployment`` set ``True``. - - Note that unlike in previous releases, this operator driven upgrade step - includes a full puppet configuration run as happens after the ansible - steps on the roles those are executed on. The significance is that nodes - are 'fully' upgraded after each step completes, rather than having to wait - for the final converge step as has previously been the case. In the case of - Ocata to Pike the full puppet/docker config is applied to bring up the - overcloud services in containers. - - The tripleo_upgrade_node.sh_ script and puppet configuration are delivered to - the nodes with ``disable_upgrade_deployment`` set ``True`` during the initial - major upgrade composable steps in step 1 above. - - For Ocata to Pike, the tripleo_upgrade_node.sh_ is still delivered to the - ``disable_upgrade_deployment`` nodes but is now empty. Instead, the - `upgrade_non_controller.sh` downloads ansible playbooks and those are - executed to deliver the upgrade. See the Queens-upgrade-spec_ for more - information on this mechanism. - - To upgrade remaining roles (at your convenience) - - .. code-block:: bash - - upgrade-non-controller.sh --upgrade overcloud-compute-0 - - for i in $(seq 0 2); do - upgrade-non-controller.sh --upgrade overcloud-objectstorage-$i & - done - -#. Converge to unpin Nova RPC - - The final step is required to unpin Nova RPC version. Unlike in previous - releases, for Ocata the puppet configuration has already been applied to - nodes as part of each upgrades step, i.e. after the ansible tasks or when - invoking the tripleo_upgrade_node.sh_ script to upgrade compute nodes. Thus - the significance of this step is somewhat diminished compared to previously. - However a re-application of puppet configuration across all nodes here will - also serve as a sanity check and hopefully show any issues that an operator - may have missed during any of the previous upgrade steps. - - To converge, run the deploy command with `major-upgrade-converge-docker.yaml` - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/docker.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-converge-docker.yaml - - .. admonition:: Newton to Ocata - :class: ntoo - - For Newton to Ocata, run the deploy command with - `major-upgrade-pacemaker-converge.yaml` - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-pacemaker-converge.yaml - - .. note:: - - If the Overcloud has been deployed with Pacemaker, then add the - `docker-ha.yaml` environment file to the upgrade command - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/docker.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/docker-ha.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-converge-docker.yaml - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-converge.yaml - - .. note:: - - It is especially important to remember that you **must** include all - environment files that were used to deploy the overcloud. - -.. _spec: https://specs.openstack.org/openstack/tripleo-specs/specs/ocata/tripleo-composable-upgrades.html -.. _major-upgrade-composable-steps.yaml: https://github.com/openstack/tripleo-heat-templates/blob/master/environments/major-upgrade-composable-steps.yaml -.. _roles_data.yaml: https://github.com/openstack/tripleo-heat-templates/blob/master/roles_data.yaml -.. _tripleo_upgrade_node.sh: https://github.com/openstack/tripleo-heat-templates/blob/master/extraconfig/tasks/tripleo_upgrade_node.sh -.. _upgrade-non-controller.sh: https://github.com/openstack/tripleo-common/blob/master/scripts/upgrade-non-controller.sh -.. _manifests: https://github.com/openstack/tripleo-heat-templates/tree/master/puppet/services -.. _Queens-upgrade-spec: https://specs.openstack.org/openstack/tripleo-specs/specs/queens/tripleo_ansible_upgrades_workflow.html -.. _ceph-ansible scenarios: https://github.com/ceph/ceph-ansible/blob/stable-3.0/docs/source/testing/scenarios.rst - - -Upgrading the Overcloud to Newton and earlier ---------------------------------------------- - -.. note:: - - The `openstack overcloud deploy` calls in upgrade steps below are - non-blocking. Make sure that the overcloud is `UPDATE_COMPLETE` in - `openstack stack list` and `sudo pcs status` on a controller reports - everything running fine before proceeding to the next step. - -.. admonition:: Mitaka to Newton - :class: mton - - **Deliver the migration for ceilometer to run under httpd.** - - This is to deliver the migration for ceilometer to be run under httpd (apache) - rather than eventlet as was the case before. To execute this step run - `overcloud deploy`, passing in the full set of environment files plus - `major-upgrade-ceilometer-wsgi-mitaka-newton.yaml` - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-ceilometer-wsgi-mitaka-newton.yaml - -#. Upgrade initialization - - The initialization step switches to new repositories on overcloud nodes, and - it delivers upgrade scripts to nodes which are going to be upgraded - one-by-one (this means non-controller nodes, except any stand-alone block - storage nodes). - - Create an environment file with commands to switch OpenStack repositories to - a new release. This will likely be the same commands that were used to - switch repositories on the undercloud - - .. code-block:: bash - - cat > overcloud-repos.yaml < \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-pacemaker-init.yaml \ - -e overcloud-repos.yaml - -#. Object storage nodes upgrade - - If the deployment has any standalone object storage nodes, upgrade them - one-by-one using the `upgrade-non-controller.sh` script on the undercloud - node - - .. code-block:: bash - - upgrade-non-controller.sh --upgrade - - This is ran before controller node upgrade because swift storage services - should be upgraded before swift proxy services. - -#. Upgrade controller and block storage nodes - - .. admonition:: Mitaka to Newton - :class: mton - - **Explicitly disable sahara services if so desired:** - As discussed at bug1630247_ sahara services are disabled by default in - the Newton overcloud deployment. This special case is handled for the - duration of the upgrade by defaulting to 'keep sahara-\*'. - - That is by default sahara services are restarted after the mitaka to - newton upgrade of controller nodes and sahara config is re-applied during - the final upgrade converge step. - - If an operator wishes to **disable** sahara services as part of the - mitaka to newton upgrade they need to include the - major-upgrade-remove-sahara.yaml_ environment file during the controller - upgrade step as well as during the converge step later - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-pacemaker.yaml - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-remove-sahara.yaml - - All controllers will be upgraded in sync in order to make services only talk - to DB schema versions they expect. Services will be unavailable during this - operation. Standalone block storage nodes are automatically upgraded in this - step too, in sync with controllers, because block storage services don't - have a version pinning mechanism. - - Run the deploy command with `major-upgrade-pacemaker.yaml` - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-pacemaker.yaml - - Services of the compute component on the controller nodes are now pinned to - communicate like the older release, ensuring that they can talk to the - compute nodes which haven't been upgraded yet. - - .. note:: - - If this step fails, it may leave the pacemaker cluster stopped (together - with all OpenStack services on the controller nodes). The root cause and - restoration procedure may vary, but in simple cases the pacemaker cluster - can be started by logging into one of the controllers and running ``sudo - pcs cluster start --all``. - - .. note:: - - After this step, or if this step failed with the error: `ERROR: upgrade - cannot start with some cluster nodes being offlineAfter`, it's possible - that some pacemaker resources needs to be clean. Check the failed - actions and clean them by running on `only one` controller node as root - - .. code-block:: bash - - pcs status - pcs resource cleanup - - It can take few minutes for the cluster to go back to a “normal” state as - displayed by `crm_mon`. This is expected. - -#. Upgrade ceph storage nodes - - If the deployment has any ceph storage nodes, upgrade them one-by-one using - the `upgrade-non-controller.sh` script on the undercloud node - - .. code-block:: bash - - upgrade-non-controller.sh --upgrade - -#. Upgrade compute nodes - - Upgrade compute nodes one-by-one using the `upgrade-non-controller.sh` - script on the undercloud node - - .. code-block:: bash - - upgrade-non-controller.sh --upgrade - -#. Apply configuration from upgraded tripleo-heat-templates - - .. admonition:: Mitaka to Newton - :class: mton - - **Explicitly disable sahara services if so desired:** - As discussed at bug1630247_ sahara services are disabled by default in - the Newton overcloud deployment. This special case is handled for the - duration of the upgrade by defaulting to 'keep sahara-\*'. - - That is by default sahara services are restarted after the mitaka to - newton upgrade of controller nodes and sahara config is re-applied during - the final upgrade converge step. - - If an operator wishes to **disable** sahara services as part of the - mitaka to newton upgrade they need to include the - major-upgrade-remove-sahara.yaml_ environment file during the controller - upgrade earlier and converge step here - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-pacemaker-converge.yaml - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-remove-sahara.yaml - - .. _bug1630247: https://bugs.launchpad.net/tripleo/+bug/1630247 - .. _major-upgrade-remove-sahara.yaml: https://github.com/openstack/tripleo-heat-templates/blob/2e6cc07c1a74c2dd7be70568f49834bace499937/environments/major-upgrade-remove-sahara.yaml - - - This step unpins compute services communication (upgrade level) on - controller and compute nodes, and it triggers configuration management - tooling to converge the overcloud configuration according to the new release - of `tripleo-heat-templates`. - - Make sure that all overcloud nodes have been upgraded to the new release, - and then run the deploy command with `major-upgrade-pacemaker-converge.yaml` - - .. code-block:: bash - - openstack overcloud deploy --templates \ - -e \ - -e /usr/share/openstack-tripleo-heat-templates/environments/major-upgrade-pacemaker-converge.yaml - - - .. note:: - - After the converge step, it's possible that some pacemaker resources - needs to be cleaned. Check the failed actions and clean them by running - on **only one** controller as root - - .. code-block:: bash - - pcs status - pcs resource cleanup - - It can take few minutes for the cluster to go back to a “normal” state as - displayed by ``crm_mon``. This is expected. - - diff --git a/deploy-guide/source/post_deployment/upgrade/minor_update.rst b/deploy-guide/source/post_deployment/upgrade/minor_update.rst deleted file mode 100644 index 416fd094..00000000 --- a/deploy-guide/source/post_deployment/upgrade/minor_update.rst +++ /dev/null @@ -1,237 +0,0 @@ -.. _package_update: - -Updating Content on Overcloud Nodes -=================================== - -The update of overcloud packages and containers to the latest version -of the current release is referred to as the 'minor update' in TripleO -(distinguishing it from the 'major upgrade' to the next release). In -the Queens cycle the minor update workflow was changed compared to -previous cycles. There are thus version specific sections below. - -Updating your Overcloud - Queens and beyond -------------------------------------------- - -The Queens release brought common CLI and workflow conventions to the -main deployment lifecycle operations (minor updates, major upgrades, -and fast forward upgrades). This means that the minor update workflow -has changed compared to previous releases, and it should now be easier -to learn and reason about the lifecycle operations in general. - -To update your overcloud to the latest packages / container images of -the OpenStack release that you currently operate, perform these steps: - -#. **Software sources setup** - - In case you use pinned repositories (e.g. to some DLRN hash), make - sure to update your repository files on overcloud to get the latest - RPMs. If you use stable RDO repositories, you don't need to change - anything. - -#. **Update preparation** - - To prepare the overcloud for the update, run: - - .. code-block:: bash - - openstack overcloud update prepare \ - \ - -e containers-prepare-parameter.yaml - - In place of the `` token should go all parameters that you - used with previous `openstack overcloud deploy` command. - - The last argument `containers-prepare-parameter.yaml` differs in - content depending on release. In Queens and before, it was a list - of individual container image parameters, pointing to images you've - already uploaded to local registry in previous step. In Rocky and - beyond, this file contains the ``ContainerImagePrepare`` parameter. - The upload of images to local registry is yet to happen, in a - separate step after `update prepare`. - - .. note:: - - The `update prepare` command performs a Heat stack update, and - as such it should be passed all parameters currently used by the - Heat stack (most notably environment files, role counts, roles - data, and network data). This is crucial in order to keep - correct state of the stack. - - .. note:: - - The `containers-prepare-parameter.yaml` file is intended to - replace any previous container parameters file. You should drop - the previous container parameter file and pass the new one for - any subsequent stack update operations. - - The `update prepare` command updates the Heat stack outputs with - Ansible snippets used in the next steps of the update. - -#. **Container image upload** - - Since Rocky, we will need to upload the container images - to the local registry at this point. Run: - - .. code-block:: bash - - openstack overcloud external-update run --tags container_image_prepare - -#. **Update run** - - Run the update procedure on a subset of nodes selected via the - ``--limit`` parameter: - - .. code-block:: bash - - openstack overcloud update run --limit overcloud-controller-0 - - You can specify a role name, e.g. 'Compute', to execute the minor - update on all nodes of that role in a rolling fashion (`serial: 1` - is used on the playbooks). - - There is no required node ordering for performing the minor update - on the overcloud, but it's a good practice to keep some consistency - in the process. E.g. all controllers first, then all computes, etc. - - Do this for all the overcloud nodes before proceeding to next step. - -#. **Ceph update (optional)** - - If your environment includes Ceph managed by TripleO (i.e. *not* - what TripleO calls "external Ceph"), you'll want to update Ceph at - this point too. - -#. **Update convergence** - - .. note:: - - Update Converge is only required for version less than Wallaby. - Converge has been removed for Wallaby and above. - - To finish the update procedure, run: - - .. code-block:: bash - - openstack overcloud update converge - - In place of the `` token should go all parameters that you - used with previous `openstack overcloud update prepare` command - (including the new `-e container-params.yaml`). - - .. note:: - - The `update converge` command performs a Heat stack update, and - as such it should be passed all parameters currently used by the - Heat stack (most notably environment files, role counts, roles - data, and network data). This is crucial in order to keep - correct state of the stack. - - The `update converge` command updates Heat stack outputs with - Ansible snippets the same way as `overcloud deploy` would, and it - runs the config management operations to assert that the overcloud - state matches the used overcloud templates. - -Updating your Overcloud - Pike ------------------------------- - -.. note:: - The minor update workflow described below is generally not well tested for - *non* containerized Pike environments. The main focus for the TripleO - upgrades engineering and QE teams has been on testing the minor update - within a containerized Pike environment. - - In particular there are currently no pacemaker update_tasks for the non - containerized cluster services (i.e., `puppet/services/pacemaker`_) and - those will need to be considered and added. You should reach out to the - TripleO community if this is an important feature for you and you'd like - to contribute to it. - -For the Pike cycle the minor update workflow is significantly different to -previous cycles. In particular, rather than using a static yum_update.sh_ -we now use service specific ansible update_tasks_ (similar to the upgrade_tasks -used for the major upgrade workflow since Ocata). Furthermore, these are not -executed directly via a Heat stack update, but rather, together with the -docker/puppet config, collected and written to ansible playbooks. The operator -then invokes these to deliver the minor update to particular nodes. - -There are essentially two steps: first perform a (relatively short) Heat stack -update against the overcloud to generate the "config" ansible playbooks, and -then execute these. See bug 1715557_ for more information about this mechanism -and its implementation. - - -1. Confirm that your `$HOME/containers-prepare-parameter.yaml` -`ContainerImagePrepare` parameter includes a `tag_from_label` value, so that -the latest images are discovered on update, otherwise edit the `tag` value -to specify what image versions to update to. - - -2. Perform a heat stack update to generate the ansible playbooks, specifying -the registry file generated from the first step above:: - - openstack overcloud update --init-minor-update --container-registry-file latest-images.yaml - -3. Invoke the minor update on the nodes specified with the ``--limit`` -parameter:: - - openstack overcloud update --limit controller-0 - -.. admonition:: Stable Branch - :class: stable - - The `--limit` was introduced in the Stein release, previous versions used - `--nodes` or `--roles` parameters. - -You can specify a role name, e.g. 'Compute', to execute the minor update on -all nodes of that role in a rolling fashion (serial:1 is used on the playbooks). - -.. _yum_update.sh: https://github.com/openstack/tripleo-heat-templates/blob/53db241cfbfc1b6a237b7f33486a051aa6934579/extraconfig/tasks/yum_update.sh -.. _update_tasks: https://github.com/openstack/tripleo-heat-templates/blob/e1a9638732290c247e5dac10392bc8702b531981/puppet/services/tripleo-packages.yaml#L59 -.. _1715557: https://bugs.launchpad.net/tripleo/+bug/1715557 -.. _puppet/services/pacemaker: https://github.com/openstack/tripleo-heat-templates/tree/2e182bffeeb099cb5e0b1747086fb0e0f57b7b5d/puppet/services/pacemaker - -Updating your Overcloud - Ocata and earlier -------------------------------------------- - -Updating packages on all overcloud nodes involves two steps. The first one -makes sure that the overcloud plan is updated (a new tripleo-heat-templates rpm -might have brought fixes/changes to the templates):: - - openstack overcloud deploy --update-plan-only \ - --templates \ - -e - -By using the parameter ``--update-plan-only`` we make sure we update only the -stored overcloud plan and not the overcloud itself. Make sure you pass the -exact same environment parameters that were used at deployment time. - -The second step consists in updating the packages themselves on all overcloud -nodes with a command similar to the following:: - - openstack overcloud update stack -i overcloud - -This command updates the ``UpdateIdentifier`` parameter and triggers stack update -operation. If this parameter is set, ``yum update`` command is executed on each -node. Because running update on all nodes in parallel might be unsafe (an -update of a package might involve restarting a service), the command above -sets breakpoints on each overcloud node so nodes are updated one by one. When -the update is finished on a node the command will prompt for removing -breakpoint on next one. - -.. note:: - Make sure you use the ``-i`` parameter, otherwise update runs on background - and does not prompt for removing of breakpoints. - -.. note:: - Multiple breakpoints can be removed by specifying list of nodes with a - regular expression. - -.. note:: - If the update command is aborted for some reason you can always continue - in the process by re-running same command. - -.. note:: - The ``--templates`` and ``--environment-file`` (``-e``) are now deprecated. - They can still be passed to the command, but they will be silently ignored. - This is due to the plan now used for deployment should only be modified via - plan modification commands. diff --git a/deploy-guide/source/post_deployment/upgrade/undercloud.rst b/deploy-guide/source/post_deployment/upgrade/undercloud.rst deleted file mode 100644 index 34613183..00000000 --- a/deploy-guide/source/post_deployment/upgrade/undercloud.rst +++ /dev/null @@ -1,149 +0,0 @@ -Updating Undercloud Components ------------------------------- - -.. note:: - Instack-undercloud is deprecated as of the Rocky cycle. Instack undercloud - can only be upgraded to a containerized undercloud. See - :doc:`../../deployment/undercloud` - for backward compatibility related information. - -.. note:: - When updating the existing containerized undercloud installation, - keep in mind the special cases described in :ref:`notes-for-stack-updates`. - -#. Before upgrading the undercloud, it is highly suggested to perform - a :doc:`backup <../backup_and_restore/01_undercloud_backup>` - of the undercloud and validate that a restore works fine. - -#. Remove all Delorean repositories: - - .. note:: - - You may wish to backup your current repos before disabling them - - .. code-block:: bash - - mkdir -p /home/stack/REPOBACKUP - sudo mv /etc/yum.repos.d/delorean* /home/stack/REPOBACKUP - - .. code-block:: bash - - sudo rm /etc/yum.repos.d/delorean* - - -#. Enable new Delorean repositories: - - .. include:: ../../repositories.rst - -.. We need to manually continue our list numbering here since the above - "include" directive breaks the numbering. - -#. Clean the yum cache to ensure only the new repos are used - - .. code-block:: bash - - sudo dnf clean all - sudo rm -rf /var/cache/yum - -#. Update required package: - - .. admonition:: Validations - :class: validations - - It is strongly recommended that you validate the state of your undercloud - before starting any upgrade operations. The tripleo-validations_ repo has - some 'pre-upgrade' validations that you can execute by following the - instructions at validations_ to execute the "pre-upgrade" group - - .. code-block:: bash - - openstack tripleo validator run --group pre-upgrade - - .. admonition:: Newton to Ocata - :class: ntoo - - The following commands need to be run before the undercloud upgrade:: - - sudo systemctl stop openstack-* - sudo systemctl stop neutron-* - sudo systemctl stop openvswitch - sudo systemctl stop httpd - sudo yum update instack-undercloud openstack-puppet-modules openstack-tripleo-common - - .. admonition:: Ocata to Pike - :class: otop - - .. admonition:: Ceph - :class: ceph - - Prior to Pike, TripleO deployed Ceph with puppet-ceph. With the - Pike release it is possible to use TripleO to deploy Ceph with - either ceph-ansible or puppet-ceph, though puppet-ceph is - deprecated. To use ceph-ansible, the CentOS Storage SIG Ceph - repository must be enabled on the undercloud and the - ceph-ansible package must then be installed:: - - sudo yum install --enablerepo=extras centos-release-ceph-jewel - sudo yum install ceph-ansible - - Ceph clusters deployed with Ocata via puppet-ceph will be migrated - so that all of the existing Ceph services are run inside of containers. - This migration will be managed not by puppet-ceph, but by ceph-ansible, - which TripleO will use to control updates to the same ceph cluster after - the Ocata to Pike upgrade. - - - Update TripleO CLI and dependencies - - .. code-block:: bash - - sudo dnf update python3-tripleoclient* openstack-tripleo-common openstack-tripleo-heat-templates - -#. As part of the undercloud install, an image registry is configured on port - `8787`. This is used to increase reliability of overcloud image pulls, and - minimise overall network transfers. First it is highly suggested to perform - a backup of the initial `containers-prepare-parameter.yaml` file. Then - update the new `containers-prepare-parameter.yaml` file with the same - modifications made in the initial one:: - - openstack tripleo container image prepare default \ - --local-push-destination \ - --output-env-file ~/containers-prepare-parameter.yaml - - .. note:: - This command is available since Rocky. - -#. Run the undercloud upgrade command. This command will upgrade all packages - and use puppet to apply new configuration and restart all OpenStack - services - - .. code-block:: bash - - openstack undercloud upgrade - - .. note:: - The undercloud is containerized by default as of Rocky. Therefore, - an undercloud deployed on Queens (non-containerized) will be upgraded - to a containerized undercloud on Rocky, by default. - To upgrade with instack undercloud in Rocky, you'll need to upgrade with - ``--use-heat=False`` option. Note this isn't tested and not supported. - - .. note:: - In order to obtain the ansible command used for the installation of the - Undercloud in the artifacts directory, it is necessary to pass the option - ``--reproduce-command`` in the Undercloud upgrade command. - - .. note:: - It's possible to enable verbose logging with ``--verbose`` option. - To cleanup an undercloud after its upgrade, you'll need to set - upgrade_cleanup to True in undercloud.conf. It'll remove the rpms - that were deployed by instack-undercloud, after the upgrade to a - containerized undercloud. - - .. note:: - - If you added custom OVS ports to the undercloud (e.g. in a virtual - testing environment) you may need to re-add them at this point. - - .. _validations: ../validations/index.html - .. _tripleo-validations: https://github.com/openstack/tripleo-validations/tree/master/validations diff --git a/deploy-guide/source/post_deployment/validations/ansible.rst b/deploy-guide/source/post_deployment/validations/ansible.rst deleted file mode 100644 index 4f7084b8..00000000 --- a/deploy-guide/source/post_deployment/validations/ansible.rst +++ /dev/null @@ -1,46 +0,0 @@ - -Running validations using Ansible ---------------------------------- - -Validations -^^^^^^^^^^^ - -You can run the ``prep`` validations to verify the hardware. Later in -the process, the validations will be run by the undercloud processes. - -However, the undercloud is not set up yet. You can install Ansible on -your local machine (that has SSH connectivity to the undercloud) and -validate the undercloud from there. - -You need Ansible version 2 and the hostname/IP address of the -undercloud (referred to ``$UNDERCLOUD_HOST`` here): - -.. code-block:: bash - - $ sudo yum install ansible - $ git clone https://git.openstack.org/openstack/tripleo-validations - $ cd tripleo-validations - $ printf "[undercloud]\n$UNDERCLOUD_HOST" > hosts - $ export ANSIBLE_STDOUT_CALLBACK=validation_output - $ export ANSIBLE_CALLBACK_PLUGINS="${PWD}/callback_plugins" - $ export ANSIBLE_ROLES_PATH="${PWD}/roles" - $ export ANSIBLE_LOOKUP_PLUGINS="${PWD}/lookup_plugins" - $ export ANSIBLE_LIBRARY="${PWD}/library" - -Then get the ``prep`` validations: - -.. code-block:: bash - - $ grep -l '^\s\+-\s\+prep' -r playbooks - -And run them one by one: - -.. code-block:: bash - - $ ansible-playbook -i hosts playbooks/validation-name.yaml - -Or run them all in one shot: - -.. code-block:: bash - - $ for PREP_VAL in `grep -l '^\s\+-\s\+prep' -r playbooks`; do echo "=== $PREP_VAL ==="; ansible-playbook -i hosts $PREP_VAL; done diff --git a/deploy-guide/source/post_deployment/validations/cli.rst b/deploy-guide/source/post_deployment/validations/cli.rst deleted file mode 100644 index 6dcf4c4d..00000000 --- a/deploy-guide/source/post_deployment/validations/cli.rst +++ /dev/null @@ -1,239 +0,0 @@ -CLI support for validations -=========================== - -The following section describes the options when running or listing the existing -validations. - -Running validations -^^^^^^^^^^^^^^^^^^^ - -Validations can be executed by groups or individually. The current CLI -implementation allows to run them using the following CLI options: - -.. code-block:: bash - - $ openstack tripleo validator run [options] - -``--plan, --stack``: This option allows to execute the validations overriding the -default plan name. The default value is set to ``overcloud``. To override this -options use for example: - -.. code-block:: bash - - $ openstack tripleo validator run --plan mycloud - -``--validation``: This options allows to execute a set of specific -validations. Specify them as [,,...] which means a -comma separated list. The default value for this option is []. - -For example you can run this as: - -.. code-block:: bash - - $ openstack tripleo validator run --validation check-ftype,512e - -.. _running_validation_group: - -Running validation groups -------------------------- - -``--group``: This option allows to run specific group validations, if more than -one group is required, then separate the group names with commas. The default -value for this option is ['pre-deployment']. - -Run this option for example like: - -.. code-block:: bash - - $ openstack tripleo validator run --group pre-upgrade,prep - -``--extra-vars``: This option allows to add a dictionary of extra variables to a -run of a group or specific validations. - -.. code-block:: bash - - $ openstack tripleo validator run \ - --extra-vars '{"min_undercloud_ram_gb": 24, "min_undercloud_cpu_count": 8}' \ - --validation undercloud-ram,undercloud-cpu - -``--extra-vars-file``: This -option allows to add a valid ``JSON`` or ``YAML`` -file containing extra variables to a run of a group or specific validations. - -.. code-block:: bash - - $ openstack tripleo validator run \ - --extra-vars-file /home/stack/envvars.json \ - --validation undercloud-ram,undercloud-cpu - -``--workers, -w``: This option will configure the maximum of threads that can be -used to execute the given validation. - -.. code-block:: bash - - $ openstack tripleo validator run \ - --extra-vars-file /home/stack/envvars.json \ - --validation undercloud-ram,undercloud-cpu \ - --workers 3 - -Getting the list of the Groups of validations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To get the list of all groups used by tripleo-validations and get their -description, the user can type the following command: - -.. code-block:: bash - - $ openstack tripleo validator group info - -``--format, -f``: This option allows to change the default output for listing -the validations. The options are csv, value, json, yaml or table. - -.. code-block:: bash - - $ openstack tripleo validator group info --format json - -Getting a list of validations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Validations can be listed by groups and depending which validations will be -listed, the output might be configured as a table, json or yaml. The user can -list the validations using the following command: - -.. code-block:: bash - - $ openstack tripleo validator list [options] - -``--group``: This option allows to list specific group validations, if more than -one group is required, then separate the group names with commas. - -.. code-block:: bash - - $ openstack tripleo validator list --group prep,pre-introspection - -``--format, -f``: This option allows to change the default output for listing -the validations. The options are csv, value, json, yaml or table. - -.. code-block:: bash - - $ openstack tripleo validator list --format json - -Getting detailed information about a validation -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To get a full description of a validation, the user can run the following -command: - -.. code-block:: bash - - $ openstack tripleo validator show dns - -Getting the parameters list for a validation or a group of validations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To get all the available ``Ansible`` variables for one or more validations: - -``--validation``: This options allows to execute a set of specific -validations. Specify them as [,,...] which means a -comma separated list. The default value for this option is []. - -.. code-block:: bash - - openstack tripleo validator show parameter --validation undercloud-ram,undercloud-cpu - { - "undercloud-cpu": { - "parameters": { - "min_undercloud_cpu_count": 8 - } - }, - "undercloud-ram": { - "parameters": { - "min_undercloud_ram_gb": 24 - } - } - } - -``--group``: This option allows to list specific group validations, if more than -one group is required, then separate the group names with commas. - -.. code-block:: bash - - openstack tripleo validator show parameter --group prep - { - "512e": { - "parameters": {} - }, - "service-status": { - "parameters": {} - }, - "tls-everywhere-prep": { - "parameters": {} - }, - "undercloud-cpu": { - "parameters": { - "min_undercloud_cpu_count": 8 - } - }, - "undercloud-disk-space": { - "parameters": { - "volumes": [ - { - "min_size": 10, - "mount": "/var/lib/docker" - }, - { - "min_size": 3, - "mount": "/var/lib/config-data" - }, - { - "min_size": 3, - "mount": "/var/log" - }, - { - "min_size": 5, - "mount": "/usr" - }, - { - "min_size": 20, - "mount": "/var" - }, - { - "min_size": 25, - "mount": "/" - } - ] - } - }, - "undercloud-ram": { - "parameters": { - "min_undercloud_ram_gb": 24 - } - }, - "undercloud-selinux-mode": { - "parameters": {} - } - } - -``--download``: This option allows to generate a valid ``JSON`` or -``YAML`` file containing the available ``Ansible`` variables for the validations. - -To generate a ``JSON`` or ``YAML`` file containing for the variables of the -``undercloud-ram`` and ``undercloud-cpu`` validations: - -.. code-block:: bash - - openstack tripleo validator show parameter \ - --download [json|yaml] /home/stack/envvars \ - --validation undercloud-ram,undercloud-cpu - -To generate a ``JSON`` or ``YAML`` file containing for the variables of the -validations belonging to the ``prep`` and ``pre-introspection`` groups: - -.. code-block:: bash - - openstack tripleo validator show parameter \ - --download [json|yaml] /home/stack/envvars \ - --group prep,pre-introspection - -``--format, -f``: This option allows to change the default output for listing -the validations parameters. The options are json or yaml. diff --git a/deploy-guide/source/post_deployment/validations/in-flight.rst b/deploy-guide/source/post_deployment/validations/in-flight.rst deleted file mode 100644 index dd354292..00000000 --- a/deploy-guide/source/post_deployment/validations/in-flight.rst +++ /dev/null @@ -1,96 +0,0 @@ -In-flight validations -===================== - -The What --------- -In-flight validations are launched during the deploy, usually at the beginning -of a step, in order to ensure a service deployed at previous step does actually -work. - -The Why -------- -Being able to validate services early also ensures we get early failures. For -instance, if "service-one" is deployed at step 2 and never used until step 4, -we won't notice its failed state before step 4. - -Adding a validation at the beginning of step 3 would prevent this issue, by -failing early with a human readable message. - -The How -------- -The in-flight validations can be added directly in `the service template`_, -either at the end of the step we want to check, or at the beginning of the -next step. - -Since steps are launched with ansible, order does matter. - -Tagging -_______ -In order to ensure we can actually deactivate validations, we have to tag -validation related tasks with, at least, two tags:: - - - opendev-validation - - opendev-validation-SERVICE_NAME - -Plain ansible task -__________________ -The following example will ensure rabbitmq service is running after its -deployment: - -.. code-block:: YAML - - deploy_steps_tasks: - # rabbitmq container is supposed to be started during step 1 - # so we want to ensure it's running during step 2 - - name: validate rabbitmq state - when: step|int == 2 - tags: - - opendev-validation - - opendev-validation-rabbitmq - wait_for_connection: - host: {get_param: [ServiceNetMap, RabbitmqNetwork]} - port: 5672 - delay: 10 - -Validation Framework import -___________________________ -We can also include already existing validations from the -`Validation Framework`_ roles. This can be archived like this: - -.. code-block:: YAML - - deploy_steps_tasks: - - name: some validation - when: step|int == 2 - tags: - - opendev-validation - - opendev-validation-rabbitmq - include_role: - role: rabbitmq-limits - # We can pass vars to included role, in this example - # we override the default min_fd_limit value: - vars: - min_fd_limit: 32768 - -You can find the definition of the ``rabbitmq-limits`` role `here`_. - -Use existing health checks -__________________________ -We can also go for a simple thing, and use the existing service health check: - -.. code-block:: YAML - - deploy_steps_tasks: - # rabbitmq container is supposed to be started during step 1 - # so we want to ensure it's running during step 2 - - name: validate rabbitmq state - when: step|int == 2 - tags: - - opendev-validation - - opendev-validation-rabbitmq - command: > - podman exec rabbitmq /openstack/healthcheck - -.. _the service template: https://github.com/openstack/tripleo-heat-templates/tree/master/deployment -.. _Validation Framework: https://docs.openstack.org/tripleo-validations/latest/readme.html -.. _here: https://github.com/openstack/tripleo-validations/tree/master/roles/rabbitmq-limits diff --git a/deploy-guide/source/post_deployment/validations/index.rst b/deploy-guide/source/post_deployment/validations/index.rst deleted file mode 100644 index 92cf3427..00000000 --- a/deploy-guide/source/post_deployment/validations/index.rst +++ /dev/null @@ -1,77 +0,0 @@ -Validations guide -================= - -Since the Newton release, TripleO ships with extensible checks for -verifying the Undercloud configuration, hardware setup, and the -Overcloud deployment to find common issues early. - -Since Stein, it is possible to run the validations using the TripleO CLI. - -Validations are used to efficiently and reliably verify various facts about -the cloud on the level of individual nodes and hosts. - -Validations are non-intrusive by design, and recommended when performing large -scale changes to the cloud, for example upgrades, or to aid in the diagnosis -of various issues. Detailed docs for both the CLI and the API are provided -by the Validations Framework project. - -* tripleo-validations: https://docs.openstack.org/tripleo-validations/latest/ -* validations-common: https://docs.openstack.org/validations-common/latest/ -* validations-libs: https://docs.openstack.org/validations-libs/latest/ - -The validations are assigned into various groups that indicate when in -the deployment workflow they are expected to run: - -* **no-op** validations will run a no-op operation to verify that - the workflow is working as it supposed to, it will run in both - the Undercloud and Overcloud nodes. - -* **openshift-on-openstack** validations will check that the - environment meets the requirements to be able to deploy OpenShift - on OpenStack. - -* **prep** validations check the hardware configuration of the - Undercloud node and should be run before ``openstack undercloud - install``. Running prep validations should not rely on Mistral - because it might not be installed yet. - -* **pre-introspection** should be run before we introspect nodes using - Ironic Inspector. - -* **pre-deployment** validations should be executed before ``openstack - overcloud deploy`` - -* **post-deployment** should be run after the Overcloud deployment has - finished. - -* **pre-upgrade** try to validate your OpenStack deployment before you upgrade it. - -* **post-upgrade** try to validate your OpenStack deployment after you upgrade it. - -.. note:: - In case of the most validations, a failure does not mean that - you'll be unable to deploy or run OpenStack. But it can indicate - potential issues with long-term or production setups. If you're - running an environment for developing or testing TripleO, it's okay - that some validations fail. In a production setup, they should not. - -The list of all existing validations and the specific documentation -for the project can be found on the `tripleo-validations documentation page`_. - -With implementation specifics described in docs for the `validations-libs`_, -and `validations-common`_. - -The following sections describe the different ways of running and listing the -currently installed validations. - -.. toctree:: - :maxdepth: 2 - :includehidden: - - cli - ansible - in-flight - -.. _tripleo-validations documentation page: https://docs.openstack.org/tripleo-validations/latest/ -.. _validations-libs: https://docs.openstack.org/validations-libs/latest/ -.. _validations-common: https://docs.openstack.org/validations-common/latest/ \ No newline at end of file diff --git a/deploy-guide/source/post_deployment/vm_snapshot.rst b/deploy-guide/source/post_deployment/vm_snapshot.rst deleted file mode 100644 index c9a54c73..00000000 --- a/deploy-guide/source/post_deployment/vm_snapshot.rst +++ /dev/null @@ -1,26 +0,0 @@ -Import/Export of VM Snapshots -============================= - -Create a snapshot of a running server -------------------------------------- -Create a new image by taking a snapshot of a running server and download the -image. - -:: - - nova image-create instance_name image_name - glance image-download image_name --file exported_vm.qcow2 - -Import an image into Overcloud and launch an instance ------------------------------------------------------ -Upload the exported image into glance in Overcloud and launch a new instance. - -:: - - glance image-create --name imported_image --file exported_vm.qcow2 --disk-format qcow2 --container-format bare - nova boot --poll --key-name default --flavor m1.demo --image imported_image --nic net-id=net_id imported - -.. note:: - **Warning**: disadvantage of using glance image for export/import VMs is - that each VM disk has to be copied in and out into glance in source and - target clouds. Also by making snapshot qcow layering system is lost. diff --git a/deploy-guide/source/provisioning/ansible_deploy_interface.rst b/deploy-guide/source/provisioning/ansible_deploy_interface.rst deleted file mode 100644 index 786c8174..00000000 --- a/deploy-guide/source/provisioning/ansible_deploy_interface.rst +++ /dev/null @@ -1,157 +0,0 @@ -Extending overcloud nodes provisioning -====================================== - -Starting with the Queens release, the *ansible* deploy interface became -available in Ironic. Unlike the default `iSCSI deploy interface`_, it is -highly customizable through operator-provided Ansible playbooks. These -playbooks will run on the target image when Ironic boots the deploy ramdisk. - -.. TODO(dtantsur): link to ansible interface docs when they merge - -.. note:: - This feature is not related to the ongoing work of switching overcloud - configuration to Ansible. - -Enabling Ansible deploy ------------------------ - -The *ansible* deploy interface is enabled by default starting with Queens. -However, additional configuration is required when installing an undercloud. - -Custom ansible playbooks -~~~~~~~~~~~~~~~~~~~~~~~~ - -To avoid modifying playbooks, provided by the distribution, you must copy -them to a new location that is accessible by Ironic. In this guide it is -``/var/lib/ironic``. - -.. note:: - Use of the ``/var/lib`` directory is not fully compliant to FHS. We do it - because for containerized undercloud this directory is shared between - the host and the ironic-conductor container. - -#. Set up repositories and install the Ironic common package, if it is not - installed yet:: - - sudo yum install -y openstack-ironic-common - -#. Copy the files to the new location (``/var/lib/ironic/playbooks``):: - - sudo cp -R /usr/lib/python2.7/site-packages/ironic/drivers/modules/ansible/playbooks/ \ - /var/lib/ironic - -Installing undercloud -~~~~~~~~~~~~~~~~~~~~~ - -#. Generate an SSH key pair, for example:: - - ssh-keygen -t rsa -b 2048 -f ~/ipa-ssh -N '' - - .. warning:: The private part should not be password-protected or Ironic - will not be able to use it. - -#. Create a custom hieradata override. Pass the **public** SSH key for the - deploy ramdisk to the common PXE parameters, and set the new playbooks path. - - For example, create a file called ``ansible-deploy.yaml`` with the - following content: - - .. code-block:: yaml - - ironic::drivers::ansible::default_username: 'root' - ironic::drivers::ansible::default_key_file: '/var/lib/ironic/ipa-ssh' - ironic::drivers::ansible::playbooks_path: '/var/lib/ironic/playbooks' - ironic::drivers::pxe::pxe_append_params: 'nofb nomodeset vga=normal selinux=0 sshkey=""' - -#. Link to this file in your ``undercloud.conf``: - - .. code-block:: ini - - hieradata_override=/home/stack/ansible-deploy.yaml - -#. Deploy or update your undercloud as usual. - -#. Move the private key to ``/var/lib/ironic`` and ensure correct ACLs:: - - sudo mv ~/ipa-ssh /var/lib/ironic - sudo chown ironic:ironic /var/lib/ironic/ipa-ssh - sudo chmod 0600 /var/lib/ironic/ipa-ssh - -Enabling temporary URLs -~~~~~~~~~~~~~~~~~~~~~~~ - -#. First, enable the ``admin`` user access to other Swift accounts:: - - $ openstack role add --user admin --project service ResellerAdmin - -#. Check if the ``service`` account has a temporary URL key generated in the - Object Store service. Look for ``Temp-Url-Key`` properties in the output - of the following command:: - - $ openstack --os-project-name service object store account show - +------------+---------------------------------------+ - | Field | Value | - +------------+---------------------------------------+ - | Account | AUTH_97ae97383424400d8ee1a54c3a2c41a0 | - | Bytes | 2209530996 | - | Containers | 5 | - | Objects | 42 | - +------------+---------------------------------------+ - -#. If the property is not present, generate a value and add it:: - - $ openstack --os-project-name service object store account set \ - --property Temp-URL-Key=$(uuidgen | sha1sum | awk '{print $1}') - -Configuring nodes ------------------ - -Nodes have to be explicitly configured to use the Ansible deploy. For example, -to configure all nodes, use:: - - for node in $(baremetal node list -f value -c UUID); do - baremetal node set $node --deploy-interface ansible - done - -Editing playbooks ------------------ - -.. TODO(dtantsur): link to ansible interface docs when they merge - -Example: kernel arguments -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Let's modify the playbooks to include additional kernel parameters for some -nodes. - -#. Update ``/var/lib/ironic/playbooks/roles/configure/tasks/grub.yaml`` from - - .. code-block:: yaml - - - name: create grub config - become: yes - command: chroot {{ tmp_rootfs_mount }} /bin/sh -c '{{ grub_config_cmd }} -o {{ grub_config_file }}' - - to - - .. code-block:: yaml - - - name: append kernel params - become: yes - lineinfile: - dest: "{{ tmp_rootfs_mount }}/etc/default/grub" - state: present - line: 'GRUB_CMDLINE_LINUX+=" {{ ironic_extra.kernel_params | default("") }}"' - - name: create grub config - become: yes - command: chroot {{ tmp_rootfs_mount }} /bin/sh -c '{{ grub_config_cmd }} -o {{ grub_config_file }}' - -#. Set the newly introduced ``kernel_params`` extra variable to the desired - kernel parameters. For example, to update only compute nodes use:: - - for node in $(baremetal node list -c Name -f value | grep compute); do - baremetal node set $node \ - --extra kernel_params='param1=value1 param2=value2' - done - -.. _iSCSI deploy interface: https://docs.openstack.org/ironic/latest/admin/interfaces/deploy.html#iscsi-deploy diff --git a/deploy-guide/source/provisioning/baremetal_provision.rst b/deploy-guide/source/provisioning/baremetal_provision.rst deleted file mode 100644 index b8393180..00000000 --- a/deploy-guide/source/provisioning/baremetal_provision.rst +++ /dev/null @@ -1,710 +0,0 @@ -.. _baremetal_provision: - -Provisioning Baremetal Before Overcloud Deploy -============================================== - -Baremetal provisioning is a feature which interacts directly with the -Bare Metal service to provision baremetal before the overcloud is deployed. -This adds a new provision step before the overcloud deploy, and the output of -the provision is a valid :doc:`../features/deployed_server` configuration. - -In the Wallaby release the baremetal provisioning was extended to also manage -the neutron API resources for :doc:`../features/network_isolation` and -:doc:`../features/custom_networks`, and apply network configuration on the -provisioned nodes using os-net-config. - -Undercloud Components For Baremetal Provisioning ------------------------------------------------- - -A new YAML file format is introduced to describe the baremetal required for -the deployment, and the new command ``openstack overcloud node provision`` -will consume this YAML and make the specified changes. The provision command -interacts with the following undercloud components: - -* A baremetal provisioning workflow which consumes the YAML and runs to - completion - -* The `metalsmith`_ tool which deploys nodes and associates ports. This tool is - responsible for presenting a unified view of provisioned baremetal while - interacting with: - - * The Ironic baremetal node API for deploying nodes - - * The Ironic baremetal allocation API which allocates nodes based on the YAML - provisioning criteria - - * The Neutron API for managing ports associated with the node's NICs - - -In a future release this will become the default way to deploy baremetal, as -the Nova compute service and the Glance image service will be switched off on -the undercloud. - -Baremetal Provision Configuration ---------------------------------- - -A declarative YAML format specifies what roles will be deployed and the -desired baremetal nodes to assign to those roles. Defaults can be relied on -so that the simplest configuration is to specify the roles, and the count of -baremetal nodes to provision for each role - -.. code-block:: yaml - - - name: Controller - count: 3 - - name: Compute - count: 100 - -Often it is desirable to assign specific nodes to specific roles, and this is -done with the ``instances`` property - -.. code-block:: yaml - - - name: Controller - count: 3 - instances: - - hostname: overcloud-controller-0 - name: node00 - - hostname: overcloud-controller-1 - name: node01 - - hostname: overcloud-controller-2 - name: node02 - - name: Compute - count: 100 - instances: - - hostname: overcloud-novacompute-0 - name: node04 - -Here the instance ``name`` refers to the logical name of the node, and the -``hostname`` refers to the generated hostname which is derived from the -overcloud stack name, the role, and an incrementing index. In the above -example, all of the Controller servers are on predictable nodes, as well as -one of the Compute servers. The other 99 Compute servers are on nodes -allocated from the pool of available nodes. - -The properties in the ``instances`` entries can also be set in the -``defaults`` section so that they do not need to be repeated in every entry. -For example, the following are equivalent - -.. code-block:: yaml - - - name: Controller - count: 3 - instances: - - hostname: overcloud-controller-0 - name: node00 - image: - href: overcloud-full-custom - - hostname: overcloud-controller-1 - name: node01 - image: - href: overcloud-full-custom - - hostname: overcloud-controller-2 - name: node02 - image: - href: overcloud-full-custom - - - name: Controller - count: 3 - defaults: - image: - href: overcloud-full-custom - instances: - - hostname: overcloud-controller-0 - name: node00 - - hostname: overcloud-controller-1 - name: node01 - - hostname: overcloud-controller-2 - name: node02 - -When using :doc:`../features/network_isolation`, -:doc:`../features/custom_networks` or a combination of the two the **networks** -and **network_configuration** must either be set in the ``defaults`` for the -role or for each specific node (instance). The following example extends the -first simple configuration example adding typical TripleO network isolation by -setting defaults for each role - -.. code-block:: yaml - - - name: Controller - count: 3 - defaults: - networks: - - network: ctlplane - vif: true - - network: external - subnet: external_subnet - - network: internalapi - subnet: internal_api_subnet01 - - network: storage - subnet: storage_subnet01 - - network: storagemgmt - subnet: storage_mgmt_subnet01 - - network: tenant - subnet: tenant_subnet01 - network_config: - template: /home/stack/nic-config/controller.j2 - default_route_network: - - external - - name: Compute - count: 100 - defaults: - networks: - - network: ctlplane - vif: true - - network: internalapi - subnet: internal_api_subnet02 - - network: tenant - subnet: tenant_subnet02 - - network: storage - subnet: storage_subnet02 - network_config: - template: /home/stack/nic-config/compute.j2 - - -Role Properties -^^^^^^^^^^^^^^^ - -Each role entry supports the following properties: - -* ``name``: Mandatory role name - -* ``hostname_format``: Override the default hostname format for this role. The - default format uses the lower case role name, so for the ``Controller`` role the - default format is ``%stackname%-controller-%index%``. Only the ``Compute`` role - doesn't follow the role name rule, the ``Compute`` default format is - ``%stackname%-novacompute-%index%`` - -* ``count``: Number of nodes to provision for this role, defaults to 1 - -* ``defaults``: A dict of default values for ``instances`` entry properties. An - ``instances`` entry property will override a default specified here See - :ref:`instance-defaults-properties` for supported properties - -* ``instances``: A list of dict for specifying attributes for specific nodes. - See :ref:`instance-defaults-properties` for supported properties. The length - of this list must not be greater than ``count`` - -* ``ansible_playbooks``: A list of dict for Ansible playbooks and Ansible vars, - the playbooks are run against the role instances after node provisioning, - prior to the node network configuration. See - :ref:`ansible-playbook-properties` for more details and examples. - -.. _instance-defaults-properties: - -Instance and Defaults Properties -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -These properties serve three purposes: - -* Setting selection criteria when allocating nodes from the pool of available nodes - -* Setting attributes on the baremetal node being deployed - -* Setting network configuration properties for the deployed nodes - -Each ``instances`` entry and the ``defaults`` dict support the following properties: - -* ``capabilities``: Selection criteria to match the node's capabilities - -* ``config_drive``: Add data and first-boot commands to the config-drive passed - to the node. See :ref:`config-drive` - -* ``hostname``: If this complies with the ``hostname_format`` pattern then - other properties will apply to the node allocated to this hostname. - Otherwise, this allows a custom hostname to be specified for this node. - (Cannot be specified in ``defaults``) - -* ``image``: Image details to deploy with. See :ref:`image-properties` - -* ``managed``: Boolean to determine whether the instance is actually - provisioned with metalsmith, or should be treated as preprovisioned. - -* ``name``: The name of a node to deploy this instance on (Cannot be specified - in ``defaults``) - -* ``nics``: (**DEPRECATED:** Replaced by ``networks`` in Wallaby) List of - dicts representing requested NICs. See :ref:`nics-properties` - -* ``networks``: List of dicts representing instance networks. See - :ref:`networks-properties` - -* ``network_config``: Network configuration details. See - :ref:`network-config-properties` - -* ``profile``: Selection criteria to use :doc:`./profile_matching` - -* ``provisioned``: Boolean to determine whether this node is provisioned or - unprovisioned. Defaults to ``true``, ``false`` is used to unprovision a node. - See :ref:`scaling-down` - -* ``resource_class``: Selection criteria to match the node's resource class, - defaults to ``baremetal``. See :doc:`./profile_matching` - -* ``root_size_gb``: Size of the root partition in GiB, defaults to 49 - -* ``swap_size_mb``: Size of the swap partition in MiB, if needed - -* ``traits``: A list of traits as selection criteria to match the node's ``traits`` - -.. _image-properties: - -Image Properties -________________ - -* ``href``: Glance image reference or URL of the root partition or whole disk - image. URL schemes supported are ``file://``, ``http://``, and ``https://``. - If the value is not a valid URL, it is assumed to be a Glance image reference - -* ``checksum``: When the ``href`` is a URL, the ``MD5`` checksum of the root - partition or whole disk image - -* ``kernel``: Glance image reference or URL of the kernel image (partition images only) - -* ``ramdisk``: Glance image reference or URL of the ramdisk image (partition images only) - - -.. _networks-properties: - -Networks Properties -___________________ - -The ``instances`` ``networks`` property supports a list of dicts, one dict per -network. - -* ``network``: Neutron network to create the port for this network: - -* ``fixed_ip``: Specific IP address to use for this network - -* ``network``: Neutron network to create the port for this network - -* ``subnet``: Neutron subnet to create the port for this network - -* ``port``: Existing Neutron port to use instead of creating one - -* ``vif``: When ``true`` the network is attached as VIF (virtual-interface) by - metalsmith/ironic. When ``false`` the baremetal provisioning workflow creates - the Neutron API resource, but no VIF attachment happens in metalsmith/ironic. - (Typically only the provisioning network (``ctlplane``) has this set to - ``true``.) - -By default there is one network representing - -.. code-block:: yaml - - - network: ctlplane - vif: true - -Other valid network entries would be - -.. code-block:: yaml - - - network: ctlplane - fixed_ip: 192.168.24.8 - vif: true - - port: overcloud-controller-0-ctlplane - - network: internal_api - subnet: internal_api_subnet01 - -.. _network-config-properties: - -Network Config Properties -_________________________ - -The ``network_config`` property contains os-net-config related properties. - -* ``template``: The ansible j2 nic config template to use when - applying node network configuration. (default: - ``templates/net_config_bridge.j2``) - -* ``physical_bridge_name``: Name of the OVS bridge to create for accessing - external networks. (default: ``br-ex``) - -* ``public_interface_name``: Which interface to add to the public bridge - (default: ``nic1``) - -* ``network_config_update``: Whether to apply network configuration changes, - on update or not. Boolean value. (default: ``false``) - -* ``net_config_data_lookup``: Per node and/or per node group os-net-config nic - mapping config. - -* ``default_route_network``: The network to use for the default route (default: - ``ctlplane``) - -* ``networks_skip_config``: List of networks that should be skipped when - configuring node networking - -* ``dns_search_domains``: A list of DNS search domains to be added (in order) - to resolv.conf. - -* ``bond_interface_ovs_options``: The ovs_options or bonding_options string for - the bond interface. Set things like lacp=active and/or bond_mode=balance-slb - for OVS bonds or like mode=4 for Linux bonds using this option. - -.. _nics-properties: - -Nics Properties -_______________ - -The ``instances`` ``nics`` property supports a list of dicts, one dict per NIC. - -* ``fixed_ip``: Specific IP address to use for this NIC - -* ``network``: Neutron network to create the port for this NIC - -* ``subnet``: Neutron subnet to create the port for this NIC - -* ``port``: Existing Neutron port to use instead of creating one - -By default there is one NIC representing - -.. code-block:: yaml - - - network: ctlplane - -Other valid NIC entries would be - -.. code-block:: yaml - - - subnet: ctlplane-subnet - fixed_ip: 192.168.24.8 - - port: overcloud-controller-0-ctlplane - -.. _config-drive: - -Config Drive -____________ - -The ``instances`` ``config_drive`` property supports two sub-properties: - -* ``cloud_config``: Dict of cloud-init `cloud-config`_ data for tasks to run on - node boot. A task specified in an ``instances`` ``cloud_config`` will - overwrite a task with the same name in ``defaults`` ``cloud_config``. - -* ``meta_data``: Extra metadata to include with the config-drive cloud-init - metadata. This will be added to the generated metadata ``public_keys``, - ``uuid``, ``name``, ``hostname``, and ``instance-type`` which is set to - the role name. Cloud-init makes this metadata available as `instance-data`_. - A key specified in an ``instances`` ``meta_data`` entry will overwrite the - same key in ``defaults`` ``meta_data``. - -Below are some examples of what can be done with ``config_drive``. - -Run arbitrary scripts on first boot: - -.. code-block:: yaml - - config_drive: - cloud_config: - bootcmd: - # temporary workaround to allow steering in ConnectX-3 devices - - echo "options mlx4_core log_num_mgm_entry_size=-1" >> /etc/modprobe.d/mlx4.conf - - /sbin/dracut --force - -Enable and configure ntp: - -.. code-block:: yaml - - config_drive: - cloud_config: - enabled: true - ntp_client: chrony # Uses cloud-init default chrony configuration - -Allow root ssh login (for development environments only): - -.. code-block:: yaml - - config_drive: - cloud_config: - ssh_pwauth: true - disable_root: false - chpasswd: - list: |- - root:sekrit password - expire: False - -Use values from custom metadata: - -.. code-block:: yaml - - config_drive: - meta_data: - foo: bar - cloud_config: - runcmd: - - echo The value of foo is `jq .foo < /run/cloud-init/instance-data.json` - - -.. _ansible-playbook-properties: - -Ansible Playbooks ------------------ - -The role ``ansible_playbooks`` takes a list of playbook definitions, supporting -the ``playbook`` and ``extra_vars`` sub-properties. - -* ``playbook``: The path (relative to the roles definition YAML file) to the - ansible playbook. - -* ``extra_vars``: Extra Ansible variables to set when running the playbook. - -.. note:: Playbooks only run if '--network-config' is enabled. - -Run arbitrary playbooks: - -.. code-block:: yaml - - ansible_playbooks: - - playbook: a_playbook.yaml - - playbook: b_playbook.yaml - -Run arbitrary playbooks with extra variables defined for one of the playbooks: - -.. code-block:: yaml - - ansible_playbooks: - - playbook: a_playbook.yaml - extra_vars: - param1: value1 - param2: value2 - - playbook: b_playbook.yaml - -Grow volumes playbook -^^^^^^^^^^^^^^^^^^^^^ - -After custom playbooks are run, an in-built playbook is run to grow the LVM -volumes of any node deployed with the whole-disk overcloud image -`overcloud-hardened-uefi-full.qcow2`. The implicit `ansible_playbooks` would be: - -.. code-block:: yaml - - ansible_playbooks: - - playbook: /usr/share/ansible/tripleo-playbooks/cli-overcloud-node-growvols.yaml - extra_vars: - growvols_args: > - /=8GB - /tmp=1GB - /var/log=10GB - /var/log/audit=2GB - /home=1GB - /var=100% - -Each LVM volume is grown by the amount specified until the disk is 100% -allocated, and any remaining space is given to the `/` volume. In some cases it -may be necessary to specify different `growvols_args`. For example the -`ObjectStorage` role deploys swift storage which stores state in `/srv`, so this -volume needs the remaining space instead of `/var`. The playbook can be -explicitly written to override the default `growvols_args` value, for example: - -.. code-block:: yaml - - ansible_playbooks: - - playbook: /usr/share/ansible/tripleo-playbooks/cli-overcloud-node-growvols.yaml - extra_vars: - growvols_args: > - /=8GB - /tmp=1GB - /var/log=10GB - /var/log/audit=2GB - /home=1GB - /var=1GB - /srv=100% - -Set kernel arguments playbook -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Features such as DPDK require that kernel arguments are set and the node is -rebooted before the network configuration is run. A playbook is provided to -allow this. Here it is run with the default variables set: - -.. code-block:: yaml - - ansible_playbooks: - - playbook: /usr/share/ansible/tripleo-playbooks/cli-overcloud-node-kernelargs.yaml - extra_vars: - kernel_args: '' - reboot_wait_timeout: 900 - defer_reboot: false - tuned_profile: 'throughput-performance' - tuned_isolated_cores: '' - -Here is an example for a specific DPDK deployment: - -.. code-block:: yaml - - ansible_playbooks: - - playbook: /usr/share/ansible/tripleo-playbooks/cli-overcloud-node-kernelargs.yaml - extra_vars: - kernel_args: 'default_hugepagesz=1GB hugepagesz=1G hugepages=64 intel_iommu=on iommu=pt' - tuned_isolated_cores: '1-11,13-23' - tuned_profile: 'cpu-partitioning' - -.. _deploying-the-overcloud: - -Deploying the Overcloud ------------------------ - -This example assumes that the baremetal provision configuration file has the -filename ``~/overcloud_baremetal_deploy.yaml`` and the resulting deployed -server environment file is ``~/overcloud-baremetal-deployed.yaml``. It also -assumes overcloud networks are pre-deployed using the ``openstack overcloud -network provision`` command and the deployed networks environment file is -``~/overcloud-networks-deployed.yaml``. - -The baremetal nodes are provisioned with the following command:: - - openstack overcloud node provision \ - --stack overcloud \ - --network-config \ - --output ~/overcloud-baremetal-deployed.yaml \ - ~/overcloud_baremetal_deploy.yaml - -.. note:: Removing the ``--network-config`` argument will disable the management - of non-VIF networks and post node provisioning network configuration - with os-net-config via ansible. - -The overcloud can then be deployed using the output from the provision command:: - - openstack overcloud deploy \ - -e /usr/share/openstack-tripleo-heat-templates/environments/deployed-server-environment.yaml \ - -e ~/overcloud-networks-deployed.yaml \ - -e ~/templates/vips-deployed-environment.yaml \ - -e ~/overcloud-baremetal-deployed.yaml \ - --deployed-server \ - --disable-validations \ # optional, see note below - # other CLI arguments - -.. note:: - The validation which is part of `openstack overcloud node - provision` may fail with the default overcloud image unless the - Ironic node has more than 4 GB of RAM. For example, a VBMC node - provisioned with 4096 MB of memory failed because the image size - plus the reserved RAM size were not large enough (Image size: 4340 - MiB, Memory size: 3907 MiB). - -Viewing Provisioned Node Details --------------------------------- - -The commands ``baremetal node list`` and ``baremetal node show`` continue to -show the details of all nodes, however there are some new commands which show a -further view of the provisioned nodes. - -The `metalsmith`_ tool provides a unified view of provisioned nodes, along with -allocations and neutron ports. This is similar to what Nova provides when it -is managing baremetal nodes using the Ironic driver. To list all nodes -managed by metalsmith, run:: - - metalsmith list - -The baremetal allocation API keeps an association of nodes to hostnames, -which can be seen by running:: - - baremetal allocation list - -The allocation record UUID will be the same as the Instance UUID for the node -which is allocated. The hostname can be seen in the allocation record, but it -can also be seen in the ``baremetal node show`` property -``instance_info``, ``display_name``. - - -Scaling the Overcloud ---------------------- - -Scaling Up -^^^^^^^^^^ - -To scale up an existing overcloud, edit ``~/overcloud_baremetal_deploy.yaml`` -to increment the ``count`` in the roles to be scaled up (and add any desired -``instances`` entries) then repeat the :ref:`deploying-the-overcloud` steps. - -.. _scaling-down: - -Scaling Down -^^^^^^^^^^^^ - -Scaling down is done with the ``openstack overcloud node delete`` command but -the nodes to delete are not passed as command arguments. - -To scale down an existing overcloud edit -``~/overcloud_baremetal_deploy.yaml`` to decrement the ``count`` in the roles -to be scaled down, and also ensure there is an ``instances`` entry for each -node being unprovisioned which contains the following: - -* The ``name`` of the baremetal node to remove from the overcloud - -* The ``hostname`` which is assigned to that node - -* A ``provisioned: false`` property - -* A YAML comment explaining the reason for making the node unprovisioned (optional) - -For example the following would remove ``overcloud-compute-1`` - -.. code-block:: yaml - - - name: Compute - count: 1 - instances: - - hostname: overcloud-compute-0 - name: node10 - # Removed from deployment due to disk failure - provisioned: false - - hostname: overcloud-compute-1 - name: node11 - -Then the delete command will be called with ``--baremetal-deployment`` -instead of passing node arguments:: - - openstack overcloud node delete \ - --stack overcloud \ - --baremetal-deployment ~/overcloud_baremetal_deploy.yaml - -Before any node is deleted, a list of nodes to delete is displayed -with a confirmation prompt. - -What to do when scaling back up depends on the situation. If the scale-down -was to temporarily remove baremetal which is later restored, then the -scale-up can increment the ``count`` and set ``provisioned: true`` on nodes -which were previously ``provisioned: false``. If that baremetal node is not -going to be re-used in that role then the ``provisioned: false`` can remain -indefinitely and the scale-up can specify a new ``instances`` entry, for -example - -.. code-block:: yaml - - - name: Compute - count: 2 - instances: - - hostname: overcloud-compute-0 - name: node10 - # Removed from deployment due to disk failure - provisioned: false - - hostname: overcloud-compute-1 - name: node11 - - hostname: overcloud-compute-2 - name: node12 - - -Unprovisioning All Nodes -^^^^^^^^^^^^^^^^^^^^^^^^ - -After ``openstack overcloud delete`` is called, all of the baremetal nodes -can be unprovisioned without needing to edit -``~/overcloud_baremetal_deploy.yaml`` by running the unprovision command with -the ``--all`` argument:: - - openstack overcloud node unprovision --all \ - --stack overcloud \ - --network-ports \ - ~/overcloud_baremetal_deploy.yaml - -.. note:: Removing the ``--network-ports`` argument will disable the management - of non-VIF networks, non-VIF ports will _not_ be deleted in that - case. - -.. _metalsmith: https://docs.openstack.org/metalsmith/ - -.. _cloud-config: https://cloudinit.readthedocs.io/en/latest/topics/examples.html - -.. _instance-data: https://cloudinit.readthedocs.io/en/latest/topics/instancedata.html diff --git a/deploy-guide/source/provisioning/bios_settings.rst b/deploy-guide/source/provisioning/bios_settings.rst deleted file mode 100644 index 7501c236..00000000 --- a/deploy-guide/source/provisioning/bios_settings.rst +++ /dev/null @@ -1,51 +0,0 @@ -BIOS Settings -============= - -Tripleo can support BIOS configuration for bare metal nodes via node manual -:doc:`cleaning`. Several commands are added to allow administrator to apply -and reset BIOS settings. - -Apply BIOS settings -------------------- - -#. To apply given BIOS configuration to all manageable nodes:: - - openstack overcloud node bios configure --configuration <..> --all-manageable - -#. To apply given BIOS configuration to specified nodes:: - - openstack overcloud node bios configure --configuration <..> node_uuid1 node_uuid2 .. - -The configuration parameter passed to above commands must be YAML/JSON string -or a file name which contains YAML/JSON string of BIOS settings, for example:: - - { - "settings": [ - { - "name": "setting name", - "value": "setting value" - }, - { - "name": "setting name", - "value": "setting value" - }, - .. - ] - } - -With the parameter ``--all-manageable``, the command applies given BIOS -settings to all manageable nodes. - -With the parameter ``node_uuid1 node_uuid2``, the command applies given BIOS -settings to nodes which uuid equal to ``node_uuid1`` and ``node_uuid2``. - -Reset BIOS settings -------------------- - -#. To reset the BIOS configuration to factory default on specified nodes:: - - openstack overcloud node bios reset --all-manageable - -#. To reset the BIOS configuration on specified nodes:: - - openstack overcloud node bios reset node_uuid1 node_uuid2 .. diff --git a/deploy-guide/source/provisioning/cleaning.rst b/deploy-guide/source/provisioning/cleaning.rst deleted file mode 100644 index a56874e9..00000000 --- a/deploy-guide/source/provisioning/cleaning.rst +++ /dev/null @@ -1,58 +0,0 @@ -Node cleaning -============= - -In Ironic *cleaning* is a process of preparing a bare metal node for -provisioning. There are two types of cleaning: *automated* and *manual*. -See `cleaning documentation -`_ for more -details. - -.. warning:: - It is highly recommended to at least wipe metadata (partitions and - partition table(s)) from all disks before deployment. - -Automated cleaning ------------------- - -*Automated cleaning* runs before a node gets to the ``available`` state (see -:doc:`node_states` for more information on provisioning states). It happens -after the first enrollment and after every unprovisioning. - -In the TripleO undercloud automated cleaning is **disabled** by default. -Starting with the Ocata release, it can be enabled by setting the following -option in your ``undercloud.conf``: - -.. code-block:: ini - - [DEFAULT] - clean_nodes = True - -Alternatively, you can use `Manual cleaning`_ as described below. - -Manual cleaning ---------------- - -*Manual cleaning* is run on request for nodes in the ``manageable`` state. - -If you have *automated cleaning* disabled, you can use the following procedure -to wipe the node's metadata starting with the Rocky release: - -#. If the node is not in the ``manageable`` state, move it there:: - - baremetal node manage - -#. Run manual cleaning on a specific node:: - - openstack overcloud node clean - - or all manageable nodes:: - - openstack overcloud node clean --all-manageable - -#. Make the node available again:: - - openstack overcloud node provide - - or provide all manageable nodes:: - - openstack overcloud node provide --all-manageable diff --git a/deploy-guide/source/provisioning/index.rst b/deploy-guide/source/provisioning/index.rst deleted file mode 100644 index 0423ad05..00000000 --- a/deploy-guide/source/provisioning/index.rst +++ /dev/null @@ -1,22 +0,0 @@ -Baremetal Node Configuration -============================ - -Documentation on how to do advanced configuration of baremetal nodes in -|project|. - -.. toctree:: - - node_states - cleaning - bios_settings - node_discovery - root_device - introspect_single_node - profile_matching - node_placement - ready_state - introspection_data - whole_disk_images - uefi_boot - ansible_deploy_interface - baremetal_provision \ No newline at end of file diff --git a/deploy-guide/source/provisioning/introspect_single_node.rst b/deploy-guide/source/provisioning/introspect_single_node.rst deleted file mode 100644 index 65397ffd..00000000 --- a/deploy-guide/source/provisioning/introspect_single_node.rst +++ /dev/null @@ -1,28 +0,0 @@ -Introspecting a Single Node -=========================== - -In addition to bulk introspection, you can also introspect nodes one by one. -When doing so, you must take care to set the correct node states manually. -Use ``baremetal node show UUID`` command to figure out whether nodes -are in ``manageable`` or ``available`` state. For all nodes in ``available`` -state, start with putting a node to ``manageable`` state (see -:doc:`node_states` for details):: - - baremetal node manage - -Then you can run introspection:: - - baremetal introspection start UUID - -This command won't poll for the introspection result, use the following command -to check the current introspection state:: - - baremetal introspection status UUID - -Repeat it for every node until you see ``True`` in the ``finished`` field. -The ``error`` field will contain an error message if introspection failed, -or ``None`` if introspection succeeded for this node. - -Do not forget to make nodes available for deployment afterwards:: - - baremetal node provide diff --git a/deploy-guide/source/provisioning/introspection_data.rst b/deploy-guide/source/provisioning/introspection_data.rst deleted file mode 100644 index 344d7e19..00000000 --- a/deploy-guide/source/provisioning/introspection_data.rst +++ /dev/null @@ -1,172 +0,0 @@ -.. _introspection_data: - -Accessing Introspection Data ----------------------------- - -Every introspection run (as described in -:doc:`../deployment/install_overcloud`) collects a lot of facts about -the hardware and puts them as JSON in Swift. Starting with -``python-ironic-inspector-client`` version 1.4.0 there is a command to retrieve -this data:: - - baremetal introspection data save - -You can provide a ``--file`` argument to save the data in a file instead of -displaying it. - -If you don't have a new enough version of ``python-ironic-inspector-client``, -you can use cURL to access the API:: - - token=$(openstack token issue -f value -c id) - curl -H "X-Auth-Token: $token" http://127.0.0.1:5050/v1/introspection//data - -Accessing raw additional data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Extra hardware data can be collected using the python-hardware_ library. If -you have enabled this, by setting ``inspection_extras`` to ``True`` in your -``undercloud.conf`` (enabled by default starting with the Mitaka release), -then even more data is available. - -The command above will display it in a structured format under the ``extra`` -key in the resulting JSON object. This format is suitable for using in -the **ironic-inspector** introspection rules (see e.g. -:ref:`auto-profile-tagging`). However, if you want to access it in its -original format (list of lists instead of nested objects), you can query -Swift for it directly. - -The Swift container name is ``ironic-inspector``, which can be modified in -**/etc/ironic-inspector/inspector.conf**. The Swift object is called -``extra_hardware-`` where ```` is a node UUID. In the default -configuration you have to use the ``service`` tenant to access this object. - -As an example, to download the Swift data for all nodes to a local directory -and use that to collect a list of node mac addresses:: - - # You will need the ironic-inspector user password - # from the [swift] section of /etc/ironic-inspector/inspector.conf: - export IRONIC_INSPECTOR_PASSWORD=xxxxxx - - # Download the extra introspection data from swift: - for node in $(baremetal node list -f value -c UUID); - do swift -U service:ironic -K $IRONIC_INSPECTOR_PASSWORD download ironic-inspector extra_hardware-$node; - done - - # Use jq to access the local data - for example gather macs: - for f in extra_hardware-*; - do cat $f | jq -r 'map(select(.[0]=="network" and .[2]=="serial"))'; - done - -Running benchmarks -~~~~~~~~~~~~~~~~~~ - -Benchmarks for CPU, memory and hard drive can be run during the introspection -process. However, they are time consuming, and thus are disabled by default. -To enable benchmarks set ``inspection_runbench`` to ``true`` in the -``undercloud.conf`` (also requires ``inspection_extras`` set to ``true``), -then (re)run ``openstack undercloud install``. - -Extra data examples -~~~~~~~~~~~~~~~~~~~ - -Here is an example of CPU extra data, including benchmark results:: - - $ baremetal introspection data save | jq '.extra.cpu' - { - "physical": { - "number": 1 - }, - "logical": { - "number": 1, - "loops_per_sec": 636 - }, - "logical_0": { - "bandwidth_4K": 3657, - "bandwidth_1G": 6775, - "bandwidth_128M": 8353, - "bandwidth_2G": 7221, - "loops_per_sec": 612, - "bogomips": "6983.57", - "bandwidth_1M": 10781, - "bandwidth_16M": 9808, - "bandwidth_1K": 1204, - "cache_size": "4096KB" - }, - "physical_0": - { - "physid": 400, - "product": "QEMU Virtual CPU version 2.3.0", - "enabled_cores": 1, - "vendor": "Intel Corp.", - "threads": 1, - "flags": "fpu fpu_exception wp de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pse36 clflush mmx fxsr sse sse2 syscall nx x86-64 rep_good nopl pni cx16 x2apic hypervisor lahf_lm abm", - "version": "RHEL 7.2.0 PC (i440FX + PIIX, 1996)", - "frequency": 2e+09, - "cores": 1 - } - } - -Here is an example of disk extra data, including benchmark results:: - - $ baremetal introspection data save | jq '.extra.disk' - { - "logical": { - "count": 1 - }, - "sda": { - "SMART/Raw_Read_Error_Rate(1)/value": 100, - "SMART/Spin_Up_Time(3)/thresh": 0, - "model": "QEMU HARDDISK", - "SMART/Power_Cycle_Count(12)/when_failed": "NEVER", - "SMART/Reallocated_Sector_Ct(5)/worst": 100, - "SMART/Power_Cycle_Count(12)/raw": 0, - "standalone_read_1M_KBps": 1222758, - "SMART/Power_On_Hours(9)/worst": 100, - "Read Cache Disable": 0, - "SMART/Power_On_Hours(9)/raw": 1, - "rotational": 1, - "SMART/Start_Stop_Count(4)/thresh": 20, - "SMART/Start_Stop_Count(4)/raw": 100, - "SMART/Power_Cycle_Count(12)/thresh": 0, - "standalone_randread_4k_KBps": 52491, - "physical_block_size": 512, - "SMART/Reallocated_Sector_Ct(5)/value": 100, - "SMART/Reallocated_Sector_Ct(5)/when_failed": "NEVER", - "SMART/Power_Cycle_Count(12)/value": 100, - "SMART/Spin_Up_Time(3)/when_failed": "NEVER", - "size": 44, - "SMART/Power_On_Hours(9)/thresh": 0, - "id": "ata-QEMU_HARDDISK_QM00005", - "SMART/Reallocated_Sector_Ct(5)/raw": 0, - "SMART/Raw_Read_Error_Rate(1)/when_failed": "NEVER", - "SMART/Airflow_Temperature_Cel(190)/worst": 69, - "SMART/Airflow_Temperature_Cel(190)/when_failed": "NEVER", - "SMART/Spin_Up_Time(3)/value": 100, - "standalone_read_1M_IOps": 1191, - "SMART/Airflow_Temperature_Cel(190)/thresh": 50, - "SMART/Power_On_Hours(9)/when_failed": "NEVER", - "SMART/firmware_version": "2.3.0", - "optimal_io_size": 0, - "SMART/Raw_Read_Error_Rate(1)/thresh": 6, - "SMART/Raw_Read_Error_Rate(1)/raw": 0, - "SMART/Raw_Read_Error_Rate(1)/worst": 100, - "SMART/Power_Cycle_Count(12)/worst": 100, - "standalone_randread_4k_IOps": 13119, - "rev": 0, - "SMART/Start_Stop_Count(4)/worst": 100, - "SMART/Start_Stop_Count(4)/when_failed": "NEVER", - "SMART/Spin_Up_Time(3)/worst": 100, - "SMART/Reallocated_Sector_Ct(5)/thresh": 36, - "SMART/device_model": "QEMU HARDDISK", - "SMART/Airflow_Temperature_Cel(190)/raw": " 31 (Min/Max 31/31)", - "SMART/Start_Stop_Count(4)/value": 100, - "SMART/Spin_Up_Time(3)/raw": 16, - "Write Cache Enable": 1, - "vendor": "ATA", - "SMART/serial_number": "QM00005", - "SMART/Power_On_Hours(9)/value": 100, - "SMART/Airflow_Temperature_Cel(190)/value": 69 - } - } - -.. _python-hardware: https://github.com/redhat-cip/hardware diff --git a/deploy-guide/source/provisioning/node_discovery.rst b/deploy-guide/source/provisioning/node_discovery.rst deleted file mode 100644 index 98b7da7e..00000000 --- a/deploy-guide/source/provisioning/node_discovery.rst +++ /dev/null @@ -1,147 +0,0 @@ -Node Discovery -============== - -As an alternative to creating an inventory file (``instackenv.json``) and -enrolling nodes from it, you can discover and enroll the nodes automatically. - -TripleO supports two approaches to the discovery process: - -* `Automatic enrollment of new nodes`_ -* `Scanning BMC range`_ - -Automatic enrollment of new nodes ---------------------------------- - -You can enable **ironic-inspector** to automatically enroll all unknown nodes -that boot the introspection ramdisk. See `ironic-inspector discovery -documentation`_ for more details on the process. - -Configuration -~~~~~~~~~~~~~ - -Set the following in your ``undercloud.conf`` before installing the undercloud: - -.. code-block:: ini - - enable_node_discovery = True - -Make sure to get (or build) and upload the introspection image, as described -in :doc:`../deployment/install_overcloud`. - -Basic usage -~~~~~~~~~~~ - -After the discovery is enabled, any node that boots the introspection ramdisk -and posts back to **ironic-inspector** will be enrolled in **ironic**. Make -sure the nodes are connected to the provisioning network, and default to -booting from PXE. Power them on using any available means (e.g. by pushing the -power button on them). - -New nodes appear in the ``enroll`` state by default and use the -``pxe_ipmitool`` driver (configurable via the ``discovery_default_driver`` -option in ``undercloud.conf``). You have to set the power credentials -for these nodes and make them available. See :doc:`node_states` for details. - -Using introspection rules -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Alternatively, you can use **ironic-inspector** introspection rules to -automatically set the power credentials based on certain properties. - -For example, to set the same credentials for all new nodes, you can use -the following rules: - -.. code-block:: json - - [ - { - "description": "Set default IPMI credentials", - "conditions": [ - {"op": "eq", "field": "data://auto_discovered", "value": true} - ], - "actions": [ - {"action": "set-attribute", "path": "driver_info/ipmi_username", - "value": "admin"}, - {"action": "set-attribute", "path": "driver_info/ipmi_password", - "value": "paSSw0rd"} - ] - } - ] - -To set specific credentials for a certain vendor, use something like: - -.. code-block:: json - - [ - { - "description": "Set default IPMI credentials", - "conditions": [ - {"op": "eq", "field": "data://auto_discovered", "value": true}, - {"op": "ne", "field": "data://inventory.system_vendor.manufacturer", - "value": "Dell Inc."} - ], - "actions": [ - {"action": "set-attribute", "path": "driver_info/ipmi_username", - "value": "admin"}, - {"action": "set-attribute", "path": "driver_info/ipmi_password", - "value": "paSSw0rd"} - ] - }, - { - "description": "Set the vendor driver for Dell hardware", - "conditions": [ - {"op": "eq", "field": "data://auto_discovered", "value": true}, - {"op": "eq", "field": "data://inventory.system_vendor.manufacturer", - "value": "Dell Inc."} - ], - "actions": [ - {"action": "set-attribute", "path": "driver", "value": "pxe_drac"}, - {"action": "set-attribute", "path": "driver_info/drac_username", - "value": "admin"}, - {"action": "set-attribute", "path": "driver_info/drac_password", - "value": "paSSw0rd"}, - {"action": "set-attribute", "path": "driver_info/drac_address", - "value": "{data[inventory][bmc_address]}"} - ] - } - ] - -The rules should be put to a file and uploaded to **ironic-inspector** before -the discovery process: - -.. code-block:: console - - baremetal introspection rule import /path/to/rules.json - -See :doc:`profile_matching` for more examples on introspection rules. - -.. _ironic-inspector discovery documentation: https://docs.openstack.org/ironic-inspector/usage.html#discovery - -Scanning BMC range ------------------- - -You can discover new nodes by scanning an IP range for accessible BMCs. -You need to provide a set of credentials to try, and optionally a list of -ports. Use the following command to run the scan: - -.. code-block:: console - - openstack overcloud node discover --range \ - --credentials --credentials - -Here, ```` is an IP range, e.g. ``10.0.0.0/24``. Credentials are -provided separated by a colon, e.g. ``root:calvin``. - -With this approach, new nodes end up in ``manageable`` state, and will already -have the deploy properties, such as deploy kernel/ramdisk, assigned. - -You can use the same command to introspect the nodes and make them available -for deployment: - -.. code-block:: console - - openstack overcloud node discover --range \ - --credentials --credentials \ - --introspect --provide - -The resulting node UUIDs will be printed on the screen. diff --git a/deploy-guide/source/provisioning/node_placement.rst b/deploy-guide/source/provisioning/node_placement.rst deleted file mode 100644 index 836fe717..00000000 --- a/deploy-guide/source/provisioning/node_placement.rst +++ /dev/null @@ -1,172 +0,0 @@ -Controlling Node Placement and IP Assignment -============================================ - -By default, nodes are assigned randomly via the Nova scheduler, either from -a generic pool of nodes, or from a subset of nodes identified via specific -profiles which are mapped to Nova flavors (See -:doc:`../environments/baremetal` and :doc:`./profile_matching` -for further information). - -However in some circumstances, you may wish to control node placement more -directly, which is possible by combining the same capabilities mechanism used -for per-profile placement with per-node scheduler hints. - - -Assign per-node capabilities ----------------------------- - -The first step is to assign a unique per-node capability which may be matched -by the Nova scheduler on deployment. - -This can either be done via the nodes json file when registering the nodes, or -alternatively via manual adjustment of the node capabilities, e.g:: - - baremetal node set --property capabilities='node:controller-0' - -This has assigned the capability ``node:controller-0`` to the node, and this -must be repeated (using a unique continuous index, starting from 0) for all -nodes. - -If this approach is used, all nodes for a given role (e.g Controller, Compute -or each of the Storage roles) must be tagged in the same way, or the Nova -scheduler will be unable to match the capabilities correctly. - -.. note:: Profile matching is redundant when precise node placement is used. - To avoid scheduling failures you should use the default "baremetal" - flavor for deployment in this case, not the flavors designed for - profile matching ("compute", "control", etc). - -Create an environment file with Scheduler Hints ------------------------------------------------ - -The next step is simply to create a heat environment file, which matches the -per-node capabilities created for each node above:: - - parameter_defaults: - ControllerSchedulerHints: - 'capabilities:node': 'controller-%index%' - -This is then passed via ``-e scheduler_hints_env.yaml`` to the overcloud -deploy command. - -The same approach is possible for each role via these parameters: - -* ControllerSchedulerHints -* ComputeSchedulerHints -* BlockStorageSchedulerHints -* ObjectStorageSchedulerHints -* CephStorageSchedulerHints - -For custom roles (defined via roles_data.yaml) the parameter will be named -RoleNameSchedulerHints, where RoleName is the name specified in roles_data.yaml. - -.. note:: - - Previously the parameter for Compute nodes was named - `NovaComputeSchedulerHints`. If - you are updating a deployment which used the old parameter, all - values previously passed to `NovaComputeSchedulerHints` should be - passed to `ComputeSchedulerHints` instead, and - `NovaComputeSchedulerHints: {}` should be explicitly set in - `parameter_defaults`, to ensure that values from the old parameter - will not be used anymore. - -Custom Hostnames ----------------- - -In combination with the custom placement configuration above, it is also -possible to assign a specific baremetal node a custom hostname. This may -be used to denote where a system is located (e.g. rack2-row12), to make -the hostname match an inventory identifier, or any other situation where -a custom hostname is desired. - -To customize node hostnames, the ``HostnameMap`` parameter can be used. For -example:: - - parameter_defaults: - HostnameMap: - overcloud-controller-0: overcloud-controller-prod-123-0 - overcloud-controller-1: overcloud-controller-prod-456-0 - overcloud-controller-2: overcloud-controller-prod-789-0 - overcloud-novacompute-0: overcloud-novacompute-prod-abc-0 - -The environment file containing this configuration would then be passed to -the overcloud deploy command using ``-e`` as with all environment files. - -Note that the ``HostnameMap`` is global to all roles, and is not a top-level -Heat template parameter so it must be passed in the ``parameter_defaults`` -section. The first value in the map (e.g. ``overcloud-controller-0``) is the -hostname that Heat would assign based on the HostnameFormat parameters. The -second value (e.g. ``overcloud-controller-prod-123-0``) is the desired custom -hostname for that node. - -.. _predictable_ips: - -Predictable IPs ---------------- - -For further control over the resulting environment, overcloud nodes can be -assigned a specific IP on each network as well. This is done by -editing ``environments/ips-from-pool-all.yaml`` in tripleo-heat-templates. -Be sure to make a local copy of ``/usr/share/openstack-tripleo-heat-templates`` -before making changes so the packaged files are not altered, as they will -be overwritten if the package is updated. - -The parameter_defaults section in ``ips-from-pool-all.yaml``, is where the IP -addresses are assigned. Each node type has an associated parameter - -ControllerIPs for Controller nodes, ComputeIPs for Compute nodes, etc. Each -parameter is a map of network names to a list of addresses. Each network type -must have at least as many addresses as there will be nodes on that network. -The addresses will be assigned in order, so the first node of each type will -get the first address in each of the lists, the second node will get the second -address in each of the lists, and so on. - -For example, if three Ceph storage nodes were being deployed, the CephStorageIPs -parameter might look like:: - - CephStorageIPs: - storage: - - 172.16.1.100 - - 172.16.1.101 - - 172.16.1.102 - storage_mgmt: - - 172.16.3.100 - - 172.16.3.101 - - 172.16.3.102 - -The first Ceph node would have two addresses: 172.16.1.100 and 172.16.3.100. The -second would have 172.16.1.101 and 172.16.3.101, and the third would have -172.16.1.102 and 172.16.3.102. The same pattern applies to the other node types. - -.. important:: - Even if an overcloud node is deleted, its entry in the IP lists should - *not* be removed. The IP list is based on the underlying Heat indices, - which do not change even if nodes are deleted. To indicate that a given - entry in the list is no longer used, the IP value can be replaced with a - value such as "DELETED" or "UNUSED". - - In short, entries should never be removed from the IP lists, only changed - or added. - -To apply this configuration during a deployment, pass the environment file to the -deploy command. For example, if you copied tripleo-heat-templates to ~/my-templates, -the extra parameter would look like:: - - -e ~/my-templates/environments/ips-from-pool-all.yaml - -Predictable Virtual IPs ------------------------ - -You can also assign predictable Virtual IPs (VIPs) for services. To accomplish this, -edit the network environment file and add the VIP parameters in the -parameter_defaults section, for example:: - - ControlFixedIPs: [{'ip_address':'192.168.201.101'}] - InternalApiVirtualFixedIPs: [{'ip_address':'172.16.0.9'}] - PublicVirtualFixedIPs: [{'ip_address':'10.1.1.9'}] - StorageVirtualFixedIPs: [{'ip_address':'172.16.1.9'}] - StorageMgmtVirtualFixedIPs: [{'ip_address':'172.16.3.9'}] - RedisVirtualFixedIPs: [{'ip_address':'172.16.0.8'}] - -These IPs MUST come from outside their allocation range to prevent conflicts. -Do not use these parameters if deploying with an external load balancer. diff --git a/deploy-guide/source/provisioning/node_states.rst b/deploy-guide/source/provisioning/node_states.rst deleted file mode 100644 index e3f89970..00000000 --- a/deploy-guide/source/provisioning/node_states.rst +++ /dev/null @@ -1,54 +0,0 @@ -Bare Metal Node States -====================== - -This document provides a brief explanation of the bare metal node states that -TripleO uses or might use. Please refer to `the Ironic documentation -`_ for more details. - -enroll ------- - -In a typical Ironic workflow nodes begin their life in a state called ``enroll``. -Nodes in this state are not available for deployment, nor for most of other -actions. Ironic does not touch such nodes in any way. - -In the TripleO workflow the nodes start their life in the ``manageable`` state -and only see the ``enroll`` state if their power management fails to validate:: - - openstack overcloud import instackenv.json - -Nodes can optionally be introspected in this step by passing the --provide flag -which will progress them through the manageable_ state and eventually to -the available_ state ready for deployment. - -manageable ----------- - -To make nodes alive an operator uses ``manage`` provisioning action to move -nodes to ``manageable`` state. During this transition the power and management -credentials (IPMI, SSH, etc) are validated to ensure that nodes in -``manageable`` state are actually manageable by Ironic. This state is still not -available for deployment. With nodes in this state an operator can execute -various pre-deployment actions, such as introspection, RAID configuration, etc. -So to sum it up, nodes in ``manageable`` state are being configured before -exposing them into the cloud. - -The ``manage`` action -can be used to bring nodes from enroll_ to ``manageable`` or nodes already -moved to available_ state back to ``manageable`` for configuration:: - - baremetal node manage - -available ---------- - -The last step before the deployment is to make nodes ``available`` using the -``provide`` provisioning action. Such nodes are exposed to nova, and can be -deployed to at any moment. No long-running configuration actions should be run -in this state. - -.. note:: - Nodes which failed introspection stay in ``manageable`` state and must be - reintrospected or made ``available`` manually:: - - baremetal node provide diff --git a/deploy-guide/source/provisioning/profile_matching.rst b/deploy-guide/source/provisioning/profile_matching.rst deleted file mode 100644 index 721b97de..00000000 --- a/deploy-guide/source/provisioning/profile_matching.rst +++ /dev/null @@ -1,188 +0,0 @@ -Node matching with resource classes and profiles -================================================ - -The `Baremetal Provision Configuration`_ describes all of the instance and -defaults properties which can be used as selection criteria for which node will -be assigned to a provisioned instance. Filtering on the ``resource_class`` property -is recommended for nodes which have special hardware for specific roles. The -``profile`` property is recommended for other matching requirements such as -placing specific roles to groups of nodes, or assigning instances to nodes based -on introspection data. - -Resource class matching ------------------------ - -As an example of matching on special hardware, this shows how to have a custom -``Compute`` role for PMEM equipped hardware, see :doc:`../features/compute_nvdimm`. - -By default all nodes are assigned the ``resource_class`` of ``baremetal``. Each -node which is PMEM enabled needs to have its ``resource_class`` changed to -``baremetal.PMEM``:: - - baremetal node set --resource-class baremetal.PMEM - -Assuming there is a custom role called ``ComputePMEM``, the -``~/overcloud_baremetal_deploy.yaml`` file will match on ``baremetal.PMEM`` -nodes with: - -.. code-block:: yaml - - - name: ComputePMEM - count: 3 - defaults: - resource_class: baremetal.PMEM - -Advanced profile matching -------------------------- -Profile matching allows a user to specify precisely which nodes provision with each -role (or instance). Here are additional setup steps to take advantage of the -profile matching. In this document ``profile`` is a capability that is assigned to -the ironic node, then matched in the ``openstack overcloud node provision`` yaml. - -After profile is specified in ``~/overcloud_baremetal_deploy.yaml``, metalsmith -will only deploy it on ironic nodes with the same profile. Deployment will fail -if not enough ironic nodes are tagged with a profile. - -There are two ways to assign a profile to a node. You can assign it directly -or specify one or many suitable profiles for the deployment command to choose -from. It can be done either manually or using the introspection rules. - -Manual profile tagging -~~~~~~~~~~~~~~~~~~~~~~ - -To assign a profile to a node directly, issue the following command:: - - baremetal node set --property capabilities=profile: - -To clean all profile information from a node use:: - - baremetal node unset --property capabilities - -.. note:: - We can not update only a single key from the capabilities dictionary, so if - it contained more then just the profile information then this will need to - be set for the node. - -Also see :ref:`instackenv` for details on how to set profile in the -``instackenv.json`` file. - -.. _auto-profile-tagging: - -Automated profile tagging -~~~~~~~~~~~~~~~~~~~~~~~~~ - -`Introspection rules`_ can be used to conduct automatic profile assignment -based on data received from the introspection ramdisk. A set of introspection -rules should be created before introspection that either set ``profile`` or -``_profile`` capabilities on a node. - -The exact structure of data received from the ramdisk depends on both ramdisk -implementation and enabled plugins, and on enabled *ironic-inspector* -processing hooks. The most basic properties are ``cpus``, ``cpu_arch``, -``local_gb`` and ``memory_mb``, which represent CPU number, architecture, -local hard drive size in GiB and RAM size in MiB. See -:ref:`introspection_data` for more details on what our current ramdisk -provides. - -Create a JSON file, for example ``rules.json``, with the introspection rules -to apply (see `Example of introspection rules`_). Before the introspection -load this file into *ironic-inspector*:: - - baremetal introspection rule import /path/to/rules.json - -Then (re)start the introspection. Check assigned profiles using command:: - - baremetal node list -c uuid -c name -c properties - -If you've made a mistake in introspection rules, you can delete them all:: - - baremetal introspection rule purge - -Then reupload the updated rules file and restart introspection. - -.. note:: - When you use introspection rules to assign the ``profile`` capability, it - will always override the existing value. On the contrary, - ``_profile`` capabilities are ignored for nodes with the existing - ``profile`` capability. - -Example of introspection rules -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Imagine we have the following hardware: with disk sizes > 1 TiB -for object storage and with smaller disks for compute and controller nodes. -We also need to make sure that no hardware with seriously insufficient -properties gets to the fleet at all. - -:: - - [ - { - "description": "Fail introspection for unexpected nodes", - "conditions": [ - {"op": "lt", "field": "memory_mb", "value": 4096} - ], - "actions": [ - {"action": "fail", "message": "Memory too low, expected at least 4 GiB"} - ] - }, - { - "description": "Assign profile for object storage", - "conditions": [ - {"op": "ge", "field": "local_gb", "value": 1024} - ], - "actions": [ - {"action": "set-capability", "name": "profile", "value": "swift-storage"} - ] - }, - { - "description": "Assign possible profiles for compute and controller", - "conditions": [ - {"op": "lt", "field": "local_gb", "value": 1024}, - {"op": "ge", "field": "local_gb", "value": 40} - ], - "actions": [ - {"action": "set-capability", "name": "compute_profile", "value": "1"}, - {"action": "set-capability", "name": "control_profile", "value": "1"}, - {"action": "set-capability", "name": "profile", "value": null} - ] - } - ] - -This example consists of 3 rules: - -#. Fail introspection if memory is lower is 4096 MiB. Such rules can be - applied to exclude nodes that should not become part of your cloud. - -#. Nodes with hard drive size 1 TiB and bigger are assigned the - ``swift-storage`` profile unconditionally. - -#. Nodes with hard drive less than 1 TiB but more than 40 GiB can be either - compute or control nodes. So we assign two capabilities ``compute_profile`` - and ``control_profile``, so that the ``openstack overcloud node provision`` - command can later make the final choice. For that to work, we remove the - existing ``profile`` capability, otherwise it will have priority. - -#. Other nodes are not changed. - -Provision with profile matching -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Assuming nodes have been assigned the profiles ``control_profile`` and -``compute_profile``, the ``~/overcloud_baremetal_deploy.yaml`` can be modified -with the following to match profiles during ``openstack overcloud node -provision``: - -.. code-block:: yaml - - - name: Controller - count: 3 - defaults: - profile: control_profile - - name: Compute - count: 100 - defaults: - profile: compute_profile - -.. _Introspection rules: https://docs.openstack.org/ironic-inspector/usage.html#introspection-rules -.. _Baremetal Provision Configuration: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/provisioning/baremetal_provision.html#baremetal-provision-configuration diff --git a/deploy-guide/source/provisioning/ready_state.rst b/deploy-guide/source/provisioning/ready_state.rst deleted file mode 100644 index 460a0617..00000000 --- a/deploy-guide/source/provisioning/ready_state.rst +++ /dev/null @@ -1,31 +0,0 @@ -Ready-state configuration -========================= - -.. note:: Ready-state configuration currently works only with Dell DRAC - machines. - -Ready-state configuration can be used to prepare bare-metal resources for -deployment. It includes BIOS configuration based on a predefined profile. - - -Define the target BIOS configuration ------------------------------------- - -To define a BIOS setting, list the name of the setting and its target -value for each profile:: - - { - "compute" :{ - "bios_settings": {"ProcVirtualization": "Enabled"} - } - } - - -Trigger the ready-state configuration -------------------------------------- - -Make sure the nodes have profiles assigned as described in -:doc:`profile_matching`. Create a JSON file with the target ready-state -configuration for each profile. Then trigger the configuration:: - - baremetal configure ready state ready-state.json diff --git a/deploy-guide/source/provisioning/root_device.rst b/deploy-guide/source/provisioning/root_device.rst deleted file mode 100644 index edb9621d..00000000 --- a/deploy-guide/source/provisioning/root_device.rst +++ /dev/null @@ -1,106 +0,0 @@ -.. _root_device: - -Setting the Root Device for Deployment --------------------------------------- - -If your hardware has several hard drives, it's highly recommended that you -specify the exact device to be used during introspection and deployment -as a root device. This is done by setting a ``root_device`` property on the -node in Ironic. Please refer to the `Ironic root device hints documentation`_ -for more details. - -For example:: - - baremetal node set --property root_device='{"wwn": "0x4000cca77fc4dba1"}' - -To remove a hint and fallback to the default behavior:: - - baremetal node unset --property root_device - -Note that the root device hints should be assigned *before* both introspection -and deployment. After changing the root device hints you should either re-run -introspection or manually fix the ``local_gb`` property for a node:: - - baremetal node set --property local_gb= - -Where the new value is calculated as a real disk size in GiB minus 1 GiB to -account for partitioning (the introspection process does this calculation -automatically). - -Setting root device hints automatically -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Starting with the Newton release it is possible to autogenerate root -device hints for all nodes instead of setting them one by one. Pass the -``--root-device`` argument to the ``openstack overcloud node -configure`` **after a successful introspection**. This argument can -accept a device list in the order of preference, for example:: - - openstack overcloud node configure --all-manageable --root-device=sdb,sdc,vda - -It can also accept one of two strategies: ``smallest`` will pick the smallest -device, ``largest`` will pick the largest one. By default only disk devices -larger than 4 GiB are considered at all, set the ``--root-device-minimum-size`` -argument to change. - -.. note:: - Subsequent runs of this command on the same set of nodes does nothing, - as root device hints are already recorded on nodes and are not overwritten. - If you want to change existing root device hints, first remove them manually - as described above. - -.. note:: - This command relies on introspection data, so if you change disk devices on - the machines, introspection must be rerun before rerunning this command. - -Using introspection data to find the root device -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you don't know the information required to make a choice, you can use -introspection to figure it out. First start with :ref:`introspection` as usual -without setting any root device hints. Then use the stored introspection data -to list all disk devices:: - - baremetal introspection data save fdf975ae-6bd7-493f-a0b9-a0a4667b8ef3 | jq '.inventory.disks' - -For **python-ironic-inspector-client** versions older than 1.4.0 you can use -the ``curl`` command instead, see :ref:`introspection_data` for details. - -This command will yield output similar to the following (some fields are empty -for a virtual node):: - - [ - { - "size": 11811160064, - "rotational": true, - "vendor": "0x1af4", - "name": "/dev/vda", - "wwn_vendor_extension": null, - "wwn_with_extension": null, - "model": "", - "wwn": null, - "serial": null - }, - { - "size": 11811160064, - "rotational": true, - "vendor": "0x1af4", - "name": "/dev/vdb", - "wwn_vendor_extension": null, - "wwn_with_extension": null, - "model": "", - "wwn": null, - "serial": null - } - ] - -You can use all these fields, except for ``rotational``, for the root device -hints. Note that ``size`` should be converted to GiB and that ``name``, -``wwn_with_extension`` and ``wwn_vendor_extension`` can only be used starting -with the Mitaka release. Also note that the ``name`` field, while convenient, -`may be unreliable and change between boots -`_. - -Do not forget to re-run the introspection after setting the root device hints. - -.. _Ironic root device hints documentation: https://docs.openstack.org/ironic/latest/install/advanced.html#specifying-the-disk-for-deployment-root-device-hints diff --git a/deploy-guide/source/provisioning/uefi_boot.rst b/deploy-guide/source/provisioning/uefi_boot.rst deleted file mode 100644 index 0efee34c..00000000 --- a/deploy-guide/source/provisioning/uefi_boot.rst +++ /dev/null @@ -1,73 +0,0 @@ -Booting in UEFI mode -==================== - -TripleO supports booting overcloud nodes in UEFI_ mode instead of the default -BIOS mode. This is required to use advanced features like *secure boot* (not -covered by this guide), and some hardware may only feature UEFI support. - -Configuring nodes ------------------ - -Depending on the driver, nodes have to be put in the UEFI mode manually or the -driver can put them in it. For example, manual configuration is required for -``ipmi`` (including ``pxe_ipmitool``) and ``idrac`` (including ``pxe_drac``) -drivers, while ``ilo`` (including ``pxe_ilo``) and ``irmc`` (starting with -the Queens release) drivers can set boot mode automatically. - -Independent of the driver, you have to configure the UEFI mode manually, if -you want introspection to run in it. - -Manual configuration is usually done by entering node's *system setup* and -changing boot setting there. - -Introspection -------------- - -The introspection process is flexible enough to automatically detect the boot -mode of the node. The only requirement is iPXE: TripleO currently does not -support using PXE with UEFI. Make sure the following options are enabled -in your ``undercloud.conf`` (they are on by default): - -.. code-block:: ini - - ipxe_enabled = True - -Then you can run introspection as usual. - -Deployment ----------- - -Starting with the Pike release, the introspection process configures bare -metal nodes to run in the same boot mode as it was run in. For example, if -introspection was run on nodes in UEFI mode, **ironic-inspector** will -configure introspected nodes to deploy in UEFI mode as well. - -Here is how the ``properties`` field looks for nodes configured in BIOS mode:: - - $ baremetal node show -f value -c properties - {u'capabilities': u'profile:compute,boot_mode:bios', u'memory_mb': u'6144', u'cpu_arch': u'x86_64', u'local_gb': u'49', u'cpus': u'1'} - -Note that ``boot_mode:bios`` capability is set. For a node in UEFI mode, it -will look like this:: - - $ baremetal node show -f value -c properties - {u'capabilities': u'profile:compute,boot_mode:uefi', u'memory_mb': u'6144', u'cpu_arch': u'x86_64', u'local_gb': u'49', u'cpus': u'1'} - -You can change the boot mode with the following command (required for UEFI -before the Pike release):: - - $ baremetal node set --property capabilities=profile:compute,boot_mode:uefi - -.. warning:: - Do not forget to copy all other capabilities, e.g. ``profile`` and - ``boot_option`` if present. - - -Finally, you may configure your flavors to explicitly request nodes that boot -in UEFI mode, for example:: - - $ openstack flavor set --property capabilities:boot_mode='uefi' compute - -Then proceed with the deployment as usual. - -.. _UEFI: https://en.wikipedia.org/wiki/Unified_Extensible_Firmware_Interface diff --git a/deploy-guide/source/provisioning/whole_disk_images.rst b/deploy-guide/source/provisioning/whole_disk_images.rst deleted file mode 100644 index 2f72821b..00000000 --- a/deploy-guide/source/provisioning/whole_disk_images.rst +++ /dev/null @@ -1,17 +0,0 @@ -Use whole disk images for overcloud ------------------------------------ - -By default, TripleO **overcloud-full** image is a *partition* image. Such images carry only the -root partition contents and no partition table. Alternatively, *whole disk* images can be used, -which carry all partitions, a partition table and a boot loader. - -Whole disk images can be built with **diskimage-builder** - see -`Ironic images documentation `_ -for details. Note that this does not affect **ironic-python-agent** images. - -Use the following command to treat **overcloud-full** as a whole disk image when uploading images:: - - openstack overcloud image upload --whole-disk - -In this case only ``overcloud-full.qcow2`` file is required, ``overcloud-full.initrd`` and -``overcloud-full.vmlinuz`` are not used. diff --git a/deploy-guide/source/repositories.rst b/deploy-guide/source/repositories.rst deleted file mode 100644 index f7d656ef..00000000 --- a/deploy-guide/source/repositories.rst +++ /dev/null @@ -1,34 +0,0 @@ -.. This should be changed to something more user-friendly like http://tripleo.org/tripleo-repos.rpm - -.. note:: - Python3 is required for current releases of OpenStack which is - supported on CentOS Stream 9. - -#. Download and install the python-tripleo-repos RPM from - the appropriate RDO repository - - .. admonition:: CentOS Stream 9 - :class: centos9 - - Current `Centos 9 RDO repository `_. - - .. code-block:: bash - - sudo dnf install -y https://trunk.rdoproject.org/centos9/component/tripleo/current/python3-tripleo-repos-.el9.noarch.rpm - - .. note:: - - tripleo-repos removes any repositories that it manages before each run. - This means all repositories must be specified in a single tripleo-repos - call. As an example, the correct way to install the current and ceph repos - is to run ``tripleo-repos current ceph``, not two separate calls. - -2. Run tripleo-repos to install the appropriate repositories. The option below - will enable the latest master TripleO packages, the latest promoted - packages for all other OpenStack services and dependencies and the latest - stable Ceph packages. There are other repository configurations available in - tripleo-repos, see its ``--help`` output for details. - - .. code-block:: bash - - sudo -E tripleo-repos current-tripleo-dev ceph diff --git a/deploy-guide/source/troubleshooting/index.rst b/deploy-guide/source/troubleshooting/index.rst deleted file mode 100644 index 8213494b..00000000 --- a/deploy-guide/source/troubleshooting/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -Troubleshooting -=============== - -This section contains troubleshooting related topics for |project|. - -.. toctree:: - :maxdepth: 1 - - troubleshooting - troubleshooting-image-build - troubleshooting-log-and-status-capture - troubleshooting-nodes - troubleshooting-overcloud - troubleshooting-tripleo-heat-templates diff --git a/deploy-guide/source/troubleshooting/troubleshooting-image-build.rst b/deploy-guide/source/troubleshooting/troubleshooting-image-build.rst deleted file mode 100644 index 5d26438b..00000000 --- a/deploy-guide/source/troubleshooting/troubleshooting-image-build.rst +++ /dev/null @@ -1,16 +0,0 @@ -Troubleshooting Image Build ---------------------------- - -Images fail to build -^^^^^^^^^^^^^^^^^^^^ - -More space needed -^^^^^^^^^^^^^^^^^ - -Images are built in tmpfs by default, to speed up the builds. In case -your machine doesn't have enough free RAM, the image building step -can fail with a message like "At least 174MB more space needed on -the / filesystem". If freeing up more RAM isn't a possibility, -images can be built on disk by exporting an environment variable:: - - $ export DIB_NO_TMPFS=1 diff --git a/deploy-guide/source/troubleshooting/troubleshooting-log-and-status-capture.rst b/deploy-guide/source/troubleshooting/troubleshooting-log-and-status-capture.rst deleted file mode 100644 index 3559e947..00000000 --- a/deploy-guide/source/troubleshooting/troubleshooting-log-and-status-capture.rst +++ /dev/null @@ -1,67 +0,0 @@ -Performing Log and Status Capture ---------------------------------- - -The tripleoclient provides commands to allow operators to run sosreport on the -overcloud nodes and download the log and status log bundles with tripleoclient. -This can aide with troubleshooting problems as the results can be sent to an -external support for analysis. The `openstack overcloud support report -collect` command can be used to execute sosreport on select (or all) overcloud -nodes, upload the logs to swift running on the undercloud, and download the -logs to the host that the command is executed from. - - -Example: Download logs from all controllers -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The required `server_name` option for the command can be a partial name -match for the overcloud nodes. This means `openstack overcloud support report -collect controller` will match all the overcloud nodes that contain the word -`controller`. To download the run the command and download them to a local -directory, run the following command:: - - $ openstack overcloud support report collect controller - -.. note:: By default if -o is not specified, the logs will be downloaded to a folder - in the current working directory called `support_logs` - - -Example: Download logs from a single host -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To download logs from a specific host, you must specify the complete name as -reported by `openstack service list` from the undercloud:: - - $ openstack overcloud support report collect -o /home/stack/logs overcloud-novacompute-0 - - -Example: Leave logs in a swift container -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you want to perform a sosreport but do not currently wish to download the -logs, you can leave them in a swift container for later retrieval. The -``--collect-only`` and ``-c`` options can be leveraged to store the -logs in a swift container. For example:: - - $ openstack overcloud support report collect -c logs_20170601 --collect-only controller - -This will run sosreport on the nodes and upload the logs to a container named -`logs_20170601` on the undercloud. From which standard swift tooling can be -used to download the logs. Alternatively, you can then fetch the logs using -the `openstack overcloud support report collect` command by running:: - - $ openstack overcloud support report collect -c logs_20170601 --download-only -o /tmp/mylogs controller - -.. note:: There is a ``--skip-container-delete`` option that can be used if you - want to leave the logs in swift but still download them. This option - is ignored if ``--collect-only`` or ``--download-only`` options are - provided. - - -Additional Options -^^^^^^^^^^^^^^^^^^ - -The ``openstack overcloud support report collect`` command has additional -that can be passed to work with the log bundles. Run the command with -``--help`` to see additional options:: - - $ openstack overcloud support report collect --help diff --git a/deploy-guide/source/troubleshooting/troubleshooting-nodes.rst b/deploy-guide/source/troubleshooting/troubleshooting-nodes.rst deleted file mode 100644 index 75112471..00000000 --- a/deploy-guide/source/troubleshooting/troubleshooting-nodes.rst +++ /dev/null @@ -1,192 +0,0 @@ -Troubleshooting Node Management Failures -======================================== - -Where Are the Logs? -------------------- - -Some logs are stored in *journald*, but most are stored as text files in -``/var/log/containers``. They are only accessible by the root user. - -ironic-inspector -~~~~~~~~~~~~~~~~ - -The introspection logs (from ironic-inspector) are located in -``/var/log/containers/ironic-inspector``. If something fails during the introspection -ramdisk run, ironic-inspector stores the ramdisk logs in -``/var/log/ironic-inspector/ramdisk/`` as gz-compressed tar files. -File names contain date, time and IPMI address of the node if it was detected -(only for bare metal). - -To collect introspection logs on success as well, set -``always_store_ramdisk_logs = true`` in -``/etc/ironic-inspector/inspector.conf``, restart the -``openstack-ironic-inspector`` service and retry the introspection. - -.. _ironic_logs: - -ironic -~~~~~~ - -The deployment logs (from ironic) are located in ``/var/log/containers/ironic``. If -something goes wrong during deployment or cleaning, the ramdisk logs are -stored in ``/var/log/containers/ironic/deploy``. See `ironic logs retrieving documentation -`_ -for more details. - -.. _node_registration_problems: - -Node Registration and Management Problems ------------------------------------------ - -Nodes in enroll state after registration -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you see your nodes staying in the ``enroll`` provision state after the -registration process (which may hang due to this), it means that Ironic is -unable to verify power management credentials, and you need to fix them. -Check the ``pm_addr``, ``pm_user`` and ``pm_password`` fields in your -``instackenv.json``. In some cases (e.g. when using -:doc:`../environments/virtual`) you also need a correct ``pm_port``. -Update the node as explained in `Fixing invalid node information`_. - -Fixing invalid node information -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Any problems with node data registered into Ironic can be fixed using the -Ironic CLI. - -For example, a wrong MAC can be fixed in two steps: - -* Find out the assigned port UUID by running - :: - - $ baremetal port list --node - -* Update the MAC address by running - :: - - $ baremetal port set --address - -A Wrong IPMI address can be fixed with the following command:: - - $ baremetal node set --driver-info ipmi_address= - -Node power state is not enforced by Ironic -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -By default Ironic will not forcibly sync the power state of the nodes, -because in our HA (high availability) model Pacemaker is the -one responsible for controlling the power state of the nodes -when fencing. If you are using a non-HA setup and want Ironic -to take care of the power state of the nodes please change the -value of the ``force_power_state_during_sync`` configuration option -in the ``/etc/ironic/ironic.conf`` file to ``True`` and restart the -openstack-ironic-conductor service. - -Also, note that if ``openstack undercloud install`` is re-run the value of -the ``force_power_state_during_sync`` configuration option will be set back to -the default, which is ``False``. - -How do I repair broken nodes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Usually, the nodes should only be deleted when the hardware is decommissioned. -Before that, you're expected to remove instances from them using scale-down. -However, in some cases, it may be impossible to repair a node with e.g. broken -power management, and it gets stuck in an abnormal state. - -.. warning:: - Before proceeding with this section, always try to decommission a node - normally, by scaling down your cloud. Forcing node deletion may cause - unpredictable results. - -Ironic requires that nodes that cannot be operated normally are put in the -maintenance mode. It is done by the following command:: - - $ baremetal node maintenance set --reason "" - -Ironic will stop checking power and health state for such nodes, and Nova will -not pick them for deployment. Power command will still work on them, though. - -After a node is in the maintenance mode, you can attempt repairing it, e.g. by -`Fixing invalid node information`_. If you manage to make the node operational -again, move it out of the maintenance mode:: - - $ baremetal node maintenance unset - -If repairing is not possible, you can force deletion of such node:: - - $ baremetal node delete - -Forcing node removal will leave it powered on, accessing the network with -the old IP address(es) and with all services running. Before proceeding, make -sure to power it off and clean up via any means. - -After that, the associated Nova instance is orphaned, and must be deleted. -You can do it normally via the scale down procedure. - -.. _introspection_problems: - -Hardware Introspection Problems -------------------------------- - -Introspection hangs and times out -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -ironic-inspector times out introspection process after some time (defaulting to -1 hour) if it never gets response from the introspection ramdisk. This can be -a sign of a bug in the introspection ramdisk, but usually it happens due to -environment misconfiguration, particularly BIOS boot settings. Please refer to -`ironic-inspector troubleshooting documentation -`_ -for information on how to detect and fix such problems. - -Accessing the ramdisk -~~~~~~~~~~~~~~~~~~~~~ - -Note that the introspection ramdisk is by default built with the -`dynamic-login element -`_, -so you can set up an SSH key and log into it for debugging. - -First, think of a temporary root password. Generate a hash by feeding it -into ``openssl passwd -1`` command. Edit ``/httpboot/inspector.ipxe`` -manually. Find the line starting with "kernel" and append rootpwd="HASH" to it. -Do not append the real password. Alternatively, you can append -sshkey="PUBLIC_SSH_KEY" with your public SSH key. - -.. warning:: - In both cases quotation marks are required! - -When ramdisk is running, figure out its IP address by checking ``arp`` utility -or DHCP logs from - -:: - - $ sudo journalctl -u openstack-ironic-inspector-dnsmasq - -SSH as a root user with the temporary password or the SSH key. - -.. note:: - Some operating systems, such as RHEL and CentOS, require SELinux to be in permissive or disabled - mode so that you can log in to the image. This is achieved by building the - image with the selinux-permissive element for diskimage-builder or by - passing selinux=0 in the kernel command line. - -Refusing to introspect node with provision state "available" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you're running introspection directly using ironic-inspector CLI (or in case -of bugs in our scripts), a node can be in the "AVAILABLE" state, which is meant -for deployment, not for introspection. You should advance node to the -"MANAGEABLE" state before introspection and move it back before deployment. -Please refer to `upstream node states documentation -`_ -for information on how to fix it. - -How can introspection be stopped? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Introspection for a node can be stopped with the following command:: - - $ baremetal introspection abort diff --git a/deploy-guide/source/troubleshooting/troubleshooting-overcloud.rst b/deploy-guide/source/troubleshooting/troubleshooting-overcloud.rst deleted file mode 100644 index 0c909f93..00000000 --- a/deploy-guide/source/troubleshooting/troubleshooting-overcloud.rst +++ /dev/null @@ -1,300 +0,0 @@ -Troubleshooting a Failed Overcloud Deployment ---------------------------------------------- - -If an Overcloud deployment has failed, the OpenStack clients and service log -files can be used to troubleshoot the failed deployment. The following commands -are all run on the Undercloud and assume a stackrc file has been sourced. - -Identifying Failed Component -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -In most cases, Heat will show the failed overcloud stack when a deployment -has failed:: - - $ openstack stack list - - +--------------------------------------+------------+--------------------+----------------------+ - | id | stack_name | stack_status | creation_time | - +--------------------------------------+------------+--------------------+----------------------+ - | 7e88af95-535c-4a55-b78d-2c3d9850d854 | overcloud | CREATE_FAILED | 2015-04-06T17:57:16Z | - +--------------------------------------+------------+--------------------+----------------------+ - -Occasionally, Heat is not even able to create the stack, so the ``openstack -stack list`` output will be empty. If this is the case, observe the message -that was printed to the terminal when ``openstack overcloud deploy`` or ``openstack -stack create`` was run. - -Next, there are a few layers on which the deployment can fail: - -* Orchestration (Heat and Nova services) -* Bare metal provisioning (Ironic service) -* Post-deploy configuration (Puppet) - -As Ironic service is in the middle layer, you can use its shell to guess the -failed layer. Issue ``baremetal node list`` command to see all -registered nodes and their current status, you will see something like:: - - +--------------------------------------+------+---------------+-------------+-----------------+-------------+ - | UUID | Name | Instance UUID | Power State | Provision State | Maintenance | - +--------------------------------------+------+---------------+-------------+-----------------+-------------+ - | f1e26112-5fbd-4fc4-9612-ecce7a1d86aa | None | None | power off | available | False | - | f0b8c105-f1d7-4059-a9a3-b050c3340340 | None | None | power off | available | False | - +--------------------------------------+------+---------------+-------------+-----------------+-------------+ - -Pay close attention to **Provision State** and **Maintenance** columns -in the resulting table. - -* If the command shows empty table or less nodes that you expect, or - **Maintenance** is ``True``, or **Provision State** is ``manageable`` - or ``enroll``, there was a problem during node enrolling and introspection. - - You can check the actual cause using the following command:: - - $ baremetal node show -f value -c maintenance_reason - - For example, **Maintenance** goes to ``True`` automatically, if wrong power - credentials are provided. - - Fix the cause of the failure, then move the node out of the maintenance - mode:: - - $ baremetal node maintenance unset - -* If **Provision State** is ``available`` then the problem occurred before - bare metal deployment has even started. Proceed with `Debugging Using Heat`_. - -* If **Provision State** is ``active`` and **Power State** is ``power on``, - then bare metal deployment has finished successfully, and problem happened - during the post-deployment configuration step. Again, refer to `Debugging - Using Heat`_. - -* If **Provision State** is ``wait call-back``, then bare metal deployment is - not finished for this node yet. You may want to wait until the status - changes. - -* If **Provision State** is ``error`` or ``deploy failed``, then bare metal - deployment has failed for this node. Look at the **last_error** field:: - - $ baremetal node show -f value -c last_error - - If the error message is vague, you can use logs to clarify it, see - :ref:`ironic_logs` for details. - - If you see wait timeout error, and node **Power State** is ``power on``, - then try to connect to the virtual console of the failed machine. Use - ``virt-manager`` tool for virtual machines and vendor-specific virtual - console (e.g. iDRAC for DELL) for bare metal machines. - -Showing deployment failures -^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Deployment failures can be shown with the following command:: - - $ openstack overcloud failures --plan my-deployment - -The command will show any errors encountered when running ``ansible-playbook`` -to configure the overcloud during the ``config-download`` process. See -:ref:`config_download` for more information. - -Debugging Using Heat -^^^^^^^^^^^^^^^^^^^^ - -* Identifying the failed Heat resource - - List all the stack resources to see which one failed. - - :: - - $ openstack stack resource list overcloud - - +-----------------------------------+-----------------------------------------------+---------------------------------------------------+-----------------+----------------------+ - | resource_name | physical_resource_id | resource_type | resource_status | updated_time | - +-----------------------------------+-----------------------------------------------+---------------------------------------------------+-----------------+----------------------+ - | BlockStorage | 9e40a1ee-96d3-4920-868d-683d3788e129 | OS::Heat::ResourceGroup | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | BlockStorageAllNodesDeployment | 2c453f6b-7378-44c8-a0ad-57de57d9c57f | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | BlockStorageNodesPostDeployment | | OS::TripleO::BlockStoragePostDeployment | INIT_COMPLETE | 2015-04-06T21:15:20Z | - | CephClusterConfig | 1684e7a3-0e42-44fe-9db4-7543b742fbfc | OS::TripleO::CephClusterConfig::SoftwareConfig | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | CephStorage | 48b3460c-bf9a-4663-99fc-2b4fa01b8dc1 | OS::Heat::ResourceGroup | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | CephStorageAllNodesDeployment | 76beb3a9-8327-4d2e-a206-efe12f1613fb | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | CephStorageCephDeployment | af8fb02a-5bc6-468c-8fac-fbe7e5b2c689 | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | CephStorageNodesPostDeployment | | OS::TripleO::CephStoragePostDeployment | INIT_COMPLETE | 2015-04-06T21:15:20Z | - | Compute | e5e6ec84-197f-4bf6-b8ac-eb11fe494cdf | OS::Heat::ResourceGroup | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ComputeAllNodesDeployment | e6d44fbf-9683-4765-acbb-4a3d31c8fd48 | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ControllerNodesPostDeployment | e551e472-f2db-4468-b586-0374678d71a3 | OS::TripleO::ControllerPostDeployment | CREATE_FAILED | 2015-04-06T21:15:20Z | - | ComputeCephDeployment | 673608d5-70d7-453a-ac78-7987bc2c0158 | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ComputeNodesPostDeployment | 1078e3e3-9f6f-48b9-8961-a30f44098856 | OS::TripleO::ComputePostDeployment | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ControlVirtualIP | 6402b396-84aa-4cf6-9849-305205755604 | OS::Neutron::Port | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | Controller | ffc45352-9708-486d-81ac-3b60efa8e8b8 | OS::Heat::ResourceGroup | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ControllerAllNodesDeployment | f73c6e33-3dd2-46f1-9eca-0d2981a4a986 | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ControllerBootstrapNodeConfig | 01ce5b6a-794a-4828-bad9-49d5fbfd55bf | OS::TripleO::BootstrapNode::SoftwareConfig | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ControllerBootstrapNodeDeployment | c963d53d-879b-4a41-a10a-9000ac9f02a1 | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ControllerCephDeployment | 2d4281df-31ea-4433-820d-984a6dca6eb1 | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ControllerClusterConfig | 719c0d30-a4b8-4f77-9ab6-b3c9759abeb3 | OS::Heat::StructuredConfig | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ControllerClusterDeployment | d929aa40-1b73-429e-81d5-aaf966fa6756 | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ControllerSwiftDeployment | cf28f9fe-025d-4eed-b3e5-3a5284a2aa60 | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | HeatAuthEncryptionKey | overcloud-HeatAuthEncryptionKey-5uw6wo7kavnq | OS::Heat::RandomString | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | MysqlClusterUniquePart | overcloud-MysqlClusterUniquePart-vazyj2s4n2o5 | OS::Heat::RandomString | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | MysqlRootPassword | overcloud-MysqlRootPassword-nek2iky7zfdm | OS::Heat::RandomString | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ObjectStorage | 47327c98-533e-4cc2-b1f3-d8d0eedba822 | OS::Heat::ResourceGroup | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ObjectStorageAllNodesDeployment | 7bb691aa-fa93-4f10-833e-6edeccc61408 | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ObjectStorageNodesPostDeployment | d4d16f39-384a-4d6a-9719-1dd9b2d4ff09 | OS::TripleO::ObjectStoragePostDeployment | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | ObjectStorageSwiftDeployment | afc87385-8b40-4097-b529-2a5bc81c94c8 | OS::Heat::StructuredDeployments | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | PublicVirtualIP | 4dd92878-8f29-49d8-9d3d-bc0cd44d26a9 | OS::Neutron::Port | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | RabbitCookie | overcloud-RabbitCookie-uthzbos3l66v | OS::Heat::RandomString | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | SwiftDevicesAndProxyConfig | e2141170-bb77-4509-b8bd-58447b2cd15f | OS::TripleO::SwiftDevicesAndProxy::SoftwareConfig | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - | allNodesConfig | cbd42692-fffa-4527-a519-bd4014ebf0fb | OS::TripleO::AllNodes::SoftwareConfig | CREATE_COMPLETE | 2015-04-06T21:15:20Z | - +-----------------------------------+-----------------------------------------------+---------------------------------------------------+-----------------+----------------------+ - - In this example, notice how the **ControllerNodesPostDeployment** resource - has failed. The **\*PostDeployment** resources are the configuration that is - applied to the deployed Overcloud nodes. When these resources have failed it - indicates that something went wrong during the Overcloud node configuration, - perhaps when Puppet was run. - -* Show the failed resource - - :: - - $ openstack stack resource show overcloud ControllerNodesPostDeployment - - +------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Property | Value | - +------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | attributes | {} | - | description | | - | links | http://192.168.24.1:8004/v1/cea2a0c78d2447bc9a0f7caa35c9224c/stacks/overcloud/ec3e3251-f949-4df9-92be-dbd37c6992a1/resources/ControllerNodesPostDeployment (self) | - | | http://192.168.24.1:8004/v1/cea2a0c78d2447bc9a0f7caa35c9224c/stacks/overcloud/ec3e3251-f949-4df9-92be-dbd37c6992a1 (stack) | - | | http://192.168.24.1:8004/v1/cea2a0c78d2447bc9a0f7caa35c9224c/stacks/overcloud-ControllerNodesPostDeployment-6kcqm5zuymqu/e551e472-f2db-4468-b586-0374678d71a3 (nested) | - | logical_resource_id | ControllerNodesPostDeployment | - | physical_resource_id | e551e472-f2db-4468-b586-0374678d71a3 | - | required_by | BlockStorageNodesPostDeployment | - | | CephStorageNodesPostDeployment | - | resource_name | ControllerNodesPostDeployment | - | resource_status | CREATE_FAILED | - | resource_status_reason | ResourceUnknownStatus: Resource failed - Unknown status FAILED due to "None" | - | resource_type | OS::TripleO::ControllerPostDeployment | - | updated_time | 2015-04-06T21:15:20Z | - +------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - - The ``resource show`` doesn't always show a clear reason why the resource - failed. In these cases, logging into the Overcloud node is required to - further troubleshoot the issue. - -* Logging into Overcloud nodes - - Use the nova client to see the IP addresses of the Overcloud nodes. - - :: - - $ openstack server list - - +--------------------------------------+-------------------------------------------------------+--------+------------+-------------+---------------------+ - | ID | Name | Status | Task State | Power State | Networks | - +--------------------------------------+-------------------------------------------------------+--------+------------+-------------+---------------------+ - | 18014b02-b143-4ca2-aeb9-5553bec93cff | ov-4tvbtgpv7w-0-soqocxy2w4fr-NovaCompute-nlrxd3lgmmlt | ACTIVE | - | Running | ctlplane=192.168.24.13 | - | 96a57a46-1e48-4c66-adaa-342ee4e98972 | ov-rf4hby6sblk-0-iso3zlqmyzfe-Controller-xm2imjkzalhi | ACTIVE | - | Running | ctlplane=192.168.24.14 | - +--------------------------------------+-------------------------------------------------------+--------+------------+-------------+---------------------+ - - Login as the ``heat-admin`` user to one of the deployed nodes. In this - example, since the **ControllerNodesPostDeployment** resource failed, login - to the controller node. The ``heat-admin`` user has sudo access. - - :: - - $ ssh heat-admin@192.168.24.14 - - While logged in to the controller node, examine the log for the - ``os-collect-config`` log for a possible reason for the failure. - - :: - - $ sudo journalctl -u os-collect-config - -* Failed Nova Server ResourceGroup Deployments - - In some cases, Nova fails deploying the node in entirety. This situation - would be indicated by a failed ``OS::Heat::ResourceGroup`` for one of the - Overcloud role types such as Control or Compute. - - Use nova to see the failure in this case. - - :: - - $ openstack server list - $ openstack server show - - The most common error shown will reference the error message ``No valid host - was found``. Refer to `No Valid Host Found Error`_ below. - - In other cases, look at the following log files for further troubleshooting:: - - /var/log/containers/nova/* - /var/log/containers/heat/* - /var/log/containers/ironic/* - -* Using SOS - - SOS is a set of tools that gathers information about system hardware and - configuration. The information can then be used for diagnostic purposes and - debugging. SOS is commonly used to help support technicians and developers. - - SOS is useful on both the undercloud and overcloud. Install the ``sos`` - package and then generate a report:: - - $ sudo sosreport --all-logs - -.. _no-valid-host: - -No Valid Host Found Error -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Sometimes ``/var/log/containers/nova/nova-conductor.log`` contains the following error:: - - NoValidHost: No valid host was found. There are not enough hosts available. - -"No valid host was found" means that the Nova Scheduler could not find a bare -metal node suitable for booting the new instance. - -This in turn usually means some mismatch between resources that Nova expects -to find and resources that Ironic advertised to Nova. - -Start with checking `Ironic troubleshooting guide on this topic -`_. - -If you're using advanced profile matching with multiple flavors, make sure -you have enough nodes corresponding to each flavor/profile. Watch -``capabilities`` key in the output of - -:: - - $ baremetal node show --fields properties - -It should contain e.g. ``profile:compute`` for compute nodes. - - -Debugging OpenStack services -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Since Pike release, TripleO now offers an easy way to enable per-service debug -without relying on knowledge of the puppet interfaces. -Each OpenStack service has now its own Debug parameter. - -* Operators who want to enable Debug everywhere will set ``Debug`` to ``true``. -* Operators who want to disable Debug everywhere will set ``Debug`` to ``false``. -* Operators who want to disable Debug everywhere except for Glance will set ``Debug`` to - ``false`` and ``GlanceDebug`` to ``true``. -* Operators who want to enable Debug everywhere except for Glance will set ``Debug`` to - ``true`` and ``GlanceDebug`` to ``false``. - -Glance was an example, but all OpenStack services are supported. You can find their Debug -in the TripleO Heat Templates composable services. - -It is also possible to :ref:`toggle debug` for services after deployment. - -Manually Run the Deployment -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The deployment can be replicated by `manually running the config-download Ansible playbooks -`_. -This also allows the Ansible inventory, variables, playbooks, tasks, and more to be modified to -help narrow down issues. Consider using ``ansible-playbook --start-at-task`` to more quickly -troubleshooting an error. diff --git a/deploy-guide/source/troubleshooting/troubleshooting-tripleo-heat-templates.rst b/deploy-guide/source/troubleshooting/troubleshooting-tripleo-heat-templates.rst deleted file mode 100644 index 4ac6c7bb..00000000 --- a/deploy-guide/source/troubleshooting/troubleshooting-tripleo-heat-templates.rst +++ /dev/null @@ -1,8 +0,0 @@ -Debugging TripleO Heat Templates ------------------------------------ - -Useful Links -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Please follow this flow: -http://hardysteven.blogspot.co.uk/2015/04/debugging-tripleo-heat-templates.html diff --git a/deploy-guide/source/troubleshooting/troubleshooting.rst b/deploy-guide/source/troubleshooting/troubleshooting.rst deleted file mode 100644 index 575653a2..00000000 --- a/deploy-guide/source/troubleshooting/troubleshooting.rst +++ /dev/null @@ -1,14 +0,0 @@ -Troubleshooting -=============== - -At this chapter you will find answers for frequently asked questions and -help with troubleshooting when using |project|. - -.. toctree:: - :maxdepth: 2 - - troubleshooting-image-build - troubleshooting-nodes - troubleshooting-overcloud - troubleshooting-tripleo-heat-templates - troubleshooting-log-and-status-capture diff --git a/doc/requirements.txt b/doc/requirements.txt deleted file mode 100644 index 3f3c68f2..00000000 --- a/doc/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -# The order of packages is significant, because pip processes them in the order -# of appearance. Changing the order has an impact on the overall integration -# process, which may cause wedges in the gate later. - -sphinx>=2.0.0,!=2.1.0 # BSD -sphinxcontrib-svg2pdfconverter>=0.1.0 # BSD -sphinxcontrib-mermaid>=0.3.1,!=0.6.1 # BSD -requests -pyquery - -# needed because sphinxcontrib-mermaid calls it but doesn't require it -six diff --git a/doc/source/ci/_images/baremetal-jobs.svg b/doc/source/ci/_images/baremetal-jobs.svg deleted file mode 100644 index 64624875..00000000 --- a/doc/source/ci/_images/baremetal-jobs.svg +++ /dev/null @@ -1,2 +0,0 @@ - -
uc
[Not supported by viewer]
Undercloud
large VM
envE
[Not supported by viewer]
baremetal hardware pool
[Not supported by viewer]
overcloud deploy
[Not supported by viewer]
Overcloud
baremetal nodes
for envE
[Not supported by viewer]
Executor
Nodepool node
slave VM

[Not supported by viewer]
undercloud
install

[Not supported by viewer]
Zuul
Zuul
periodic job
[Not supported by viewer]
Internal Software Factory
<font color="#333333">Internal Software Factory</font>
oc
[Not supported by viewer]
\ No newline at end of file diff --git a/doc/source/ci/_images/grafana1.png b/doc/source/ci/_images/grafana1.png deleted file mode 100644 index 83a6e64d..00000000 Binary files a/doc/source/ci/_images/grafana1.png and /dev/null differ diff --git a/doc/source/ci/_images/grafana2.png b/doc/source/ci/_images/grafana2.png deleted file mode 100644 index 275450ca..00000000 Binary files a/doc/source/ci/_images/grafana2.png and /dev/null differ diff --git a/doc/source/ci/_images/grafana3.png b/doc/source/ci/_images/grafana3.png deleted file mode 100644 index 8949dff1..00000000 Binary files a/doc/source/ci/_images/grafana3.png and /dev/null differ diff --git a/doc/source/ci/_images/sova.png b/doc/source/ci/_images/sova.png deleted file mode 100644 index 82d92601..00000000 Binary files a/doc/source/ci/_images/sova.png and /dev/null differ diff --git a/doc/source/ci/baremetal_jobs.rst b/doc/source/ci/baremetal_jobs.rst deleted file mode 100644 index 396af4d2..00000000 --- a/doc/source/ci/baremetal_jobs.rst +++ /dev/null @@ -1,279 +0,0 @@ -Baremetal jobs -============== - -This section gives an overview and some details on the baremetal CI jobs. The -baremetal deployment is intended as a multinode real world "production-like" -environment for TripleO. - see `Baremetal deploy guide `_ -for more information on setting up a baremetal environment. - -The baremetal jobs, previously running in the RDO Phase 2 of the promotion -pipeline from Jenkins servers, now are triggered from an internal Software -Factory instance of Zuul. These promotion jobs testing containers built on -tripleo-ci-testing hashes run on real baremetal hardware, report to dlrn and -can be included in the TripleO promotion criteria. - -The goal is to give developers feedback on real deployments and allow us to -have better coverage on issues seen in production environments. It also -allows an approximation of OVB jobs running in RDO cloud in order to get an -"apples-to-apples" comparison to eliminate infra issues. - -.. _baremetal_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/provisioning/index.html - -Where ------ - -The hardware is maintained internally and cannot be accessed by upstream -Zuul or RDO Cloud. The internal Software Factory instance provides a version -of infra upstream tools as Zuul, Gerrit and Nodepool for running the defined -baremetal jobs. Refer to `Software Factory Documentation `_ for more details. - -The jobs will use hardware_environments//instackenv.json file and the -hardware_environments//network_configs/single_nic_vlans settings file. -These configurations are explored in more detail below. - -.. _Software_Factory_documentation: https://softwarefactory-project.io/docs/index.html - - -How ---- - -The baremetal job workflow is described as follows: - - 1. The baremetal jobs are triggered in the periodic pipeline and initially - run on a Nodepool node that can be called as "executor", where the job - starts executing its playbooks and roles. - - 2. The job ssh's to the baremetal machine which will host the undercloud vm - and creates a new vm on which the undercloud will be installed and set - up. - - 3. Finally the undercloud VM deploys the overcloud on real baremetal nodes - defined in the instackenv.json configuration over pxe boot. - -This workflow for baremetal jobs is illustrated in the following figure: - -.. image:: ./_images/baremetal-jobs.svg - - -Parenting from upstream and RDO repos -------------------------------------- - -Jobs that run from internal Zuul can parent off, and use resources (jobs, -roles etc.) from, upstream (review.opendev.org) and RDO -(review.rdoproject.org) repos. As such, duplication can be kept to a minimum -and jobs that run internally on baremetal hardware can maintain parity with -OVB jobs run in RDO Cloud. - -For example, a base TripleO CI job in Zuul :: - - - job: - name: tripleo-ci-base-baremetal - abstract: true - description: | - Base abstract job for Baremetal TripleO - parent: tripleo-ci-base - nodeset: tripleo-baremetal-centos-7-primary - attempts: 1 - required-projects: - - rdo-jobs - roles: - - zuul: rdo-jobs - pre-run: - - playbooks/configure-mirrors.yaml - - playbooks/copy-env-vars-baremetal.yaml - vars: - # must be overridden - undercloud: - environment_infra: baremetal - environment_type: baremetal - playbooks: - - baremetal-prep-virthost.yml - - baremetal-full-undercloud.yml - - baremetal-full-overcloud-prep.yml - - baremetal-full-overcloud.yml - - baremetal-full-overcloud-validate.yml - tags: - - all - -Now adding the dlrn reporting :: - - - job: - name: tripleo-ci-base-baremetal-dlrn - parent: tripleo-ci-base-baremetal - abstract: true - description: | - Base abstract job to do DLRN reporting - required-projects: - - config - roles: - - zuul: config - pre-run: - - playbooks/dlrn/pre-dlrn.yaml - post-run: - - playbooks/dlrn/post-dlrn.yaml - secrets: - - dlrnapi - -Example of a specific hardware job in Zuul: - -Note that multiple jobs cannot be run on the hardware concurrently. -The base job is modified to include semaphore -https://zuul-ci.org/docs/zuul/user/config.html#semaphore to run -each only one at a time :: - - - job: - name: tripleo-ci-base-baremetal-dlrn-my_env - abstract: true - parent: tripleo-ci-base-baremetal-dlrn - vars: - baremetal_env_vars: >- - {{ local_working_dir }}/hardware_environments/my_env//env_settings.yml - undercloud: - semaphore: - name: my_env - - - job: - name: periodic-tripleo-ci-centos-7-baremetal-3ctlr_1comp-featureset001-master - parent: tripleo-ci-base-baremetal-dlrn-my_env - vars: - nodes: 3ctlr_1comp - featureset: '001' - release: master - - -Hardware Settings ------------------ - -An example of hardware settings for baremetal environment 'my_env' is shown -below: - -hardware_environments / my_env / network_configs / single_nic_vlans / - env_settings.yml :: - - environment_type: my_env - - # undercloud.conf settings - undercloud_network_cidr: 10.10.10.0/26 - undercloud_local_ip: 10.10.10.1/26 - undercloud_network_gateway: 10.10.10.100 - undercloud_undercloud_public_vip: 10.10.10.2 - undercloud_undercloud_admin_vip: 10.10.10.3 - undercloud_local_interface: eth1 - undercloud_masquerade_network: 10.10.10.0/26 - undercloud_dhcp_start: 10.10.10.5 - undercloud_dhcp_end: 10.10.10.24 - undercloud_inspection_iprange: 10.10.10.25,10.10.10.39 - undercloud_undercloud_nameservers: 10.10.10.200 - network_isolation_ipv4_cidr: 10.10.10.64/26 - undercloud_external_network_cidr: 10.10.10.64/26 - - # undercloud vm settings - virthost_provisioning_interface: eno2 - virthost_provisioning_ip: 10.10.10.4 - virthost_provisioning_netmask: 255.255.255.192 - virthost_provisioning_hwaddr: FF:FF:FF:FF:FF:FF - virthost_ext_provision_interface: eno1 - - undercloud_memory: 28672 - undercloud_disk: 80 - undercloud_vcpu: 8 - - undercloud_instackenv_template: >- - {{ local_working_dir }}/hardware_environments/my_env/instackenv.json - - undercloud_type: virtual - step_introspect: true - introspect: true - - # network-environment.yaml settings - network_environment_args: - InternalApiNetCidr: 172.21.33.0/24 - StorageNetCidr: 172.21.36.0/24 - StorageMgmtNetCidr: 172.21.35.0/24 - TenantNetCidr: 172.16.0.0/24 - ExternalNetCidr: 10.10.10.64/26 - BondInterfaceOvsOptions: "mode=4 lacp_rate=fast" - InternalApiAllocationPools: [{'start': '172.21.33.10', 'end': '172.21.33.200'}] - StorageAllocationPools: [{'start': '172.21.36.10', 'end': '172.21.36.200'}] - StorageMgmtAllocationPools: [{'start': '172.21.35.10', 'end': '172.21.35.200'}] - TenantAllocationPools: [{'start': '172.16.0.10', 'end': '172.16.0.200'}] - # Leave room for floating IPs starting at .128 - ExternalAllocationPools: [{'start': '10.10.10.101', 'end': '10.10.10.120'}] - ExternalInterfaceDefaultRoute: 10.10.10.130 - InternalApiNetworkVlanID: 1272 - StorageNetworkVlanID: 1273 - StorageMgmtNetworkVlanID: 1274 - ExternalNetworkVlanID: 113 - TenantNetworkVlanID: 1275 - NeutronExternalNetworkBridge: "''" - PublicVirtualFixedIPs: [{"ip_address": "10.10.10.90"}] - ControlPlaneSubnetCidr: "26" - ControlPlaneDefaultRoute: 10.10.10.1 - EC2MetadataIp: 10.10.10.1 - DnsServers: ["8.8.8.8", "8.8.4.4"] - NtpServer: ["216.239.35.12","time.google.com","0.north-america.pool.ntp.org"] - - step_root_device_size: false - step_install_upstream_ipxe: false - hw_env: my_env - enable_vbmc: false - -hardware_environments / my_env / instackenv.json :: - - { - "nodes": [ - { - "pm_password": "", - "pm_type": "ipmi", - "mac": [ - "FF:FF:FF:FF:FF:FF" - ], - "cpu": "12", - "memory": "32768", - "disk": "558", - "arch": "x86_64", - "pm_user": "Administrator", - "pm_addr": "10.1.1.11" - }, - { - "pm_password": "", - "pm_type": "ipmi", - "mac": [ - "FF:FF:FF:FF:FF:FF" - ], - "cpu": "12", - "memory": "32768", - "disk": "558", - "arch": "x86_64", - "pm_user": "Administrator", - "pm_addr": "10.1.1.12" - }, - { - "pm_password": "", - "pm_type": "ipmi", - "mac": [ - "FF:FF:FF:FF:FF:FF" - ], - "cpu": "12", - "memory": "32768", - "disk": "558", - "arch": "x86_64", - "pm_user": "Administrator", - "pm_addr": "10.1.1.13" - }, - { - "pm_password": "", - "pm_type": "ipmi", - "mac": [ - "FF:FF:FF:FF:FF:FF" - ], - "cpu": "12", - "memory": "32768", - "disk": "558", - "arch": "x86_64", - "pm_user": "Administrator", - "pm_addr": "10.1.1.14" - } - ] - } - diff --git a/doc/source/ci/chasing_promotions.rst b/doc/source/ci/chasing_promotions.rst deleted file mode 100644 index 1b685337..00000000 --- a/doc/source/ci/chasing_promotions.rst +++ /dev/null @@ -1,259 +0,0 @@ -Chasing CI promotions -===================== - -The purpose of this document is to go into more detail about the TripleO -promotion from the point of view of the ci-squad `ruck|rover`_. - -There is other documentation in this repo which covers the stages of the -Tripleo-CI promotion pipeline in promotion-stages-overview_ and also about -relevant tooling such as the dlrn-api-promoter_. - -Ensuring promotions are happening regularly (including for all current -stable/ branches) is one of the biggest responsibilities of the ruck|rover. As -explained in promotion-stages-overview_ the CI promotion represents the point -at which we test all the tripleo-* things against the rest of openstack. The -requirement is that there is a successful promotion (more on that below) at -least once a week. Otherwise the branch will be considered 'in the red' as in -"master promotion is red" or "we are red for stein promotion" meaning was no -promotion in (at least) 7 days for that branch. - -Successful promotion --------------------- - -So what does it actually mean to have a "successful promotion". In short: - - * The TripleO periodic jobs have to run to completion and - * The periodic jobs in the promotion criteria must pass and - * The promoter server must be running in order to actually notice - the job results and promote! - -Each of these is explained in more detail below. - -TripleO periodic jobs ---------------------- - -The TripleO periodic jobs are `ci jobs`_ that are executed in one of the TripleO -periodic pipelines. At time of writing we have four periodic pipelines defined -in the `config repo zuul pipelines`_:: - - * openstack-periodic-master - * openstack-periodic-latest-released - * openstack-periodic-24hr - * openstack-periodic-wednesday-weekend - -These pipelines are *periodic* because unlike the check and gate pipelines -(see `ci jobs`_ for more on those) jobs that run on each submitted code review, -periodic jobs are executed *periodically*, at an interval given in cron syntax -as you can see in the definitions at `config repo zuul pipelines`_):: - - - pipeline: - name: openstack-periodic-master - post-review: true - description: Jobs in this queue are triggered to run every few hours. - manager: independent - precedence: high - trigger: - timer: - - time: '10 0,12,18 * * *' - -As can be seen at time of writing the openstack-periodic-master jobs -will run three times every day, at 10 minutes after midnight, noon and 6pm. - -The four pipelines correspond to the four latest releases of OpenStack. -The openstack-periodic-master_ runs jobs for master promotion, -openstack-periodic-latest-released_ runs jobs for the latest stable branch -promotion, openstack-periodic-24hr_ runs jobs for the stable branch before that -and finally openstack-periodic-wednesday-weekend_ runs jobs for the stable -branch before that. - -You can see the full list of jobs that are executed in the pipelines -in the `rdo-infra periodic zuul layout`_. - -It is important to finally highlight a common pattern in the pipeline layout. -In each case the first job that must complete is the -'promote-consistent-to-tripleo-ci-testing' which is where we take the latest -consistent hash and mark it as tripleo-ci-testing to become our new candidate -(see promotion-stages-overview_) to be used by the rest of the jobs in our -pipeline. You will note that this is the only job that doesn't have any dependency:: - - ... - - periodic-tripleo-ci-rhel-8-ovb-3ctlr_1comp-featureset001-master: - dependencies: - - periodic-tripleo-rhel-8-buildimage-ironic-python-agent-master - - periodic-tripleo-rhel-8-master-containers-build-push - - periodic-tripleo-centos-7-master-promote-consistent-to-tripleo-ci-testing - ... - -Then the containers and overcloud image build jobs must complete and only then -we finally run the rest of the jobs. These ordering requirements are expressed -using dependencies in the layout:: - - ... - - periodic-tripleo-rhel-8-buildimage-overcloud-full-master: - dependencies: - - periodic-tripleo-centos-7-master-promote-consistent-to-tripleo-ci-testing - - periodic-tripleo-rhel-8-buildimage-ironic-python-agent-master: - dependencies: - - periodic-tripleo-centos-7-master-promote-consistent-to-tripleo-ci-testing - - periodic-tripleo-ci-centos-7-ovb-1ctlr_1comp-featureset002-master-upload: - dependencies: - - periodic-tripleo-centos-7-master-containers-build-push - .. - -As can be seen above the build image jobs depend on the promote-consistent job -and then everything else in the layout depends on the container build job. - -Promotion Server and Criteria ------------------------------ - -The promotion server is maintained by the Tripleo-CI squad at a secret location -(!) and it runs the code from the `DLRN API Promoter`_ as a service. In short, -the job of this service is to fetch the latest hashes from the `RDO delorean -service`_ and then query the state of the periodic jobs using that particular -hash. - -The main input to the promotion server is the configuration which defines -the `promotion criteria`_. This is the list of jobs that must pass so that we -can declare a successful promotion:: - - [current-tripleo] - periodic-tripleo-centos-7-master-containers-build-push - periodic-tripleo-ci-centos-7-ovb-3ctlr_1comp-featureset001-master - periodic-tripleo-ci-centos-7-ovb-1ctlr_1comp-featureset002-master-upload - periodic-tripleo-ci-centos-7-multinode-1ctlr-featureset010-master - periodic-tripleo-ci-centos-7-scenario001-standalone-master - periodic-tripleo-ci-centos-7-scenario002-standalone-master - periodic-tripleo-ci-centos-7-scenario003-standalone-master - ... - -The promoter service queries the delorean service for the results of those -jobs (for a given hash) and if they are all found to be in SUCCESS then the -hash can be promoted to become the new current-tripleo_. - -It is a common practice for TripleO CI ruck or rover to check the -`indexed promoter service logs`_ to see why a given promotion is not successful -for example or when debugging issues with the promotion code itself. - -Hack the promotion with testproject ------------------------------------ - -Finally testproject_ and the ability to run individual periodic jobs on -demand is an important part of the ruck|rover toolbox. In some cases you may -want to run a job for verification of a given launchpad bug that affects -periodic jobs. - -However another important use is when the ruck|rover notice that one of the -jobs in criteria failed on something they (now) know how to fix, or on some -unrelated/transient issue. Instead of waiting another 6 or however many hours -for the next periodic to run, you can try to run the job yourself using -testproject. If the job is successful in testproject and -it is the only job missing from criteria then posting the testproject review -can also mean directly causing the promotion to happen. - -You first need to checkout testproject:: - - git clone https://review.rdoproject.org/r/testproject - cd testproject - vim .zuul.layout - -To post a testproject review you simply need to add a .zuul.layout_ file:: - - - project: - check: - jobs: - - periodic-tripleo-centos-7-train-containers-build-push: - vars: - force_periodic: true - -So the above would run the periodic-tripleo-centos-7-train-containers-build-push. -Note the required *force_periodic* variable which causes the job to run as -though it is in the periodic pipeline, rather than in the check pipeline which -you will use in testproject. - -An `example is there`_ and if you need to include a known fix you can simply -have a Depends-On in the commit message. - -Specifying a particular hash -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Jobs in the periodic promotion pipelines are using the tripleo-ci-testing -repo as described in the promotion-stages-overview_, since that is the candidate -we are trying to promote to current-tripleo. The tripleo-ci-testing and all -other named tags in tripleo, are associated with a particular *hash* that -identifies the delorean repo. For example looking at `centos7 master tripleo-ci-testing`_ -at time of writing we see:: - - [delorean] - name=delorean-tripleo-ansible-544864ccc03b053317f5408b0c0349a42723ce73 - baseurl=https://trunk.rdoproject.org/centos7/54/48/544864ccc03b053317f5408b0c0349a42723ce73_ebb98bd9 - enabled=1 - gpgcheck=0 - priority=1 - -So the centos7 master tripleo-ci-testing *hash* is -*544864ccc03b053317f5408b0c0349a42723ce73_ebb98bd9a*. The corresponding repo -is given by the baseurl above and if you navigate to that URL with your -browser you can see the list of packages used in the jobs. Thus, the job -specified in the example above for testproject -*periodic-tripleo-centos-7-train-containers-build-push* would use whatever -the current tripleo-ci-testing points to. - -However it is possible to override the particular hash (and thus repo) used by -a job you run with testproject, using the dlrn_hash_tag featureset_override:: - - - project: - check: - jobs: - - periodic-tripleo-ci-centos-7-ovb-1ctlr_1comp-featureset002-train-upload: - vars: - force_periodic: true - featureset_override: - dlrn_hash_tag: 4b32d316befe0919fd98a147d84086bc0907677a_046903a2 - -Thus, in the example above the periodic-tripleo-ci-centos-7-ovb-1ctlr_1comp-featureset002-train-upload -job would run with the hash: *4b32d316befe0919fd98a147d84086bc0907677a_046903a2* -regardless of the current value of tripleo-ci-testing. - -The most common reason for overriding the hash in this way is when we notice -that a particular job failed during one of the recent periodic pipeline runs. -Looking at one of the `indexed promoter service logs`_ you may notice something -like the following text:: - - 2020-02-21 03:57:07,458 31360 INFO promoter Skipping promotion of centos7-master - {'timestamp': 1582243926, 'distro_hash': 'ebb98bd9545e026f033683143ae39e9e236b3671', - 'promote_name': 'tripleo-ci-testing', 'user': 'review_rdoproject_org', - 'repo_url': 'https://trunk.rdoproject.org/centos7/54/48/544864ccc03b053317f5408b0c0349a42723ce73_ebb98bd9', - 'full_hash': '544864ccc03b053317f5408b0c0349a42723ce73_ebb98bd9', - 'repo_hash': '544864ccc03b053317f5408b0c0349a42723ce73_ebb98bd9', - 'commit_hash': '544864ccc03b053317f5408b0c0349a42723ce73'} - from tripleo-ci-testing to current-tripleo, - missing successful jobs: [u'periodic-tripleo-ci-centos-7-ovb-3ctlr_1comp-featureset035-master', - u'periodic-tripleo-ci-centos-7-ovb-3ctlr_1comp-featureset001-master'] - -In particular note the last line 'missing successful jobs'. This means that -for the hash *544864ccc03b053317f5408b0c0349a42723ce73_ebb98bd9* a promotion -could not happen, because in this particular run, those two identified jobs -were failed. If the jobs were fixed in the meantime or you now know -how to fix them and get a good result, you could re-run those with testproject -specifying the particular hash. If they execute successfully then on the next -run the promoter will promote that hash to become the new current-tripleo. - - -.. _promotion-stages-overview: stages-overview.html -.. _dlrn-api-promoter: dlrn-promoter-overview.html -.. _`ruck|rover`: ruck_rover_primer.html -.. _`ci jobs`: https://docs.openstack.org/tripleo-docs/latest/ci/ci_primer.html#where-do-tripleo-promotion-jobs-live -.. _`config repo zuul pipelines`: https://github.com/rdo-infra/review.rdoproject.org-config/blob/0fd16d0badb13e02460d3b2e3213db4af7f027e0/zuul.d/upstream.yaml#L84-L157 -.. _openstack-periodic-master: https://review.rdoproject.org/zuul/builds?pipeline=openstack-periodic-master -.. _openstack-periodic-latest-released: https://review.rdoproject.org/zuul/builds?pipeline=openstack-periodic-latest-released -.. _openstack-periodic-24hr: https://review.rdoproject.org/zuul/builds?pipeline=openstack-periodic-24hr -.. _openstack-periodic-wednesday-weekend: https://review.rdoproject.org/zuul/builds?pipeline=openstack-periodic-wednesday-weekend -.. _`rdo-infra periodic zuul layout`: https://github.com/rdo-infra/review.rdoproject.org-config/blob/0fd16d0badb13e02460d3b2e3213db4af7f027e0/zuul.d/tripleo.yaml#L74-L424 -.. _`DLRN API Promoter`: https://github.com/rdo-infra/ci-config/blob/master/ci-scripts/dlrnapi_promoter/README.md -.. _`RDO delorean service`: https://trunk.rdoproject.org/centos7-master-head/report.html -.. _`promotion criteria`: https://github.com/rdo-infra/ci-config/blob/4bc3261c4ce644829a317c1bd85c1d645cb96cbd/ci-scripts/dlrnapi_promoter/config/CentOS-7/master.ini#L16 -.. _current-tripleo: https://trunk.rdoproject.org/centos7-master/current-tripleo/delorean.repo -.. _testproject: https://review.rdoproject.org/r/#/q/project:testproject -.. _`example is there`: https://review.rdoproject.org/r/#/c/23502/ -.. _`indexed promoter service logs`: http://promoter.rdoproject.org/ -.. _`centos7 master tripleo-ci-testing`: https://trunk.rdoproject.org/centos7-master/tripleo-ci-testing/delorean.repo diff --git a/doc/source/ci/check_gates.rst b/doc/source/ci/check_gates.rst deleted file mode 100644 index 24098e6c..00000000 --- a/doc/source/ci/check_gates.rst +++ /dev/null @@ -1,245 +0,0 @@ -How to add a TripleO job to your projects check pipeline -======================================================== - -To ensure a non-TripleO project's changes work with TripleO an additional -check job can be added to the project's job definitions in OpenStack's -`project config `_ - -Project Config Example ----------------------- - -In this case we'll use openstack/neutron as an example to understand how -this works. Note that this is only an example and this job may not be appropriate -for your project, we will cover how to pick a job later on in this documentation. -Browse through the `layout.yaml -`_ -file in the project-config repository until you find:: - - - name: openstack/neutron - template: - - name: merge-check - - ... - - ... - check: - - ... - - ... - - gate-tripleo-ci-centos-7-nonha-multinode-oooq-nv - -The above configuration will run the TripleO job -``gate-tripleo-ci-centos-7-nonha-multinode-oooq-nv`` without voting (nv). -This type of job is used to inform the reviewers of the patch whether or not -the change under review works with TripleO. - - -How to pick which job to execute for any given OpenStack project ----------------------------------------------------------------- - -TripleO can deploy a number of different OpenStack services. To best utilize -the available upstream CI resources TripleO uses the same concept as the -`puppet-openstack-integration project -`_ to define how -services are deployed. The TripleO documentation regarding services can be found -`here. `_ -Review the TripleO documentation and find a scenario that includes the services -that your project requires to be tested. Once you have determined which -scenario to use you are ready to pick a TripleO check job. - -The following is a list of available check jobs:: - - gate-tripleo-ci-centos-7-scenario001-multinode-oooq - gate-tripleo-ci-centos-7-scenario001-multinode-oooq-puppet - gate-tripleo-ci-centos-7-scenario001-multinode-oooq-container - gate-tripleo-ci-centos-7-scenario002-multinode-oooq - gate-tripleo-ci-centos-7-scenario002-multinode-oooq-puppet - gate-tripleo-ci-centos-7-scenario002-multinode-oooq-container - gate-tripleo-ci-centos-7-scenario003-multinode-oooq - gate-tripleo-ci-centos-7-scenario003-multinode-oooq-puppet - gate-tripleo-ci-centos-7-scenario003-multinode-oooq-container - gate-tripleo-ci-centos-7-scenario004-multinode-oooq - gate-tripleo-ci-centos-7-scenario004-multinode-oooq-puppet - gate-tripleo-ci-centos-7-scenario004-multinode-oooq-container - gate-tripleo-ci-centos-7-nonha-multinode-oooq - gate-tripleo-ci-centos-7-containers-multinode - -**Note** over time additional scenarios will be added and will follow the same -pattern as the job names listed above. - -Adding a new non-voting check job ---------------------------------- - -Find your project in `layout.yaml -`_. -An example of a project will look like the following example:: - - - name: openstack/$project - template: - - ... - - ... - -**Note** ``$project`` is the name of your project. - -Under the section named ``check``, add the job that best suits your project. -Be sure to add ``-nv`` to the job name to ensure the job does not vote:: - - check: - - ... - - ... - - $job-nv - -Enabling voting jobs --------------------- - -If your project is interested in gating your project with a voting version -of a TripleO job, you can follow the openstack/mistral project's example in -`layout.yaml -`_ - -For example:: - - - name: openstack/mistral - template: - -name: merge-check - - ... - - ... - check: - - ... - - ... - - gate-tripleo-ci-centos-7-scenario003-multinode-oooq-puppet - gate: - - gate-tripleo-ci-centos-7-scenario003-multinode-oooq-puppet - -**Note** the example does **not** append ``-nv`` as a suffix to the job name - -Troubleshooting a failed job ----------------------------- - -When your newly added job fails, you may want to download its logs for a local -inspection and root cause analysis. Use the -`tripleo-ci getthelogs script -`_ -for that. - -Enabling tempest tests notification ------------------------------------ - -There is a way to get notifications by email when a job finishes to running -tempest. -People interested to receive these notifications can submit a patch to add -their email address in `this config file -`_. -Instructions can be found `here -`_. - -featureset override -------------------- - -In TripleO CI, we test each patchset using different jobs. These jobs -are defined using `featureset config files -`_. -Each featureset config file is mapped to a job template that is defined in -`tripleo-ci `_. -Tempest tests are basically triggered in scenario jobs in order to post validate the -a particular scenario deployment. -The set of tempest tests that run for a given TripleO CI job is defined in the -`featureset config files -`_. -You may want to run a popular TripleO CI job with a custom set of Tempest -tests and override the default Tempest run. This can be accomplished through -adding the `featureset_overrides` var to zuul job config `vars:` section. -The allowed featureset_override are defined in the `tripleo-ci run-test role -`_. -This setting allows projects to override featureset post deployment configuration. -Some of the overridable settings are: - - - `run_tempest`: To run tempest or not (true|false). - - `tempest_whitelist`: List of tests you want to be executed. - - `test_black_regex`: Set of tempest tests to skip. - - `tempest_format`: To run tempest using different format (packages, containers, venv). - - `tempest_extra_config`: A dict of additional tempest config to be overridden. - - `tempest_plugins`: A list of tempest plugins needs to be installed. - - `standalone_environment_files`: List of environment files to be overridden - by the featureset configuration on standalone deployment. The environment - file should exist in tripleo-heat-templates repo. - - `test_white_regex`: Regex to be used by tempest - - `tempest_workers`: Numbers of parallel workers to run - - `standalone_container_cli`: Container cli to use - - `tempest_private_net_provider_type`: The Neutron type driver that should be - used by tempest tests. - -For a given job `tripleo-ci-centos-7-scenario001-multinode-oooq-container`, you -can create a new abstract layer job and overrides the tempest tests:: - - - job: - name: scn001-multinode-oooq-container-custom-tempest - parent: tripleo-ci-centos-7-scenario001-multinode-oooq-container - ... - vars: - featureset_override: - run_tempest: true - tempest_whitelist: - - 'tempest.scenario.test_volume_boot_pattern.TestVolumeBootPattern.test_volume_boot_pattern' - test_black_regex: - - 'keystone_tempest_plugin' - tempest_format: 'containers' - tempest_extra_config: {'compute-feature-enabled.attach_encrypted_volume': 'True', - 'auth.tempest_roles': '"Member"'} - tempest_plugins: - - 'python2-keystone-tests-tempest' - - 'python2-cinder-tests-tempest' - tempest_workers: 1 - test_white_regex: - - 'tempest.api.identity' - - 'keystone_tempest_plugin' - standalone_environment_files: - - 'environments/low-memory-usage.yaml' - - 'ci/environments/scenario003-standalone.yaml' - standalone_container_cli: docker - -In a similar way, for skipping Tempest run for the scenario001 job, you can do -something like:: - - - job: - name: scn001-multinode-oooq-container-skip-tempest - parent: tripleo-ci-centos-7-scenario001-multinode-oooq-container - ... - vars: - featureset_override: - run_tempest: false - -Below is the list of jobs based on `tripleo-puppet-ci-centos-7-standalone` which uses -featureset_override and run specific tempest tests against puppet projects: - -* puppet-nova - - - job name: puppet-nova-tripleo-standalone - - tempest_test: compute - -* puppet-horizon - - - job name: puppet-horizon-tripleo-standalone - - tempest_test: horizon - -* puppet-keystone - - - job name: puppet-keystone-tripleo-standalone - - tempest_test: keystone_tempest_plugin & identity - -* puppet-glance - - - job name: puppet-glance-tripleo-standalone - - tempest_test: image - -* puppet-cinder - - - job name: puppet-cinder-tripleo-standalone - - tempest_test: volume & cinder_tempest_tests - -* puppet-neutron - - - job name: puppet-neutron-tripleo-standalone - - tempest_test: neutron_tempest_tests & network - -* puppet-swift - - - job name: puppet-swift-tripleo-standalone - - tempest_test: object_storage diff --git a/doc/source/ci/ci_primer.rst b/doc/source/ci/ci_primer.rst deleted file mode 100644 index 34444961..00000000 --- a/doc/source/ci/ci_primer.rst +++ /dev/null @@ -1,159 +0,0 @@ -TripleO CI jobs primer -====================== - -This primer aims to demonstrate where the Triple ci jobs are defined and -illustrate the difference between the check and gate queues and how jobs -are executed in them. Which queue a job is executed in also affects whether the -job is defined as voting or not. Generally: - -* new jobs are run in check and are non voting -* once a job is voting in check, it needs to be added to gate too. -* once a job is voting in check and gate you should add it to the promotion - jobs so that tripleo promotions (i.e. from tripleo-testing - to current-tripleo) will depend on successful execution of that job. - -Once a job becomes voting it must be added to the gate queue too. If it isn't -then we may end up with a situation where something passes the voting -check job and merges without being run in the gate queue. It could be that for -some reason it would have failed in the gate and thus not have merged. A common -occurrence is the check jobs run on a particular submission and pass on one day but -then not actually merge (and so run in the gate) until much later perhaps even after -some days.In the meantime some unrelated change merges in another project which would -cause the job to fail in the gate, but since we're not running it there the code -submission merges. This then means that the job is broken in subsequent check runs. - -Non tripleo-projects are not gated in tripleo. The promotion jobs -represent the point at which we take the latest built tripleo packages and the -latest built non-tripleo projects packages (like nova, neutron etc) and test these together. -For more information about promotions refer to :doc:`Promotion Stages` - -Where do tripleo-ci jobs live ------------------------------ - -.. note:: - - If you ever need to search for a particular job to see which file it is defined - in or which tripleo project repos it is running for you can search by name in - the openstack-codesearch_ (e.g. that is a search for the - tripleo-ci-centos-7-scenario003-standalone job). - -.. note:: - - If you ever want to see the status for a particular job with respect to how - often it is failing or passing, you can check the zuul_builds_ status and - search by job name (again the linked example is for scenario003-standalone). - -The tripleo ci jobs live in the tripleo-ci repo and specifically in various -files defined under the zuul.d_ directory. As an example we can examine one of -the scenario-standalone-jobs_:: - - - job: - name: tripleo-ci-centos-7-scenario001-standalone - voting: true - parent: tripleo-ci-base-standalone - nodeset: single-centos-7-node - branches: ^(?!stable/(newton|ocata|pike|queens|rocky)).*$ - vars: - featureset: '052' - standalone_ceph: true - featureset_override: - standalone_container_cli: docker - standalone_environment_files: - - 'ci/environments/scenario001-standalone.yaml' - - 'environments/low-memory-usage.yaml' - tempest_plugins: - - python-telemetry-tests-tempest - - python-heat-tests-tempest - test_white_regex: '' - tempest_workers: 1 - tempest_extra_config: {'telemetry.alarm_granularity': '60'} - tempest_whitelist: - - 'tempest.api.identity.v3' - - 'tempest.scenario.test_volume_boot_pattern.TestVolumeBootPattern.test_volume_boot_pattern' - - 'telemetry_tempest_plugin.scenario.test_telemetry_integration.TestTelemetryIntegration' - -As you can see the job definition consists of the unique job name followed by -the rest of the zuul variables, including whether the job is voting and which -node layout (nodeset) should be used for that job. The unique job name is then -used in the zuul layout (discussed in the next section) to determine if the job -is run in check or gate or both. Since the job shown above is set as voting -we can expect it to be defined in both gate and check. - -.. _zuul.d: https://github.com/openstack-infra/tripleo-ci/tree/master/zuul.d -.. _scenario-standalone-jobs: https://github.com/openstack-infra/tripleo-ci/blob/101074b2e804f97880440a3e62351844f390b2f2/zuul.d/standalone-jobs.yaml#L86-L88 -.. _openstack-codesearch: http://codesearch.openstack.org/?q=tripleo-ci-centos-7-scenario003-standalone&i=nope&files=&repos= -.. _zuul_builds: http://zuul.opendev.org/builds?job_name=tripleo-ci-centos-7-scenario003-standalone - -Zuul queues - gate vs check ---------------------------- - -As with all OpenStack projects there are two zuul queues to which jobs are -scheduled - the check jobs which are run each time a change is submitted and -then the gate jobs which are run before a change is merged. There is also -an experimental queue but that is invoked manually. - -Which queue a given job is run in is determined by the zuul layout file for the -given project - e.g. here is tripleo-heat-templates-zuul-layout_. The layout -file has the following general format:: - - - project: - templates: - .. list of templates - check: - jobs: - .. list of job names and any options for each - gate: - queue: tripleo - jobs: - .. list of job names and any options for each - -The templates: section in the outline above is significant because the layout -can also be defined in one of the included templates. For example the -scenario-standalone-layout_ defines the check/gate layout for the -tripleo-standalone-scenarios-full template which is then included by the -projects that want the jobs defined in that template to execute in the manner -it specifies. - -.. _tripleo-heat-templates-zuul-layout: https://github.com/openstack/tripleo-heat-templates/blob/efe9b8fa1fff7ef1828777a95eee9fe4d901f9b9/zuul.d/layout.yaml#L9 -.. _scenario-standalone-layout: https://github.com/openstack-infra/tripleo-ci/blob/7333a6fc8ff3990a971a661a817e30ae25e06374/zuul.d/standalone-jobs.yaml#L77-L79 - -Where do tripleo promotion jobs live ------------------------------------- - -.. note:: - If you even need to find the definition for a particular promotion job you can - search for it by name using the rdo-codesearch_. - -The tripleo promotions jobs are not defined in the tripleo-ci but instead live -in the rdo-jobs_ repository. For more information about the promotion pipeline -in TripleO refer to the :doc:`Promotion Stages` - -Similar to the tripleo-ci jobs, they are defined in various files under the -rdo-jobs-zuul.d_ directory and the job definitions look very similar to the -tripleo-ci ones - for example the -periodic-tripleo-ci-centos-7-multinode-1ctlr-featureset010-master_:: - - - job: - name: periodic-tripleo-ci-centos-7-multinode-1ctlr-featureset010-master - parent: tripleo-ci-base-multinode-periodic - vars: - nodes: 1ctlr - featureset: '010' - release: master - -If you even need to find the definition for a particular promotion job you can -search for it by name using the rdo-codesearch_. - -.. _rdo-jobs: https://github.com/rdo-infra/rdo-jobs -.. _rdo-jobs-zuul.d: https://github.com/rdo-infra/rdo-jobs/tree/master/zuul.d -.. _periodic-tripleo-ci-centos-7-multinode-1ctlr-featureset010-master: https://github.com/rdo-infra/rdo-jobs/blob/76daaff19a464614a002655bc85db4080607f1bf/zuul.d/multinode-jobs.yaml#L148 -.. _rdo-codesearch: https://codesearch.rdoproject.org/?q=periodic-tripleo-ci-centos-7-multinode-1ctlr-featureset010-master&i=nope&files=&repos= - -Contacting CI team ------------------- - -When in need you can contact the TripleO CI team members on one of the two -irc channels on OFTC #tripleo by mentioning ``@oooq`` keyword in your -message as team members get notified about such messages. It is good to -remember that those nicknames with ``|ruck`` and ``|rover`` suffix are on duty -to look for CI status. diff --git a/doc/source/ci/component_integration_pipelines.mmd b/doc/source/ci/component_integration_pipelines.mmd deleted file mode 100644 index da0605da..00000000 --- a/doc/source/ci/component_integration_pipelines.mmd +++ /dev/null @@ -1,5 +0,0 @@ -graph TD - compute/consistent-->compute/component-ci-testing-->compute/promoted-components-->promoted-components - cinder/consistent-->cinder/component-ci-testing-->cinder/promoted-components-->promoted-components - security/consistent-->security/component-ci-testing-->security/promoted-components-->promoted-components - promoted-components-->tripleo-ci-testing-->current-tripleo diff --git a/doc/source/ci/component_pipeline_tags_flow.mmd b/doc/source/ci/component_pipeline_tags_flow.mmd deleted file mode 100644 index 01345573..00000000 --- a/doc/source/ci/component_pipeline_tags_flow.mmd +++ /dev/null @@ -1,6 +0,0 @@ -graph TD - A[consistent] -->|periodically promote to| B[component-ci-testing] - B -->|Run periodic promotion jobs| C{criteria jobs passing?} - C -->|no| E[reject content] - C -->|yes| D[promoted-components] - D --> F[start of integration pipeline] diff --git a/doc/source/ci/content_provider_jobs.rst b/doc/source/ci/content_provider_jobs.rst deleted file mode 100644 index a57b634b..00000000 --- a/doc/source/ci/content_provider_jobs.rst +++ /dev/null @@ -1,178 +0,0 @@ -Content Provider Jobs -===================== - -This section gives an overview and some details about the 'content provider' -zuul jobs. They are so called because they consist of a parent job that builds -containers which are then consumed by any number of child jobs. Thus the parent -jobs are the 'content provider' for the child jobs. - -Why Do We Need Content Providers? ---------------------------------- - -The content provider jobs were added by the Tripleo CI squad during the -Victoria development cycle. Prior to this `check and gate tripleo-ci jobs`_ -running on review.opendev.org code submissions were pulling the promoted -'current-tripleo' containers from docker.io. - -Having all jobs pull directly from a remote registry obviously puts a strain on -resources; consider multiple jobs per code submission with tens of -container pulls for each. We have over time been affected by a number of issues -related to the container pulls (such as timeouts) that would cause jobs to fail -and block the gates. Furthermore, `docker has recently announced`_ that requests -will be rate limited to one or two hundred pull requests per six hours (without and -with authentication respectively) on the free plan effective 01 November 2020. - -In anticipation of this the TripleO CI squad has moved all jobs to the new -content provider architecture. - -The Content Provider --------------------- - -The main task executed by the content provider job is to build the containers -needed to deploy TripleO. This is achieved with a collection of ansible plays -defined in the `multinode-standalone-pre.yml`_ tripleo-quickstart-extras -playbook. - -Once built, the content provider then needs to make those containers available -for use by the child jobs. The `build-container role itself`_ as invoked in -`multinode-standalone-pre.yml`_ ensures containers are pushed to the -a local registry on the content provider node. However the child job will need -to know the IP address on which they can reach that registry. - -To achieve this we use the `zuul_return module`_ that allows for a parent -job to return data for consumption within child jobs. We set the required -zuul_return data in the `run-provider.yml playbook`_:: - - - name: Set registry IP address - zuul_return: - data: - zuul: - pause: true - registry_ip_address: "{{ hostvars[groups.all[0]].ansible_host }}" - provider_dlrn_hash: "{{ dlrn_hash|default('') }}" - provider_dlrn_hash_tag: "{{ dlrn_hash_tag|default('') }}" - provider_job_branch: "{{ provider_job_branch }}" - registry_ip_address_branch: "{{ registry_ip_address_branch }}" - provider_dlrn_hash_branch: "{{ provider_dlrn_hash_branch }}" - tags: - - skip_ansible_lint - -Child jobs retrieve the IP address for the content provider container -registry via the `registry_ip_address_branch dictionary`_. This contains a -mapping between the release (master, victoria, ussuri etc) and the IP address -of the content provider container registry with images for that release. -For example:: - - registry_ip_address_branch: - master: 38.145.33.72 - -Most jobs will only ever have one release in this dictionary but upgrade jobs -will require two (more on that later). Note that besides setting the -zuul_return data the task above sets the `zuul pause: true`_. As the name -suggests, this allows the parent content provider job to be paused until all -children have executed. - -Given all the above, it should be of little surprise ;) that the -`content provider zuul job definition`_ is as follows (at time of writing):: - - - job: - name: tripleo-ci-centos-8-content-provider - parent: tripleo-ci-base-standalone-centos-8 - branches: ^(?!stable/(newton|ocata|pike|queens|rocky|stein)).*$ - run: - - playbooks/tripleo-ci/run-v3.yaml - - playbooks/tripleo-ci/run-provider.yml - vars: - featureset: '052' - provider_job: true - build_container_images: true - ib_create_web_repo: true - playbooks: - - quickstart.yml - - multinode-standalone-pre.yml - -It uses the `same featureset as the standalone job`_. Notice the -`multinode-standalone-pre.yml`_ passed to tripleo-quickstart for execution. -The `run-provider.yml playbook`_ is executed as the last of the zuul `run` plays. - -Finally, one other important task performed by the content provider job is to -build any dependent changes (i.e. depends-on in the code submission). This is -done with `build-test-packages`_ invoked in the `multinode-standalone-pre.yml`_. -We ensure that the built repo is available to child jobs by setting the -`ib_create_web_repo variable`_ when built-test-packages is invoked by a -provider job. This `makes the repo available via a HTTP server`_ on the -content provider node that consumers then retrieve as described below. - -The Content Consumers ---------------------- - -The child jobs or content consumers must use the container registry available -from the content provider. To do this `we set the docker_registry_host`_ -variable using the `job.registry_ip_address_branch` zuul_data returned from -the parent content provider. - -Any dependent changes built by `build-test-packages`_ are installed into -consumer jobs using the `install-built-repo`_ playbook. This has been added -into the `appropriate base job definitions`_ as a *pre-run:* play. - -Finally, in order to make a given zuul job a *consumer* job we must set the -content provider as dependency and pass the relevant variables. For example -in order to `run tripleo-ci-centos-8-scenario001-standalone as a consumer job`_:: - - - tripleo-ci-centos-8-content-provider - - tripleo-ci-centos-8-scenario001-standalone: - files: *scen1_files - vars: &consumer_vars - consumer_job: true - build_container_images: false - tags: - - standalone - dependencies: - - tripleo-ci-centos-8-content-provider - - -Upgrade Jobs ------------- - -Upgrade jobs are a special case because they require content from more than -one release. For instance tripleo-ci-centos-8-standalone-upgrade-ussuri will -deploy train containers and then upgrade to ussuri containers. - -To achieve this we use two content provider jobs as dependencies for the upgrade -jobs that require them (not all do):: - - - tripleo-ci-centos-8-standalone-upgrade: - vars: *consumer_vars - dependencies: - - tripleo-ci-centos-8-content-provider - - tripleo-ci-centos-8-content-provider-ussuri - -As shown earlier in this document the `registry_ip_address_branch dictionary`_ -maps release to the appropriate registry. This is set by each of the two parent -jobs and once both have executed the dictionary will contain more than one -entry. For example:: - - registry_ip_address_branch: - master: 213.32.75.192 - ussuri: 158.69.75.154 - -The consumer upgrade jobs then use the appropriate registry for the deployment -or upgrade part of the test. - -.. _`check and gate tripleo-ci jobs`: ci_primer.html -.. _`docker has recently announced`: https://www.docker.com/blog/scaling-docker-to-serve-millions-more-developers-network-egress/ -.. _`content provider zuul job definition`: https://opendev.org/openstack/tripleo-ci/src/commit/fbaaa3324712b9a718ce17c82bb190d09cca95be/zuul.d/standalone-jobs.yaml#L1032 -.. _`multinode-standalone-pre.yml`: https://opendev.org/openstack/tripleo-quickstart-extras/src/commit/e61200fec8acccb3d5fe20f68b64156a3daadb8a/playbooks/multinode-standalone-pre.yml -.. _`build-container role itself`: https://opendev.org/openstack/tripleo-ci/src/commit/fbaaa3324712b9a718ce17c82bb190d09cca95be/roles/build-containers/tasks/main.yaml#L265-L270 -.. _`zuul_return module`: https://zuul-ci.org/docs/zuul/reference/jobs.html?highlight=zuul_return#return-values -.. _`run-provider.yml playbook`: https://opendev.org/openstack/tripleo-ci/src/commit/fbaaa3324712b9a718ce17c82bb190d09cca95be/playbooks/tripleo-ci/run-provider.yml#L56 -.. _`zuul pause: true`: https://zuul-ci.org/docs/zuul/reference/jobs.html?highlight=pause#pausing-the-job -.. _`we set the docker_registry_host`: https://opendev.org/openstack/tripleo-quickstart-extras/src/commit/e61200fec8acccb3d5fe20f68b64156a3daadb8a/roles/extras-common/defaults/main.yml#L44 -.. _`build-test-packages`: https://opendev.org/openstack/tripleo-quickstart-extras/src/branch/master/roles/build-test-packages/ -.. _`ib_create_web_repo variable`: https://opendev.org/openstack/tripleo-quickstart-extras/src/commit/e61200fec8acccb3d5fe20f68b64156a3daadb8a/roles/install-built-repo/defaults/main.yml#L11 -.. _`makes the repo available via a HTTP server`: https://opendev.org/openstack/tripleo-quickstart-extras/src/commit/e61200fec8acccb3d5fe20f68b64156a3daadb8a/roles/install-built-repo/templates/install-built-repo.sh.j2#L17-L23 -.. _`install-built-repo`: https://opendev.org/openstack/tripleo-ci/src/commit/fbaaa3324712b9a718ce17c82bb190d09cca95be/playbooks/tripleo-ci/install-built-repo.yml#L16-L27 -.. _`appropriate base job definitions`: https://opendev.org/openstack/tripleo-ci/src/commit/fbaaa3324712b9a718ce17c82bb190d09cca95be/zuul.d/base.yaml#L184 -.. _`run tripleo-ci-centos-8-scenario001-standalone as a consumer job`: https://opendev.org/openstack/tripleo-ci/src/commit/fbaaa3324712b9a718ce17c82bb190d09cca95be/zuul.d/standalone-jobs.yaml#L483-L492 -.. _`registry_ip_address_branch dictionary`: https://opendev.org/openstack/tripleo-ci/src/commit/fbaaa3324712b9a718ce17c82bb190d09cca95be/playbooks/tripleo-ci/run-provider.yml#L26 -.. _`same featureset as the standalone job`: https://github.com/openstack/tripleo-quickstart/blob/671893a60467ad76359eaaf2199c55b64cc20702/config/general_config/featureset052.yml#L2 diff --git a/doc/source/ci/dlrn-promoter-overview.rst b/doc/source/ci/dlrn-promoter-overview.rst deleted file mode 100644 index b345acf1..00000000 --- a/doc/source/ci/dlrn-promoter-overview.rst +++ /dev/null @@ -1,141 +0,0 @@ -How the TripleO-RDO Pipelines' Promotions Work -============================================== - -Building consumable RDO repos and images involves various stages. -Each stage takes inputs and outputs artifacts. This document explains the -stages comprising the promotion pipelines, and the tools used to create -and manage the resulting artifacts. - -What is DLRN? -------------- - -DLRN is a tool to build RPM packages from each commit to a set of -OpenStack-related git repositories that are included in RDO. -DLRN builds are run through CI and to detect packaging issues with the -upstream branches of these Openstack projects. - -DLRN Artifacts - Hashes and Repos ---------------------------------- - -When a DLRN build completes, it produces a new hash and related repo version. -For example, the Pike builds on CentOS are available at: -https://trunk.rdoproject.org/centos7-pike/. -The builds are placed in directories by DLRN hash. Each directory contains -the RPMs as well as a repo file -https://trunk.rdoproject.org/centos7-pike/current-tripleo/delorean.repo -and a ``commit.yaml`` file -https://trunk.rdoproject.org/centos7-pike/current-tripleo/commit.yaml. - -There are some standard links that are updated as the builds complete and pass -stages of CI. Examples are these links are: - -- https://trunk.rdoproject.org/centos7-pike/current/ -- https://trunk.rdoproject.org/centos7-pike/consistent/ -- https://trunk.rdoproject.org/centos7-pike/current-tripleo/ -- https://trunk.rdoproject.org/centos7-pike/current-tripleo-rdo/ -- https://trunk.rdoproject.org/centos7-pike/current-tripleo-rdo-internal/ -- https://trunk.rdoproject.org/centos7-pike/tripleo-ci-testing/ - -The above links will be referenced in the sections below. - -Promoting through the Stages - DLRN API ---------------------------------------- - -DLRN API Client -``````````````` - -`The DLRN API -`_ -`client `_ -enables users to query repo status, upload new hashes and create promotions. -Calls to the dlrnapi_client retrieve the inputs to stages and upload artifacts -after stages. - -For example: - -:: - - $ dlrnapi --url https://trunk.rdoproject.org/api-centos-master-uc \ - promotion-get --promote-name tripleo-ci-testing - - [{'commit_hash': 'ec650aa2c8ce952e4a33651190301494178ac562', - 'distro_hash': '9a7acc684265872ff288a11610614c3b5739939b', - 'promote_name': 'tripleo-ci-testing', - 'timestamp': 1506427440}, - {'commit_hash': 'ec650aa2c8ce952e4a33651190301494178ac562', - [..] - - - $ dlrnapi --url https://trunk.rdoproject.org/api-centos-master-uc \ - repo-status --commit-hash ec650aa2c8ce952e4a33651190301494178ac562 \ - --distro-hash 9a7acc684265872ff288a11610614c3b5739939b - - [{'commit_hash': 'ec650aa2c8ce952e4a33651190301494178ac562', - 'distro_hash': '9a7acc684265872ff288a11610614c3b5739939b', - 'in_progress': False, - 'job_id': 'consistent', - 'notes': '', - 'success': True, - 'timestamp': 1506409403, - 'url': ''}, - {'commit_hash': 'ec650aa2c8ce952e4a33651190301494178ac562', - 'distro_hash': '9a7acc684265872ff288a11610614c3b5739939b', - 'in_progress': False, - 'job_id': 'periodic-singlenode-featureset023', - 'notes': '', - 'success': True, - 'timestamp': 1506414726, - 'url': 'https://logs.rdoproject.org/openstack-periodic-4hr/periodic-tripleo-centos-7-master-containers-build/8a76883'}, - {'commit_hash': 'ec650aa2c8ce952e4a33651190301494178ac562', - [..] - - -DLRN API Promoter -````````````````` - -`The DLRN API Promoter script -`_ -is a Python script that, based on the information in an input config file, -will promote an existing DLRN link to another link, provided the required tests -return successful results. - -For example, -`the master ini config file -`_ -is passed to the `promoter script -`_ -to promote the ``current-tripleo`` link to ``current-tripleo-rdo``. See the -sections above where both these links (for Pike) were shown. - -In the RDO Phase 1 pipeline, the tests listed under the ``[current-tripleo-rdo]`` -are run with the ``current-tripleo`` hash. Each test reports its ``success`` status to the -DLRN API endpoint for the Master release, ``api-centos-master-uc``. - -If each test reports ``SUCCESS: true``, the content of the ``current-tripleo`` -will become the new content of the ``current-tripleo-rdo`` hash. - -For complete documentation on how to run the Promoter script see: -https://github.com/rdo-infra/ci-config/blob/master/ci-scripts/dlrnapi_promoter/README.md - - -Pushing RDO containers to ``docker.io`` -``````````````````````````````````````` - -The DLRN Promoter script calls the `container push playbook -`_ -to push the RDO containers at each stage to `docker.io -`_. -Note that the above ``docker.io`` link shows containers tagged with ``tripleo-ci-testing``, -``current-tripleo`` and ``current-tripleo-rdo``. - - -DLRN API Promoter Server -```````````````````````` - -It is recommended that the Promoter script is run from a dedicated server. -`The promoter-setup repo -`_ -contains the Ansible playbook used to setup the promoter-server in the RDO -Cloud environment. This playbook allows the promoter script server to be -rebuilt as required. - diff --git a/doc/source/ci/emit_releases_file.rst b/doc/source/ci/emit_releases_file.rst deleted file mode 100644 index 84d32c34..00000000 --- a/doc/source/ci/emit_releases_file.rst +++ /dev/null @@ -1,93 +0,0 @@ -emit-releases-file and releases.sh -================================== - -The emit-releases-file tool is a python script that lives in the tripleo-ci -repo under the `scripts/emit_releases_file`_ directory. This script produces -an output file called `releases.sh` containing shell variable export commands. -These shell variables set the release **name** and **hash** for the -installation and target (versions) of a given job. For example, installing -latest stable branch (currently stein) and upgrading to master. The **hash** -is the delorean repo hash from which the packages used in the job are to be -installed. - -The contents of `releases.sh` will differ depending on the type of upgrade or -update operation being performed by a given job and this is ultimately -determined by the featureset. Each upgrade or update related featureset sets -boolean variables that signal the type of upgrade performed. For example -featureset050_ is used for undercloud upgrade and it sets:: - - undercloud_upgrade: true - -The `releases.sh` for an undercloud upgrade job looks like:: - - #!/bin/env bash - export UNDERCLOUD_INSTALL_RELEASE="stein" - export UNDERCLOUD_INSTALL_HASH="c5b283cab4999921135b3815cd4e051b43999bce_5b53d5ba" - export UNDERCLOUD_TARGET_RELEASE="master" - export UNDERCLOUD_TARGET_HASH="be90d93c3c5f77f428d12a9a8a2ef97b9dada8f3_5b53d5ba" - export OVERCLOUD_DEPLOY_RELEASE="master" - export OVERCLOUD_DEPLOY_HASH="be90d93c3c5f77f428d12a9a8a2ef97b9dada8f3_5b53d5ba" - export OVERCLOUD_TARGET_RELEASE="master" - export OVERCLOUD_TARGET_HASH="be90d93c3c5f77f428d12a9a8a2ef97b9dada8f3_5b53d5ba" - export STANDALONE_DEPLOY_RELEASE="master" - export STANDALONE_DEPLOY_HASH="be90d93c3c5f77f428d12a9a8a2ef97b9dada8f3_5b53d5ba" - export STANDALONE_DEPLOY_NEWEST_HASH="b4c2270cc6bec2aaa3018e55173017c6428237a5_3eee5076" - export STANDALONE_TARGET_RELEASE="master" - export STANDALONE_TARGET_NEWEST_HASH="b4c2270cc6bec2aaa3018e55173017c6428237a5_3eee5076" - export STANDALONE_TARGET_HASH="be90d93c3c5f77f428d12a9a8a2ef97b9dada8f3_5b53d5ba" - -As can be seen there are three different groups of keys set: -`UNDERCLOUD_INSTALL` and `UNDERCLOUD_TARGET` is one group, then -`OVERCLOUD_DEPLOY` and `OVERCLOUD_TARGET`, and finally `STANDALONE_DEPLOY` and -`STANDALONE_TARGET`. For each of those groups we have the `_RELEASE` name and -delorean `_HASH`. Since the example above is generated from an undercloud -upgrade job/featureset only the undercloud related values are set correctly. -The values for `OVERCLOUD_` and `STANDALONE_` are set to the default values -with both `_DEPLOY` and `_TARGET` referring to `master`. - -Where is releases.sh used -------------------------- - -The releases script is not used for all CI jobs or even for all upgrades -related jobs. There is a conditional in the -`tripleo-ci run-test role which determines`_ -the list of jobs for which we `use emit-releases-file`. In future we may remove -this conditional altogether. - -Once it is determined that the releases.sh file will be used, a list of extra -`RELEASE_ARGS is compiled`_ to be passed into the subsequent -`quickstart playbook invocations`_. An example of what these `RELEASE_ARGS` -looks like is:: - - --extra-vars @/home/zuul/workspace/.quickstart/config/release/tripleo-ci/CentOS-7/master.yml -e dlrn_hash=be90d93c3c5f77f428d12a9a8a2ef97b9dada8f3_5b53d5ba -e get_build_command=be90d93c3c5f77f428d12a9a8a2ef97b9dada8f3_5b53d5ba - -The `RELEASE_ARGS` are resolved by a helper function -get_extra_vars_from_release_. As you can see this function uses the release -name passed in via the `_RELEASE` value from the `releases.sh` to set the right -release configuration file from the tripleo-quickstart `config/release/`_ -directory which sets variables for the ansible execution. It also sets the -`dlrn_hash` which is used to setup the right repo and thus versions of packages -and finally the get_build_command is used to make sure we have the right -containers for the job. - -As you can see in the list of compiled `RELEASE_ARGS` the `INSTALL` or `TARGET` -are passed in to the get_extra_vars_from_release function, depending on the -playbook:: - - declare -A RELEASE_ARGS=( - ["multinode-undercloud.yml"]=$(get_extra_vars_from_release \ - $UNDERCLOUD_INSTALL_RELEASE $UNDERCLOUD_INSTALL_HASH) - ["multinode-undercloud-upgrade.yml"]=$(get_extra_vars_from_release \ - $UNDERCLOUD_TARGET_RELEASE $UNDERCLOUD_TARGET_HASH) - -So for the multinode-undercloud.yml use INSTALL_RELEASE but for -multinode-undercloud-upgrade.yml use TARGET_RELEASE and HASH. - -.. _`scripts/emit_releases_file`: https://opendev.org/openstack/tripleo-ci/src/commit/91c836da76f6f28a5c7545b6a96bf6a9c0d2289e/scripts/emit_releases_file -.. _featureset050: https://opendev.org/openstack/tripleo-quickstart/src/commit/b90b5a51df5104da35adf42a7d7fb5f7bc603eca/config/general_config/featureset050.yml#L18 -.. _releases_jobs: https://opendev.org/openstack/tripleo-ci/src/commit/91c836da76f6f28a5c7545b6a96bf6a9c0d2289e/roles/run-test/templates/toci_gate_test.sh.j2#L120 -.. _`tripleo-ci run-test role which determines`: https://opendev.org/openstack/tripleo-ci/src/commit/93768b46eec9cf3767fef23c186806e660f69395/roles/run-test/templates/toci_gate_test.sh.j2#L124 -.. _get_extra_vars_from_release: https://opendev.org/openstack/tripleo-ci/src/commit/91c836da76f6f28a5c7545b6a96bf6a9c0d2289e/roles/run-test/templates/oooq_common_functions.sh.j2#L155 -.. _`RELEASE_ARGS is compiled`: https://opendev.org/openstack/tripleo-ci/src/commit/91c836da76f6f28a5c7545b6a96bf6a9c0d2289e/roles/run-test/templates/toci_quickstart.sh.j2#L66 -.. _`quickstart playbook invocations`: https://opendev.org/openstack/tripleo-ci/src/commit/91c836da76f6f28a5c7545b6a96bf6a9c0d2289e/roles/run-test/templates/toci_quickstart.sh.j2#L130 -.. _`config/release/`: https://opendev.org/openstack/tripleo-quickstart/src/commit/b90b5a51df5104da35adf42a7d7fb5f7bc603eca/config/release diff --git a/doc/source/ci/index.rst b/doc/source/ci/index.rst deleted file mode 100644 index 9b331a0d..00000000 --- a/doc/source/ci/index.rst +++ /dev/null @@ -1,21 +0,0 @@ -TripleO CI Guide -================ - -.. toctree:: - :maxdepth: 2 - :includehidden: - - ci_primer - reproduce-ci - check_gates - standalone_scenario_jobs - baremetal_jobs - dlrn-promoter-overview - stages-overview - emit_releases_file - ruck_rover_primer - chasing_promotions - third_party_dependencies_ci - content_provider_jobs - tripleo_dependencies_pipelines - tripleo_ci_job_parenting diff --git a/doc/source/ci/promotions.mmd b/doc/source/ci/promotions.mmd deleted file mode 100644 index e6441776..00000000 --- a/doc/source/ci/promotions.mmd +++ /dev/null @@ -1,6 +0,0 @@ -graph TD - A[promoted-components] --> |periodically promote to| B[tripleo-ci-testing] - B --> |Build images & containers| C[images in RDO cloud] - C --> |Run periodic promotion jobs| D{criteria jobs passing?} - D-->|no| F[reject content] - D-->|yes| E[current-tripleo] diff --git a/doc/source/ci/reproduce-ci.rst b/doc/source/ci/reproduce-ci.rst deleted file mode 100644 index 89c0a3f2..00000000 --- a/doc/source/ci/reproduce-ci.rst +++ /dev/null @@ -1,127 +0,0 @@ -Reproduce CI jobs for debugging and development -=============================================== - -Knowing that at times ( perhaps always ) manipulating zuul jobs to do -your bidding can be frustrating. Perhaps you are trying to reproduce a -bug, test a patch, or just bored on a Sunday afternoon. I wanted to -briefly remind folks of their options. - -`RDO's zuul: `__ ---------------------------------------------------------------- - -RDO's zuul is setup to directly inherit from upstream zuul. Any TripleO -job that executes upstream should be re-runnable in RDO's zuul. A distinct -advantage here is that you can ask RDO admins to hold the job for you, -get your ssh keys on the box and debug the live environment. It's good -stuff. To hold a node, ask your friends in #rhos-ops - -Use testproject: Some documentation can be found -`here `__: - -upstream job example: -^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: yaml - - - project: - name: testproject - check: - jobs: - - tripleo-ci-centos-8-content-provider - - tripleo-ci-centos-8-containers-multinode: - dependencies: - - tripleo-ci-centos-8-content-provider - - gate: - jobs: [] - -periodic job, perhaps recreating a CIX issue example: -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: yaml - - - project: - name: testproject - check: - jobs: - - tripleo-ci-centos-8-scenario002-standalone: - vars: - timeout: 22000 - - periodic-tripleo-ci-centos-8-standalone-full-tempest-scenario-master: - vars: - timeout: 22000 - force_periodic: true - - periodic-tripleo-ci-centos-8-standalone-full-tempest-scenario-victoria: - vars: - timeout: 22000 - force_periodic: true - - periodic-tripleo-ci-centos-8-standalone-full-tempest-scenario-ussuri: - vars: - timeout: 22000 - force_periodic: true - - gate: - jobs: [] - - - - -Remember that depends-on can bring in any upstream changes. -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- Here is an example commit message: - -Test jobs with new ovn package - -.. code-block:: yaml - - Test jobs with new ovn package - - Depends-On: https://review.opendev.org/c/openstack/openstack-tempest-skiplist/+/775493 - - Change-Id: I7b392acc4690199caa78cac90956e717105f4c6e - -`Local zuul: `__ ---------------------------------------------------------------------------------- - -Setting up zuul and friends locally is a much heavier lift than your -first option. Instructions and scripts to help you are available in any upstream -TripleO job, and -`here `__ - -A basic readme for the logs can be found directly in the logs directory -of any tripleo job. - -- `Basic - Readme `__ -- `Job - reproduce `__ - -If you are familiar w/ zuul and friends, containers, etc.. this could be -a good option for you and your team. There are a lot of moving parts and -it's complicated, well because it's complicated. A good way to become -more familiar with zuul would be to try out zuul's tutorial - -`zuul-runner: `__ -------------------------------------------------------------------------------------------- - -A long hard fought battle of persuasion and influence has been fought -with the maintainers of the zuul project. The blueprints and specs have -merged. The project's status is not complete as there are many -unmerged patches to date. - -Other Options: --------------- - -Finally, if you are not attempting to recreate, test, play with an -upstream tripleo job and just want to develop code there is another -option. A lot of developers find `tripleo-lab `__ to be quite useful. Many -devels have their own patterns as well, what works for you is fine. - -Summary: --------- - -For what it's worth imho using testproject jobs is an efficient, low -barrier to getting things done with upstream TripleO jobs. I'll be -updating the documentation and references to try and help over the next -few days, patches are welcome :) diff --git a/doc/source/ci/ruck_rover_primer.rst b/doc/source/ci/ruck_rover_primer.rst deleted file mode 100644 index 41d0c0fc..00000000 --- a/doc/source/ci/ruck_rover_primer.rst +++ /dev/null @@ -1,201 +0,0 @@ -TripleO CI ruck|rover primer -============================ - -The tripleo-squad divides their work across 3 week sprints. During sprint -planning 2 members of the team are nominated as the 'ruck' and 'rover'. -You can easily identify these unfortunate souls in OFTC #oooq with "ruck" or -"rover" in their irc nick. - -In short the "ruck" and "rover" are tasked with keeping the lights on for a -given TripleO CI sprint. This means: - -* ensuring `gate queues`_ are green to keep TripleO patches merging. -* ensuring promotion_ jobs are green to keep TripleO up to date with - the rest of OpenStack and everything else that isn't TripleO! Target - is bugs filed + escalated + fixed for promotion at *least* once a week. - -The "ruck|rover" concept adopted by Tripleo CI are taken from -`Australian Rules Football`_. The ruck monitors 'the queue' and files bugs, -and the rover picks up those bugs and runs with them until they're fixed. - -This document is a primer for anyone new to the TripleO CI squad or otherwise -interested in how the ruck|rover of the TripleO CI squad operate. See the -`CI Team Structure`_ document for general information about how the (rest of -the) TripleO CI team is organised and operates in a given sprint. - -Ruck ----- - -The ruck monitors the various jobs across the various tripleo related repos -both upstream tripleo-ci and rdo-infra jobs and periodics for promotions. The -grafana dashboard at -`http://cockpit-ci.tripleo.org/`_ is one of the -tools used by the ruck to monitor jobs (and many other things, more info on -grafana below). - -Any new issues are triaged by collecting logs from multiple instances of the -error (i.e. same error in different jobs or different runs of the same job). -The ruck monitors the failing jobs and files bugs for all known or confirmed -things currently affecting TripleO CI. - -Launchpad is used as the bug tracker - here is a list of recently created -`Tripleo launchpad bugs`_. When filing a new bug, the ruck will add the correct -milestone, change the status to "Triaged" add the appropriate tag(s): - -* ci: a general tag for all ci related bugs - any bug about a failing CI job - should have this. -* alert: critical bugs e.g. something that affects a great number of jobs. This - tag causes the bug to be advertised in irc OFTC #tripleo. -* tempest: bug is tempest related - failing tests or other tempest related error. -* ci-reproducer: related to the `zuul based job reproducer`_ -* promotion-blocker: this is used when the failing job(s) is in the promotion - criteria (more on that below). Bugs with this tag are picked up by a script - running periodically and converted to a CIX card which are tracked twice a week - in a CI Escalation Status meeting. -* ovb: bug is related to ovb (openstack-virtual-baremetal) jobs. - -For the periodic promotion jobs the ruck must ensure that the jobs defined as -being in 'promotion criteria' are passing. The criteria is simply a list of -jobs which must pass for a promotion to occur (see the promotion_ -docs for more info on the promotion stages in TripleO). This list is maintained -in a file per branch in the ci-config-dlrnapi-promoter-config_ directory. -For tripleo-ci promotions we are interested in promotions from current to -current-tripleo (see promotion_). Thus, looking at master.yaml_ at time of -writing for example:: - - promotions: - current-tripleo: - candidate_label: tripleo-ci-testing - criteria: - # Jobs to be added as they are defined and qualified - - periodic-tripleo-ci-build-containers-ubi-8-push - - periodic-tripleo-centos-8-buildimage-overcloud-full-master - - periodic-tripleo-centos-8-buildimage-overcloud-hardened-uefi-full-master - - periodic-tripleo-centos-8-buildimage-ironic-python-agent-master - - periodic-tripleo-ci-centos-8-standalone-master - ... - -The above means that for a promotion to happen all the jobs defined under -"current-tripleo" must pass. Obviously this list changes over time as jobs -are created and retired. It is sometimes necessary to temporarily skip a job -from that list (which is why you may see some jobs commented out with #). - -Rover ------ -The rover then takes the bugs filed by the ruck and tries to fix them. That is -*not* to say that the rover is expected or indeed able to fix all encountered -things! Really the expectation is that the rover has a root cause, or at least -understands where the bug is coming from (e.g. which service). - -In some cases bugs are fixed once a new version of some service is released -(and in tripleo-ci jobs after a promotion_ if it is a non tripleo -service/project). In this case the rover is expected to know what that fix is -and do everything they can to make it available in the jobs. This will range -from posting gerrit reviews to bump some service version in requirements.txt -through to simply harassing the 'right folks' ;) in the relevant `TripleO Squad`_. - -In other cases bugs may be deprioritized - for example if the job is non voting -or is not in the promotion criteria then any related bugs are less likely to -be getting the rover's attention. If you are interested in such jobs or bugs -then you should go to #OFTC oooq channel and find the folks with "ruck" or -"rover" in their nick and harass them about it! - -Of course for other cases there are bona fide bugs with the `TripleO CI code`_ -that the rover is expected to fix. To avoid being overwhelmed time management -is hugely important for the rover especially under high load. As a general rule -the rover should not spend any more than half a day (or four hours) on any -particular bug. Once this threshold is passed the rover should reach out and -escalate to any component experts. - -Under lighter load the rover is encouraged to help with any open bugs perhaps -those ongoing issues with lower priority (e.g. non-voting jobs) and even non -CI bugs in TripleO or any other relevant OpenStack component. - -Tools ------ - -The TripleO squad has developed two main tools to help the ruck and rover -do their job efficiently. They are known within the squad as 'grafana' and -'sova' (the names of the underlying code in each case): - -* grafana: `http://cockpit-ci.tripleo.org/`_ -* sova: `http://cistatus.tripleo.org/`_ -* etherpad: $varies -* ci health: `http://ci-health.tripleo.org/`_ - -The ruck|rover are encouraged to use an etherpad that is kept up to date for -any ongoing issues actively being worked on. Besides allowing coordination -between ruck and rover themselves (the TripleO CI team is distributed across -a number of time zones) one other use case is to allow tripleo-devs to check -if the reason a particular job is failing on their code review is 'known' or if -they need to go harrass the ruck|rover about it in OFTC #oooq. The location -of the current ruck|rover etherpad is given in grafana (see below). - -Sova -^^^^ - -In sova you can see for each of "check", "gate", and "promotions" a list of all -jobs, grouped by functionality ('ovb' or 'containers') as well as by branch in -the case of promotion jobs. By clicking on a particular job you can see the -most recent failures and successes with link to logs for more investigation. -Sova tries to determine where and how the a job fails and reports that -accordingly as shown below. - -.. image:: ./_images/sova.png - :align: left - :scale: 40 % - -Grafana -^^^^^^^ - -Grafana is used to track many things and is also constantly evolving so we -highlight only a few main data points here. The top of the dashboard has some -meters showing the overall 'health' of CI. - -.. figure:: ./_images/grafana1.png - :align: center - :scale: 40 % - :alt: grafana overall ci health - - As can be seen left to right - the "Upstream Zuul queue" gives the time a - review waits before being picked up by zuul for jobs to run against it, the - "Upstream gate jobs" shows the number of failing gate jobs in the last 24 hours, - "Upstream CI stats" shows the ratio of passing to failing jobs as a Pie chart - (anything above 80% pass is good) and finally a list of the latest failing gate - jobs with links. At the bottom left there is a link to the current ruck rover - etherpad. - - -Grafana is also useful for tracking promotions across branches. - -.. figure:: ./_images/grafana2.png - :align: center - :scale: 40 % - - As seen above on the left hand side and from top to bottom - the latest - promotions for master, stein, rocky, queens and pike as bar charts. The bars - represent promotions and height shows the number of promotions on that day. - - -Finally grafana tracks a list of all running jobs highlighting the failures in -red. - -.. image:: ./_images/grafana3.png - :align: left - :scale: 40 % - - -.. _`gate queues`: https://docs.openstack.org/tripleo-docs/latest/ci/ci_primer.html -.. _`Australian Rules Football`: https://en.wikipedia.org/wiki/Follower_(Australian_rules_football) -.. _promotion: https://docs.openstack.org/tripleo-docs/latest/ci/stages-overview.html -.. _`TripleO Squad`: https://docs.openstack.org/tripleo-docs/latest/contributor/index.html#squads -.. _`TripleO launchpad bugs`: https://bugs.launchpad.net/tripleo/+bugs?orderby=-datecreated&start=0 -.. _ci-config-dlrnapi-promoter-config: https://github.com/rdo-infra/ci-config/tree/master/ci-scripts/dlrnapi_promoter/config_environments/rdo/CentOS-8 -.. _master.yaml: https://github.com/rdo-infra/ci-config/blob/cc3999a3fb29736769a8c497f0069e90c035b82c/ci-scripts/dlrnapi_promoter/config_environments/rdo/CentOS-8/master.yaml#L24-L51 -.. _`http://cockpit-ci.tripleo.org/`: http://cockpit-ci.tripleo.org/ -.. _`http://cistatus.tripleo.org/`: http://cistatus.tripleo.org/ -.. _`http://ci-health.tripleo.org/`: http://ci-health.tripleo.org/ -.. _`CI Team Structure`: https://specs.openstack.org/openstack/tripleo-specs/specs/policy/ci-team-structure.html -.. _`zuul based job reproducer`: https://opendev.org/openstack/tripleo-quickstart-extras/src/branch/master/roles/create-zuul-based-reproducer/README.md -.. _`TripleO CI code`: https://opendev.org/openstack/tripleo-ci/src/branch/master/README.rst - diff --git a/doc/source/ci/stages-overview.rst b/doc/source/ci/stages-overview.rst deleted file mode 100644 index 06ac4bbc..00000000 --- a/doc/source/ci/stages-overview.rst +++ /dev/null @@ -1,266 +0,0 @@ -TripleO CI Promotions -===================== - -This section introduces the concept of promotions in TripleO. -In short, a promotion happens when we can certify the latest version of all -packages required for a TripleO deployment of OpenStack as being in a good -state and without regressions. - -The certification consists of running Zuul CI jobs with the latest packages -built from source for TripleO code (list of TripleO repos at [1]_) and -the latest packages built from source for non-tripleo code. If the tests are -successful, then the result is certified as **current-tripleo**, ready to be -consumed by the TripleO CI check and gate jobs (see [2]_ for more information -about check and gate). - -This process is continuous as new code is merged into the various repos. Every -time we get a successful completion of the promotion CI jobs, the tested content -is 'promoted' to be the new **current-tripleo**, hence the name this workflow -is known by. At a given time, the latest **current-tripleo** is the baseline by -which we test all new code submissions to the TripleO project. - -TripleO vs non-tripleo repos ----------------------------- - -All proposed code submissions across the various tripleo repos are gated by the -TripleO community which owns and manages the zuul check and gate jobs for those -repos. - -However, we cannot gate changes to anything outside TripleO, including all -the OpenStack projects used by TripleO as well as any dependencies such -as Open vSwitch or Pacemaker. - -Even though we cannot gate on those external repos, the promotion process -allows us to test our TripleO code with their latest versions. If there are -regressions or any other bugs (and assuming ideal test coverage) the promotion -jobs will fail accordingly allowing the TripleO CI team to investigate and file -launchpad bugs so the issue(s) can be addressed. - -RDO DLRN & Promotion Criteria ------------------------------ - -TripleO CI jobs consume packages built by the RDO DLRN service ('delorean') so -we first introduce it here. An overview is given on the RDO project site at -[3]_. - -In short, RDO DLRN builds RPMs from source and publishes the resulting packages -and repos. Each build or repo is identifiable using a unique build ID. - -RDO DLRN assigns named tags to particular build IDs. You can see all of these -named tags by browsing at the RDO DLRN package root, for example for Centos8 -master branch at [4]_. Of particular importance to the TripleO promotion -workflow are:: - -* current -* consistent -* component-ci-testing -* promoted-components -* tripleo-ci-testing -* current-tripleo - -The list of tags in the order given above gives the logical progression -through the TripleO promotion workflow. - -The build ID referenced by each of those named tags is constantly updated as -new content is 'promoted' to become the new named tag. - -A general pattern in DLRN is that **current** is applied to the very latest -build, that is, the latest commits to a particular repo. A new **current** -build is generated periodically (e.g. every half hour). The **consistent** tag -represents the latest version of packages where there were no errors -encountered during the build for any of those (i.e. all packages were built -successfully). The **consistent** build is what TripleO consumes as the entry -point to the TripleO promotion workflow. - -One last point to be made about RDO DLRN is that after the TripleO promotion -CI jobs are executed against a particular DLRN build ID, the results are -reported back to DLRN. For example, you can query using the build ID at [5]_ -to get the list of jobs that were executed -against that specific content, together with the results for each. - -The list of jobs that are required to pass before we can promote a particular -build is known as the 'promotion criteria'. In order to promote, TripleO -queries the DLRN API to get the results for a particular build and compares the -passing jobs to the promotion criteria, before promoting or rejecting that -content accordingly. You can find the master centos8 promotion criteria at [6]_ -for example. - -The TripleO Promotion Pipelines -------------------------------- - -A pipeline refers to a series of Zuul CI jobs and what we refer to as the -TripleO promotion workflow is actually a number of interconnected pipelines. -At the highest level conceptually these are grouped into either *Component* -or *Integration* pipelines. The output of the Component pipeline serves as -input to the Integration pipeline. - -A Component is a conceptual grouping of packages related by functional area -(with respect to an OpenStack deployment). This grouping is enforced in -practice by the RDO DLRN server and the current list of all components can be -found at [7]_. For example, you can expect to find the 'openstack-nova-' -packages within the Compute component. - -The Component pipeline actually consists of a number of individual -pipelines, one for each of the components. The starting point for each of these -is the latest **consistent** build of the component packages and we will go -into more detail about the flow inside the component pipelines in the following -section. - -A successful run of the jobs for the given component allows us to certify that -content as being the new **promoted-components**, ready to be used as input to -the Integration pipeline. The Integration pipeline qualifies the result of the -components tested together and when that is successful we can promote to a new -current-tripleo. This is shown conceptually for a subset of components here: - -.. only:: html - - .. mermaid:: component_integration_pipelines.mmd - -In the diagram above, you can see the component pipeline at the top with the -compute, cinder and security components. This feeds into the integration -pipeline in the bottom half of the diagram where promoted-components will be -tested together and if successful produce the new **current-tripleo**. - -The Component Promotion Pipeline --------------------------------- - -As noted above, the "Component pipeline" is actually a series of individual -pipelines, one for each component. While these all operate and promote -in the same way, they do so independently of each other. -So the latest **compute/promoted-components** may be much newer than the latest -**security/promoted-components**, if the latter is failing to promote for -example. The following flowchart shows the progression of the RDO DLRN tags -through a single component pipeline while in practice this flow is repeated in -parallel per component. - -.. only:: html - - .. mermaid:: component_pipeline_tags_flow.mmd - - -As illustrated above, the entry point to the component pipelines -is the latest **consistent** build from RDO DLRN. Once a day a periodic job -tags the latest **consistent** build as **component-ci-testing**. For example -you can see the history for the baremetal component job at [8]_ descriptively -named -**periodic-tripleo-centos-8-master-component-baremetal-promote-consistent-to-component-ci-testing**. - -After this job has completed the content marked as **component-ci-testing** -becomes the new candidate for promotion to be passed through the component CI -jobs. The **component-ci-testing** repo content is tested with the latest -**current-tripleo** repos of everything else. Remember that at a given time -**current-tripleo** is the known good baseline by which we test all new -content and the same applies to new content tested in the component pipelines. - -As an example of the component CI jobs, you can see the history for the -baremetal component standalone job at [9]_. If you navigate to the -*logs/undercloud/etc/yum.repos.d/* -directory for one of those job runs you will see (at least) the following -repos: - -* delorean.repo - which provides the latest current-tripleo content -* baremetal-component.repo - which provides the 'component-ci-testing' content - that we are trying to promote. - -You may notice that the trick allowing the baremetal-component.repo to have -precedence for the packages it provides is to set the repo priority accordingly -(*1* for the component and *20* for delorean.repo). - -Another periodic job checks the result of the **component-ci-testing** job runs -and if the component promotion criteria is satisfied the candidate content is -promoted and tagged as the new **promoted-components**. You can find the -promotion criteria for Centos8 master components at [10]_. - -As an example the history for the zuul job that handles promotion to -promoted-components for the cinder component can be found at [11]_ - -You can explore the latest content tagged as **promoted-components** for the -compute component at [12]_. All the component **promoted-components** are -aggregated into one repo that can be found at [13]_ and looks -like the following:: - - [delorean-component-baremetal] - name=delorean-openstack-ironic-9999119f737cd39206df3d73e23e5f47933a6f32 - baseurl=https://trunk.rdoproject.org/centos8/component/baremetal/99/99/9999119f737cd39206df3d73e23e5f47933a6f32_1b0aff0d - enabled=1 - gpgcheck=0 - priority=1 - - [delorean-component-cinder] - name=delorean-openstack-cinder-482e6a3cc5cca697b54ee1d853a4eca6e6f3cfc7 - baseurl=https://trunk.rdoproject.org/centos8/component/cinder/48/2e/482e6a3cc5cca697b54ee1d853a4eca6e6f3cfc7_ae00ff8c - enabled=1 - gpgcheck=0 - priority=1 - -Every time a component promotes a new **component/promoted-components** the -aggregated **promoted-components** delorean.repo on the RDO DLRN server is -updated with the new content. - -This **promoted-components** repo is used as the starting point for the TripleO -Integration promotion pipeline. - -The Integration Promotion Pipeline ----------------------------------- - -The Integration pipeline as the name suggests is the integration point where -we test new content from all components together. The consolidated -**promoted-components** delorean.repo produced by the component pipeline -is tested with a series of CI jobs. If the jobs listed in the promotion -criteria pass, we promote that content and tag it as **current-tripleo**. - -.. only:: html - - .. mermaid:: promotions.mmd - -As can be seen in the flowchart above, the **promoted-components** content -is periodically promoted (pinned) to **tripleo-ci-testing**, which becomes the -new promotion candidate to be tested. You can find the build history -for the job that promotes to **tripleo-ci-testing** for Centos 8 master, -descriptively named -**periodic-tripleo-centos-8-master-promote-promoted-components-to-tripleo-ci-testing**, -at [14]_. - -First the **tripleo-ci-testing** content is used to build containers and -overcloud deployment images and these are pushed to RDO cloud to be used by -the rest of the jobs in the integration pipeline. - -The periodic promotion jobs are then executed with the results being reported -back to DLRN. If the right jobs pass according to the promotion criteria -then the **tripleo-ci-testing** content is promoted and tagged to become the -new **current-tripleo**. - -An important distinction in the integration pipeline compared to the promotion -pipeline is in the final promotion of content. In the component pipeline -the **promoted-components** content is tagged by a periodic Zuul job as -described above. For the Integration pipeline however, the promotion to -**current-tripleo** happens with the use of a dedicated service. This service -is known to the tripleo-ci squad by a few names including -'the promotion server', 'the promoter server' and 'the promoter'. - -In short the promoter periodically queries delorean for the results of the last -few tripleo-ci-testing runs. It compares the results to the promotion criteria -and if successful it re-tags the container and overcloud deployment images as -**current-tripleo** and pushes back to RDO cloud (as well as to the quay.io and -docker registries). It also talks to the DLRN server and retags the -successful **tripleo-ci-testing** repo as the new **current-tripleo**. -You can read more about the promoter with links to the code at [15]_. - -References -~~~~~~~~~~ - -.. [1] `List of TripleO repos `_ -.. [2] `TripleO Check and Gate jobs `_ -.. [3] `RDO DLRN Overview @ rdoproject.org `_ -.. [4] `Index of RDO DLRN builds for Centos 8 master @ rdoproject.org `_ -.. [5] `Query RDO DLRN by build ID @ rdoproject.org `_ -.. [6] `Centos8 current-tripleo promotion criteria at time of writing `_ -.. [7] `Centos8 RDO DLRN components @ rdoproject.org `_ -.. [8] `Zuul job history "periodic-tripleo-centos-8-master-component-baremetal-promote-consistent-to-component-ci-testing" `_ -.. [9] `Zuul job history "periodic-tripleo-ci-centos-8-standalone-baremetal-master" `_ -.. [10] `Centos8 master promoted-components promotion critiera at time of writing `_ -.. [11] `Zuul job history "periodic-tripleo-centos-8-master-component-cinder-promote-to-promoted-components" `_ -.. [12] `Compute promoted-components @ rdoproject.org `_ -.. [13] `Centos8 master promoted-components delorean.repo @ rdoproject.org `_ -.. [14] `Zuul job history "periodic-tripleo-centos-8-master-promote-promoted-components-to-tripleo-ci-testing" `_ -.. [15] `TripleO CI docs "Promotion Server and Criteria" `_ diff --git a/doc/source/ci/standalone_scenario_jobs.rst b/doc/source/ci/standalone_scenario_jobs.rst deleted file mode 100644 index 72639d39..00000000 --- a/doc/source/ci/standalone_scenario_jobs.rst +++ /dev/null @@ -1,100 +0,0 @@ -Standalone Scenario jobs -======================== - -This section gives an overview and some details on the standalone scenario ci -jobs. The standalone deployment is intended as a one node development -environment for TripleO. - see the `Standalone Deploy Guide `_ -for more information on setting up a standalone environment. - -A 'scenario' is a concept used in TripleO -to describe a collection of services - see the service-testing-matrix_ for more -information about each scenario and the services deployed there. We combine the -two to define the standalone scenario jobs. - -These are intended to give developers faster feedback (the jobs are relatively -fast to complete) and allow us to have better coverage across services by defining a -number of scenarios. Crucially the standalone scenario jobs allow us to increase -coverage without further increasing our resource usage footprint with eachjob only taking -a single node. See this openstack-dev-thread_ for background around the move from -the multinode jobs to the more resource friendly standalone versions. - -.. _service-testing-matrix: https://github.com/openstack/tripleo-heat-templates/blob/master/README.rst#service-testing-matrix -.. _openstack-dev-thread: http://lists.openstack.org/pipermail/openstack-dev/2018-October/136192.html -.. _standalone_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/deployment/standalone.html - - -Where ------ - -The standalone scenario jobs (hereafter referred to as just 'standalone' in this -document), are defined in the `tripleo-ci/zuul.d/standalone.yaml`_ file. Besides -the definitions for each of the scenario00X-standalone jobs, this file also -carries the tripleo-standalone-scenarios-full_project-template_ which defines -the zuul layout and files: sections for the standalone jobs in a central location. - -Thus, the jobs are consumed by other projects across tripleo by inclusion of -the template in their respective zuul layout file, for example -tripleo-heat-templates_ and tripleo-common_. - -Besides the job definitions in the tripleo-ci repo, the other main part of the -standalone jobs is a service environment file, which lives in the -`tripleo-heat-templates-ci/environments`_. As you can see in scenario001-env_, -scenario002-env_, scenario003-env_ and scenario004-env_ that is where we define the -services and parameters that are part of a given scenario. - -.. _`tripleo-ci/zuul.d/standalone.yaml`: https://github.com/openstack-infra/tripleo-ci/blob/master/zuul.d/standalone-jobs.yaml -.. _tripleo-standalone-scenarios-full_project-template: https://github.com/openstack-infra/tripleo-ci/blob/75ff68608baab31f6ac9e5395a9841c08c62e092/zuul.d/standalone-jobs.yaml#L78-L80 -.. _tripleo-heat-templates: https://github.com/openstack/tripleo-heat-templates/blob/d5298e2f7936bcb5ca7d41466d024fe6958ce177/zuul.d/layout.yaml#L8 -.. _tripleo-common: https://github.com/openstack/tripleo-common/blob/026ed7d9e041c92956aa9db59e881f6632eed2f2/zuul.d/layout.yaml#L14 -.. _`tripleo-heat-templates-ci/environments`: https://github.com/openstack/tripleo-heat-templates/tree/master/ci/environments -.. _scenario001-env: https://github.com/openstack/tripleo-heat-templates/blob/1c46d1850a8de89daeecd96f2f5288336e3778f8/ci/environments/scenario001-standalone.yaml#L1 -.. _scenario002-env: https://github.com/openstack/tripleo-heat-templates/blob/1c46d1850a8de89daeecd96f2f5288336e3778f8/ci/environments/scenario002-standalone.yaml#L1 -.. _scenario003-env: https://github.com/openstack/tripleo-heat-templates/blob/1c46d1850a8de89daeecd96f2f5288336e3778f8/ci/environments/scenario003-standalone.yaml#L1 -.. _scenario004-env: https://github.com/openstack/tripleo-heat-templates/blob/1c46d1850a8de89daeecd96f2f5288336e3778f8/ci/environments/scenario004-standalone.yaml#L1 - -How ---- - -The standalone jobs are special in that they differ from 'traditional' multinode -jobs by having a shared featureset rather than requiring a dedicated featureset -for each job. Some of the standalone scenarios, notably scenario012_ will end up -having a dedicated-featureset_ however in most cases the base standalone-featureset052_ -can be re-used for the different scenarios. Notably you can see that scenario001-job_, -scenario002-job_, scenario003-job_ and scenario004-job_ job definitions are all -using the same standalone-featureset052_. - -Given that we use the same featureset the main differentiator between these -standalone jobs is the scenario environment file, which we pass using -featureset_override (see :doc:`../ci/check_gates`). -For example in the scenario001 job we point to the scenario001-standalone.yaml -(scenario001-env_):: - - - job: - name: tripleo-ci-centos-7-scenario001-standalone - voting: true - parent: tripleo-ci-base-standalone - nodeset: single-centos-7-node - branches: ^(?!stable/(newton|ocata|pike|queens|rocky)).*$ - vars: - featureset: '052' - standalone_ceph: true - featureset_override: - standalone_container_cli: docker - standalone_environment_files: - - 'environments/low-memory-usage.yaml' - - 'ci/environments/scenario001-standalone.yaml' - ... - -Finally we use a task in the tripleo-ci-run-test-role_ to pass the scenario -environment file into the standalone deployment command using the standalone -role standalone_custom_env_files_ parameter. - -.. _scenario012: https://review.opendev.org/634723 -.. _dedicated-featureset: https://review.opendev.org/636355 -.. _standalone-featureset052: https://github.com/openstack/tripleo-quickstart/blob/6585d6320ca4f0c37ae62dfc60fe2eb0cd42647c/config/general_config/featureset052.yml#L2 -.. _scenario001-job: https://github.com/openstack-infra/tripleo-ci/blob/1d890565feeeea6ce637cf0384da822926480f07/zuul.d/standalone-jobs.yaml#L376 -.. _scenario002-job: https://github.com/openstack-infra/tripleo-ci/blob/1d890565feeeea6ce637cf0384da822926480f07/zuul.d/standalone-jobs.yaml#L401 -.. _scenario003-job: https://github.com/openstack-infra/tripleo-ci/blob/1d890565feeeea6ce637cf0384da822926480f07/zuul.d/standalone-jobs.yaml#L426 -.. _scenario004-job: https://github.com/openstack-infra/tripleo-ci/blob/1d890565feeeea6ce637cf0384da822926480f07/zuul.d/standalone-jobs.yaml#L448 -.. _tripleo-ci-run-test-role: https://github.com/openstack-infra/tripleo-ci/blob/1d890565feeeea6ce637cf0384da822926480f07/roles/run-test/tasks/main.yaml#L26-L36 -.. _standalone_custom_env_files: https://github.com/openstack/tripleo-quickstart-extras/blob/def233448d2ae8ed5bcc6d286f5cf8378f7cf7ec/roles/standalone/templates/standalone.sh.j2#L9 diff --git a/doc/source/ci/third_party_dependencies_ci.rst b/doc/source/ci/third_party_dependencies_ci.rst deleted file mode 100644 index eb4d3940..00000000 --- a/doc/source/ci/third_party_dependencies_ci.rst +++ /dev/null @@ -1,144 +0,0 @@ -Gating github projects using TripleO CI jobs -============================================ - -In TripleO deployment, we consume OpenStack and non-openstack projects. -In order to catch issues early, every patchset of the OpenStack projects -is gated with TripleO CI jobs using Zuul. - -With the help of an RDO software factory instance, we can also now gate -non-openstack projects hosted on Github. - -ceph-ansible and podman are the two non-openstack projects which are heavily -used in TripleO deployments and are hosted on github and for which we have -enabled TripleO CI jobs via github pull requests as described below. - -Jobs running against ceph-ansible ---------------------------------- - -ceph-ansible_ is used to deploy Ceph in standalone scenario 1 and 4 jobs. -These jobs are defined in rdo-jobs_ repo. - -On any ceph-ansible pull request, A user can trigger these jobs by leaving a -comment with 'check-rdo' on a pull request. It is currently done manually by -the OpenStack developers. - -Then, those jobs will appear in the RDO software factory Zuul_ status page -under `github-check` pipeline. - -On merged patches, periodic jobs are also triggered in -`openstack-periodic-weekend` pipeline_. - -.. _ceph-ansible: https://github.com/ceph/ceph-ansible -.. _rdo-jobs: https://github.com/rdo-infra/rdo-jobs/blob/master/zuul.d/ceph-ansible.yaml -.. _Zuul: https://review.rdoproject.org/zuul/status -.. _pipeline: https://review.rdoproject.org/zuul/builds?pipeline=openstack-periodic-weekend&project=ceph%2Fceph-ansible - -Jobs running against podman ---------------------------- - -In TripleO, OpenStack services are running in containers. -The container lifecycle, healthcheck and execution is managed via systemd using -paunch. Paunch under the hood uses podman. - -The `podman` utility comes from libpod_ project. - -Currently on each libpod pull request, tripleo ci based jobs get triggered -automatically and get queued in `github-check` pipeline in RDO software factory -Zuul instance. - -TripleO jobs related to podman are defined in rdo-jobs-repo_. - -For gating libpod project, we run keystone based scenario000 minimal tripleo -deployment job which tests the functionality of podman with keystone services. -It takes 30 mins to finish the tripleo deployment. - -Below is the example job definition for scenario000-job_:: - - - job: - name: tripleo-podman-integration-rhel-8-scenario000-standalone - parent: tripleo-ci-base-standalone-periodic - nodeset: single-rhel-8-node - branches: ^master$ - run: playbooks/podman/install-podman-rpm.yaml - required-projects: - - name: github.com/containers/libpod - vars: - featureset: '052' - release: master - registry_login_enabled: false - featureset_override: - standalone_environment_files: - - 'environments/low-memory-usage.yaml' - - 'ci/environments/scenario000-standalone.yaml' - - 'environments/podman.yaml' - run_tempest: false - use_os_tempest: false - -For re-running the tripleo jobs on libpod pull request, we can add -`check-github` comment on the libpod pull requests itself. - -On merged patches, periodic jobs also get triggerd in -`openstack-regular` rdo-job-pipeline_. - -.. _libpod: https://github.com/containers/libpod -.. _rdo-jobs-repo: https://github.com/rdo-infra/rdo-jobs/blob/master/zuul.d/podman.yaml -.. _scenario000-job: https://github.com/rdo-infra/rdo-jobs/blob/0186d637063c7e410ab9e0afc91b266c19e92473/zuul.d/podman.yaml#L50-L67 -.. _rdo-job-pipeline: https://review.rdoproject.org/zuul/builds?pipeline=openstack-regular&project=containers%2Flibpod - - -Report bugs when Jobs start failing ------------------------------------ - -TripleO Jobs running against libpod and ceph-ansible projects might fail due to -issue in libpod/ceph-ansible or in TripleO itself. - -Once the status of any job is *FAILED* or *POST_FAILURE* or *RETRY_LIMIT*. -Click on the job link and it will open the build result page. Then click on -`log_url`, click on the `job-output.txt`. It contains the results of -ansible playbook runs. -Look for *ERROR* or failed messages. -If looks something obvious. -Please go ahead and create the bug on launchpad_ against tripleo project with -all the information. - -Once the bug is created, please add `depcheck` tag on the filed launchpad bug. -This tag is explicitly used for listing bugs related to TripleO CI job failure -against ceph-ansible and podman projects. - -.. _launchpad: https://bugs.launchpad.net/tripleo/+filebug - -`check-rdo` vs `check-github` ------------------------------ - -`check-rdo` and `check-github` comments are used to trigger TripleO based zuul -jobs against github projects (ceph-ansible/podman) 's pull requests. - -.. note:: - - On commenting `check-rdo` or `check-github`, not all jobs will appears in the - github-manual pipeline. It depends whether the jobs are configured in the - particular pipeline to get triggered. If the jobs are not defined there - then, nothing will happen. - -check-rdo -********* - -It is used against ceph-ansible pull requests especially. The jobs will be gets -triggered and land in `github-check` pipeline. - -check-github -************ - -If a TripleO job fails against ceph-ansible or podman PRs, then it can be -relaunched using `check-github` comment. The job will appear in `github-manual` -pipeline. - -Using `Depends-On` on ceph-ansible/podman pull requests -------------------------------------------------------- - -One can also create/put OpenStack or RDO gerrit reviews against -ceph-ansible/podman pull requests by putting -`Depends-On: ` in the first message -of the github pull request_. - -.. _request: https://github.com/ceph/ceph-ansible/pull/3576 diff --git a/doc/source/ci/tripleo_ci_job_parenting.rst b/doc/source/ci/tripleo_ci_job_parenting.rst deleted file mode 100644 index b3f094a8..00000000 --- a/doc/source/ci/tripleo_ci_job_parenting.rst +++ /dev/null @@ -1,422 +0,0 @@ -TripleO CI Zuul Jobs Parenting -============================== - -When a developer submits a patch to TripleO repositories, their code is -tested against a series of different TripleO CI jobs. -Each job creates a different scenario for testing purposes. - -The TripleO CI jobs are Zuul jobs, defined within TripleO projects under -one of several locations: `zuul.d`_ directory, .zuul.yaml or zuul.yaml. - -A Zuul job can be inherited in various child jobs as `parent`_. - - -Zuul Job Parenting -++++++++++++++++++ - -In order to re-use a particular Zuul job, we create -a set of standard base jobs, which contain -ansible variables, required projects, pre-run, run, -post-run steps and Zuul related variables. - -These base job definitions are used as `parent`_ in various tripleo-ci -jobs. The child job inherits attributes from the parent unless -these are overridden by the child. - -A child job can override the variable which is also defined -in parent job. - -TripleO CI Base jobs -++++++++++++++++++++ - -TripleO CI base jobs are defined in `zuul.d/base.yaml`_ file -in tripleo-ci repo. - -Below is the list of base jobs and each is explained in a little more detail -in subsequent sections: - -* tripleo-ci-base-common-required-projects -* tripleo-ci-base-standard -* tripleo-ci-base-multinode-standard -* tripleo-ci-base-singlenode-standard -* tripleo-ci-base-standalone-standard -* tripleo-ci-base-standalone-upgrade-standard -* tripleo-ci-base-ovb-standard -* tripleo-ci-base-containers-standard -* tripleo-ci-base-images-standard -* tripleo-ci-content-provider-standard - -tripleo-ci-base-common-required-projects ----------------------------------------- - -It contains a list of common required projects and ansible roles -which are needed to start the deployment. It is used in -upstream, RDO and Downstream. -If a new project is needed in all types of deployment -(upstream, RDO and Downstream) it can be added here. - -tripleo-ci-base-standard ------------------------- - -It contains a set of ansible variables and playbooks used in -most deployments. - -tripleo-ci-base-multinode-standard ----------------------------------- -It contains a set of ansible variables and playbooks used in -most containers multinode and scenarios job. - -It is used in those jobs where the user needs to deploy -OpenStack using one undercloud and one controller. - -tripleo-ci-base-singlenode-standard ------------------------------------ -It contains a set of ansible variables and playbooks used in -most single node jobs. - -It is used in those jobs where user needs to build containers -and overcloud images which later can be used in another deployment. - -It can also be used for undercloud deployment. - -tripleo-ci-base-standalone-standard ------------------------------------ -It contains a set of ansible variables and playbooks used in -vanilla standalone and standalone based scenario jobs. - -The standalone job consists of single node overcloud deployment. - -tripleo-ci-base-standalone-upgrade-standard -------------------------------------------- -It contains a set of ansible variables and playbooks used in -the standalone upgrade job. - -The singlenode job consists of single node overcloud deployment -where we upgrade a deployment from an older release to a newer one. - -tripleo-ci-base-ovb-standard ----------------------------- -It contains a set of ansible variables and playbooks used in -the virtual baremetal deployment. - -The ovb job consists of one undercloud and four overcloud -nodes (one compute and multiple controllers) deployed as -virtual baremetal nodes. It is a replica of -real world customer deployments. - -It is used in RDO and downstream jobs. - -tripleo-ci-base-containers-standard ------------------------------------ -It contains a set of ansible variables and playbooks used -during build containers and pushing it to specific registry. - -tripleo-ci-base-images-standard -------------------------------- -It contains a set of ansible variables and playbooks used -during build overcloud images and pushing it to image server. - -tripleo-ci-content-provider-standard ------------------------------------- -It contains a set of ansible variables and playbooks used for -building containers and pushing them to a local registry. -Depends-on patches are built into respective rpm packages via DLRN and -served by a local yum repos. - -The job is `paused`_ to serve container registry and yum repos which -can be used later in dependent jobs. - -Currently these jobs are running in Upstream and Downstream. - -Required Project Jobs -+++++++++++++++++++++ - -It contains the list of required projects needed for specific type -of deployment. - -Upstream job `tripleo-ci-build-containers-required-projects-upstream`_ -requires projects like ansible-role-container-registry, -kolla, python-tripleoclient, tripleo-ansible to build containers. - -In case of RDO `tripleo-ci-build-containers-required-projects-rdo`_ serves the -same purpose. - -Many Upstream OpenStack projects are forked downstream and have different -branches. - -To accommodate the downstream namespace and branches we use the downstream -specific required project job (*required-projects-downstream*) -as a base job with proper branches and override-checkout. - -tripleo-ci-base-required-projects-multinode-internal job defined in the -examples are perfect example for the same. - -Below is one of the examples of container multinode required projects job. - -`Upstream`_ :: - - - job: - name: tripleo-ci-base-required-projects-multinode-upstream - description: | - Base abstract job to add required-projects for Upstream Multinode Jobs - abstract: true - parent: tripleo-ci-base-multinode-standard - required-projects: - - opendev.org/openstack/tripleo-ansible - - opendev.org/openstack/tripleo-common - - opendev.org/openstack/tripleo-operator-ansible - - name: opendev.org/openstack/ansible-config_template - override-checkout: master - -`RDO`_ :: - - - job: - name: tripleo-ci-base-required-projects-multinode-rdo - abstract: true - description: | - Base abstract job for multinode in RDO CI zuulv3 jobs - parent: tripleo-ci-base-multinode-standard - pre-run: - - playbooks/tripleo-rdo-base/pre.yaml - - playbooks/tripleo-rdo-base/container-login.yaml - roles: - - zuul: opendev.org/openstack/ansible-role-container-registry - - zuul: opendev.org/openstack/tripleo-ansible - required-projects: - - opendev.org/openstack/ansible-role-container-registry - - opendev.org/openstack/tripleo-ansible - secrets: - - rdo_registry - vars: - registry_login_enabled: true - - -Downstream :: - - - job: - name: tripleo-ci-base-required-projects-multinode-internal - description: | - Base abstract job to add required-projects for multinode downstream job - abstract: true - override-checkout: - parent: tripleo-ci-base-multinode-standard - required-projects: - - name: tripleo-ansible - branch: - - ansible-config_template - - tripleo-operator-ansible - - rdo-jobs - - tripleo-environments - roles: - - zuul: rdo-jobs - pre-run: - - playbooks/configure-mirrors.yaml - - playbooks/tripleo-rdo-base/cert-install.yaml - - playbooks/tripleo-rdo-base/pre-keys.yaml - vars: - mirror_locn: - featureset_override: - artg_repos_dir: /home/zuul/src//openstack - -Distribution Jobs -+++++++++++++++++ - -The TripleO deployment is supported on multiple distro versions. -Here is the current supported matrix in RDO, Downstream and Upstream. - -+----------+------------------------------+-------------+ -| Release | CentOS/CentOS Stream Version |RHEL Version | -+==========+==============================+=============+ -| Master | 9-Stream |- | -+----------+------------------------------+-------------+ -| Wallaby | 8-Stream, 9-Stream |8.x, 9 | -+----------+------------------------------+-------------+ -| Victoria | 8-Stream |- | -+----------+------------------------------+-------------+ -| Ussuri | 8-Stream |- | -+----------+------------------------------+-------------+ -| Train | 7, 8-Stream |8.x | -+----------+------------------------------+-------------+ - -Each of these distros have different settings which are used in deployment. -It's easier to maintain separate variables based on distributions. - -Below is an example of distro jobs for containers multinode at different levels. - -`Upstream Distro Jobs`_ :: - - - - job: - name: tripleo-ci-base-multinode - abstract: true - description: | - Base abstract job for multinode TripleO CI C7 zuulv3 jobs - parent: tripleo-ci-base-required-projects-multinode-upstream - nodeset: two-centos-7-nodes - - - - job: - name: tripleo-ci-base-multinode-centos-8 - abstract: true - description: | - Base abstract job for multinode TripleO CI centos-8 zuulv3 jobs - parent: tripleo-ci-base-required-projects-multinode-upstream - nodeset: two-centos-8-nodes - - - job: - name: tripleo-ci-base-multinode-centos-9 - abstract: true - description: | - Base abstract job for multinode TripleO CI centos-9 zuulv3 jobs - parent: tripleo-ci-base-required-projects-multinode-upstream - nodeset: two-centos-9-nodes - -`RDO Distro Jobs`_ :: - - - job: - name: tripleo-ci-base-multinode-periodic - parent: tripleo-ci-base-multinode-rdo - pre-run: playbooks/tripleo-ci-periodic-base/pre.yaml - post-run: playbooks/tripleo-ci-periodic-base/post.yaml - required-projects: - - config - - rdo-infra/ci-config - roles: - - zuul: rdo-infra/ci-config - secrets: - - dlrnapi - - - job: - name: tripleo-ci-base-multinode-periodic-centos-8 - parent: tripleo-ci-base-multinode-rdo-centos-8 - pre-run: playbooks/tripleo-ci-periodic-base/pre.yaml - post-run: playbooks/tripleo-ci-periodic-base/post.yaml - required-projects: - - config - - rdo-infra/ci-config - roles: - - zuul: rdo-infra/ci-config - vars: - promote_source: tripleo-ci-testing - secrets: - - dlrnapi - - - job: - name: tripleo-ci-base-multinode-periodic-centos-9 - parent: tripleo-ci-base-multinode-rdo-centos-9 - pre-run: playbooks/tripleo-ci-periodic-base/pre.yaml - post-run: playbooks/tripleo-ci-periodic-base/post.yaml - required-projects: - - config - - rdo-infra/ci-config - roles: - - zuul: rdo-infra/ci-config - vars: - promote_source: tripleo-ci-testing - secrets: - - dlrnapi - -Zuul Job Inheritance Order -++++++++++++++++++++++++++ - -Here is an example of Upstream inheritance of tripleo-ci-centos-9-containers-multinode_ job.:: - - tripleo-ci-base-common-required-projects - | - v - tripleo-ci-base-standard - | - v - tripleo-ci-base-multinode-standard - | - v - tripleo-ci-base-required-projects-multinode-upstream - | - v - tripleo-ci-base-multinode-centos-9 - | - v - tripleo-ci-centos-9-containers-multinode - - -Here is the another example of RDO job periodic-tripleo-ci-centos-8-containers-multinode-master_ :: - - tripleo-ci-base-multinode-standard - | - v - tripleo-ci-base-required-projects-multinode-rdo - | - v - tripleo-ci-base-multinode-rdo-centos-8 - | - v - tripleo-ci-base-multinode-periodic-centos-8 - | - v - periodic-tripleo-ci-centos-8-containers-multinode-master - - -TripleO CI Zuul Job Repos -+++++++++++++++++++++++++ - -Below is the list of repos where tripleo-ci related Zuul jobs are defined. - -Upstream --------- -* `tripleo-ci `_ - -RDO ---- -* `config `_: Jobs which needs secrets are defined here. -* `rdo-jobs `_ - -FAQs regarding TripleO CI jobs -++++++++++++++++++++++++++++++ - -* If we have a new project, which needs to be tested at all places - and installed from source but - - - cloned from upstream source, then it must be added under required-projects - at tripleo-ci-base-common-required-projects job. - - - the project namespace is different in Upstream and downstream, then it must be - added under required-projects at - Downstream (tripleo-ci-base-required-projects-multinode-internal) or - Upstream (tripleo-ci-base-required-projects-multinode-upstream) specific - required-projects parent job. - - - if the project is only developed at downstream or RDO or Upstream, then it must - be added under required project at downstream or RDO or Upstream required-projects - parent job. - -* In order to add support for new distros, please use required-projects job as a - parent and then create distro version specific child job with required nodeset. - -* If a project with different branch is re-added in child job required-projects, - then the child job project will be used in the deployment. - -* If a playbook (which calls another role, exists in different repo) is called at - pre-run step in Zuul job, then role specific required projects and roles needs - to be added at that job level. For example: In `tripleo-ci-containers-rdo-upstream-pre`_ - job, ansible-role-container-registry and triple-ansible is needed for pre.yaml playbook. - So both projects are added in roles and required-projects. - -* If a job having pre/post run playbook needs zuul secrets and playbook depends on - distros, then the job needs to be defined in config repo. - -* We should not use branches `attributes`_ in Zuul Distro jobs or options jobs. - -.. _`zuul.d`: https://opendev.org/openstack/tripleo-ci/src/branch/master/zuul.d -.. _`parent`: https://zuul-ci.org/docs/zuul/latest/config/job.html#attr-job.parent -.. _`zuul.d/base.yaml`: https://opendev.org/openstack/tripleo-ci/src/branch/master/zuul.d/base.yaml -.. _`tripleo-ci-build-containers-required-projects-rdo`: https://github.com/rdo-infra/rdo-jobs/commit/86e7e63ce6da27c2815afa845a6878cf96acdb47#diff-4897e02c92e2979a54f09d6eb383dba74c9a9211b065a52f9ecc4efbcce19637R17 -.. _`paused`: https://zuul-ci.org/docs/zuul/latest/job-content.html#pausing-the-job -.. _`tripleo-ci-build-containers-required-projects-upstream`: https://opendev.org/openstack/tripleo-ci/commit/1d640d09fd808caa33b82f0bdd5622120cebef09 -.. _`Upstream`: https://opendev.org/openstack/tripleo-ci/src/commit/9e270ea7f8c19fc3902a38d87a7ea4ace8219cd9/zuul.d/multinode-jobs.yaml#L17 -.. _`RDO`: https://github.com/rdo-infra/review.rdoproject.org-config/commit/b96b916fb2446171f5040ba8168c470a79f1befa#diff-80b60a19d10a7b56e22da7bfc1926e4e8d2143670b3ec3f26d009bda8e8910bfR527 -.. _`Upstream Distro Jobs`: https://github.com/openstack/tripleo-ci/commit/9e270ea7f8c19fc3902a38d87a7ea4ace8219cd9#diff-7653508e44c2cd8de8b5140648d7583c5efb27f0012155ff21f83c22edad69a3R29-R57 -.. _`RDO Distro Jobs`: https://github.com/rdo-infra/review.rdoproject.org-config/commit/b96b916fb2446171f5040ba8168c470a79f1befa#diff-80b60a19d10a7b56e22da7bfc1926e4e8d2143670b3ec3f26d009bda8e8910bfR574-R616 -.. _`periodic-tripleo-ci-centos-8-containers-multinode-master`: https://review.rdoproject.org/zuul/job/periodic-tripleo-ci-centos-8-containers-multinode-master -.. _`tripleo-ci-centos-9-containers-multinode`: https://zuul.openstack.org/job/tripleo-ci-centos-9-containers-multinode -.. _`tripleo-ci-containers-rdo-upstream-pre`: https://opendev.org/openstack/tripleo-ci/commit/05366af2930d76b4791a0fcb1f8ed9fddb132721 -.. _`attributes`: https://opendev.org/openstack/tripleo-ci/commit/bda6e1a61a846890c9cc39d0bc91952e9c6deb8f diff --git a/doc/source/ci/tripleo_dependencies_pipelines.rst b/doc/source/ci/tripleo_dependencies_pipelines.rst deleted file mode 100644 index 2bee8e45..00000000 --- a/doc/source/ci/tripleo_dependencies_pipelines.rst +++ /dev/null @@ -1,188 +0,0 @@ -TripleO Dependency Pipeline -+++++++++++++++++++++++++++++ - -This section introduces the TripleO Dependency Pipeline. The dependency -pipeline is what the TripleO CI team calls the series of zuul CI jobs -that aim to catch problems in deployment *dependencies*. - -A dependency is any package that is not directly related to the deployment -of OpenStack itself, such as OpenvSwitch, podman, buildah, pacemaker and ansible. -Each time, these projects release a newer version, it breaks the OpenStack -deployment and CI. - -Currently we have `promotion and component pipeline`_ set up to detect -OpenStack projects related issues early. - -In order to detect the breakages from non-openstack projects, TripleO -dependency pipeline has come into existence. Currently we have a single -type of pipeline enabled: - -* packages coming from specific repo - -The configurations for each pipeline can be found under -tripleo-quickstart/src/branch/master/config/release/dependency_ci//repo_config.yaml. - -Current OpenStack Dependency Pipeline jobs ------------------------------------------- -* openstack-dependencies-openvswitch - for testing OVS and OVN dependencies coming from NFV sig repo. -* openstack-dependencies-centos-compose - for testing jobs pinned to a specific CentOS compose build. - -.. note:: - The following pipelines were deprecated in favor of CentOS Stream 9 adoption: - - openstack-dependencies-containertools - for testing container tools dependencies - - openstack-dependencies-centos8stream - for testing base operating system dependencies coming from CentOS-8 stream repo. - -Understanding Package Dependency Pipeline ------------------------------------------ - -openstack-dependencies-openvswitch is a package dependency pipeline where we -tests OVS and OVN packages coming from NFV sig. - -Here is the config for the `openvswitch dependency pipeline`_: - -.. code-block:: yaml - - add_repos: - - type: generic - reponame: openvswitch-next - filename: "openvswitch-next.repo" - baseurl: "https://buildlogs.centos.org/centos/8/nfv/x86_64/openvswitch-2/" - update_container: false - dependency_override_repos: - - centos-nfv-openvswitch,http://mirror.centos.org/centos/8/nfv/x86_64/openvswitch-2/ - dep_repo_cmd_after: | - {% if dependency_override_repos is defined %} - {% for item in dependency_override_repos %} - sudo dnf config-manager --set-disabled {{ item.split(',')[0] }} - {% endfor %} - sudo dnf clean metadata; - sudo dnf clean all; - sudo dnf update -y; - {% endif %} - - -What do the above terms mean? -* `add_repos`: This is the 'test' repo i.e. the one that is bringing us a newer -than 'normal' version of the package we are testing, OpenvSwitch in this case. -* `dependency_override_repos`: It is used to disable or override a particular repo. - -In the above case, openvswitch-next.repo repo will get generated due to repo setup -and will disables the centos-nfv-openvswitch repo. - -Before the deployment, `rdo-jobs/dependency/get-dependency-repo-content.yaml` playbook -is used to set particular release file (in this case it is -config/release/dependency_ci/openvswitch/repo_config.yaml) and then generate a diff -of packages from dependency_override_repos and new repos added by add_repos option. - -Below are the jobs running in `openstack-dependencies-openvswitch`_ pipeline on review.rdoproject.org. - -.. code-block:: yaml - - openstack-dependencies-openvswitch: - jobs: - - periodic-tripleo-ci-centos-8-standalone-openvswitch-container-build-master: - dependencies: - - periodic-tripleo-ci-centos-8-standalone-master - - periodic-tripleo-ci-centos-8-scenario007-standalone-openvswitch-container-build-master: - dependencies: - - periodic-tripleo-ci-centos-8-scenario007-standalone-master - - periodic-tripleo-ci-centos-8-standalone-master: - vars: - force_periodic: false - - periodic-tripleo-ci-centos-8-scenario007-standalone-master: - vars: - force_periodic: false - -Understanding CentOS Compose Pinning Dependency ------------------------------------------------ -The dependency `compose-repos`_ works in the same line as package dependency jobs, with the difference that instead of -setting up a single repository at a time, it consumes metadata from the provided compose URL and generates a set of -repos as specified in the configuration snippet below: - -.. code-block:: yaml - - ... - add_repos: - - type: compose_repos - compose_url: "https://odcs.stream.centos.org/production/latest-CentOS-Stream/compose/" - release: centos-stream-9 - disable_conflicting: true - variants: - - AppStream - - BaseOS - - HighAvailability - - CRB - disable_repos: - - quickstart-centos-appstreams.repo - - quickstart-centos-base.repo - - quickstart-centos-highavailability.repo - - quickstart-centos-crb.repo - -The `compose_repos` repo's type was created to generate a set of repos based on a compose repos URL and information about -variants and conflicting repos. The `variants` will define which repos should be created from compose metadata, while -`disable_conflicting` and `disable_repos` should guarantee that conflicting repos will be disabled in favor of the new -ones. -For more details on how repos are generated, please check `yum-config-compose`_ in setup-role and `yum-config`_ tool. - -.. note:: - The process of setting up compose-repos starts earlier in the job, before any call to `repo-setup`, in one of the pre - playbooks defined in base jobs. You shall see the `centos-compose-repos.yml`_ playbook running in jobs that have - `dependency` set to *centos-compose*, which sets up those repos using the same tools mentioned above. The purpose of - the dependency config here is to keep those repos enabled when any other playbook or role calls `repo-setup`. - -Testing Jobs Using Compose Pinning Dependency ---------------------------------------------- - -In order to test any job against a CentOS compose build, which can be a compose newer or older than the available on -CentOS mirrors, you will need to setup a new job definition and provide the following job variables: - -.. code-block:: yaml - - - job: - name: tripleo-ci-centos-9-standalone-compose-pinning - parent: tripleo-ci-centos-9-standalone - vars: - dependency: centos-compose - centos_compose_url: https://odcs.stream.centos.org/production/latest-CentOS-Stream/compose/ - build_container_images: true - containers_base_image: quay.io/centos/centos:stream9 - -* `dependency`: need to be set to *centos-compose*. -* `centos_compose_url`: CentOS compose URL to be tested. Note that the full URL ends with `compose`, because it is where - compose `metadata` lives, required by `yum-config` tool to generate the repos. The default value is set to latest - compose, which might be ahead of mirror's compose. - -.. note:: - In the example above, there is an enabled flag for `build_container_images`. It means that process of building - containers will also use compose repositories. - -Ensuring Correct Module or Repo is Used ---------------------------------------- - -Once a jobs runs and finishes in the dependency pipeline, we need to navigate -to job log url. Under `logs/undercloud/home/zuul` directory, we can see -two log files: - -* control_repoquery_list.log.txt.gz - Contains a list of new packages coming from newly added repos. -* control_test_diff_table.log.txt.gz - contains a diff of the packages coming from new repo and overridden repo - -All the above operation is done `rdo-jobs/playbooks/dependency/diff-control-test.yaml`_ playbook which uses -`compare_rpms`_ project from ci-config/ci-scripts/infra-setup/roles/rrcockpit/files. - -.. note:: - The dependency `compose-repos` doesn't support rpm diff control test yet. - -.. _`promotion and component pipeline`: https://docs.openstack.org/tripleo-docs/latest/ci/stages-overview.html -.. _`openvswitch dependency pipeline`: https://opendev.org/openstack/tripleo-quickstart/src/branch/master/config/release/dependency_ci/openvswitch/repo_config.yaml -.. _`openstack-dependencies-containertools`: https://review.rdoproject.org/zuul/builds?pipeline=openstack-dependencies-containertools -.. _`openstack-dependencies-openvswitch`: https://review.rdoproject.org/zuul/builds?pipeline=openstack-dependencies-openvswitch -.. _`rdo-jobs/zuul.d/dependencies-jobs.yaml`: https://github.com/rdo-infra/rdo-jobs/blob/master/zuul.d/dependencies-jobs.yaml -.. _`rdo-jobs/zuul.d/project-templates-dependencies.yaml`: https://github.com/rdo-infra/rdo-jobs/blob/master/zuul.d/project-templates-dependencies.yaml -.. _`rdo-jobs/playbooks/dependency/diff-control-test.yaml`: https://github.com/rdo-infra/rdo-jobs/blob/master/playbooks/dependency/diff-control-test.yaml -.. _`get-dependency-module-content.yaml`: https://github.com/rdo-infra/rdo-jobs/blob/master/playbooks/dependency/get-dependency-module-content.yaml -.. _`rdo-jobs/dependency/get-dependency-repo-content.yaml`: https://github.com/rdo-infra/rdo-jobs/blob/master/playbooks/dependency/get-dependency-repo-content.yaml -.. _`compare_rpms`: https://github.com/rdo-infra/ci-config/tree/master/ci-scripts/infra-setup/roles/rrcockpit/files/compare_rpms -.. _`compose-repos`: https://github.com/openstack/tripleo-quickstart/blob/3f3f93da95c8531a4542c9a9aaa6424f2f6364c6/config/release/dependency_ci/centos-compose/repo_config.yaml -.. _`yum-config-compose`: https://github.com/openstack/tripleo-quickstart/blob/3f3f93da95c8531a4542c9a9aaa6424f2f6364c6/roles/repo-setup/tasks/yum-config-compose.yml -.. _`yum-config`: https://github.com/openstack/tripleo-repos/blob/cbbdde6cb6c73692b3ce9d0f6931f1b6e6fe6c91/plugins/modules/yum_config.py -.. _`centos-compose-repos.yml`: https://github.com/openstack/tripleo-ci/blob/5246cc282819f0248d997db79bc16c0f00a9e2f8/playbooks/tripleo-ci/centos-compose-repos.yml \ No newline at end of file diff --git a/doc/source/conf.py b/doc/source/conf.py deleted file mode 100644 index fb8028f1..00000000 --- a/doc/source/conf.py +++ /dev/null @@ -1,195 +0,0 @@ -# instack-undercloud documentation build configuration file, created by -# sphinx-quickstart on Wed Feb 25 10:56:57 2015. -# -# This file is execfile()d with the current directory set to its containing -# dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# import os -# import sys - -from pyquery import PyQuery -import requests - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) - -# -- General configuration --------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinxcontrib.mermaid', - 'openstackdocstheme', - 'sphinxcontrib.rsvgconverter', -] - -# Disable usage of xindy https://bugzilla.redhat.com/show_bug.cgi?id=1643664 -latex_use_xindy = False - - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'TripleO' -copyright = u'2015, OpenStack Foundation' -bug_tracker = u'Launchpad' -bug_tracker_url = u'https://launchpad.net/tripleo' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -# version = '3.0.0' -# The full version, including alpha/beta/rc tags. -# release = '3.0.0' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'native' - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - - -# -- Options for HTML output ------------------------------------------------- - -html_static_path = ['../../_custom'] -# html_style = 'custom.css' -templates_path = ['../../_templates'] - -# Output file base name for HTML help builder. -htmlhelp_basename = '%sdoc' % project - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'openstackdocs' - -# -- Options for LaTeX output ------------------------------------------------ - - -def _get_name_version(index=1): - response = requests.get('https://releases.openstack.org/') - release_list = PyQuery(response.content) - all_tr = release_list('tr') - release = all_tr('td:first')[index] - - return release.text_content() - - -def get_oldest_version_name(): - return _get_name_version(index=4) - - -def get_before_oldest_version_name(): - return _get_name_version(index=3) - - -def get_before_latest_version_name(): - return _get_name_version(index=2) - - -def get_latest_version_name(): - """Get the name of the last stable version""" - return _get_name_version() - - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass -# [howto/manual]). -latex_documents = [ - ('index', - 'doc-tripleo-docs.tex', - u'TripleO Documentation', - u'OpenStack Foundation', 'manual'), -] - -# Allow deeper levels of nesting for \begin...\end stanzas -latex_elements = {'maxlistdepth': 10, 'extraclassoptions': 'openany,oneside'} - -oldest_version_name = get_oldest_version_name() -oldest_version_name_lower = oldest_version_name.lower() -before_oldest_version_name = get_before_oldest_version_name() -before_oldest_version_name_lower = before_oldest_version_name.lower() -before_latest_version_name = get_before_latest_version_name() -before_latest_version_name_lower = before_latest_version_name.lower() -latest_version_name = get_latest_version_name() -latest_version_name_lower = latest_version_name.lower() -rst_prolog = """ -.. |project| replace:: {project} -.. |bug_tracker| replace:: {bug_tracker} -.. |bug_tracker_url| replace:: {bug_tracker_url} -.. |oldest_version_name| replace:: {oldest_version_name} -.. |oldest_version_name_lower| replace:: {oldest_version_name_lower} -.. |before_oldest_version_name| replace:: {before_oldest_version_name} -.. |before_oldest_version_name_lower| replace:: {b_oldest_version_name_lower} -.. |before_latest_version_name| replace:: {before_latest_version_name} -.. |before_latest_version_name_lower| replace:: {b_latest_version_name_lower} -.. |latest_version_name| replace:: {latest_version_name} -.. |latest_version_name_lower| replace:: {latest_version_name_lower} -""".format( - project=project, bug_tracker=bug_tracker, bug_tracker_url=bug_tracker_url, - oldest_version_name=oldest_version_name, - oldest_version_name_lower=oldest_version_name_lower, - before_oldest_version_name=before_oldest_version_name, - b_oldest_version_name_lower=before_oldest_version_name_lower, - before_latest_version_name=before_latest_version_name, - b_latest_version_name_lower=before_latest_version_name_lower, - latest_version_name=latest_version_name, - latest_version_name_lower=latest_version_name_lower -) - -# openstackdocstheme options -openstackdocs_repo_name = 'openstack/tripleo-docs' -openstackdocs_pdf_link = True -openstackdocs_auto_name = False -openstackdocs_bug_project = 'tripleo' -openstackdocs_bug_tag = 'documentation' diff --git a/doc/source/contributor/contributions.rst b/doc/source/contributor/contributions.rst deleted file mode 100644 index 5e2b3dd3..00000000 --- a/doc/source/contributor/contributions.rst +++ /dev/null @@ -1,77 +0,0 @@ -How to Contribute -================= - -|project| source code is publicly available. You can contribute code to -individual projects, documentation, report bugs and vulnerabilities, request -features. - -Contributing Code ------------------ -As long as |project| is a set of integrated OpenStack projects, all -development is happening in OpenStack upstream. - -Learn `how to contribute into OpenStack's upstream `_. - -See :doc:`../install/introduction/components` to find out how to contribute into -individual projects. - -Contacting the Core Team ------------------------- -Please refer to the `TripleO Core Team -`_ contacts. - -For upgrade specific contacts, refer to `TripleO Upgrade Core -`_ contacts - -For TripleO Ansible specific contacts, refer to `TripleO Ansible Core -`_ contacts - -For Shared TripleO CI role contacts, refer to `TripleO Shared CI Core -`_ contacts - - -Contributing to this Documentation ------------------------------------ - -|project| User Documentation lives on -`git.opendev.org `_ -and is mirrored on -`GitHub under the OpenStack organization `_. - -Learn `how to contribute into TripleO Docs -`_. - -Reporting Bugs --------------- - -**OpenStack Upstream**: If you find bugs or vulnerabilities which affect -upstream projects, please follow OpenStack's process of filing bugs. - -* Learn `how to report bugs in OpenStack - `_. - -* If you want to file a bug against upstream project, you can find useful links - in our list of :doc:`../install/introduction/components`. - - -**TripleO** If the bug impacts the |project| project as a whole, you can file a -bug in |bug_tracker|: - -#. Go to |bug_tracker_url| - -#. Fill in needed information (If you filed also upstream bug, please provide - its URL in advanced fields) - -#. Submit bug - -Requesting Features -------------------- -**OpenStack Upstream**: Since we are developing projects in OpenStack community, -all the features are being requested upstream via Blueprints. - -* Learn `how to create Blueprints in OpenStack - `_. - -* If you want to file a bug against upstream project, you can find useful links - in our list of :doc:`../install/introduction/components`. diff --git a/doc/source/contributor/core.rst b/doc/source/contributor/core.rst deleted file mode 100644 index 5b02edc0..00000000 --- a/doc/source/contributor/core.rst +++ /dev/null @@ -1,118 +0,0 @@ -Core maintainers -================ - -The intention of this document is to give developers some information -regarding what is expected from core maintainers and hopefully provide some -guidance to those aiming for this role. - -Teams ------ - -The TripleO Core team is responsible for reviewing all changes proposed to -repositories that are under the `governance of TripleO `_. - -.. _tripleo_governance: https://governance.openstack.org/tc/reference/projects/tripleo.html - -The TripleO Upgrade core reviewers maintain the `tripleo_upgrade`_ project. - -.. _tripleo_upgrade: https://opendev.org/openstack/tripleo-upgrade - -The TripleO Validation team maintains the Validation Framework in TripleO. - -The TripleO CI team maintains the TripleO CI related projects (tripleo-ci, -tripleo-quickstart, tripleo-quickstart-extras, etc). - -We also have contributors with a specific area of expertise who have been -granted core reviews on their area. Example: a Ceph integration expert would -have core review on the Ceph related patches in TripleO. - -Because Gerrit doesn't allow such granularity, we trust people to understand -which patches they can use their core reviewer status or not. -If one is granted core review access on an area, there is an expectation that -it'll only be used in this specific area. -The grant is usually done for all the TripleO repositories but we expect -SME cores to use +/- 2 for their area of expertise otherwise the regular +/- 1. - -.. note:: - Everyone is warmly encouraged to review incoming patches in TripleO, even - if you're not (yet) a member of these teams. - Participating in the review process will be a major task on the road to join - the core maintainer teams. - -Adding new members ------------------- - -Each team mentioned above should be aware of who is active in their respective -project(s). - -In order to add someone in one of these groups, it has to be discussed -between other cores and the TripleO PTL. - -It is a good practice to reach out to the nominee before proposing the -candidate, to make sure about their willingness to accept this position and its -responsibilities. - -In real life, it usually happens by informal discussions, but the official -proposals have to be sent with an email to the openstack-discuss mailing list. -It is strongly recommended to have this initial informal agreement before -going public, in case there are some disagreements which could cause -unpleasant discussions which could harm the nominee. - -This discussion can be initiated by any core, and only the existing cores votes -will weight into whether or not the proposal is granted. -Of course anyone is welcome to share their feedback and opinions. - -Removing members ----------------- - -It is normal for developers to reduce their activity and work on something -else. If they don't reach out by themselves, it is the responsibility of the -teams to remove them from the core list and inform about the change on the -mailing-list and privately when possible. - -Also if someone doesn't respect the TripleO rules or doesn't use the core -permission correctly, this person will be removed from the core list with -a private notice at least. - -Core membership expectations ----------------------------- - -Becoming a core member is a serious commitment and it is not granted easily. -Here are a non-exhaustive list of things that are expected: - -* The time invested on the project is consistent. - -* (Nearly) Daily participation in core reviews. - -.. note:: - Core reviewers are expected to provide thoroughly reviews on the code, - which doesn't only mean +1/-1, but also comments the code that confirm - that the patch is ready (or not) to be merged into the repository. - This capacity to provide these kind of reviews is strongly evaluated when - recruiting new core reviewers. It is preferred to provide quality reviews - over quantity. A negative review needs productive feedback and harmful - comments won't help to build credibility within the team. - -* Quality of technical contributions: bug reports, code, commit messages, - specs, e-mails, etc. - -* Awareness of discussions happening within the project (mailing-list, specs). - -* Best effort participation on IRC #tripleo (when timezone permits), - to provide support to our dear users and developers. - -* Gain trust with other core members, engage collaboration and be nice with - people. While mainly maintained by Red Hat, TripleO remains a friendly - project where we hope people can have fun while maintaining a project which - meets business needs for the OpenStack community. - -* Understand the `Expedited Approvals `_ policy. - -.. _expedited_approvals: https://specs.openstack.org/openstack/tripleo-specs/specs/policy/expedited-approvals.html - -Final note ----------- - -The goal of becoming core must not be intimidating. It should be reachable to -anyone well involved in our project with has good intents and enough technical -level. One should never hesitate to ask for help and mentorship when needed. diff --git a/doc/source/contributor/index.rst b/doc/source/contributor/index.rst deleted file mode 100644 index a10b9010..00000000 --- a/doc/source/contributor/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -TripleO Contributor Guide -========================= - -.. toctree:: - :maxdepth: 2 - :includehidden: - - new_developers - contributions - core -.. include:: ./squads.rst diff --git a/doc/source/contributor/new_developers.rst b/doc/source/contributor/new_developers.rst deleted file mode 100644 index 93b04c81..00000000 --- a/doc/source/contributor/new_developers.rst +++ /dev/null @@ -1,117 +0,0 @@ -Information for New Developers -============================== - -The intention of this document is to give new developers some information -regarding how to get started with TripleO as well as some best practices that -the TripleO community has settled on. - -In general TripleO is a very complex chunk of software. It uses numerous -technologies to implement an OpenStack installer. The premise of TripleO was -to use the OpenStack platform itself as the installer and API for user -interfaces. As such the first step to installing TripleO is to create what is -called an `undercloud`. We use almost similar architecture for both -`undercloud` and `overcloud` that leverages same set of Heat templates found -in `tripleo-heat-templates` repository, with a few minor differences. The -`undercloud` services are deployed in containers and can be managed by the -same tool chain used for `overcloud`. - -Once the `undercloud` is deployed, we use a combination of Ansible playbooks -and a set of Heat templates, to drive the deployment of an overcloud. Ironic -is used to provision hardware and boot an operating system either on baremetal -(for real deployments) or on VMs (for development). All services are deployed -in containers on the overcloud like undercloud. - -Repositories that are part of TripleO -------------------------------------- - -* `tripleo-common `_: - This is intended to be for TripleO libraries of common code. - Unfortunately it has become a bit overrun with unrelated bits. Work - is ongoing to clean this up and split this into separate repositories. - -* `tripleo-ansible `_: - Contains Ansible playbooks, roles, plugins, modules, filters for use with - TripleO deployments. - -* `tripleo-heat-templates `_: - This contains all the Heat templates necessary to deploy the overcloud (and - hopefully soon the undercloud as well). - -* `python-tripleoclient `_: - The CLI for deploying TripleO. This contains some logic but remember that we - want to call Mistral actions from here where needed so that the logic can be - shared with the UI. - -* `tripleo-docs `_: - Where these docs are kept. - -* `tripleo-image-elements `_: - Image elements (snippets of puppet that prepare specific parts of the - image) for building the undercloud and overcloud disk images. - -* `tripleo-puppet-elements `_: - Puppet elements used to configure and deploy the overcloud. These - used during installation to set up the services. - -* `puppet-tripleo `_: - Puppet is used to configure the services in TripleO. This repository - contains various puppet modules for doing this. - -* `tripleo-quickstart `_: - Quickstart is an Ansible driven deployment for TripleO used in CI. Most - developers also use this to stand up instances for development as well. - -* `tripleo-quickstart-extras `_: - Extended functionality for tripleo-quickstart allowing for end-to-end - deployment and testing. - -* `tripleo-ui `_: - The web based graphical user interface for deploying TripleO. - -* `kolla `_: - We use the containers built by the Kolla project for services in TripleO. - Any new containers or additions to existing containers should be submitted - here. - -* `diskimage-builder `_: - Disk image builder is used to build our base images for the TripleO - deployment. - -Definition of Done ------------------- - -This is basically a check list of things that you want to think about when -implementing a new feature. - -- Ensure that the continuous integration (CI) is in place and passing, adding - coverage to tests if required. See - http://specs.openstack.org/openstack/tripleo-specs/specs/policy/adding-ci-jobs.html - for more information. -- Ensure there are unit tests where possible. -- Maintain backwards compatibility with our existing template interfaces from - tripleo-heat-templates. -- New features should be reviewed by cores who have knowledge in that area of - the codebase. -- One should consider logging and support implications. If you have new logs, - would they be available via sosreport. -- Error messages are easy to understand and work their way back to the user - (stack traces are not sufficient). -- Documentation should be updated if necessary. New features need a - tripleo-docs patch. -- If any new dependencies are used for your feature, be sure they are properly - packaged and available in RDO. You can ask on #rdo (on OFTC server) for - help with this. - - -Using TripleO Standalone for Development ----------------------------------------- - -The Standalone container based deployment can be used for development purposes. -This reuses the existing TripleO Heat Templates, allowing you to do the -development using this framework instead of a complete overcloud. -This is very useful if you are developing Heat templates or containerized -services. - -Please see `Standalone Deployment Guide `_ -on how to set up a Standalone OpenStack node. - diff --git a/doc/source/contributor/squads.rst b/doc/source/contributor/squads.rst deleted file mode 100644 index 4fa73dd0..00000000 --- a/doc/source/contributor/squads.rst +++ /dev/null @@ -1,45 +0,0 @@ -Squads ------- - -Work in TripleO is divided in Squads. For more information the `project policy -`_. - -The list tends to be dynamic over the cycles, depending on which topics -the team is working on. The list below is subject to change as squads change. - -+-------------------------------+----------------------------------------------------------------------------+ -| Squad | Description | -+===============================+============================================================================+ -| CI | Group of people focusing on Continuous Integration tooling and system | -| | https://etherpad.openstack.org/p/tripleo-ci-squad-meeting | -+-------------------------------+----------------------------------------------------------------------------+ -| UI/CLI | Group of people focusing on TripleO UI and CLI | -| | https://etherpad.openstack.org/p/tripleo-ui-cli-squad-status | -+-------------------------------+----------------------------------------------------------------------------+ -| Upgrade | Group of people focusing on TripleO upgrades | -| | https://etherpad.openstack.org/p/tripleo-upgrade-squad-status | -+-------------------------------+----------------------------------------------------------------------------+ -| Validations | Group of people focusing on TripleO validations tooling | -| | https://etherpad.openstack.org/p/tripleo-validations-squad-status | -+-------------------------------+----------------------------------------------------------------------------+ -| Workflows | Group of people focusing on TripleO Workflows | -| | https://etherpad.openstack.org/p/tripleo-workflows-squad-status | -+-------------------------------+----------------------------------------------------------------------------+ -| Containers | Group of people focusing on TripleO deployed in containers | -| | https://etherpad.openstack.org/p/tripleo-containers-squad-status | -+-------------------------------+----------------------------------------------------------------------------+ -| Networking | Group of people focusing on networking bits in TripleO | -| | https://etherpad.openstack.org/p/tripleo-networking-squad-status | -+-------------------------------+----------------------------------------------------------------------------+ -| Integration | Group of people focusing on configuration management (eg: services) | -| | https://etherpad.openstack.org/p/tripleo-integration-squad-status | -+-------------------------------+----------------------------------------------------------------------------+ -| Edge | Group of people focusing on Edge/multi-site/multi-cloud | -| | https://etherpad.openstack.org/p/tripleo-edge-squad-status | -+-------------------------------+----------------------------------------------------------------------------+ - -.. note:: - - Note about CI: the squad is about working together on the tooling used - by OpenStack Infra to test TripleO, though every squad has in charge of - maintaining the good shape of their tests. diff --git a/doc/source/developer/index.rst b/doc/source/developer/index.rst deleted file mode 100644 index 2ea5953a..00000000 --- a/doc/source/developer/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -Developer Documentation -======================= - -Documentation of developer-specific options in |project|. - - -.. toctree:: - - tht_walkthrough/tht_walkthrough - release - tripleoclient_primer - ../upgrade/developer/upgrades/upgrades diff --git a/doc/source/developer/release.rst b/doc/source/developer/release.rst deleted file mode 100644 index 29c64d96..00000000 --- a/doc/source/developer/release.rst +++ /dev/null @@ -1,160 +0,0 @@ -Release Management -================== - -Releases Overview ------------------ - -Before reading this document and being involved in TripleO release management, -it's suggested to read the OpenStack Release Management guide_. - -.. _guide: https://docs.openstack.org/project-team-guide/release-management.html - -Most of TripleO projects follows the independent_ release model. -We will be creating stable branches based on our long term supported releases -going forward. The details can be found on the releases repository_. - -.. _repository: https://opendev.org/openstack/releases/src/branch/master/deliverables/_independent - -.. _independent: https://releases.openstack.org/reference/release_models.html#independent - -All information about previous releases can be found on https://releases.openstack.org. -This page will document the process of releasing TripleO projects. - -The tagging convention can be discussed with the PTL or the Release Liaison of TripleO. - -For puppet-tripleo, we also need to update metadata.json file:: - - "version": "X.Y.Z", - -For other projects, there is no need to update anything since the release will be ready by pbr. - -.. Note:: - Puppet OpenStack modules release management is documented here: - https://docs.openstack.org/puppet-openstack-guide/releases.html#how-to-release-puppet-modules - -Once this is done, you can submit a patch in openstack/releases and per project to modify the YAML. -The openstack/releases project provides tooling to update these files. See the new-release_ command. -You can also update the yaml files manually as necessary. -Example with tripleo-heat-templates, edit deliverables/pike/tripleo-heat-templates.yaml:: - - --- - launchpad: tripleo - release-type: python-pypi - team: tripleo - type: other - repository-settings: - openstack/tripleo-heat-templates: {} - releases: - - version: 15.0.0 - projects: - - repo: openstack/tripleo-heat-templates - hash: 1ffbc6cf70c8f79cb3a1e251c9b1e366843ab97c - - version: 15.1.0 - projects: - - repo: openstack/tripleo-heat-templates - hash: ec8955c26a15f3c9e659b7ae08223c544820af03 - - version: 16.0.0 - projects: - - repo: openstack/tripleo-heat-template - hash: - -.. _new-release: https://releases.openstack.org/reference/using.html#using-new-release-command - -Once the file is edited, you can submit it and OpenStack release team will review it. Note that the patch -requires +1 from TripleO PTL or TripleO Release Liaison_. - -.. _Liaison: https://wiki.openstack.org/wiki/CrossProjectLiaisons#Release_management - - -The process of branching is also done by Release tools, and you need to change the YAML to -specify where we want to branch. -Example with tripleo-heat-templates, edit deliverables/ocata/tripleo-heat-templates.yaml:: - - --- - launchpad: tripleo - release-type: python-pypi - team: tripleo - type: other - repository-settings: - openstack/tripleo-heat-templates: {} - branches: - - name: stable/xena - location: 16.0.0 - releases: - - version: 15.0.0 - projects: - - repo: openstack/tripleo-heat-templates - hash: 1ffbc6cf70c8f79cb3a1e251c9b1e366843ab97c - - version: 15.1.0 - projects: - - repo: openstack/tripleo-heat-templates - hash: ec8955c26a15f3c9e659b7ae08223c544820af03 - - version: 16.0.0 - projects: - - repo: openstack/tripleo-heat-template - hash: - -Keep in mind that tags, branches, release notes, announcements are generated by the tooling -and nothing has to be done manually, except what is documented here. - - -Releases for RDO ----------------- - -Due to TripleO's switch_ to the independent model, the TripleO project needs to -cut tags at the end of cycles that will not be supported in the long term. These -tags are used by the RDO release process to include a build of the TripleO -rpms in the RDO release. The process to create the intermediate release would -be as follows. - -.. _switch: https://specs.openstack.org/openstack/tripleo-specs/specs/xena/tripleo-independent-release.html - -Update required metadata -^^^^^^^^^^^^^^^^^^^^^^^^ - -Some projects like puppet-tripleo and puppet-pacemaker require the metadata -be updated in the repository prior to cutting a tag. If the metadata is -not updated, the tagging patch to openstack/releases will fail CI. - -For puppet-tripleo_ and puppet-pacemaker_, update the version information to -represent the next tag version (e.g. 16.1.0). - -.. _puppet-tripleo: https://review.opendev.org/c/openstack/puppet-tripleo/+/813847 -.. _puppet-pacemaker: https://review.opendev.org/c/openstack/puppet-pacemaker/+/813854 - - -Get latest promoted content -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -After the previous metadata updates are available in the latest promoted content, -fetch the version information from RDO which contains the git repository hashes. - -An example where this could be found is:: - - https://trunk.rdoproject.org/centos8-master/current-tripleo/versions.csv - -.. Note:: - You will needed to adjust the centos8 to centos9 as necessary. - - -Prepare version tags -^^^^^^^^^^^^^^^^^^^^ - -Based on the versions.csv data, an openstack/releases patch needs to be created -to tag the release with the provided hashes. You can determine which TripleO -projects are needed by finding the projects tagged with "team: tripleo_". -`An example review`_. Please be aware of changes between versions and create -the appropriate version number as necessary (e.g. major, feature, or bugfix). - -.. _tripleo: https://opendev.org/openstack/releases/src/commit/fcdb1f5b556e99f25f248d38f16ad812489c9be0/deliverables/_independent/tripleo-heat-templates.yaml -.. _An example review: https://review.opendev.org/c/openstack/releases/+/813852 - -.. Note:: - If this is a long term release, this patch should include a stable branch. - -Notify RDO team of tags -^^^^^^^^^^^^^^^^^^^^^^^ - -Once the release has been created, make sure the RDO team not has been notified -of the new tags. They will update the RDO release items to ensure that the -given openstack release will contained the pinned content. diff --git a/doc/source/developer/tht_walkthrough/changes-puppet-tripleo.rst b/doc/source/developer/tht_walkthrough/changes-puppet-tripleo.rst deleted file mode 100644 index 7feb09b5..00000000 --- a/doc/source/developer/tht_walkthrough/changes-puppet-tripleo.rst +++ /dev/null @@ -1,56 +0,0 @@ -Updating puppet-tripleo ------------------------ - -.. include:: ../../links.rst - -The puppet manifests that currently define overcloud node configuration are -moved from the tripleo-heat-templates to new puppet-tripleo class definitions -as part of the composable services approach. In next iterations, all service -configuration should be moved also to puppet-tripleo. -This section considers the addition of the ntp definition to puppet-tripleo. - -Folder structure convention -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Services should be defined in the services folder, depending on the service -purpose. -:: - - manifests - profile/base ---> To host all services not using pacemaker. - time ---> Specific folder for time services (NTP, timezone, Chrony among others). - ntp.pp ---> Puppet manifest to configure the service. - -.. note:: - - For further information related to the current folders manifests structure - refer to the `puppet-tripleo repository`_. - -Adding the puppet manifest -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This step will reference how the puppet logic should be organized in -puppet-tripleo. - -Inside the manifests folder, add the service manifest following the folder -structure (``manifests/profile/base/time/ntp.pp``) as: -:: - - class tripleo::profile::base::time::ntp ( - #We get the configuration step in which we can choose which steps to execute - $step = hiera('step'), - ) { - #step assigned for core modules. - #(Check for further references about the configuration steps) - #https://opendev.org/openstack/tripleo-heat-templates/src/branch/master/puppet/services/README.rst - if ($step >= 2){ - #We will call the NTP puppet class and assign our configuration values. - #If needed additional Puppet packages can be added/installed by using the repo tripleo-puppet-elements - if count($ntpservers) > 0 { - include ::ntp - } - } - } - -If users have followed all the previous steps, they should be able to configure -their services using the composable services within roles guidelines. diff --git a/doc/source/developer/tht_walkthrough/changes-tht.rst b/doc/source/developer/tht_walkthrough/changes-tht.rst deleted file mode 100644 index 60c6d77e..00000000 --- a/doc/source/developer/tht_walkthrough/changes-tht.rst +++ /dev/null @@ -1,241 +0,0 @@ -Updating tripleo-heat-templates -------------------------------- - -.. include:: ../../links.rst - -This section will describe the changes needed for tripleo-heat-templates. - -Folder structure convention for tripleo-heat-templates -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Services should be defined in the services folder, depending on the service -purpose. -:: - - puppet - services ---> To host all services. - ---> Folder to store a specific type services (If time, will store time based services like: NTP, timezone, Chrony among others). - .yaml ---> Heat template defining per-service configuration. - -base.yaml ---> Heat template defining common service configuration. - -.. note:: - - No puppet manifests may be defined in the `THT repository`_, they - should go to the `puppet-tripleo repository`_ instead. - -.. note:: - - The use of a base heat template (-base.yaml) is necessary in cases where - a given 'service' (e.g. "heat") is comprised of a number of individual - component services (e.g. heat-api, heat-engine) which need to share some - of the base configuration (such as rabbit credentials). - Using a base template in those cases means we don't need to - duplicate that configuration. - Refer to: https://review.opendev.org/#/c/313577/ for further details. - Also, refer to :ref:`duplicated-parameters` for an use-case description. - -Changes list -~~~~~~~~~~~~ - -The list of changes in THT are: - -- If there is any configuration of the given feature/service - in any of the ``tripleo-heat-templates/puppet/manifests/*.pp`` - files, then this will need to be removed and migrated to the - puppet-tripleo repository. - -- Create a service type specific folder in the root services folder - (``deployment/time``). - -- Create a heat template for the service inside the deployment/time folder - (``deployment/time/ntp-baremetal-puppet.yaml``). - -- Optionally, create a common heat template to reuse common configuration - data, which is referenced from each per-service heat template. - -Step 1 - Updating puppet references -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Remove all puppet references for the composable service from the current -manifests (\*.pp). All the puppet logic will live in the puppet-tripleo -repository based on a configuration step, so it is mandatory to remove all the -puppet references from tripleo-heat-templates. - -The updated .pp files for the NTP example were: - -- ``puppet/manifests/overcloud_cephstorage.pp`` - -- ``puppet/manifests/overcloud_compute.pp`` - -- ``puppet/manifests/overcloud_controller.pp`` - -- ``puppet/manifests/overcloud_controller_pacemaker.pp`` - -- ``puppet/manifests/overcloud_object.pp`` - -- ``puppet/manifests/overcloud_volume.pp`` - - - -Step 2 - overcloud-resource-registry-puppet.j2.yaml resource registry changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The resource ``OS::TripleO::Services::Timesync`` must be defined in the resource -registry (``overcloud-resource-registry-puppet.j2.yaml``) - -Create a new resource type alias which references the per-service -heat template file, as described above. - -By updating the resource registry we are forcing to use a nested template to -configure our resources. In the example case the created resource -(OS::TripleO::Services::Timesync), will point to the corresponding service yaml file -(deployment/time/ntp-baremetal-puppet.yaml). - - -Step 3 - roles_data.yaml initial changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The default roles are defined here. They are then iterated and the respective -values of each section are rendered into the overcloud.j2.yaml. - -Mandatory services should be added to the roles' ServicesDefault value, -which defines all the services enabled by default in the role(s). - -From ``roles_data.yaml`` find:: - - - name: Controller - CountDefault: 1 - ServicesDefault: - - OS::TripleO::Services::CACerts - - OS::TripleO::Services::CertmongerUser - - OS::TripleO::Services::CephMds - - OS::TripleO::Services::Keystone - - OS::TripleO::Services::GlanceApi - - OS::TripleO::Services::GlanceRegistry - ... - - OS::TripleO::Services::Timesync ---> New service deployed in the controller overcloud - - -Update this section with your new service to be deployed to the controllers in -the overcloud. - -These values will be used by the controller roles' ServiceChain resource as -follows:: - - {% for role in roles %} - # Resources generated for {{role.name}} Role - {{role.name}}ServiceChain: - type: OS::TripleO::Services - properties: - Services: - get_param: {{role.name}}Services - ServiceNetMap: {get_attr: [ServiceNetMap, service_net_map]} - EndpointMap: {get_attr: [EndpointMap, endpoint_map]} - - ... - {% endfor %} - -THT changes for all the different roles are covered in: - -- https://review.opendev.org/#/c/310421/ (tripleo-heat-templates controller) - -- https://review.opendev.org/#/c/330916/ (tripleo-heat-templates compute) - -- https://review.opendev.org/#/c/330921/ (tripleo-heat-templates cephstorage) - -- https://review.opendev.org/#/c/330923/ (tripleo-heat-templates objectstorage) - -.. note:: - - In the case of the controller services, they are defined as part of the - roles' ServiceChain resource. If it is needed to add optional services, they - need to be appended to the current services list defined by the default - value of the role's ServicesDefault parameter. - - -Step 4 - Create the services yaml files -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Create: ``deployment/time/ntp-baremetal-puppet.yaml`` - -This file will have all the configuration details for the service to be -configured. -:: - - heat_template_version: rocky - description: > - NTP service deployment using puppet, this YAML file - creates the interface between the HOT template - and the puppet manifest that actually installs - and configure NTP. - parameters: - EndpointMap: - default: {} - description: Mapping of service endpoint -> protocol. Typically set - via parameter_defaults in the resource registry. - type: json - NtpServers: - default: ['0.pool.ntp.org', '1.pool.ntp.org'] - description: NTP servers - type: comma_delimited_list - NtpInterfaces: - default: ['0.0.0.0'] - description: Listening interfaces - type: comma_delimited_list - outputs: - role_data: - description: Role ntp using composable services. - value: - config_settings: - ntp::ntpservers: {get_param: NtpServers} - ntp::ntpinterfaces: {get_param: NtpInterfaces} - step_config: | - include ::tripleo::profile::base::time::ntp - -.. note:: - - All role-specific parameters have to be tagged:: - - ExampleParameter: - description: This is an example. - type: json - default: {} - tags: - - role_specific - -.. note:: - - It is required for all service templates to accept the EndpointMap parameter, - all other parameters are optional and may be defined per-service. Care should - be taken to avoid naming collisions between service parameters, e.g via using - the service name as a prefix, "Ntp" in this example. - - Service templates should output a role_data value, which is a mapping containing - "config_settings" which is a mapping of hiera key/value pairs required to configure - the service, and "step_config", which is a puppet manifest fragment that references - the puppet-tripleo profile that configures the service. - - If it is needed, the templates can be decomposed to remove - duplicated parameters among different deployment environments - (i.e. using pacemaker). To do this see - section :ref:`duplicated-parameters`. - - If your service has configuration that affects another service and should - only be run on nodes (roles) that contain that service, you can use - "service_config_settings". You then have to specify the hieradata inside this - section by using the name of the service as the key. So, if you want to - output hieradata related to your service, on nodes that deploy keystone, you - would do this:: - - role_data: - ... - step_config: - ... - ... - service_config_settings: - keystone: - # Here goes the hieradata - - This is useful for things such as creating the keystone endpoints for your - service, since one usually wants these commands to only be run on the - keystone node. diff --git a/doc/source/developer/tht_walkthrough/design-patterns.rst b/doc/source/developer/tht_walkthrough/design-patterns.rst deleted file mode 100644 index f4fe738e..00000000 --- a/doc/source/developer/tht_walkthrough/design-patterns.rst +++ /dev/null @@ -1,105 +0,0 @@ -THT design patterns -------------------- - -.. _duplicated-parameters: - -Duplicated parameters -~~~~~~~~~~~~~~~~~~~~~ - -Problem: When defining multiple related services, it can be necessary -to define the same parameters (such as rabbit or DB credentials) in -multiple service templates. To avoid this, it is possible to define a -"base" heat template that contains the common parameters and config_settings -mapping for those services that require it. - -This pattern will describe how to avoid duplicated parameters in the THT yaml -files. - -``mongodb-base.yaml``: This file should have all the common parameters between -the different environments (With pacemaker and without pacemaker). -:: - - heat_template_version: rocky - description: > - Configuration details for MongoDB service using composable roles - parameters: - MongoDbNoJournal: - default: false - description: Should MongoDb journaling be disabled - type: boolean - MongoDbIPv6: - default: false - description: Enable IPv6 if MongoDB VIP is IPv6 - type: boolean - MongoDbReplset: - type: string - default: "tripleo" - outputs: - role_data: - description: Role data for the MongoDB base service. - value: - config_settings: - mongodb::server::nojournal: {get_param: MongoDbNoJournal} - mongodb::server::ipv6: {get_param: MongoDbIPv6} - mongodb::server::replset: {get_param: MongoDbReplset} - -In this way we will be able to reuse the common parameter among all the -template files requiring it. - -Referencing the common parameter: - -``mongodb.yaml``: Will have specific parameters to deploy mongodb without -pacemaker. -:: - - heat_template_version: rocky - description: > - MongoDb service deployment using puppet - parameters: - #Parameters not used EndpointMap - EndpointMap: - default: {} - description: Mapping of service endpoint -> protocol. Typically set - via parameter_defaults in the resource registry. - type: json - resources: - MongoDbBase: - type: ./mongodb-base.yaml - outputs: - role_data: - description: Service mongodb using composable services. - value: - config_settings: - map_merge: - - get_attr: [MongoDbBase, role_data, config_settings] - - mongodb::server::service_manage: True - step_config: | - include ::tripleo::profile::base::database::mongodb - -In this case mongodb.yaml is using all the common parameter added in the -MongoDbBase resource. - -If using the parameter 'EndpointMap' in the base template, you must the pass it from the service file, -and even if it is not used in the service template, it must still be defined. - -In the service file: -:: - - parameters: - EndpointMap: - default: {} - description: Mapping of service endpoint -> protocol. Typically set - via parameter_defaults in the resource registry. - type: json - resources: - ServiceBase: - type: ./-base.yaml - properties: - EndpointMap: {get_param: EndpointMap} - -This will pass the endpoint information to the base config file. - -.. note:: - - Even if the EndpointMap parameter is optional in the base template, - for consistency is advised always using it in all service templates. diff --git a/doc/source/developer/tht_walkthrough/introduction.rst b/doc/source/developer/tht_walkthrough/introduction.rst deleted file mode 100644 index 666c1973..00000000 --- a/doc/source/developer/tht_walkthrough/introduction.rst +++ /dev/null @@ -1,67 +0,0 @@ -Introduction ------------- - -.. include:: ../../links.rst - -The initial scope of this tutorial is to create a brief walkthrough with some -guidelines and naming conventions for future modules and features aligned with -the composable services architecture. Regarding the example described in this -tutorial, which leads to align an _existing_ 'non-composable' service implementation -with the composable roles approach, it is important to notice that a similar approach would be -followed if a user needed to add an entirely new service to a tripleo deployment. - -.. _puppet/manifests: https://github.com/openstack/tripleo-heat-templates/tree/3d01f650f18b9e4f1892a6d9aa17f1bfc99b5091/puppet/manifests - -The puppet manifests used to configure services on overcloud nodes currently -reside in the tripleo-heat-templates repository, in the folder `puppet/manifests`_. -In order to properly organize and structure the code, all -manifests will be re-defined in the puppet-tripleo repository, and adapted to -the `composable services architecture`_. - -The use case for this example uses NTP as a service installed by default among -the OpenStack deployment. So the profile needs to be added to all the roles in -roles_data.yaml. - -Which means that NTP will be installed everywhere in the overcloud, so the -tutorial will describe the process of refactoring the code from those files -in order move it to the puppet-tripleo repository. - -This tutorial is divided into several steps, according to different changes -that need to be added to the structure of tripleo-heat-templates and -puppet-tripleo. - -Relevant repositories in this guide -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -- tripleo-heat-templates: All the tripleo-heat-templates (aka THT) logic. - -- puppet-tripleo: TripleO puppet manifests used to deploy the overcloud services. - -- tripleo-puppet-elements: References puppet modules used by TripleO to deploy the overcloud services. - (Not used in this tutorial) - -Gerrit patches used in this example -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The gerrit patches used to describe this walkthrough are: - -- https://review.opendev.org/#/c/310725/ (puppet-tripleo) - -- https://review.opendev.org/#/c/310421/ (tripleo-heat-templates controller) - -- https://review.opendev.org/#/c/330916/ (tripleo-heat-templates compute) - -- https://review.opendev.org/#/c/330921/ (tripleo-heat-templates cephstorage) - -- https://review.opendev.org/#/c/330923/ (tripleo-heat-templates objectstorage) - - -Change prerequisites -~~~~~~~~~~~~~~~~~~~~~ - -The controller services are defined and configured via Heat resource chains. In -the proposed patch (https://review.opendev.org/#/c/259568) controller -services will be wired to a new Heat feature that allows it to dynamically include -a set of nested stacks representing individual services via a Heat resource -chain. The current example will use this interface to decompose the controller -role into isolated services. diff --git a/doc/source/developer/tht_walkthrough/service_template_sections.rst b/doc/source/developer/tht_walkthrough/service_template_sections.rst deleted file mode 100644 index e16fe900..00000000 --- a/doc/source/developer/tht_walkthrough/service_template_sections.rst +++ /dev/null @@ -1,513 +0,0 @@ -Service template sections description -===================================== - -As mentioned in the previous sections of the developer guide, there are several -sections of the template's output that need to be filled out for creating a -service in TripleO. - -In this document we will attempt to enumerate all of them and explain the -reasoning behind them. - -Note that you can also find useful information in the `tht deployment readme`_. - -What's the bare-minimum? ------------------------- - -Before, digging into details, it's always good to know what the bare-minimum -is. So lets look at a very minimal service template:: - - heat_template_version: rocky - - description: Configure Red Hat Subscription Management. - - parameters: - RoleNetIpMap: - default: {} - type: json - ServiceData: - default: {} - description: Dictionary packing service data - type: json - ServiceNetMap: - default: {} - description: Mapping of service_name -> network name. Typically set - via parameter_defaults in the resource registry. This - mapping overrides those in ServiceNetMapDefaults. - type: json - RoleName: - default: '' - description: Role name on which the service is applied - type: string - RoleParameters: - default: {} - description: Parameters specific to the role - type: json - EndpointMap: - default: {} - description: Mapping of service endpoint -> protocol. Typically set - via parameter_defaults in the resource registry. - type: json - RhsmVars: - default: {} - description: Hash of ansible-role-redhat-subscription variables - used to configure RHSM. - # The parameters contains sensible data like activation key or password. - hidden: true - tags: - - role_specific - type: json - - resources: - # Merging role-specific parameters (RoleParameters) with the default parameters. - # RoleParameters will have the precedence over the default parameters. - RoleParametersValue: - type: OS::Heat::Value - properties: - type: json - value: - map_replace: - - map_replace: - - vars: RhsmVars - - values: {get_param: [RoleParameters]} - - values: - RhsmVars: {get_param: RhsmVars} - - outputs: - role_data: - description: Role data for the RHSM service. - value: - service_name: rhsm - config_settings: - tripleo::rhsm::firewall_rules: {} - upgrade_tasks: [] - step_config: '' - host_prep_tasks: - - name: Red Hat Subscription Management configuration - vars: {get_attr: [RoleParametersValue, value, vars]} - block: - - include_role: - name: redhat-subscription - -Lets go piece by piece and explain what's going on. - -Version and description -^^^^^^^^^^^^^^^^^^^^^^^ - -As with any other heat template, you do need to specify the -``heat_template_version``, and preferably give a description of what the -stack/template does. - -Parameters -^^^^^^^^^^ - -You'll notice that there are a bunch of heat parameters defined in this -template that are not necessarily used. This is because service templates are -created in the form of a `heat resource chain object`_. This -type of objects can create a "chain" or a set of objects with the same -parameters, and gather the outputs of them. So, eventually we pass the same -mandatory parameters to the chain. This happens in the -`common/services.yaml`_ file. Lets take a look and see how -this is called:: - - ServiceChain: - type: OS::Heat::ResourceChain - properties: - resources: {get_param: Services} - concurrent: true - resource_properties: - ServiceData: {get_param: ServiceData} - ServiceNetMap: {get_param: ServiceNetMap} - EndpointMap: {get_param: EndpointMap} - RoleName: {get_param: RoleName} - RoleParameters: {get_param: RoleParameters} - -Here we can see that the mandatory parameters for the services are the -following: - -* **ServiceData**: Contains an entry called ``net_cidr_map``, which is a map - that has the CIDRs for each network in your deployment. - -* **ServiceNetMap**: Contains a mapping that tells you what network is each - service configured at. Typical entries will look like: - ``BarbicanApiNetwork: internal_api``. - -* **EndpointMap**: Contains the keystone endpoints for each service. With this - you'll be able to get what port, what protocol, and even different entries - for the public, internal and admin endpoints. - -* **RoleName**: This is the name of the role on which the service is applied. - It could be one of the default roles (e.g. "Controller" or "Compute"), or a - custom role, depending on how you're deploying. - -* **RoleParameters**: A Map containing parameters to be applied to the specific - role. - -So, if you're writing a service template yourself, these are the parameters -you have to copy into your template. - -Aside from these parameters, you can define any other parameter yourself for -the service, and in order for your service to consume the parameter, you need -to pass them via ``parameter_defaults``. - -The ``role_data`` output -^^^^^^^^^^^^^^^^^^^^^^^^ - -This is the sole output that will be read and parsed in order to get the -relevant information needed from your service. It's value must be a map, and -from the aforementioned example, it minimally contains the following: - -* ``service_name``: This is the name of the service you're configuring. The - format is lower case letters and underscores. Setting this is quite - important, since this is how TripleO reports what services are enabled, and - generates appropriate hieradata, such as a list of all services enabled, and - flags that say that your service is enabled on a certain node. - -* ``config_settings``: This will contain a map of key value pairs; the map will - be written to the hosts in the form of hieradata, which puppet can then run - and use to configure your service. Note that the hieradata will only be - written on hosts that are tagged with a role that enables your service. - -* ``upgrade_tasks``: These are ansible tasks that run when TripleO is running - an upgrade with your service enabled. If you don't have any upgrade tasks to - do, you still have to specify this output, but it's enough to set it as an - empty list. - -* ``step_config``: This defines what puppet manifest should be run to configure - your service. It typically is a string with the specific ``include`` - statement that puppet will run. If you're not configuring your service with - puppet, then you need to set this value as an empty string. There is an - exception, however: When you're configuring a containerized service. We'll - dig into that later. - -These are the bare-minimum sections of ``role_data`` you need to set up. -However, you might have noticed that the example we linked above has another -section called ``host_prep_data``. This section is not mandatory, but it is one -of the several ways you can execute Ansible tasks on the host in order to -configure your service. - -Ansible-related parameters --------------------------- - -The following are sections of the service template that allow you to use -Ansible to execute actions or configure your service. - -Host prep deployment (or ``host_prep_tasks``) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -This is seen as ``host_prep_tasks`` in the deployment service templates. -These are Ansible tasks that run before the configuration steps start, and -before any major services are configured (such as pacemaker). Here you would -put actions such as wiping out your disk, or migrating log files. - -Lets look at the output section of the example from the previous blog post:: - - outputs: - role_data: - description: Role data for the RHSM service. - value: - service_name: rhsm - config_settings: - tripleo::rhsm::firewall_rules: {} - upgrade_tasks: [] - step_config: '' - host_prep_tasks: - - name: Red Hat Subscription Management configuration - vars: {get_attr: [RoleParametersValue, value, vars]} - block: - - include_role: - name: redhat-subscription - -Here we see that an Ansible role is called directly from the -``host_prep_tasks`` section. In this case, we're setting up the Red Hat -subscription for the node where this is running. We would definitely want this -to happen in the very beginning of the deployment, so ``host_prep_tasks`` is an -appropriate place to put it. - -Pre Deploy Step tasks (or ``pre_deploy_step_tasks``) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -These are Ansible tasks that take place in the overcloud nodes. They are run -after the network is completely setup, after the bits to prepare for containers -running are completed (TCIB/Kolla files, container engine installation and configuration). -They are also run before any External deploy tasks. - -External deploy tasks -^^^^^^^^^^^^^^^^^^^^^ - -These are Ansible tasks that take place in the node where you executed the -"overcloud deploy". You'll find these in the service templates in the -``external_deploy_tasks`` section. These actions are also ran as part of the -deployment steps, so you'll have the ``step`` fact available in order to limit -the ansible tasks to only run on a specific step. Note that this runs on each -step before the "deploy steps tasks", the puppet run, and the container -deployment. - -Typically you'll see this used when, to configure a service, you need to -execute an Ansible role that has special requirements for the Ansible -inventory. - -Such is the case for deploying OpenShift on baremetal via TripleO. The Ansible -role for deploying OpenShift requires several hosts and groups to exist in the -inventory, so we set those up in ``external_deploy_tasks``:: - - - name: generate openshift inventory for openshift_master service - copy: - dest: "{{playbook_dir}}/openshift/inventory/{{tripleo_role_name}}_openshift_master.yml" - content: | - {% if master_nodes | count > 0%} - masters: - hosts: - {% for host in master_nodes %} - {{host.hostname}}: - {{host | combine(openshift_master_node_vars) | to_nice_yaml() | indent(6)}} - {% endfor %} - {% endif %} - - {% if new_masters | count > 0 %} - new_masters: - hosts: - {% for host in new_masters %} - {{host.hostname}}: - {{host | combine(openshift_master_node_vars) | to_nice_yaml() | indent(6)}} - {% endfor %} - - new_etcd: - children: - new_masters: {} - {% endif %} - - etcd: - children: - masters: {} - - OSEv3: - children: - masters: {} - nodes: {} - new_masters: {} - new_nodes: {} - {% if groups['openshift_glusterfs'] | default([]) %}glusterfs: {}{% endif %} - -In the case of OpenShift, Ansible itself is also called as a command from here, -using variables and the inventory that's generated in this section. This way we -don't need to mix the inventory that the overcloud deployment itself is using -with the inventory that the OpenShift deployment uses. - -Deploy steps tasks -^^^^^^^^^^^^^^^^^^ - -These are Ansible tasks that take place in the overcloud nodes. Note that like -any other service, these tasks will only execute on the nodes whose role has -this service enabled. You'll find this as the ``deploy_steps_tasks`` section in -the service templates. These actions are also ran as part of the deployment -steps, so you'll have the ``step`` fact available in order to limit the -ansible tasks to only run on a specific step. Note that on each step, this runs -after the "external deploy tasks", but before the puppet run and the container -deployment. - -Typically you'll run quite simple tasks in this section, such as setting the -boot parameters for the nodes. Although, you can also run more complex roles, -such as the IPSec service deployment for TripleO:: - - - name: IPSEC configuration on step 1 - when: step == '1' - block: - - include_role: - name: tripleo-ipsec - vars: - map_merge: - - ipsec_configure_vips: false - ipsec_skip_firewall_rules: false - - {get_param: IpsecVars} - -This type of deployment applies for services that are better tied to TripleO's -Ansible inventory or that don't require a specific inventory to run. - -Container-related parameters ----------------------------- - -This covers the sections that allow you to write a containerized service for -TripleO. - -Containerized services brought a big change to TripleO. From packaging puppet -manifests and relying on them for configuration, we now have to package -containers, make sure the configuration ends up in the container somehow, then -run the containers. Here I won't describe the whole workflow of how we -containerized OpenStack services, but instead I'll describe what you need to -know to deploy a containerized service with TripleO. - -``puppet_config`` section -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Before getting into the deployment steps where TripleO starts running services -and containers, there is a step where puppet is ran in containers and all the -needed configurations are created. The ``puppet_config`` section controls this -step. - -There are several options we can pass here: - -* ``puppet_tags``: This describes the puppet resources that will be allowed to - run in puppet when generating the configuration files. Note that deeper - knowledge of your manifests and what runs in puppet is required for this. - Else, it might be better to generate the configuration files with Ansible - with the mechanisms described in previous sections of this document. - Any service that specifies tags will have the default tags of - ``'file,concat,file_line,augeas,cron'`` appended to the setting. - To know what settings to set here, as mentioned, you need to know your puppet - manifests. But, for instance, for keystone, an appropriate setting would be: - ``keystone_config``. For our etcd example, no tags are needed, since the - default tags we set here are enough. - -* ``config_volume``: The name of the directory where configuration files - will be generated for this service. You'll eventually use this to know what - location to bind-mount into the container to get the configuration. So, the - configuration will be persisted in: - ``/var/lib/config-data/puppet-generated/`` - -* ``config_image``: The name of the container image that will be used for - generating configuration files. This is often the same container - that the runtime service uses. Some services share a common set of - config files which are generated in a common base container. Typically - you'll get this from a parameter you pass to the template, e.g. - ``Image`` or ``ConfigImage``. Dealing with these - images requires dealing with the `container image prepare workflow`_. - The parameter should point to the specific image to be used, and it'll be - pulled from the registry as part of the - deployment. - -* ``step_config``: Similarly to the ``step_config`` that's described earlier in - this document, this setting controls the puppet manifest that is ran for this - service. The aforementioned puppet tags are used along with this manifest to - generate a config directory for this container. - -One important thing to note is that, if you're creating a containerized -service, you don't need to output a ``step_config`` section from the -``roles_data`` output. TripleO figured out if you're creating a containerized -service by checking for the existence of the ``docker_config`` section in the -``roles_data`` output. - -``kolla_config`` section -^^^^^^^^^^^^^^^^^^^^^^^^ - -As you might know, TripleO uses kolla to build the container images. Kolla, -however, not only provides the container definitions, but provides a rich -framework to extend and configure your containers. Part of this is the fact -that it provides an entry point that receives a configuration file, with which -you can modify several things from the container on start-up. We take advantage -of this in TripleO, and it's exactly what the ``kolla_config`` represents. - -For each container we create, we have a relevant ``kolla_config`` entry, with a -mapping key that has the following format:: - - /var/lib/kolla/config_files/.json - -This, contains YAML that represents how to map config files into the container. -In the container, this typically ends up mapped as -``/var/lib/kolla/config_files/config.json`` which kolla will end up reading. - -The typical configuration settings we use with this setting are the following: - -* ``command``: This defines the command we'll be running on the container. - Typically it'll be the command that runs the "server". So, in the example you - see ``/usr/bin/etcd ...``, which will be the main process running. - -* ``config_files``: This tells kolla where to read the configuration files - from, and where to persist them to. Typically what this is used for is that - the configuration generated by puppet is read from the host as "read-only", - and mounted on ``/var/lib/kolla/config_files/src``. Subsequently, it is - copied on to the right location by the kolla mechanisms. This way we make - sure that the container has the right permissions for the right user, given - we'll typically be in another user namespace in the container. - -* ``permissions``: As you would expect, this sets up the appropriate - permissions for a file or set of files in the container. - -``docker_config`` section -^^^^^^^^^^^^^^^^^^^^^^^^^ - -This is the section where we tell TripleO what containers to start. Here, we -explicitly write on which step to start which container. Steps are set as keys -with the ``step_`` format. Inside these, we should set up keys -with the specific container names. In our example, we're running only the etcd -container, so we use a key called ``etcd`` to give it such a name. -`Paunch`_ or tripleo_container_manage_ Ansible role will read these parameters, -and start the containers with those settings. - -Here's an example of the container definition:: - - step_2: - etcd: - image: {get_param: ContainerEtcdImage} - net: host - privileged: false - restart: always - healthcheck: - test: /openstack/healthcheck - volumes: - - /var/lib/etcd:/var/lib/etcd - - /etc/localtime:/etc/localtime:ro - - /var/lib/kolla/config_files/etcd.json:/var/lib/kolla/config_files/config.json:ro - - /var/lib/config-data/puppet-generated/etcd/:/var/lib/kolla/config_files/src:ro - environment: - - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS - -This is what we're telling TripleO to do: - -* Start the container on step 2 - -* Use the container image coming from the ``ContainerEtcdImage`` heat parameter. - -* For the container, use the host's network. - -* The container is not `privileged`_. - -* The container will use the ``/openstack/healthcheck`` endpoint for healthchecking - -* We tell it what volumes to mount - - - Aside from the necessary mounts, note that we're bind-mounting the - file ``/var/lib/kolla/config_files/etcd.json`` on to - ``/var/lib/kolla/config_files/config.json``. This will be read by kolla - in order for the container to execute the actions we configured in the - ``kolla_config`` section. - - - We also bind-mount ``/var/lib/config-data/puppet-generated/etcd/``, which - is where the puppet ran (which was ran inside a container) persisted the - needed configuration files. We bind-mounted this to - ``/var/lib/kolla/config_files/src`` since we told kolla to copy this to - the correct location inside the container on the ``config_files`` section - that's part of ``kolla_config``. - -* Environment tells the container engine which environment variables to set - - - We set ``KOLLA_CONFIG_STRATEGY=COPY_ALWAYS`` in the example, since this - tells kolla to always execute the ``config_files`` and ``permissions`` - directives as part of the kolla entry point. If we don't set this, it - will only be executed the first time we run the container. - -``container_puppet_tasks`` section -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -These are containerized puppet executions that are meant as bootstrapping -tasks. They typically run on a "bootstrap node", meaning, they only run on one -relevant node in the cluster. And are meant for actions that you should only -execute once. Examples of this are: creating keystone endpoints, creating -keystone domains, creating the database users, etc. - -The format for this is quite similar to the one described in ``puppet_config`` -section, except for the fact that you can set several of these, and they also -run as part of the steps (you can specify several of these, divided by the -``step_`` keys). - -.. note:: This was docker_puppet_tasks prior to the Train cycle. - - -.. References - -.. _tht deployment readme: https://opendev.org/openstack/tripleo-heat-templates/src/branch/master/deployment/README.rst -.. _heat resource chain object: https://docs.openstack.org/heat/pike/template_guide/openstack.html#OS::Heat::ResourceChain -.. _common/services.yaml: https://github.com/openstack/tripleo-heat-templates/blob/stable/queens/common/services.yaml#L44 -.. _container image prepare workflow: https://docs.openstack.org/tripleo-docs/latest/install/containers_deployment/overcloud.html#preparing-overcloud-images -.. _Paunch: https://docs.openstack.org/paunch/readme.html -.. _tripleo_container_manage: https://docs.openstack.org/tripleo-ansible/latest/roles/role-tripleo_container_manage.html -.. _privileged: https://www.linux.com/blog/learn/sysadmin/2017/5/lazy-privileged-docker-containers diff --git a/doc/source/developer/tht_walkthrough/summary.rst b/doc/source/developer/tht_walkthrough/summary.rst deleted file mode 100644 index af42f8af..00000000 --- a/doc/source/developer/tht_walkthrough/summary.rst +++ /dev/null @@ -1,18 +0,0 @@ -Summary -------- - -References: - -#. https://etherpad.openstack.org/p/tripleo-composable-roles-work - -#. https://review.opendev.org/#/c/245804/2/specs/mitaka/composable-services-within-roles.rst - -#. https://review.opendev.org/#/q/topic:composable_service - -#. https://docs.openstack.org/tripleo-docs/latest/install/advanced_deployment/template_deploy.html - -#. http://hardysteven.blogspot.com.es/2015/05/tripleo-heat-templates-part-1-roles-and.html - -#. http://hardysteven.blogspot.com.es/2015/05/tripleo-heat-templates-part-2-node.html - -#. http://hardysteven.blogspot.com.es/2015/05/tripleo-heat-templates-part-3-cluster.html diff --git a/doc/source/developer/tht_walkthrough/tht_walkthrough.rst b/doc/source/developer/tht_walkthrough/tht_walkthrough.rst deleted file mode 100644 index b65f5a06..00000000 --- a/doc/source/developer/tht_walkthrough/tht_walkthrough.rst +++ /dev/null @@ -1,24 +0,0 @@ -Composable services tutorial -============================ - -.. include:: ../../links.rst - -This guide will be a walkthrough related to how to add new services to a TripleO -deployment through additions to the tripleo-heat-templates and puppet-tripleo -repositories, using part of the architecture defined in the `composable services architecture`_. - -.. note:: - - No puppet manifests may be defined in the `THT repository`_, they - should go to the `puppet-tripleo repository`_ instead. - -.. toctree:: - :maxdepth: 2 - - introduction - changes-tht - changes-puppet-tripleo - design-patterns - tls_for_services - summary - service_template_sections diff --git a/doc/source/developer/tht_walkthrough/tls_for_services.rst b/doc/source/developer/tht_walkthrough/tls_for_services.rst deleted file mode 100644 index 3cbefd7b..00000000 --- a/doc/source/developer/tht_walkthrough/tls_for_services.rst +++ /dev/null @@ -1,849 +0,0 @@ -TLS support for services -======================== - -.. _public-tls-dev: - -Public TLS ----------- - -If you're adding a REST service to TripleO, chances are that you'll need your -service to be terminated by HAProxy. Unfortunately, adding your service to -HAProxy needs extra changes to existing modules. Fortunately, it's not that -hard to do. - -You can add your service to be terminated by HAproxy by modifying the -`manifests/haproxy.pp`_ file. - -First off, we need a flag to tell the HAProxy module to write the frontend for -your service in the HAProxy configuration file if your service is deployed. For -this, we will add a parameter for the manifest. If you have followed the -walk-through, you may have noticed that the `tripleo-heat-templates`_ yaml -template requires you to set a name for your service in the ``role_data`` -output:: - - ... - outputs: - role_data: - description: Description of your service - value: - service_name: my_service - ... - -The overcloud stack generated from the tripleo-heat-templates will use this -name and automatically generate several hieradata entries that are quite -useful. One of this entries is a global flag that can tell if your service is -enabled at all or not. So we'll use this flag and fetch it from hiera to set -the parameter we need in haproxy.pp:: - - ... - $keystone_admin = hiera('keystone_enabled', false), - $keystone_public = hiera('keystone_enabled', false), - $neutron = hiera('neutron_api_enabled', false), - $cinder = hiera('cinder_api_enabled', false), - $glance_api = hiera('glance_api_enabled', false), - ... - $my_service = hiera('my_service_enabled', false), - ... - -Note that the name of the hiera key matches the following format -"_enabled" and defaults to ``false``. - -Next, you need to add a parameter that tells HAProxy which network your service -is listening on:: - - ... - $barbican_network = hiera('barbican_api_network', false), - $ceilometer_network = hiera('ceilometer_api_network', undef), - $cinder_network = hiera('cinder_api_network', undef), - $glance_api_network = hiera('glance_api_network', undef), - $heat_api_network = hiera('heat_api_network', undef), - ... - $my_service_network = hiera('my_service_network', undef), - ... - -Tripleo-heat-templates will also autogenerate this key for you. However for it -to do this, you need to specify the network for your service in the templates. -The file where this needs to be set is `network/service_net_map.j2.yaml`_, and -you'll be looking for a parameter called ``ServiceNetMapDefaults``. It will -look like this:: - - # Note that the key in this map must match the service_name - # see the description above about conversion from CamelCase to - # snake_case - the names must still match when converted - ServiceNetMapDefaults: - default: - # Note the values in this map are replaced by *NetName - # to allow for sane defaults when the network names are - # overridden. - ... - NeutronTenantNetwork: tenant - CeilometerApiNetwork: internal_api - BarbicanApiNetwork: internal_api - CinderApiNetwork: internal_api - GlanceApiNetwork: storage - ... - MyServiceNetwork: - -Now, having added this, you'll have access to the aforementioned hiera key and -several others. - -Note that the network is used by HAProxy to terminate TLS for your service. -This is used when Internal TLS is enabled and you'll learn more about it in the -:ref:`internal-tls-dev` section. - -Then, you need to add the ports that HAProxy will listen on. There is a list -with the defaults which is called ``default_service_ports``, and you need to -add your service here:: - - $default_service_ports = { - ... - neutron_api_port => 9696, - neutron_api_ssl_port => 13696, - nova_api_port => 8774, - nova_api_ssl_port => 13774, - nova_placement_port => 8778, - nova_placement_ssl_port => 13778, - nova_metadata_port => 8775, - nova_novnc_port => 6080, - nova_novnc_ssl_port => 13080, - ... - my_service_port => 5123, - my_service_ssl_port => 13123, - ... - } - -You are specifying two ports here, one that is the standard port, and another -one that is used for SSL in the public VIP/host. This was done initially to -address deployments without network isolation. In these cases, deploying TLS -would effectively take over the other interfaces, so HAProxy would be listening -with TLS everywhere accidentally if only using one port, and further -configuration for the services would need to happen to address this. However, -this is not really an issue in network isolated deployments, since they would -be using different IP addresses. So this extra port might not be needed in the -future if network isolation becomes the standard mode of deploying. - -.. note:: The SSL port is not needed if your service is only internal and - doesn't listen on the public VIP. - -.. note:: These ports can be overwritten by using the ``$service_ports`` - parameter from this manifest. Once could pass it via hieradata through the - ``ExtraConfig`` tripleo-heat-templates parameter, and setting something - like this as the value:: - - tripleo::haproxy::service_ports: - my_service_ssl_port: 5123 - my_service_2_ssl_port: 5124 - - Please consider that this will overwrite any entry from the list of - defaults, so you have to be careful to update all the relevant entries in - tripleo-heat-templates if you want to change port (be it SSL port or - non-SSL port). - -Finally, you need to add the actual endpoint to HAproxy which will configure -the listen directive (or frontend and backend) in the haproxy configuration. -For this, we have a helper class called ``::tripleo::haproxy::endpoint`` that -sets the relevant bits for you. All we need to do is pass in all the -information that class needs. And we need to make sure that this only happens -if the service is enabled, so we'll enclose it with the flag we mentioned -above. So here's a code snippet that demonstrates what you need to add:: - - if $my_service { - ::tripleo::haproxy::endpoint { 'my_service': - public_virtual_ip => $public_virtual_ip, - internal_ip => hiera('my_service_vip', $controller_virtual_ip), - service_port => $ports[my_service_port], - ip_addresses => hiera('my_service_node_ips', $controller_hosts_real), - server_names => hiera('my_service_node_names', $controller_hosts_names_real), - mode => 'http', - listen_options => { - 'http-request' => [ - 'set-header X-Forwarded-Proto https if { ssl_fc }', - 'set-header X-Forwarded-Proto http if !{ ssl_fc }'], - }, - public_ssl_port => $ports[my_service_ssl_port], - service_network => $my_service_network, - } - } - -* The ``public_virtual_ip`` variable contains the public IP address that's used - for your cloud, and it's the one that people will usually have access to - externally. - -* The hiera keys ``my_service_node_ips``, ``my_service_vip``, - ``my_service_node_names`` are automatically generated by - tripleo-heat-templates. These are other keys that you'll get access to once - you add the network for your service in ``ServiceNetMapDefaults``. - -* ``my_service_vip`` is, as mentioned, automatically generated, and will point - HAProxy to the non-public VIP where other services will be able to access - your service. This will usually be the Internal API network, but it depends - on your use-case. - -* ``my_service_node_ips`` is, as mentioned, automatically generated, and will - tell HAProxy which nodes are hosting your service, so it will point to them. - The address depends on the network your service is listening on. - -* ``my_service_node_names`` is, as mentioned, automatically generated, and will - be the names that HAProxy will use for the nodes. These are the FQDNs of the - nodes that are hosting your service. - -* This example is an HTTP service, so note that we set the mode to ``http``, - and that we set the option for HAProxy to detect if TLS was used for the - request, and set an appropriate value for the ``X-Forwarded-Proto`` HTTP - header if that's the case. Not all services can read this HTTP header, so - this depends on your service. For more information on the available options - and the mode, consult the `haproxy documentation`_. - -.. note:: If your service is only internal and doesn't listen on the public - VIP, you don't need all of the parameters listed above, and you would - instead do something like this:: - - if $my_service { - ::tripleo::haproxy::endpoint { 'my_service': - internal_ip => hiera('my_service_vip', $controller_virtual_ip), - service_port => $ports[my_service_port], - ip_addresses => hiera('my_service_node_ips', $controller_hosts_real), - server_names => hiera('my_service_node_names', $controller_hosts_names_real), - service_network => $my_service_network, - } - } - - The most relevant bits are that we omitted the SSL port and the - ``public_virtual_ip``, since these won't be used. - - -Having added this to the manifest, you should be covered for both getting your -service to be proxied by HAProxy, and letting it to TLS in the public interface -for you. - -.. _internal-tls-dev: - -Internal TLS ------------- - -How it works -~~~~~~~~~~~~ - -If you haven't read the section `TLS Everywhere `_ -it is highly recommended you read that first before continuing. - -As mentioned, the default CA is FreeIPA, which issues the certificates that the -nodes request, and they do the requests via certmonger. - -FreeIPA needs to have the nodes registered in its database and those nodes need -to be enrolled in order to authenticate to the CA. This is already being -handled for us, so there's nothing you need to do for your service on this -side. - -In order to issue certificates, FreeIPA also needs to have registered a -Kerberos principal for the service (or service principal). This way it knows -what service is using what certificate. The service principal will look -something like this:: - - /. - -We assume that the domain matches the kerberos realm, so specifying it is -redundant. - -Fortunately, one doesn't need to do much but fill in some boilerplate code in -tripleo-heat-templates to get this service principal. And this will be covered -in subsequent sections. - -So, with this one can finally request certificates for the service and use -them. - -.. _internal-tls-for-your-service: - -Enabling internal TLS for your service -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Aside from the actual certificate request, if your service is a RESTful -service, getting TLS to work with the current solution requires usually two -fronts: - -* To get your service to actually serve with TLS. - -* To tell HAProxy to try to access your service using TLS. - -This can be different for other types of services. For instance, at the time of -writing this, RabbitMQ isn't proxied by HAProxy, so there wasn't a need to -configure anything in HAProxy. Another example is MariaDB: Even though it is -proxied by HAProxy, TLS is handled on the MariaDB side and HAProxy doesn't do -TLS termination, so there was no need to configure HAProxy. - -Also, for services in general, there are two options for the Subject -Alternative Name (SAN) for the certificate: - -1) It should be a hostname that points to a specific interface in the node. - -2) It should be a hostname that points to a VIP (or a Virtual IP Address). - -The usual case for a RESTful service will be the first option. HAProxy will do -TLS termination, listening on the cloud's VIPs, and will then forward the -request to your service trying to access it via the node's internal network -interface (not the VIP). So for this case (#1), your service should be serving -a TLS certificate with the nodes' interface as the SAN. RabbitMQ has a similar -situation even if it's not proxied by HAProxy. Services try to access the -RabbitMQ cluster through the individual nodes, so each broker server has a -certificate with the node's hostname for a specific network interface as the -SAN. On the other hand, MariaDB follows the SAN pattern #2. It's terminated by -HAProxy, so the services access it through a VIP. However, MariaDB handles TLS -by itself, so it ultimately serves certificates with the hostname pointing to a -VIP interface as the SAN. This way, the hostname validation works as expected. - -If you're not sure how to go forward with your service, consult the TripleO -team. - -.. _services-over-httpd-internal-tls: - -Services that run over httpd -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Good news! Certificates are already requested for you and there is a hash where -you can fetch the path to the certificates and use them for your service. - -In `puppet-tripleo`_ you need to go to the manifest that deploys the API for -your service. Here, you will add the following parameters to the class:: - - class tripleo::profile::base::my_service::api ( - ... - $my_service_network = hiera('my_service_network', undef), - $certificates_specs = hiera('apache_certificates_specs', {}), - $enable_internal_tls = hiera('enable_internal_tls', false), - ... - ) { - -* ``my_service_network`` is a hiera key that's already generated by - tripleo-heat-templates and it references the name of the network your service - is listening on. This was referenced in the :ref:`public-tls-dev` section. - Where it mentioned the addition of your service's network to the - ``ServiceNetMapDefaults`` parameter. So, if this was done, you'll get this - key autogenerated. - -* ``apache_certificates_specs`` is a hash containing the specifications for all - the certificates requested for services running over httpd. These are - network-dependant, which is why we needed the network name. Note that this - also contains the paths where the keys are located in the filesystem. - -* ``enable_internal_tls`` is a flag that tells TripleO if TLS for the internal - network is enabled. We should base the usage of the certificates for your - service on this. - -In order to get the certificate and key for your application you can use the -following boilerplate code:: - - if $enable_internal_tls { - if !$my_service_network { - fail('my_service_network is not set in the hieradata.') - } - $tls_certfile = $certificates_specs["httpd-${my_service_network}"]['service_certificate'] - $tls_keyfile = $certificates_specs["httpd-${my_service_network}"]['service_key'] - } else { - $tls_certfile = undef - $tls_keyfile = undef - } - -If internal TLS is not enabled, we set the variables for the certificate and -key to ``undef``, this way TLS won't be enabled. If it's enabled, we get the -certificate and key from the hash. - -Now, having done this, we can pass in the variables to the class that deploys -your service over httpd:: - - class { '::my_service::wsgi::apache': - ssl_cert => $tls_certfile, - ssl_key => $tls_keyfile, - } - -Now, in `tripleo-heat-templates`_, hopefully the template for your service's -API already uses the base profile for apache services. To verify this, you need -to look in the ``resources`` section of your template for something like this:: - - ApacheServiceBase: - type: ./apache.yaml - properties: - ServiceNetMap: {get_param: ServiceNetMap} - EndpointMap: {get_param: EndpointMap} - -Note that this is of type ./apache.yaml which is the template that contains the -common configurations for httpd based services. - -You will also need to make sure that the ssl hieradata is set correctly. You -will find it usually like this:: - - my_service::wsgi::apache::ssl: {get_param: EnableInternalTLS} - -Where, EnableInternalTLS should be defined in the ``parameters`` section of the -template. - -Finally, you also need to add the ``metadata_settings`` to the output of the -template. This section will be in the same level as ``config_settings`` and -``step_config``, and will contain the following:: - - metadata_settings: - get_attr: [ApacheServiceBase, role_data, metadata_settings] - -Note that it merely outputs the metadata_settings section that the apache base -stack already outputs. This will give the appropriate parameters to a hook that -sets the nova metadata, which in turn will be taken by the *novajoin* service -generate the service principals for httpd for the host. - -See the `TLS Everywhere Deploy Guide `_ - -.. _tls_everywhere_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/features/ssl.html#tls-everywhere-for-the-overcloud -.. _configuring-haproxy-internal-tls: - -Configuring HAProxy to use TLS for your service -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Now that your service will be serving with TLS enabled, we go back to the -`manifests/haproxy.pp`_ file. You already have added the HAProxy endpoint -resource for your service, so for this, you need to add now the option to tell -it to use TLS to communicate with the server backend nodes. This is done by -adding this:: - - if $my_service { - ::tripleo::haproxy::endpoint { 'my_service': - ... - member_options => union($haproxy_member_options, $internal_tls_member_options), - } - } - -This adds the TLS options to the default member options we use in TripleO for -HAProxy. It will tell HAProxy to require TLS for your service if internal TLS -is enabled; if it's not enabled, then it won't use TLS. - -This was all the extra configuration you needed to do for HAProxy. - -Internal TLS for services that don't run over httpd -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If your service supports being run with TLS enabled, and it's not -python/eventlet-based (see :ref:`internal-tls-via-proxy`). This section is for -you. - -In `tripleo-heat-templates`_ we'll need to specify the specs for doing the -certificate request, and we'll need to get the appropriate information to -generate a service principal. To make this optional, you should add the -following to your service's base template:: - - parameters: - ... - EnableInternalTLS: - type: boolean - default: false - - conditions: - - internal_tls_enabled: {equals: [{get_param: EnableInternalTLS}, true]} - ... - ... - -* ``EnableInternalTLS`` is a parameter that's passed via ``parameter_defaults`` - which tells the templates that we want to use TLS in the internal network. - -* ``internal_tls_enabled`` is a condition that we'll furtherly use to add the - relevant bits to the output. - -The next thing to do is to add the certificate specs, the relevant hieradata -and the required metadata to the output. In the ``roles_data`` output, lets -modify the ``config_settings`` to add what we need:: - - config_settings: - map_merge: - - - # The regular hieradata for your service goes here. - ... - - - if: - - internal_tls_enabled - - generate_service_certificates: true - my_service_certificate_specs: - service_certificate: '/etc/pki/tls/certs/my_service.crt' - service_key: '/etc/pki/tls/private/my_service.key' - hostname: - str_replace: - template: "%{hiera('fqdn_NETWORK')}" - params: - NETWORK: {get_param: [ServiceNetMap, MyServiceNetwork]} - principal: - str_replace: - template: "my_service/%{hiera('fqdn_NETWORK')}" - params: - NETWORK: {get_param: [ServiceNetMap, MyServiceNetwork]} - - {} - ... - metadata_settings: - if: - - internal_tls_enabled - - - - service: my_service - network: {get_param: [ServiceNetMap, MyServiceNetwork]} - type: node - - null - -* The conditional mentioned above is used in the ``config_settings``. So, if - ``internal_tls_enabled`` evaluates to ``true``, the hieradata necessary to - enable TLS in the internal network for your service will be added. Else, we - output ``{}``, which won't affect the ``map_merge`` and won't add anything - to the regular hieradata for your service. - -* For this case, we are only requesting one certificate for the service. - -* The service will be terminated by HAProxy in a conventional way, which means - that the SAN will be case #1 as described in - :ref:`internal-tls-for-your-service`. So the SAN will point to the specific - node's network interface, and not the VIP. - -* The ``ServiceNetMap`` contains the references to the networks every service - is listening on, and the key to get the network is the name of your service - but using camelCase instead of underscores. This value is the name of the - network and if used under the ``config_settings`` section, it will be - replaced by the actual IP. Else, it will just be the network name. - -* tripleo-heat-templates automatically generates hieradata that contains the - different network-dependant hostnames. They keys are in the following - format:: - - fqdn_ - -* The ``my_service_certificate_specs`` key will contain the specifications for - the certificate we'll request. They need to follow some conventions: - - * ``service_certificate`` will specify the path to the certificate file. It - should be an absolute path. - - * ``service_key`` will specify the path to the private key file that will be - used for the certificate. It should be an absolute path. - - * ``hostname`` is the name that will be used both in the Common Name (CN) and - the Subject Alternative Name (SAN) of the certificate. We can get this - value by using the hiera key described above. So we first get the name of - the network the service is listening on from the ``ServiceNetMap`` and we - then use ``str_replace`` to place that in a hiera call in the appropriate - format. - - * ``principal`` is the service principal that will be the one used for the - certificate request. We can get this in a similar manner as we got the - hostname, and prepending an identifying name for your service. The format - will be as follows:: - - < service identifier >/< network-based hostname > - - * These are the names used by convention, and will eventually be passed to - the ``certmonger_certificate`` resource from `puppet-certmonger`_. - -* The ``metadata_settings`` section will pass some information to a metadata - hook that will create the service principal before the certificate request is - done. The format as follows: - - * ``service``: This contains the service identifier to be used in the - kerberos service principal. It should match the identifier you put in the - ``principal`` section of the certificate specs. - - * ``network``: Tells the hook what network to use for the service. This will - be used for the hook and novajoin to use an appropriate hostname for the - kerberos principal. - - * ``type``: Will tell the hook what type of case is this service. The - available options are ``node`` and ``vip``. These are the cases mentioned - in the :ref:`internal-tls-for-your-service` for the SANs. - - Note that this is a list, which can be useful if we'll be creating several - service principals (which is not the case for our example). Also, if - ``internal_tls_enabled`` evaluates to ``false``, we then output ``null``. - -* Remember to set any relevant flags or parameters that your service might - need as hieradata in ``config_settings``. These might be things that - explicitly enable TLS such as flags or paths. But these details depend on the - puppet module that deploys your service. - -.. note:: **VIP-based hostname case** - - If your service requires the certificate to contain a VIP-based hostname, as - is the case for MariaDB. It would instead look like the following:: - - metadata_settings: - if: - - internal_tls_enabled - - - - service: my_service - network: {get_param: [ServiceNetMap, MyServiceNetwork]} - type: vip - - null - - * One can get the hostname for the VIP in a similar fashion as we got the - hostname for the node. The VIP hostnames are also network based, and one - can get them from a hiera key as well. It has the following format:: - - cloud_name_< network name > - - * The ``type`` in the ``metadata_settings`` entry is ``vip``. - -In `puppet-tripleo`_ We'll create a class that does the actual certificate -request and add it to the resource that gets the certificates for all the -services. - -Lets create a class to do the request:: - - class tripleo::certmonger::my_service ( - $hostname, - $service_certificate, - $service_key, - $certmonger_ca = hiera('certmonger_ca', 'local'), - $principal = undef, - ) { - include ::my_service::params - - $postsave_cmd = "systemctl restart ${::my_service::params::service_name}" - certmonger_certificate { 'my_service' : - ensure => 'present', - certfile => $service_certificate, - keyfile => $service_key, - hostname => $hostname, - dnsname => $hostname, - principal => $principal, - postsave_cmd => $postsave_cmd, - ca => $certmonger_ca, - wait => true, - require => Class['::certmonger'], - } - - file { $service_certificate : - owner => $::my_service::params::user, - group => $::my_service::params::group, - require => Certmonger_certificate['my_service'], - } - file { $service_key : - owner => $::my_service::params::user, - group => $::my_service::params::group, - require => Certmonger_certificate['my_service'], - } - - File[$service_certificate] ~> Service<| title == $::my_service::params::service_name |> - File[$service_key] ~> Service<| title == $::my_service::params::service_name |> - } - -* You'll note that the parameters mostly match the certificate specs that we - created before in tripleo-heat-templates. - -* By convention, we'll add this class in the **manifests/certmonger** folder. - -* ``certmonger_ca`` is a value that comes from tripleo-heat-templates and tells - certmonger which CA to use. - -* If it's available, by convention, many puppet modules contain a manifest - called *params*. This usually contains the name and group that the service - runs with, as well as the name of the service in a specific distribution. - So we include this. - -* We do then the actual certificate request by using the - ``certmonger_certificate`` provider and passing all the relevant data for the - request. - - * The post-save command which is specified via the ``postsave_cmd`` is a - command that will be ran after the certificate is saved. This is useful for - when certmonger has to resubmit the request to get an updated certificate, - since this way we can reload or restart the service so it can serve the new - certificate. - -* Using the ``file`` resource from puppet, we set the appropriate user and - group for the certificate and keys. Fortunately, certmonger has sane defaults - for the file modes, so we didn't set those here. - -Having this class, we now need to add to the `certmonger_user`_ resource. This -resource is in charge of making all the certificate requests and should be -available on all roles (or at least it should be added). You would add the -certificate specs as a parameter to this class:: - - class tripleo::profile::base::certmonger_user ( - ... - $my_service_certificate_specs = hiera('my_service_certificate_specs', {}), - ... - ) { - -And finally, we call the class that does the request:: - - ... - unless empty($my_service_certificate_specs) { - ensure_resource('class', 'tripleo::certmonger::my_service', $my_service_certificate_specs) - } - ... - -.. note:: - It is also possible to do several requests for your service. See the - `certmonger_user`_ source code for examples. - -Finally, you can do the same steps described in -`configuring-haproxy-internal-tls`_ to make HAProxy connect to your service -using TLS. - -.. _internal-tls-via-proxy: - -Internal TLS via a TLS-proxy -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If you have a RESTful service that runs over python (most likely using -eventlet) or if your service requires a TLS proxy in order to have TLS in the -internal network, there are extra steps to be done. - -For python-based services, due to performance issues with eventlet, the best -thing you can do is try to move your service to run over httpd, and let it -handle crypto instead. Then you'll be able to follow the instructions from the -:ref:`services-over-httpd-internal-tls` section above. If for any reason this -can't be done at the moment, we could still use httpd to service as a TLS proxy -in the node. It would then listen on the service's port and forward all the -requests to the service, which would then be listening on localhost. - -In `puppet-tripleo`_ you need to go to the manifest that deploys the API for -your service, and add the following parameters:: - - class tripleo::profile::base::my_service::api ( - ... - $certificates_specs = hiera('apache_certificates_specs', {}), - $enable_internal_tls = hiera('enable_internal_tls', false), - $my_service_network = hiera('my_service_api_network', undef), - $tls_proxy_bind_ip = undef, - $tls_proxy_fqdn = undef, - $tls_proxy_port = 5123, - ... - ) { - ... - -* ``certificates_specs``, ``enable_internal_tls`` and ``my_service_network`` - have already been mentioned in the :ref:`services-over-httpd-internal-tls` - section. - -* ``tls_proxy_bind_ip``, ``tls_proxy_fqdn`` and ``tls_proxy_port`` are - parameters that will be used by the httpd-based TLS proxy. They will tell it - where what IP to listen on, the FQDN (which will be used as the servername) - and the port it will use. Usually the port will match your service's port. - These values are expected to be set from tripleo-heat-templates. - -Next comes the code for the actual proxy:: - - ... - if $enable_internal_tls { - if !$my_service_network { - fail('my_service_network is not set in the hieradata.') - } - $tls_certfile = $certificates_specs["httpd-${my_service_network}"]['service_certificate'] - $tls_keyfile = $certificates_specs["httpd-${my_service_network}"]['service_key'] - - ::tripleo::tls_proxy { 'my_service_proxy': - servername => $tls_proxy_fqdn, - ip => $tls_proxy_bind_ip, - port => $tls_proxy_port, - tls_cert => $tls_certfile, - tls_key => $tls_keyfile, - notify => Class['::my_service::api'], - } - } - ... - -* The ``::tripleo::tls_proxy`` is the resource that will configure the TLS - proxy for your service. As you can see, it receives the certificates that - come from the ``certificates_specs`` which contain the specification - for the certificates, including the paths for the keys. - -* The notify is added here since we want the proxy to be set before the - service. - -In `tripleo-heat-templates`_, you should modify your service's template and add -the following:: - - parameters: - ... - EnableInternalTLS: - type: boolean - default: false - ... - conditions: - ... - use_tls_proxy: {equals : [{get_param: EnableInternalTLS}, true]} - ... - resources: - ... - TLSProxyBase: - type: OS::TripleO::Services::TLSProxyBase - properties: - ServiceNetMap: {get_param: ServiceNetMap} - EndpointMap: {get_param: EndpointMap} - EnableInternalTLS: {get_param: EnableInternalTLS} - - -* ``EnableInternalTLS`` is a parameter that's passed via ``parameter_defaults`` - which tells the templates that we want to use TLS in the internal network. - -* ``use_tls_proxy`` is a condition that we'll use to modify the behaviour of - the template depending on whether TLS in the internal network is enabled or - not. - -* ``TLSProxyBase`` will make the default values from the proxy's template - available to where our service is deployed. We should make sure that we - combine our service's hieradata with the hieradata coming from that resource - by doing a ``map_merge`` with the ``config_settings``:: - - ... - config_settings: - map_merge: - - get_attr: [TLSProxyBase, role_data, config_settings] - - # Here goes our service's metadata - ... - -So, with this, we can tell the service to bind on localhost instead of the -default interface depending if TLS in the internal network is enabled or not. -Lets now set the hieradata that the puppet module needs in our service's -hieradata, which is in the ``config_settings`` section:: - - tripleo::profile::base::my_service::api::tls_proxy_bind_ip: - get_param: [ServiceNetMap, MyServiceNetwork] - tripleo::profile::base::my_service::api::tls_proxy_fqdn: - str_replace: - template: - "%{hiera('fqdn_$NETWORK')}" - params: - $NETWORK: {get_param: [ServiceNetMap, MyServiceNetwork]} - tripleo::profile::base::my_service::api::tls_proxy_port: - get_param: [EndpointMap, NeutronInternal, port] - my_service::bind_host: - if: - - use_tls_proxy - - 'localhost' - - {get_param: [ServiceNetMap, MyServiceNetwork]} - -* The ``ServiceNetMap`` contains the references to the networks every service - is listening on, and the key to get the network is the name of your service - but using camelCase instead of underscores. This value will be automatically - replaced by the actual IP. - -* tripleo-heat-templates generates automatically hieradata that contains the - different network-dependant hostnames. They keys are in the following - format:: - - fqdn_ - - So, to get it, we get the network name from the ``ServiceNetMap``, and do a - ``str_replace`` in heat that will use that network name and add it to a hiera - call that will then gets us the FQDN we need. - -* The port we can easily get from the ``EndpointMap``. - -* The conditional uses the actual IP if there's no TLS in the internal network - enabled and localhost if it is. - -Finally, we add the ``metadata_settings`` section to make sure we get a -kerberos service principal:: - - metadata_settings: - get_attr: [TLSProxyBase, role_data, metadata_settings] - -.. References - -.. _certmonger_user: https://github.com/openstack/puppet-tripleo/blob/master/manifests/profile/base/certmonger_user.pp -.. _haproxy documentation: http://www.haproxy.org/ -.. _manifests/haproxy.pp: https://github.com/openstack/puppet-tripleo/blob/master/manifests/haproxy.pp -.. _network/service_net_map.j2.yaml: https://github.com/openstack/tripleo-heat-templates/blob/master/network/service_net_map.j2.yaml -.. _puppet-certmonger: https://github.com/earsdown/puppet-certmonger -.. _puppet-tripleo: https://github.com/openstack/puppet-tripleo -.. _tripleo-heat-templates: https://github.com/openstack/tripleo-heat-templates diff --git a/doc/source/developer/tripleoclient_primer.rst b/doc/source/developer/tripleoclient_primer.rst deleted file mode 100644 index e930df88..00000000 --- a/doc/source/developer/tripleoclient_primer.rst +++ /dev/null @@ -1,139 +0,0 @@ -Primer python-tripleoclient and tripleo-common -============================================== - -This document gives an overview of how python-tripleoclient_ provides the -cli interface for TripleO. In particular it focuses on two key aspects of -TripleO commands: where they are defined and how they (very basically) work. - -Whilst python-tripleoclient provides the CLI for TripleO, it is in -tripleo-common_ that the logic behind a given command resides. So interfacing -with OpenStack services such as Heat, Nova or Mistral typically happens in -tripleo-common. - -For this primer we will use a specific example command but the same applies to -any TripleO cli command to be found in the TripleO documentation or in any -local deployment (or even in TripleO CI) logfiles. - -The example used here is:: - - openstack overcloud container image build - -This command is used to build the container images listed in the -tripleo-common file overcloud_containers.yaml_ using Kolla_. - -See the `Building Containers Deploy Guide `_ for more information on -how to use this command as an operator. - -.. _building_containers_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/deployment/3rd_party.html - -One of the TripleO CI jobs that executes this command is the -tripleo-build-containers-centos-7_ job. This job invokes the overcloud container -image build command in the build.sh.j2_ template:: - - openstack overcloud container image build \ - --config-file $TRIPLEO_COMMON_PATH/container-images/overcloud_containers.yaml \ - --kolla-config-file {{ workspace }}/kolla-build.conf \ - -The relevance of showing this is simply to serve as an example in the following -sections. First we see how to identify *where* in the tripleoclient code a given -command is defined, and then *how* the command works, highlighting a recurring -pattern common to all TripleO commands. - -.. _python-tripleoclient: https://opendev.org/openstack/python-tripleoclient/ -.. _tripleo-common: https://opendev.org/openstack/tripleo-common/ -.. _overcloud_containers.yaml: https://opendev.org/openstack/tripleo-common/src/branch/master/container-images/overcloud_containers.yaml?id=827af753884e15326863ff2207b2ac95d4ad595b#n1 -.. _Kolla: https://opendev.org/openstack/kolla -.. _tripleo-build-containers-centos-7: http://zuul.opendev.org/builds?job_name=tripleo-build-containers-centos-7 -.. _build.sh.j2: https://opendev.org/openstack-infra/tripleo-ci/src/branch/master/playbooks/tripleo-buildcontainers/templates/build.sh.j2?id=69212e1cd8726396c232b493f1aec79480459666#n5 -.. _setup.cfg: https://opendev.org/openstack/python-tripleoclient/src/branch/master/setup.cfg?id=73cc43898cfcc8b99ce736f734fc5b514f5bc6e9#n46 - - -TripleO commands: *where* -------------------------- - -Luckily the location of all TripleO commands is given in the list of -``entry_points`` in the python-tripleoclient_ setup.cfg_ file. Each *key=value* -pair has a key derived from the TripleO command. Taking the command, omit -the initial *openstack* and link subcommands with underscore instead of -whitespace. That is, for the -**openstack overcloud container image build** command the equivalent entry is -**overcloud_container_image_build**:: - - [entry_points] - openstack.cli.extension = - tripleoclient = tripleoclient.plugin - - openstack.tripleoclient.v1 = - ... - overcloud_container_image_build = tripleoclient.v1.container_image:BuildImage - -The value in each *key=value* pair provides us with the file and class name -used in the tripleoclient namespace for this command. For **overcloud_container_image_build** we have -**tripleoclient.v1.container_image:BuildImage**, which means this command is -defined in a class called **BuildImage** inside the `tripleoclient/v1/container_image.py`_ -file. - -.. _`tripleoclient/v1/container_image.py`: https://opendev.org/openstack/python-tripleoclient/src/branch/master/tripleoclient/v1/container_image.py?id=0132e7d08240d8a9d5839cc4345574d44ec2b278#n100 - -TripleO commands: *how* ------------------------ - -Obviously each TripleO command 'works' differently in that they are doing -different things - deploy vs upgrade the undercloud vs overcloud etc. -However there **is** at least one commonality which we highlight in this section. -Each TripleO command class defines a get_parser_ function and a take_action_ -function. - -The get_parser_ is where all command line arguments are defined and -take_action_ is where tripleo-common is invoked to perform the task at hand, -building container images in this case. - -Looking inside the **BuildImage** class we find:: - - def get_parser(self, prog_name): - ... - parser.add_argument( - "--config-file", - dest="config_files", - metavar='', - default=[], - action="append", - help=_("YAML config file specifying the images to build. May be " - "specified multiple times. Order is preserved, and later " - "files will override some options in previous files. " - "Other options will append. If not specified, the default " - "set of containers will be built."), - ) - parser.add_argument( - "--kolla-config-file", - -Here we can see where the two arguments shown in the introduction above are -defined: **--config-file** and **--kolla-config-file**. You can see the default -values and all other attributes for each of the command parameters there. - -Finally we can look for the take_action_ function to learn more about how the -command actually 'works'. Typically the take_action function will have some -validation of the provided arguments before calling out to tripleo-common to -actually 'do' the work (build container images in this case):: - - from tripleo_common.image import kolla_builder - ... - def take_action(self, parsed_args): - ... - try: - builder = kolla_builder.KollaImageBuilder(parsed_args.config_files) - result = builder.build_images(kolla_config_files, - -Here we can see the actual image build is done by the **kolla_builder.KollaImageBuilder** -class **build_images** function. Looking in tripleo-common we can follow that -python namespace to find the definition of **build_images** in the -`tripleo_common/image/kolla_builder.py`_ file:: - - def build_images(self, kolla_config_files=None, excludes=[], - template_only=False, kolla_tmp_dir=None): - cmd = ['kolla-build'] - ... - -.. _get_parser: https://opendev.org/openstack/python-tripleoclient/src/branch/master/tripleoclient/v1/container_image.py?id=0132e7d08240d8a9d5839cc4345574d44ec2b278#n119 -.. _take_action: https://opendev.org/openstack/python-tripleoclient/src/branch/master/tripleoclient/v1/container_image.py?id=0132e7d08240d8a9d5839cc4345574d44ec2b278#n184 -.. _`tripleo_common/image/kolla_builder.py`: https://opendev.org/openstack/tripleo-common/src/branch/master/tripleo_common/image/kolla_builder.py?id=3db41939a370ef3bbd2c6b60ca24e6e8e4b6e30a#n441 diff --git a/doc/source/index.rst b/doc/source/index.rst deleted file mode 100644 index 347b74fe..00000000 --- a/doc/source/index.rst +++ /dev/null @@ -1,147 +0,0 @@ -Welcome to |project| documentation -==================================== - -TripleO is a project aimed at installing, upgrading and operating OpenStack -clouds using OpenStack's own cloud facilities as the foundation - building on -Nova, Ironic, Neutron and Heat to automate cloud management at datacenter -scale - -Contributor Guide ------------------ - -.. toctree:: - :maxdepth: 3 - :includehidden: - - contributor/index - developer/index - -|project| Architecture ----------------------- - -.. toctree:: - :maxdepth: 3 - :includehidden: - - install/introduction/architecture.rst - -|project| Components ----------------------- - -.. toctree:: - :maxdepth: 2 - :includehidden: - - install/introduction/components.rst - -Tripleo CI Guide ----------------- - -.. toctree:: - :maxdepth: 3 - :includehidden: - - ci/index - -Install Guide -------------- - -.. toctree:: - :maxdepth: 3 - :includehidden: - - install/index - -Upgrades/Updates/FFWD-Upgrade ------------------------------ - -.. toctree:: - :maxdepth: 3 - :includehidden: - - upgrade/index - -Documentation Conventions -========================= - -Some steps in the following instructions only apply to certain environments, -such as deployments to real baremetal and deployments using Red Hat Enterprise -Linux (RHEL). These steps are marked as follows: - -.. admonition:: RHEL - :class: rhel - - Step that should only be run when using RHEL - -.. admonition:: RHEL Portal Registration - :class: portal - - Step that should only be run when using RHEL Portal Registration - -.. admonition:: RHEL Satellite Registration - :class: satellite - - Step that should only be run when using RHEL Satellite Registration - -.. admonition:: CentOS - :class: centos - - Step that should only be run when using CentOS - -.. admonition:: Baremetal - :class: baremetal - - Step that should only be run when deploying to baremetal - -.. admonition:: Virtual - :class: virtual - - Step that should only be run when deploying to virtual machines - -.. admonition:: Ceph - :class: ceph - - Step that should only be run when deploying Ceph for use by the Overcloud - -.. admonition:: Stable Branch - :class: stable - - Step that should only be run when choosing to use components from their - stable branches rather than using packages/source based on current master. - -.. admonition:: |oldest_version_name| - :class: |oldest_version_name_lower| - - Step that should only be run when installing from the |oldest_version_name| - stable branch. - -.. admonition:: |before_oldest_version_name| - - Step that should only be run when installing from the - |before_oldest_version_name| stable branch. - -.. admonition:: |before_latest_version_name| - :class: |before_latest_version_name_lower| - - Step that should only be run when installing from the - |before_latest_version_name| stable branch. - -.. admonition:: |latest_version_name| - :class: |latest_version_name_lower| - - Step that should only be run when installing from the |latest_version_name| - stable branch. - -.. admonition:: Validations - :class: validations - - Steps that will run the pre and post-deployment validations - -.. admonition:: Optional Feature - :class: optional - - Step that is optional. A deployment can be done without these steps, but they - may provide useful additional functionality. - -Any such steps should *not* be run if the target environment does not match -the section marking. diff --git a/doc/source/install/_images/TripleO_Network_Diagram_.jpg b/doc/source/install/_images/TripleO_Network_Diagram_.jpg deleted file mode 100644 index 72d9f3a9..00000000 Binary files a/doc/source/install/_images/TripleO_Network_Diagram_.jpg and /dev/null differ diff --git a/doc/source/install/_images/introspection_diagram.png b/doc/source/install/_images/introspection_diagram.png deleted file mode 100644 index 9506168e..00000000 Binary files a/doc/source/install/_images/introspection_diagram.png and /dev/null differ diff --git a/doc/source/install/_images/introspection_diagram.svg b/doc/source/install/_images/introspection_diagram.svg deleted file mode 100644 index 9112b0d8..00000000 --- a/doc/source/install/_images/introspection_diagram.svg +++ /dev/null @@ -1,717 +0,0 @@ - - - -image/svg+xmlClient -Ironic -Introspection Ramdisk -Ironic-inspector -Register nodes’power management details -Send nodes for introspection - Reboot nodes -> PXE boot generic -introspection ramdisk image -Facts checking and registration of hardware details -Post hardware metrics -Nodes registered -Nodes are fully registeredwth full stack of hardware attributes - \ No newline at end of file diff --git a/doc/source/install/_images/logical_view.png b/doc/source/install/_images/logical_view.png deleted file mode 100644 index e8941c23..00000000 Binary files a/doc/source/install/_images/logical_view.png and /dev/null differ diff --git a/doc/source/install/_images/logical_view.svg b/doc/source/install/_images/logical_view.svg deleted file mode 100644 index 1498034f..00000000 --- a/doc/source/install/_images/logical_view.svg +++ /dev/null @@ -1,2383 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Deploys - Updates - Monitors - - - - - -Undercloud - -Overcloud - -Your overcloud can contain as few or as many predefined overcloud roles (OpenStack components) as you want. - diff --git a/doc/source/install/_images/overview.png b/doc/source/install/_images/overview.png deleted file mode 100644 index 60e23f53..00000000 Binary files a/doc/source/install/_images/overview.png and /dev/null differ diff --git a/doc/source/install/_images/overview.svg b/doc/source/install/_images/overview.svg deleted file mode 100644 index f0aa9114..00000000 --- a/doc/source/install/_images/overview.svg +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - - - - - - - -Undercloud - -Production OpenStack Cloud (Overcloud) - -Deploys -Updates -Monitors - diff --git a/doc/source/install/_images/physical_view.png b/doc/source/install/_images/physical_view.png deleted file mode 100644 index 59a9e9f9..00000000 Binary files a/doc/source/install/_images/physical_view.png and /dev/null differ diff --git a/doc/source/install/_images/physical_view.svg b/doc/source/install/_images/physical_view.svg deleted file mode 100644 index 7f5b1e21..00000000 --- a/doc/source/install/_images/physical_view.svg +++ /dev/null @@ -1,2243 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Controller Node - - Object - Storage - Node - - Block - Storage - Node - - Compute - Node - - - - - - Ceph - Storage - Node - - - - - - -Undercloud - -Overcloud - -Undercloud Node -Ceph-Mon -Ceph-OSD - diff --git a/doc/source/install/_images/tripleo_ansible_arch.png b/doc/source/install/_images/tripleo_ansible_arch.png deleted file mode 100644 index 413373dc..00000000 Binary files a/doc/source/install/_images/tripleo_ansible_arch.png and /dev/null differ diff --git a/doc/source/install/_images/tripleo_ui.png b/doc/source/install/_images/tripleo_ui.png deleted file mode 100644 index e49d93ed..00000000 Binary files a/doc/source/install/_images/tripleo_ui.png and /dev/null differ diff --git a/doc/source/install/advanced_deployment/custom.rst b/doc/source/install/advanced_deployment/custom.rst deleted file mode 100644 index bda8855a..00000000 --- a/doc/source/install/advanced_deployment/custom.rst +++ /dev/null @@ -1,9 +0,0 @@ -Custom Configurations -===================== - -Documentation on how to deploy custom configurations with |project|. - - -.. toctree:: - deploy_openshift - features diff --git a/doc/source/install/advanced_deployment/deploy_openshift.rst b/doc/source/install/advanced_deployment/deploy_openshift.rst deleted file mode 100644 index e0eba46b..00000000 --- a/doc/source/install/advanced_deployment/deploy_openshift.rst +++ /dev/null @@ -1,452 +0,0 @@ -(DEPRECATED) Deploying OpenShift -================================ - -.. note:: This functionality was removed as of Train. - -You can use TripleO to deploy OpenShift clusters onto baremetal nodes. -TripleO deploys the operating system onto the nodes and uses -`openshift-ansible` to then configure OpenShift. TripleO can also be used -to manage the baremetal nodes. - -Define the OpenShift roles -************************** - -TripleO installs OpenShift services using composable roles for -`OpenShiftMaster`, `OpenShiftWorker`, and `OpenShiftInfra`. When you import -a baremetal node using `instackenv.json`, you can tag it to use a certain -composable role. See `Custom Roles `_ for more information. - -.. _custom_roles_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/features/custom_roles.html - -1. Generate the OpenShift roles: - -.. code-block:: bash - - openstack overcloud roles generate -o /home/stack/openshift_roles_data.yaml \ - OpenShiftMaster OpenShiftWorker OpenShiftInfra - -2. View the OpenShift roles: - -.. code-block:: bash - - openstack overcloud role list - -The result should include entries for `OpenShiftMaster`, `OpenShiftWorker`, and -`OpenShiftInfra`. - -3. See more information on the `OpenShiftMaster` role: - -.. code-block:: bash - - openstack overcloud role show OpenShiftMaster - -.. note:: - For development or PoC environments that are more resource-constrained, it is - possible to use the `OpenShiftAllInOne` role to collocate the different - OpenShift services on the same node. The all-in-one role is not recommended - for production. - -Create the OpenShift profiles -***************************** - -This procedure describes how to enroll a physical node as an OpenShift node. - -1. Create a flavor for each OpenShift role. You will need to adjust this - values to suit your requirements: - -.. code-block:: bash - - openstack flavor create --id auto --ram 4096 --disk 40 --vcpus 1 --swap 500 m1.OpenShiftMaster - openstack flavor create --id auto --ram 4096 --disk 40 --vcpus 1 --swap 500 m1.OpenShiftWorker - openstack flavor create --id auto --ram 4096 --disk 40 --vcpus 1 --swap 500 m1.OpenShiftInfra - -2. Map the flavors to the required profile: - -.. code-block:: bash - - openstack flavor set --property "capabilities:profile"="OpenShiftMaster" \ - --property "capabilities:boot_option"="local" m1.OpenShiftMaster - openstack flavor set --property "capabilities:profile"="OpenShiftWorker" \ - --property "capabilities:boot_option"="local" m1.OpenShiftWorker - openstack flavor set --property "capabilities:profile"="OpenShiftInfra" \ - --property "capabilities:boot_option"="local" m1.OpenShiftInfra - -3. Add your nodes to `instackenv.json`. You will need to define them to use the - `capabilities` field. For example: - -.. code-block:: json - - [{ - "arch":"x86_64", - "cpu":"4", - "disk":"60", - "mac":[ - "00:0c:29:9f:5f:05" - ], - "memory":"16384", - "pm_type":"ipmi", - "capabilities":"profile:OpenShiftMaster", - "name": "OpenShiftMaster_1" - }, - { - "arch":"x86_64", - "cpu":"4", - "disk":"60", - "mac":[ - "00:0c:29:91:b9:2d" - ], - "memory":"16384", - "pm_type":"ipmi", - "capabilities":"profile:OpenShiftWorker", - "name": "OpenShiftWorker_1" - }, - { - "arch":"x86_64", - "cpu":"4", - "disk":"60", - "mac":[ - "00:0c:29:91:b9:6a" - ], - "memory":"16384", - "pm_type":"ipmi", - "capabilities":"profile:OpenShiftInfra", - "name": "OpenShiftInfra_1" - }] - -.. TOOD(aschultz): include reference to deploy guide - -4. Import and introspect the TripleO nodes as you normally would for your - deployment. For example: - -.. code-block:: bash - - openstack overcloud node import ~/instackenv.json - openstack overcloud node introspect --all-manageable --provide - -5. Verify the overcloud nodes have assigned the correct profile - -.. code-block:: bash - - openstack overcloud profiles list - +--------------------------------------+--------------------+-----------------+-----------------+-------------------+ - | Node UUID | Node Name | Provision State | Current Profile | Possible Profiles | - +--------------------------------------+--------------------+-----------------+-----------------+-------------------+ - | 72b2b1fc-6ba4-4779-aac8-cc47f126424d | openshift-worker01 | available | OpenShiftWorker | | - | d64dc690-a84d-42dd-a88d-2c588d2ee67f | openshift-worker02 | available | OpenShiftWorker | | - | 74d2fd8b-a336-40bb-97a1-adda531286d9 | openshift-worker03 | available | OpenShiftWorker | | - | 0eb17ec6-4e5d-4776-a080-ca2fdcd38e37 | openshift-infra02 | available | OpenShiftInfra | | - | 92603094-ba7c-4294-a6ac-81f8271ce83e | openshift-infra03 | available | OpenShiftInfra | | - | b925469f-72ec-45fb-a403-b7debfcf59d3 | openshift-master01 | available | OpenShiftMaster | | - | 7e9e80f4-ad65-46e1-b6b4-4cbfa2eb7ea7 | openshift-master02 | available | OpenShiftMaster | | - | c2bcdd3f-38c3-491b-b971-134cab9c4171 | openshift-master03 | available | OpenShiftMaster | | - | ece0ef2f-6cc8-4912-bc00-ffb3561e0e00 | openshift-infra01 | available | OpenShiftInfra | | - | d3a17110-88cf-4930-ad9a-2b955477aa6c | openshift-custom01 | available | None | | - | 07041e7f-a101-4edb-bae1-06d9964fc215 | openshift-custom02 | available | None | | - +--------------------------------------+--------------------+-----------------+-----------------+-------------------+ - -Configure the container registry -******************************** - -Follow `container image preparation `_ to configure TripleO for the -container image preparation. - -.. _container_image_preparation_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/deployment/container_image_prepare.html - -This generally means generating a `/home/stack/containers-prepare-parameter.yaml` file: - -.. code-block:: bash - - openstack tripleo container image prepare default \ - --local-push-destination \ - --output-env-file containers-prepare-parameter.yaml - -Define the OpenShift environment -******************************** - -Create the `openshift_env.yaml` file. This file will define the -OpenShift-related settings that TripleO will later apply as part of the -`openstack overcloud deploy` procedure. You will need to update these values -to suit your deployment: - -.. code-block:: yaml - - Parameter_defaults: - # by default TripleO assigns the VIP random from the allocation pool - # by using the FixedIPs we can set the VIPs to predictable IPs before starting the deployment - CloudName: public.openshift.localdomain - PublicVirtualFixedIPs: [{'ip_address':'10.0.0.200'}] - - CloudNameInternal: internal.openshift.localdomain - InternalApiVirtualFixedIPs: [{'ip_address':'172.17.1.200'}] - - CloudDomain: openshift.localdomain - - ## Required for CNS deployments only - OpenShiftInfraParameters: - OpenShiftGlusterDisks: - - /dev/sdb - - ## Required for CNS deployments only - OpenShiftWorkerParameters: - OpenShiftGlusterDisks: - - /dev/sdb - - /dev/sdc - - ControlPlaneDefaultRoute: 192.168.24.1 - EC2MetadataIp: 192.168.24.1 - ControlPlaneSubnetCidr: 24 - - # The DNS server below should have entries for resolving - # {internal,public,apps}.openshift.localdomain names - DnsServers: - - 10.0.0.90 - - OpenShiftGlobalVariables: - - openshift_master_identity_providers: - - name: 'htpasswd_auth' - login: 'true' - challenge: 'true' - kind: 'HTPasswdPasswordIdentityProvider' - openshift_master_htpasswd_users: - sysadmin: '$apr1$jpBOUqeU$X4jUsMyCHOOp8TFYtPq0v1' - - #openshift_master_cluster_hostname should match the CloudNameInternal parameter - openshift_master_cluster_hostname: internal.openshift.localdomain - - #openshift_master_cluster_public_hostname should match the CloudName parameter - openshift_master_cluster_public_hostname: public.openshift.localdomain - - openshift_master_default_subdomain: apps.openshift.localdomain - -For custom networks or customer interfaces, it is necessary to use custom -network interface templates: - -.. code-block:: yaml - - resource_registry: - OS::TripleO::OpenShiftMaster::Net::SoftwareConfig: /home/stack/master-nic.yaml - OS::TripleO::OpenShiftWorker::Net::SoftwareConfig: /home/stack/worker-nic.yaml - OS::TripleO::OpenShiftInfra::Net::SoftwareConfig: /home/stack/infra-nic.yaml - -Deploy OpenShift nodes -********************** - -As a result of the previous steps, you will have three new YAML files: - -* `openshift_env.yaml` -* `openshift_roles_data.yaml` -* `containers-default-parameters.yaml` - -For a custom network deployments, maybe it is necessary NICs and network -templates like: - -* `master-nic.yaml` -* `infra-nic.yaml` -* `worker-nic.yaml` -* `network_data_openshift.yaml` - -Add these YAML files to your `openstack overcloud deploy` command. - -An example for CNS deployments: - -.. code-block:: bash - - openstack overcloud deploy \ - --stack openshift \ - --templates \ - -r /home/stack/openshift_roles_data.yaml \ - -n /usr/share/openstack-tripleo-heat-templates/network_data_openshift.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/openshift.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/openshift-cns.yaml \ - -e /home/stack/openshift_env.yaml \ - -e /home/stack/containers-prepare-parameter.yaml - -An example for non-CNS deployments: - -.. code-block:: bash - - openstack overcloud deploy \ - --stack openshift \ - --templates \ - -r /home/stack/openshift_roles_data.yaml \ - -n /usr/share/openstack-tripleo-heat-templates/network_data_openshift.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/openshift.yaml \ - -e /home/stack/openshift_env.yaml \ - -e /home/stack/containers-prepare-parameter.yaml - -Deployment for custom networks or interfaces, it is necessary to specify them. -For example: - -.. code-block:: bash - - openstack overcloud deploy \ - --stack openshift \ - --templates \ - -r /home/stack/openshift_roles_data.yaml \ - -n /home/stack/network_data_openshift.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/openshift.yaml \ - -e /usr/share/openstack-tripleo-heat-templates/environments/openshift-cns.yaml \ - -e /home/stack/openshift_env.yaml \ - -e /home/stack/containers-prepare-parameter.yaml \ - -e /home/stack/custom-nics.yaml - -Review the OpenShift deployment -******************************* - -Once the overcloud deploy procedure has completed, you can review the state -of your OpenShift nodes. - -1. List all your baremetal nodes. You should expect to see your master, infra, - and worker nodes. - - .. code-block:: bash - - baremetal node list - -2. Locate the OpenShift node: - - .. code-block:: bash - - openstack server list - -3. SSH to the OpenShift node. For example: - - .. code-block:: bash - - ssh heat-admin@192.168.122.43 - -4. Change to root user: - - .. code-block:: bash - - sudo -i - -5. Review the container orchestration configuration: - - .. code-block:: bash - - cat .kube/config - -6. Login to OpenShift: - - .. code-block:: bash - - oc login -u admin - -7. Review any existing projects: - - .. code-block:: bash - - oc get projects - -8. Review the OpenShift status: - - .. code-block:: bash - - oc status - -9. Logout from OpenShift: - - .. code-block:: bash - - oc logout - -Deploy a test app using OpenShift -********************************* - -This procedure describes how to create a test application in your new -OpenShift deployment. - -1. Login as a developer: - - .. code-block:: bash - - $ oc login -u developer - Logged into "https://192.168.64.3:8443" as "developer" using existing credentials. - You have one project on this server: "myproject" - Using project "myproject". - -2. Create a new project: - - .. code-block:: bash - - $ oc new-project test-project - Now using project "test-project" on server "https://192.168.64.3:8443". - - You can add applications to this project with the 'new-app' command. - For example, to build a new example application in Ruby try: - - .. code-block:: bash - - $ oc new-app centos/ruby-22-centos7~https://github.com/openshift/ruby-ex.git - -3. Create a new app. This example creates a CakePHP application: - - .. code-block:: bash - - $ oc new-app https://github.com/sclorg/cakephp-ex - --> Found image 9dd8c80 (29 hours old) in image stream "openshift/php" under tag "7.1" for "php" - - Apache 2.4 with PHP 7.1 - ----------------------- - PHP 7.1 available as container is a base platform for building and running various PHP 7.1 applications and frameworks. PHP is an HTML-embedded scripting language. PHP attempts to make it easy for developers to write dynamically generated web pages. PHP also offers built-in database integration for several commercial and non-commercial database management systems, so writing a database-enabled webpage with PHP is fairly simple. The most common use of PHP coding is probably as a replacement for CGI scripts. - - Tags: builder, php, php71, rh-php71 - - * The source repository appears to match: php - * A source build using source code from https://github.com/sclorg/cakephp-ex will be created - * The resulting image will be pushed to image stream "cakephp-ex:latest" - * Use 'start-build' to trigger a new build - * This image will be deployed in deployment config "cakephp-ex" - * Ports 8080/tcp, 8443/tcp will be load balanced by service "cakephp-ex" - * Other containers can access this service through the hostname "cakephp-ex" - - --> Creating resources ... - imagestream "cakephp-ex" created - buildconfig "cakephp-ex" created - deploymentconfig "cakephp-ex" created - service "cakephp-ex" created - --> Success - Build scheduled, use 'oc logs -f bc/cakephp-ex' to track its progress. - Application is not exposed. You can expose services to the outside world by executing one or more of the commands below: - 'oc expose svc/cakephp-ex' - Run 'oc status' to view your app. - -4. Review the new app: - - .. code-block:: bash - - $ oc status --suggest - In project test-project on server https://192.168.64.3:8443 - - svc/cakephp-ex - 172.30.171.214 ports 8080, 8443 - dc/cakephp-ex deploys istag/cakephp-ex:latest <- - bc/cakephp-ex source builds https://github.com/sclorg/cakephp-ex on openshift/php:7.1 - build #1 running for 52 seconds - e0f0247: Merge pull request #105 from jeffdyoung/ppc64le (Honza Horak ) - deployment #1 waiting on image or update - - Info: - * dc/cakephp-ex has no readiness probe to verify pods are ready to accept traffic or ensure deployment is successful. - try: oc set probe dc/cakephp-ex --readiness ... - * dc/cakephp-ex has no liveness probe to verify pods are still running. - try: oc set probe dc/cakephp-ex --liveness ... - - View details with 'oc describe /' or list everything with 'oc get all'. - -5. Review the pods: - - .. code-block:: bash - - $ oc get pods - NAME READY STATUS RESTARTS AGE - cakephp-ex-1-build 1/1 Running 0 1m - -6. Logout from OpenShift: - - .. code-block:: bash - - $ oc logout diff --git a/doc/source/install/advanced_deployment/features.rst b/doc/source/install/advanced_deployment/features.rst deleted file mode 100644 index 521576e5..00000000 --- a/doc/source/install/advanced_deployment/features.rst +++ /dev/null @@ -1,11 +0,0 @@ -Feature Configuration -===================== - -Documentation on how to enable and configure various features available in -|project|. - - -.. toctree:: - - - deploy_openshift diff --git a/doc/source/install/deploy-guide.rst b/doc/source/install/deploy-guide.rst deleted file mode 100644 index f1f571ed..00000000 --- a/doc/source/install/deploy-guide.rst +++ /dev/null @@ -1,6 +0,0 @@ -Deploy Guide -============ - -The installation instructions have been moved to the `TripleO Deploy Guide `_. - -.. _tripleo_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/ diff --git a/doc/source/install/deprecated/basic_deployment_ui.rst b/doc/source/install/deprecated/basic_deployment_ui.rst deleted file mode 100644 index d1d78fe0..00000000 --- a/doc/source/install/deprecated/basic_deployment_ui.rst +++ /dev/null @@ -1,290 +0,0 @@ -.. _basic-deployment-ui: - -(DEPRECATED) Basic Deployment (UI) -================================== - -.. note:: The tripleo-ui is no longer available as of Stein. This documentation - is deprecated. - -This document will show you how to access the |project| UI and perform -a simple deployment with some customizations. Validations are -automatically run at every step to help uncover potential issues early. - -.. image:: ../_images/tripleo_ui.png - :width: 768px - :height: 439px - -Prepare Your Environment ------------------------- - -The UI is installed by default with the undercloud. You can confirm -this by opening ``undercloud.conf`` and checking for:: - - enable_ui = true - enable_validations = true - -The validations are optional but strongly recommended. - -#. Make sure you have your environment ready and undercloud running: - - * `Environment Setup `_ - * `Undercloud Installation `_ - -.. _environment_setup_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/environments/index.html -.. _undercloud_installation_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/deployment/install_undercloud.html - -#. Make sure the images are uploaded in Glance on the undercloud: - - * `Get Images `_ - * `Upload Images `_ - -.. _get_images_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/deployment/install_overcloud.html#get-images -.. _upload_images_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/deployment/install_overcloud.html#upload-images - -Access the UI -------------- - -The UI is accessible on the undercloud URL. With the default settings -the URLs may look like the following, depending on whether the -undercloud was set up with `SSL `_: - -.. _undercloud_ssl: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/features/ssl.html#undercloud-ssl - -* http://192.168.24.1:3000 if it was not -* https://192.168.24.2 if set up with SSL - -The username is ``admin`` and the password can be obtained by running -the following command on the undercloud:: - - $ sudo hiera keystone::admin_password - -.. note:: On an undercloud deployed without SSL, the UI and API - endpoints are deployed on the control plane which may not be - routable. In this case you will need to create a tunnel or use a - tool such as sshuttle to be able to use the UI from a local - browser:: - - sshuttle -r user@undercloud 192.168.24.0/24 - -.. admonition:: Virtual - :class: virtual - - If you cannot directly access the undercloud (for example because - the undercloud is installed in a VM on a remote lab host), you will - need to create a tunnel and make some configuration changes in order - to access the UI locally. - - #. Open the tunnel from the virt host, to the undercloud:: - - ssh -Nf user@undercloud -L 0.0.0.0:443:192.168.24.2:443 # If SSL - ssh -Nf user@undercloud -L 0.0.0.0:3000:192.168.24.1:3000 # If no SSL - - .. note:: Quickstart started creating the tunnel automatically - during Pike. If using an older version you will have to create - the tunnel manually, for example:: - - ssh -F /root/.quickstart/ssh.config.ansible undercloud -L 0.0.0.0:443:192.168.24.2:443 - - #. Edit the UI config on the undercloud to replace the undercloud IP - with your virt host name, for example:: - - sudo sed -i.bak s/192.168.24.2/virthost/ /var/www/openstack-tripleo-ui/dist/tripleo_ui_config.js - - Additionally, make sure all the API endpoints are commented out - in this file. - - .. note:: Changes to ``tripleo_ui_config.js`` are overwritten on - undercloud upgrades. - - #. You may have to open port 3000 or 443 on your virt host. - - .. admonition:: Stable Branch - :class: stable - - .. admonition:: Newton - :class: newton - - Starting in Ocata, all the API ports are proxied through - 3000 (non-SSL) or 443 (SSL). If using Newton, you will need - to ensure ports for all the API endpoints specified in - ``tripleo_ui_config.js`` are open and accessible. If using - SSL with self-signed certificates, Firefox will also - require a SSL certificate exception to be accepted for - every port. - - #. The UI should now be accessible at http://virthost:3000 or - https://virthost. - -Manage Plans ------------- - -A default plan named ``overcloud`` is created during the undercloud -installation, based on the default tripleo-heat-templates installed on -the system. This plan can be customized and deployed. - -It is also possible to create and manage additional plans in parallel, -in order to test different configurations. - -By clicking on "Manage Deployments" beside the deployment name, you can -perform actions on plans such as create, export, delete, etc. - -.. note:: - - There can be confusion with node assignments when switching between - plans, particularly in previous releases like Newton. If doing work - with multiple plans, ensure the Node counts are what you expect - before starting the deployment, for example by navigating to Edit - Configuration -> Parameters. - -Manage Nodes ------------- - -Register Nodes -^^^^^^^^^^^^^^ - -Navigate to the **Nodes** tab in the top bar and click on the -*Register Nodes* button. New nodes can be added in two ways: - - * Importing an `instackenv.json `_ file - -.. _instackenv_file: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/environments/baremetal.html#instackenv-json - -* Importing an instackenv.json file -* Manually defining nodes via the *Add New* button. - -Introspect Nodes -^^^^^^^^^^^^^^^^ - -Introspection is a required step when deploying from the UI. Once the -nodes are registered and in the ``manageable`` provision state, select -the nodes and click on the *Introspect Nodes* button. - -Provide Nodes -^^^^^^^^^^^^^ - -Once introspection is completed, nodes need to be "provided" in order -to move to the ``available`` state and be available for -deployments. Select the nodes and click on the *Provide Nodes* button. - -.. note:: For more information about node states, see `Node States `_. - -.. _node_states: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/provisioning/node_states.html - -Tag Nodes -^^^^^^^^^ - -Nodes need to be tagged to match a specific profile/role before they -can be used in a deployment. Select the nodes you want to assign a -profile to, then click on *Tag Nodes* (the option may be in a dropdown -menu). - -.. admonition:: Stable Branch - :class: stable - - In Newton and Ocata, node assignment and node tagging are done at - the same time when assigning nodes on the **Deployment Plan** page. - -Configure the Deployment ------------------------- - -On the **Deployment Plan** tab, you can configure both the overall -deployment, as well as specific roles. - -Clicking on the *Edit Configuration* link displays the list of -environments available and their description, based on the templates -provided in the plan. After enabling environments as desired, click on -*Save Changes* and navigate to the **Parameters** tab. Once saved, the -enabled environments will also be configurable on this tab. - -The **Parameters** tab lets you set configuration options for the -deployment in general, as well as for each individual environment. - -.. admonition:: Stable Branch - :class: stable - - .. admonition:: Newton - :class: newton - - In Newton it was not possible to configure individual - environments. The environment templates should be updated - directly with the required parameters before uploading a new - plan. - -Individual roles can also be configured by clicking on the Pencil icon -beside the role name on each card. - -.. admonition:: Stable Branch - :class: stable - - .. admonition:: Newton - :class: newton - - In Newton, you may need to assign at least one node to the role - before the related configuration options are loaded. - - -Assign Nodes ------------- - -The second number on each card indicates the number of nodes tagged -with this particular profile. The number of nodes manually assigned via -the number picker will be deployed. - -.. admonition:: Stable Branch - :class: stable - - In Newton and Ocata, nodes are tagged as part of assigning a node to - a profile. This can cause issues when switching deployment plans, as - the node counts displayed on the card may not match the value - actually stored in the plan. You can correct this by clicking on - Edit Configuration -> Parameters and checking/updating the node - counts for ControllerCount, ComputeCount, etc. - - Additionally, when using custom roles you should make sure to - unassign the nodes associated with these roles before deleting the - plan, as the role cards are displayed based on the roles in the - current plan only. Therefore it is not possible to unassign a node - which is associated with a role that does not exist in the current - plan. - -Deploy the Overcloud --------------------- - -Click on the *Deploy* button. - -You may see a warning if not all validations passed. While this is -expected in resources-constrained virtual environments, it is -recommended to check the failed validations carefully before -proceeding. - -The ``View detailed information`` link shows the details for all the -Heat resources being deployed. - -Post-Deployment ---------------- - -Once the deployment completes, the progress bar will be replaced with -information about the overcloud such as the IP address and login -credentials. - -If the deployment failed, information about the failure will be -displayed. - -.. admonition:: Virtual - :class: virtual - - To access the overcloud, you will need to update your tunnel in - order to access the new URL. For example, if your overcloud - information is as such:: - - Overcloud IP address: 192.168.24.12 - Username: admin - Password: zzzzzz - - Assuming you deployed the overcloud with SSL enabled, you could - create the following tunnel from your virt host to the undercloud:: - - ssh -Nf user@undercloud -L 0.0.0.0:1234:192.168.24.12:443 - - After opening port 1234 on your virt host, you should be able to - access the overcloud by navigating to https://virthost:1234. diff --git a/doc/source/install/index.rst b/doc/source/install/index.rst deleted file mode 100644 index 68e3776c..00000000 --- a/doc/source/install/index.rst +++ /dev/null @@ -1,13 +0,0 @@ -TripleO Install Guide -========================= - -.. toctree:: - :maxdepth: 2 - :includehidden: - - introduction/introduction - deploy-guide - deprecated/basic_deployment_ui - advanced_deployment/features - advanced_deployment/custom - diff --git a/doc/source/install/introduction/architecture.rst b/doc/source/install/introduction/architecture.rst deleted file mode 100644 index 4783679d..00000000 --- a/doc/source/install/introduction/architecture.rst +++ /dev/null @@ -1,384 +0,0 @@ -|project| Architecture -======================== - -This document lists the main components of |project|, and gives some -description of how each component is used. There are links to additional sources -of information throughout the document. - -.. only:: html - - .. contents:: - :depth: 3 - :backlinks: none - - -Architecture Overview ---------------------- - -|project| is a community developed approach and set of tools for deploying, -and managing an OpenStack cloud. - - -TripleO -^^^^^^^ - -TripleO is the friendly name for “OpenStack on OpenStack”. It is an official -OpenStack project with the goal of allowing you to deploy and manage a -production cloud onto bare metal hardware using a subset of existing OpenStack -components. - -.. image:: ../_images/overview.png - -With TripleO, you start by creating an "undercloud" (a deployment cloud) -that will contain the necessary OpenStack components to deploy and manage an -"overcloud" (a workload cloud). The overcloud is the deployed solution -and can represent a cloud for any purpose (e.g. production, staging, test, etc). - -.. image:: ../_images/logical_view.png - -TripleO leverages several existing core components of OpenStack including Nova, -Ironic, Neutron, Heat, Glance and Ceilometer to deploy OpenStack on baremetal -hardware. Nova and Ironic are used in the undercloud to manage baremetal -instances that comprise the infrastructure for the overcloud. Neutron is -utilized to provide a networking environment in which to deploy the overcloud, -machine images are stored in Glance, and Ceilometer collects metrics about your -overcloud. - -The following diagram illustrates a physical view of how the undercloud may be -hosted on one physical server and the overcloud distributed across many physical -servers. - -.. image:: ../_images/physical_view.png - - -SpinalStack's Inspiration -^^^^^^^^^^^^^^^^^^^^^^^^^ - -Some key aspects of SpinalStack workflow have been incorporated into -|project|, providing options to perform introspection, benchmarking and role -matching of your hardware prior to deploying OpenStack. - -Hardware introspection features enable you to collect data about the properties -of your hardware prior to deployment, such that specific classes of hardware may -be matched to specific roles (e.g. a special hardware configuration for Compute -or Storage roles). There is also the option to enable performance benchmarking -during this phase, such that outliers which do not match the expected -performance profile may be excluded from the deployment. - -|project| also configures servers in a similar way to SpinalStack, using -stable community puppet implementations, applied in a series of steps, such -that granular control and validation of the deployment is possible - - -Benefits --------- - -Using |project|'s combination of OpenStack components, and their APIs, as the -infrastructure to deploy and operate OpenStack itself delivers several benefits: - -* |project|'s APIs are the OpenStack APIs. They're well maintained, well - documented, and come with client libraries and command line tools. Users who - invest time in learning about |project|'s APIs are also learning about - OpenStack itself, and users who are already familiar with OpenStack will find - a great deal in |project| that they already understand. -* Using the OpenStack components allows more rapid feature development of - |project| than might otherwise be the case; |project| automatically - inherits all the new features which are added to Glance, Heat etc., even when - the developer of the new feature didn't explicitly have |project| in mind. -* The same applies to bug fixes and security updates. When OpenStack developers - fix bugs in the common components, those fixes are inherited by |project|. -* Users' can invest time in integrating their own scripts and utilities with - |project|'s APIs with some confidence. Those APIs are cooperatively - maintained and developed by the OpenStack community. They're not at risk of - being suddenly changed or retired by a single controlling vendor. -* For developers, tight integration with the OpenStack APIs provides a solid - architecture, which has gone through extensive community review. - -It should be noted that not everything in |project| is a reused OpenStack -element. - - -Deployment Workflow Overview ----------------------------- - -#. Environment Preparation - - * Prepare your environment (baremetal or virtual) - * Install undercloud - - -#. Undercloud Data Preparation - - * Create images to establish the overcloud - * Register hardware nodes with undercloud - * Introspect hardware - * Create flavors (node profiles) - - -#. Deployment Planning - - * Configure overcloud roles - - * Assign flavor (node profile to match desired hardware specs) - * Assign image (provisioning image) - * Size the role (how many instances to deploy) - - * Configure service parameters - * Create a Heat template describing the overcloud (auto-generated from above) - - -#. Deployment - - * Use Heat to deploy your template - * Heat will use Nova to identify and reserve the appropriate nodes - * Nova will use Ironic to startup nodes and install the correct images - - -#. Per-node Setup - - * When each node of the overcloud starts it will gather its configuration - metadata from Heat Template configuration files - * Hiera files are distributed across all nodes and Heat applies puppet - manifests to configure the services on the nodes - * Puppet runs in multiple steps, so that after each step there can be tests - triggered to check progress of the deployment and allow easier debugging. - - -#. Overcloud Initialization - - * Services on nodes of the overcloud are registered with Keystone - - -Deployment Workflow Detail --------------------------- - -Environment Preparation -^^^^^^^^^^^^^^^^^^^^^^^ - -In the first place, you need to check that your environment is ready. -|project| can deploy OpenStack into baremetal as well as virtual environments. -You need to make sure that your environment satisfies minimum requirements for -given environment type and that networking is correctly set up. - -Next step is to install the undercloud. We install undercloud using `Instack -`_'s script and it calls -puppet scripts in the background. - -For development or proof of concept (PoC) environments, `Quickstart -`_ -can also be used. - - -Undercloud Data Preparation -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Images -"""""" - -Before deploying the overcloud, you must first download or build images which -will be installed on each of the nodes of the overcloud. TripleO uses -`diskimage-builder `_ for -building these so called "Golden Images". The diskimage-builder tool takes a -base image e.g. `CentOS 7 `_ and then layers additional software via -configuration scripts (called elements) on top of that. The final result is a -qcow2 formatted image with software installed but not configured. - -While the diskimage-builder repository provides operating-system specific -elements, ones specific to OpenStack, e.g. nova-api, are found in -`tripleo-image-elements `_. -You can add different elements to an image to provide specific applications and -services. Once all the images required to deploy the overcloud are built, they -are stored in Glance running on the undercloud. - - -Nodes -""""" - -Deploying the overcloud requires suitable hardware. The first task is to -register the available hardware with Ironic, OpenStack's equivalent of a -hypervisor for managing baremetal servers. Users can define the hardware -attributes (such as number of CPUs, RAM, disk) manually or he can leave the -fields out and run introspection of the nodes afterwards. - -The sequence of events is pictured below: - -.. image:: ../_images/introspection_diagram.png - -* The user, via the command-line tools, or through direct API calls, - registers the power management credentials for a node with Ironic. -* The user then instructs Ironic to reboot the node. -* Because the node is new, and not already fully registered, there are no - specific PXE-boot instructions for it. In that case, the default action is to - boot into an introspection ramdisk -* The introspection ramdisk probes the hardware on the node and gathers facts, - including the number of CPU cores, the local disk size and the amount of RAM. -* The ramdisk posts the facts to the ironic-inspector API. -* All facts are passed and stored in the Ironic database. -* There can be performed advanced role matching via the ''ahc-match'' tool, - which simply adds an additional role categorization to Ironic based on - introspected node facts and specified conditions. - - -Flavors -""""""" - -When users are creating virtual machines (VMs) in an OpenStack cloud, the flavor -that they choose specifies the capacity of the VM which should be created. The -flavor defines the CPU count, the amount of RAM, the amount of disk space etc. -As long as the cloud has enough capacity to grant the user's wish, and the user -hasn't reached their quota limit, the flavor acts as a set of instructions on -exactly what kind of VM to create on the user's behalf. - -In the undercloud, where the machines are usually physical rather than virtual -(or, at least, pre-existing, rather than created on demand), flavors have -a slightly different effect. Essentially, they act as a constraint. Of all of -the introspected hardware, only nodes which match a specified flavor are -suitable for a particular role. This can be used to ensure that the large -machines with a great deal of RAM and CPU capacity are used to run Nova in the -overcloud, and the smaller machines run less demanding services, such as -Keystone. - -|project| is capable of handling flavors in two different modes. - -The simpler PoC (Proof of Concept) mode is intended to enable new users to -experiment, without worrying about matching hardware profiles. In this mode, -there's one single, global flavor, and any hardware can match it. That -effectively removes flavor matching. Users can use whatever hardware they wish. - -For the second mode, named Scale because it is suited to larger scale overcloud -deployments, flavor matching is in full effect. A node will only be considered -suitable for a given role if the role is associated with a flavor which matches -the capacity of the node. Nodes without a matching flavor are effectively -unusable. - -This second mode allows users to ensure that their different hardware types end -up running their intended role, though requires either manual node tagging or -using introspection rules to tag nodes (see `Profile Matching `_). - -.. _profile_matching_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/provisioning/profile_matching.html - - -Deployment Planning -^^^^^^^^^^^^^^^^^^^ - -Whole part of planning your deployment is based on concept of **overcloud -roles**. A role brings together following things: - -* An image; the software to be installed on a node -* A flavor; the size of node suited to the role -* A size; number of instances which should be deployed having given role -* A set of heat templates; instructions on how to configure the node for its - task - - -In the case of the "Compute" role: - -* the image must contain all the required software to boot an OS and then run - the KVM hypervisor and the Nova compute service -* the flavor (at least for a deployment which isn't a simple proof of concept), - should specify that the machine has enough CPU capacity and RAM to host - several VMs concurrently -* the Heat templates will take care of ensuring that the Nova service is - correctly configured on each node when it first boots. - - -Currently, the roles in |project| are very prescriptive, and in particular -individual services cannot easily be scaled independently of the Controller role -(other than storage nodes). More flexibility in this regard is planned in a -future release. - -Customizable things during deployment planning are: - -* Number of nodes for each role -* Service parameters configuration -* Network configuration (NIC configuration options, isolated vs. single overlay) -* Ceph rbd backend options and defaults -* Ways to pass in extra configuration, e.g site-specific customizations - - -Deployment -^^^^^^^^^^ - -Deployment to physical servers happens through a collaboration of -Heat, Nova, Neutron, Glance and Ironic. - -The Heat templates and environments are served to Heat which will -orchestrate the whole deployment and it will create a stack. Stack is -Heat's own term for the applications that it creates. The overcloud, -in Heat terms, is a particularly complex instance of a stack. - -In order for the stack to be deployed, Heat makes successive calls to Nova, -OpenStack's compute service controller. Nova depends upon Ironic, which, as -described above has acquired an inventory of introspected hardware by this -stage in the process. - -At this point, Nova flavors may act as a constraint, influencing the -range of machines which may be picked for deployment by the Nova -scheduler. For each request to deploy a new node with a specific role, -Nova filters the list of available nodes, ensuring that the selected -nodes meet the hardware requirements. - -Once the target node has been selected, Ironic does the actual provisioning of -the node, Ironic retrieves the OS image associated with the role from Glance, -causes the node to boot a deployment ramdisk and then, in the typical case, -exports the node's local disk over iSCSI so that the disk can be partitioned and -the have the OS image written onto it by the Ironic Conductor. - -See Ironic's `Understanding Baremetal Deployment `_ -for further details. - - -Per-node Setup -^^^^^^^^^^^^^^ - -TBD - Puppet - - - -High Availability (HA) ----------------------- - -|project| will use Pacemaker to achieve high-availability. - -Reference architecture document: https://github.com/beekhof/osp-ha-deploy - -.. note:: **Current HA solution is being developed by our community.** - - - -Managing the Deployment ------------------------ - -After the overcloud deployment is completed, it will be possible to monitor, -scale it out or perform basic maintenance operations via the CLI. - - -Monitoring the Overcloud -^^^^^^^^^^^^^^^^^^^^^^^^ - -When the overcloud is deployed, Ceilometer can be configured to track a set of -OS metrics for each node (system load, CPU utilization, swap usage etc.) - -Additionally, Ironic exports IPMI metrics for nodes, which can also be stored in -Ceilometer. This enables checks on hardware state such as fan operation/failure -and internal chassis temperatures. - -The metrics which Ceilometer gathers can be queried for Ceilometer's REST API, -or by using the command line client. - -.. Note:: - There are plans to add more operational tooling to the future release. - - -Scaling-out the Overcloud -^^^^^^^^^^^^^^^^^^^^^^^^^ - -The process of scaling out the overcloud by adding new nodes involves these -stages: - -* Making sure you have enough nodes to deploy on (or register new nodes as - described in the "Undercloud Data Preparation" section above). -* Calling Heat to update the stack which will apply the set of changes to the - overcloud. diff --git a/doc/source/install/introduction/components.rst b/doc/source/install/introduction/components.rst deleted file mode 100644 index f023ebb9..00000000 --- a/doc/source/install/introduction/components.rst +++ /dev/null @@ -1,455 +0,0 @@ -|project| Components -====================== - -.. only:: html - - .. contents:: - :depth: 2 - :backlinks: none - -This section contains a list of components that |project| uses. The components -are organized in categories, and include a basic description, useful links, and -contribution information. - -.. - [Example Category Name] - ----------------------- - - [Example Component Name] - ^^^^^^^^^^^^^^^^^^^^^^^^ - This is short description what the project is about and how |project| uses - this project. Three sentences max. - - **How to contribute** - - * Instructions to prepare development environment. Should be mostly pointing to - upstream docs. If upstream docs doesn't exist, please, create one. Add tips - how to test the feature in |project| + other useful information. - - - **Useful links** - - * Upstream Project: `link <#>`_ - * Bugs: `link <#>`_ - * Blueprints: `link <#>`_ - - -Shared Libraries ----------------- -diskimage-builder -^^^^^^^^^^^^^^^^^ - -diskimage-builder is an image building tool. It is used by -``openstack overcloud image build``. - -**How to contribute** - -See the diskimage-builder `README.rst -`_ -for a further explanation of the tooling. Submit your changes via -OpenStack Gerrit (see `OpenStack Developer's Guide -`_). - -**Useful links** - -* Upstream Project Documentation: https://docs.openstack.org/diskimage-builder/ -* Bugs: https://bugs.launchpad.net/diskimage-builder -* Git repository: https://opendev.org/openstack/diskimage-builder/ - - -dib-utils -^^^^^^^^^ - -dib-utils contains tools that are used by diskimage-builder. - -**How to contribute** - -Submit your changes via OpenStack Gerrit (see `OpenStack Developer's Guide -`_). - -**Useful links** - -* Bugs: https://bugs.launchpad.net/diskimage-builder -* Git repository: https://opendev.org/openstack/dib-utils/ - - -os-\*-config -^^^^^^^^^^^^ - -The os-\*-config projects are a suite of tools used to configure instances -deployed via TripleO. They include: - -* os-collect-config -* os-refresh-config -* os-apply-config -* os-net-config - -**How to contribute** - -Each tool uses `tox `_ to manage the -development environment. Submit your changes via OpenStack Gerrit (see -`OpenStack Developer's Guide -`_). - -**Useful links** - -* Bugs: - - * os-collect-config: https://bugs.launchpad.net/os-collect-config - * os-refresh-config: https://bugs.launchpad.net/os-refresh-config - * os-apply-config: https://bugs.launchpad.net/os-apply-config - * os-net-config: https://bugs.launchpad.net/os-net-config - -* Git repositories: - - * os-collect-config: https://opendev.org/openstack/os-collect-config - * os-refresh-config https://opendev.org/openstack/os-refresh-config - * os-apply-config https://opendev.org/openstack/os-apply-config - * os-net-config https://opendev.org/openstack/os-net-config - -tripleo-image-elements -^^^^^^^^^^^^^^^^^^^^^^ - -tripleo-image-elements is a repository of diskimage-builder style elements used -for installing various software components. - -**How to contribute** - -Submit your changes via OpenStack Gerrit (see -`OpenStack Developer's Guide -`_). - -**Useful links** - -* Git repository: https://opendev.org/openstack/tripleo-image-elements - - -Installer ---------- - -instack -^^^^^^^ -instack executes diskimage-builder style elements on the current system. This -enables a current running system to have an element applied in the same way -that diskimage-builder applies the element to an image build. - -instack, in its current form, should be considered low level tooling. It is -meant to be used by higher level scripting that understands what elements and -hook scripts need execution. Using instack requires a rather in depth knowledge -of the elements within diskimage-builder and tripleo-image-elements. - -**How to contribute** - -Submit your changes via OpenStack Gerrit (see -`OpenStack Developer's Guide -`_). - -**Useful links** - -* Git repository: https://opendev.org/openstack/instack -* Bugs: https://launchpad.net/tripleo - -instack-undercloud -^^^^^^^^^^^^^^^^^^ -instack-undercloud is a TripleO style undercloud installer based around -instack. - -**How to contribute** - -Submit your changes via OpenStack Gerrit (see -`OpenStack Developer's Guide -`_). - -**Useful links** - -* Git repository: https://opendev.org/openstack/instack-undercloud -* Bugs: https://launchpad.net/tripleo - -Node Management ---------------- -ironic -^^^^^^ - -Ironic project is responsible for provisioning and managing bare metal -instances. - -For testing purposes Ironic can also be used for provisioning and managing -virtual machines which act as bare metal nodes via special driver ``pxe_ssh``. - -**How to contribute** - -Ironic uses `tox `_ to manage the -development environment, see the `Developer Quick-Start -`_, -`Ironic Developer Guidelines -`_ -and `OpenStack Developer's Guide`_ for details. - -**Useful links** - -* Upstream Project: https://docs.openstack.org/ironic/index.html -* Bugs: https://bugs.launchpad.net/ironic -* Blueprints: https://blueprints.launchpad.net/ironic - - * `Specs process `_ - should be followed for suggesting new features. - * Approved Specs: http://specs.openstack.org/openstack/ironic-specs/ - - -ironic inspector (former ironic-discoverd) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Ironic Inspector project is responsible for inspection of hardware properties -for newly enrolled nodes (see also ironic_). - -**How to contribute** - -Ironic Inspector uses `tox `_ to manage -the development environment, see `upstream documentation -`_ -for details. - -**Useful links** - -* Upstream Project: https://github.com/openstack/ironic-inspector -* PyPI: https://pypi.org/project/ironic-inspector -* Bugs: https://bugs.launchpad.net/ironic-inspector - -VirtualBMC -^^^^^^^^^^ - -A helper command to translate IPMI calls into libvirt calls. Used for testing -bare metal provisioning on virtual environments. - -**How to contribute** - -VirtualBMC uses `tox `_ to manage the -development environment in a similar way to Ironic. - -**Useful links** - -* Source: https://opendev.org/openstack/virtualbmc -* Bugs: https://bugs.launchpad.net/virtualbmc - - -Deployment & Orchestration --------------------------- -heat -^^^^ - -Heat is OpenStack's orchestration tool. It reads YAML files describing -the OpenStack deployment's resources (machines, their configurations -etc.) and gets those resources into the desired state, often by -talking to other components (e.g. Nova). - -**How to contribute** - -* Use `devstack with Heat - `_ - to set up a development environment. Submit your changes via - OpenStack Gerrit (see `OpenStack Developer's Guide - `_). - -**Useful links** - -* Upstream Project: https://wiki.openstack.org/wiki/Heat -* Bugs: https://bugs.launchpad.net/heat -* Blueprints: https://blueprints.launchpad.net/heat - -heat-templates -^^^^^^^^^^^^^^ - -The heat-templates repository contains additional image elements for -producing disk images ready to be configured by Puppet via Heat. - -**How to contribute** - -* Use `devtest with Puppet - `_ - to set up a development environment. Submit your changes via - OpenStack Gerrit (see `OpenStack Developer's Guide - `_). - -**Useful links** - -* Upstream Project: https://opendev.org/openstack/heat-templates -* Bugs: https://bugs.launchpad.net/heat-templates -* Blueprints: https://blueprints.launchpad.net/heat-templates - -tripleo-heat-templates -^^^^^^^^^^^^^^^^^^^^^^ - -The tripleo-heat-templates describe the OpenStack deployment in Heat -Orchestration Template YAML files and Puppet manifests. The templates -are deployed via Heat. - -**How to contribute** - -* Use `devtest with Puppet - `_ - to set up a development environment. Submit your changes via - OpenStack Gerrit (see `OpenStack Developer's Guide - `_). - -**Useful links** - -* Upstream Project: https://opendev.org/openstack/tripleo-heat-templates -* Bugs: https://bugs.launchpad.net/tripleo -* Blueprints: https://blueprints.launchpad.net/tripleo - -nova -^^^^ - -nova provides a cloud computing fabric controller. - -**How to contribute** - -* Read the - `Development Quickstart `_ - to set up a development environment. Submit your changes via OpenStack - Gerrit (see - `OpenStack Developer's Guide `_). - -**Useful links** - -* Git repository: https://opendev.org/openstack/nova -* Bugs: https://bugs.launchpad.net/nova -* Blueprints: https://blueprints.launchpad.net/nova - -puppet-\* -^^^^^^^^^ - -The OpenStack Puppet modules are used to configure the OpenStack -deployment (write configuration, start services etc.). They are used -via the tripleo-heat-templates. - -**How to contribute** - -* Use `devtest with Puppet - `_ - to set up a development environment. Submit your changes via - OpenStack Gerrit (see `OpenStack Developer's Guide - `_). - -**Useful links** - -* Upstream Project: https://wiki.openstack.org/wiki/Puppet - - -tripleo-puppet-elements -^^^^^^^^^^^^^^^^^^^^^^^ - -The tripleo-puppet-elements describe the contents of disk images which -|project| uses to deploy OpenStack. It's the same kind of elements -as in tripleo-image-elements, but tripleo-puppet-elements are specific -for Puppet-enabled images. - -**How to contribute** - -* Use `devtest with Puppet - `_ - to set up a development environment. Submit your changes via - OpenStack Gerrit (see `OpenStack Developer's Guide`_). - -**Useful links** - -* Upstream Project: https://opendev.org/openstack/tripleo-puppet-elements -* Bugs: https://bugs.launchpad.net/tripleo -* Blueprints: https://blueprints.launchpad.net/tripleo - - -User Interfaces ---------------- - -python-openstackclient -^^^^^^^^^^^^^^^^^^^^^^ -The python-openstackclient is an upstream CLI tool which can manage multiple -openstack services. It wraps openstack clients like glance, nova, etc. and maps -them under intuitive names like openstack image, compute, etc. - -The main value is that all services can be controlled by a single (openstack) -command with consistent syntax and behaviour. - -**How to contribute** - -* python-openstackclient uses `tox `_ - to manage the development environment, see the - `python-openstackclient documentation - `_ - for details. Submit your changes via OpenStack Gerrit - (see `OpenStack Developer's Guide`_). - -**Useful links** - -* Upstream Project: https://opendev.org/openstack/python-openstackclient -* Bugs: https://bugs.launchpad.net/python-openstackclient -* Blueprints: https://blueprints.launchpad.net/python-openstackclient -* Human interface guide: https://docs.openstack.org/python-openstackclient/humaninterfaceguide.html - -python-tripleoclient -^^^^^^^^^^^^^^^^^^^^ -The python-tripleoclient is a CLI tool embedded into python-openstackclient. It -provides functions related to instack installation and initial configuration -like node introspection, overcloud image building and uploading, etc. - -**How to contribute** - -* python-tripleoclient uses `tox `_ - to manage the development environment, see the - `python-tripleoclient documentation - `_ - for details. Submit your changes via - `Gerrit `_. - -**Useful links** - -* Project: https://opendev.org/openstack/python-tripleoclient - -.. - - -.. _OpenStack Developer's Guide: http://docs.openstack.org/infra/manual/developers.html - -tripleo-ui -^^^^^^^^^^ - -TripleO UI is the web interface for TripleO. - -**How to contribute** - -* See the `documentation `_ - for details. - - -**Useful links** - -* Bugs: https://bugs.launchpad.net/tripleo-ui -* Blueprints: https://blueprints.launchpad.net/tripleo-ui - -tripleo-validations -------------------- - -Pre and post-deployment validations for the deployment workflow. - -**Useful links** - -* Upstream Project: https://opendev.org/openstack/tripleo-validations/ -* Bugs: https://bugs.launchpad.net/tripleo/+bugs?field.tag=validations -* Documentation for individual validations: https://docs.openstack.org/tripleo-validations/latest/readme.html#existing-validations - -.. note:: When reporting an issue, make sure you add the - ``validations`` tag. - -Deprecated ----------- -Tuskar -^^^^^^ - -The Tuskar project was responsible for planning the deployments and -generating the corresponding Heat templates. This is no longer -necessary as Heat supports this composability out of the box. - -The source code is available below, but please note that it should not -be used for new deployments. - -https://github.com/openstack/tuskar diff --git a/doc/source/install/introduction/introduction.rst b/doc/source/install/introduction/introduction.rst deleted file mode 100644 index ea687e84..00000000 --- a/doc/source/install/introduction/introduction.rst +++ /dev/null @@ -1,38 +0,0 @@ -|project| Introduction -======================== - -|project| is an OpenStack Deployment & Management tool. - - -**Architecture** - -With |project|, you start by creating an **undercloud** (an actual operator -facing deployment cloud) that will contain the necessary OpenStack components to -deploy and manage an **overcloud** (an actual tenant facing workload cloud). The -overcloud is the deployed solution and can represent a cloud for any purpose -(e.g. production, staging, test, etc). The operator can choose any of available -Overcloud Roles (controller, compute, etc.) they want to deploy to the environment. - -Go to :doc:`architecture` to learn more. - -| - -**Components** - -|project| is composed of set of official OpenStack components accompanied by -few other open source plugins which increase |project|'s capabilities. - -Go to :doc:`components` to learn more. - - -**Deployment Guide** - -See additional information about how to deploy TripleO in the `Deploy Guide `_. - -.. _tripleo_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/ - -.. toctree:: - :hidden: - - architecture - components diff --git a/doc/source/links.rst b/doc/source/links.rst deleted file mode 100644 index 9c74b0b5..00000000 --- a/doc/source/links.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. Links, citations, etc. -.. _composable services architecture: - https://blueprints.launchpad.net/tripleo/+spec/composable-services-within-roles -.. _THT repository: - https://github.com/openstack/tripleo-heat-templates/tree/master/puppet/services -.. _puppet-tripleo repository: - https://github.com/openstack/puppet-tripleo/tree/master/manifests/profile diff --git a/doc/source/upgrade/developer/upgrades/ci_upgrades.rst b/doc/source/upgrade/developer/upgrades/ci_upgrades.rst deleted file mode 100644 index ee383d1a..00000000 --- a/doc/source/upgrade/developer/upgrades/ci_upgrades.rst +++ /dev/null @@ -1,267 +0,0 @@ -.. TODO: This is a template which is being - completed. The subsections stated - here might differ from the ones in the - final version. - -Major upgrades & Minor updates CI coverage ------------------------------------------- - -.. include:: links.rst - -This document tries to give a detailed overview of the current -CI coverage for upgrades/updates jobs. Also, it is intended as -a guideline to understand how these jobs work, as well as giving -some tips for debugging. - -Upgrades/Updates CI jobs -~~~~~~~~~~~~~~~~~~~~~~~~~ - -At the moment most of the upgrade jobs have been moved from upstream -infrastructure to `RDO Software Factory job definition`_ due to -runtime constraints of the OpenStack infra jobs. - -Each of these jobs are defined by a `featureset file`_ and a `scenario file`_. The -featureset used in a job can be found in the last part of the job type value. -This can be found in the ci job definition:: - - - '{trigger}-tripleo-ci-{jobname}-{release}{suffix}': - jobname: 'centos-7-containers-multinode-upgrades' - release: - - pike - - master - suffix: '' - type: 'multinode-1ctlr-featureset011' - node: upstream-centos-7-2-node - trigger: gate - -The scenario used is referenced in the featureset file, in the example above -the `featureset011`_ makes use of the following scenarios:: - - composable_scenario: multinode.yaml - upgrade_composable_scenario: multinode-containers.yaml - -As this job covers the upgrade from one release to another, we need to -specify two scenario files. The one used during deployment and the one -used when upgrading. Each of these scenario files defines the services -deployed in the nodes. - -.. note:: - There is a matrix with the different features deployed per feature set - here: `featureset matrix`_ - -Currently, two types of upgrade jobs exist: - -- multinode-upgrade (mixed-version): In this job, an undercloud with - release N+1 is deployed, while the overcloud is deployed with a N - release. Execution time is reduced by not upgrading the undercloud - , instead the heat templates from the (N+1) undercloud are used when - performing the overcloud upgrade. - - .. note:: - If you want your patch to be tested against this job you need - to add *RDO Third Party CI* as reviewer or reply with the comment - *check-rdo experimental*. - -- undercloud-upgrade: This job tests the undercloud upgrade from a - major release to another. The undercloud is deployed with release - N and upgraded to N+1 release. This job does not deploy an overcloud. - -.. note:: - There is an effort to `integrate`_ the new `tripleo-upgrade`_ role into - tripleo-quickstart that defines an unified way to upgrade and update. - -Upgrade/Update CI jobs, where to look -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The best place to check the current CI jobs status is in the `CI Status`_ -page. This webpage contains a log of all the TripleO CI jobs, it's result -status, link to logs, git patch trigger and statistics about the pass/fail -rates. - -To check the status of the Upgrades/Updates jobs, you need to click the -`TripleO CI promotion jobs`_ link from `CI Status`_, where you will find -the RDO cloud upgrades section: - -.. image:: rdo_upgrades_jobs.png - -In this section the CI jobs have a color code, to show its -current status in a glance:: - - - Red: CI job constantly failing. - - Yellow: Unstable job, frequent failures. - - Green: CI job passing consistently. - -If you scroll down after pressing some of the jobs in the section -you will find the CI job statistics and the last 100 (or less, it -can be edited) job executions. Each of the job executions contains:: - - - Date: Time and date the CI job was triggered - - Length: Job duration - - Reason: CI job result or failure reason. - - Patch: Git ref of the patch tha triggered the job. - - Logs: Link to the logs. - - Branch: Release branch used to run the job. - - -Debugging Upgrade/Update CI jobs -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When opening the logs from a CI job it might look a little chaotic -(mainly when it is for the first time). It's good to have an idea -where you can find the logs you need, so you will be able to identify -the cause of a failure or debug some issue. - -.. _logs directory: - -The first thing to have a look at when debugging a CI job is the -console output or full log. When clicking in the job, the following -folder structure appears:: - - job-output.json.gz - job-output.txt.gz - logs/ - zuul-info/ - -The job execution log is located in the *job-output.txt.gz* file. Once -opened, a huge log will appear in front of you. What should you look -for? - -(1) Find the job result - - A good string to search is *PLAY RECAP*. At this point, all the - playbooks have been executed and a summary of the runs per node - is displayed:: - - PLAY RECAP ********************************************************************* - 127.0.0.2 : ok=9 changed=0 unreachable=0 failed=0 - localhost : ok=10 changed=3 unreachable=0 failed=0 - subnode-2 : ok=3 changed=1 unreachable=0 failed=0 - undercloud : ok=120 changed=78 unreachable=0 failed=1 - - In this case, one of the playbooks executed in the undercloud has - failed. To identify which one, we can look for the string **fatal**.:: - - fatal: [undercloud]: FAILED! => {"changed": true, "cmd": "set -o pipefail && /home/zuul/overcloud-upgrade.sh 2>&1 - | awk '{ print strftime(\"%Y-%m-%d %H:%M:%S |\"), $0; fflush(); }' > overcloud_upgrade_console.log", - "delta": "0:00:39.175219", "end": "2017-11-14 16:55:47.124998", "failed": true, "rc": 1, - "start": "2017-11-14 16:55:07.949779", "stderr": "", "stdout": "", "stdout_lines": [], "warnings": []} - - From this task, we can guess that something went wrong during the - overcloud upgrading process. But, where can I find the log - *overcloud_upgrade_console.log* referenced in the task? - -(2) Undercloud logs - - From the `logs directory`_ , you need to open the *logs/* - folder. All undercloud logs are located inside the *undercloud/* - folder. Opening it will display the following:: - - etc/ *configuration files* - home/ *job execution logs from the playbooks* - var/ *system/services logs* - - The log we look for is located in */home/zuul/*. Most of the tasks - executed in tripleo-quickstart will store the full script as well as - the execution log in this directory. So, this is a good place to - have a better understanding of what went wrong. - - If the overcloud deployment or upgrade failed, you will also find - two log files named:: - - failed_upgrade.log.txt.gz - failed_upgrade_list.log.txt.gz - - The first one stores the output from the debugging command:: - - openstack stack failures list --long overcloud - - Which prints out the reason why the deployment or upgrade - failed. Although sometimes, this information is not enough - to find the root cause for the problem. The *stack failures* - can give you a clue of which service is causing the problem, - but then you'll need to investigate the OpenStack service logs. - -(3) Overcloud logs - - From the *logs/* folder, you can find a folder named *subnode-2* - which contains most of the overcloud logs.:: - - apache/ - ceph_conf.txt.gz - deprecations.txt.gz - devstack.journal.gz - df.txt.gz - etc/ - home/ - iptables.txt.gz - libvirt/ - listen53.txt.gz - openvswitch/ - pip2-freeze.txt.gz - ps.txt.gz - resolv_conf.txt.gz - rpm-qa.txt.gz - sudoers.d/ - var/ - - To access the OpenStack services logs, you need to go to - *subnode-2/var/log/* when deploying a baremetal overcloud. If the - overcloud is containerized, the service logs are stored under - *subnode-2/var/log/containers*. - - -Replicating CI jobs -~~~~~~~~~~~~~~~~~~~ - -Thanks to `James Slagle`_ there is now a way to reproduce TripleO CI jobs in -any OpenStack cloud. Everything is enabled by the `traas`_ project, -a set of Heat templates and scripts that reproduce the TripleO CI jobs -in the same way they are being run in the Zuul gate. - -When cloning the repo, you just need to set some configuration parameters. A -set of sample templates have been located under -`templates/example-environments`_. The parameters defined in this -template are:: - - parameters: - overcloud_flavor: [*flavor used for the overcloud instance*] - overcloud_image: [*overcloud OS image (available in cloud images)*] - key_name: [*private key used to access cloud instances*] - private_net: [*network name (it must exist and match)*] - overcloud_node_count:[*number of overcloud nodes*] - public_net: [*public net in CIDR notation*] - undercloud_image: [*undercloud OS image (available in cloud images)*] - undercloud_flavor: [*flavor used for the undercloud instance*] - toci_jobtype: [*CI job type*] - zuul_changes: [*List of patches to retrieve*] - -.. note:: The CI job type toci_jobtype can be found in the job definition - under `tripleo-ci/zuul.d`_. - -A good example to deploy a multinode job in RDO Cloud is this -`sample template`_. You can test your out patches by appending -the refs patch linked with the ^ character:: - - zuul_changes: ::[^::]* - -This allows you also to test any patch in a local environment without -consuming CI resources. Or when you want to debug an environment after -a job execution. - -Once the template parameters are defined, you just need to create the stack. -If we would like to deploy the *rdo-cloud-env-config-download.yaml* -`sample template`_ we would need to run:: - - cd traas/ - openstack stack create traas -t templates/traas.yaml \ - -e templates/traas-resource-registry.yaml \ - -e templates/example-environments/rdo-cloud-env-config-download.yaml - -This stack will create two instances in your cloud tenant, one for undercloud -and another for the overcloud. Once created, the stack will directly call -the `traas/scripts/traas.sh`_ script which downloads all required repositories -to start executing the job. - -If you want to follow up the job execution, you can ssh to the undercloud -instance and tail the content from the *$HOME/tripleo-root/traas.log*. All -the execution will be logged in that file. diff --git a/doc/source/upgrade/developer/upgrades/fast_fw_upgrade.plantuml b/doc/source/upgrade/developer/upgrades/fast_fw_upgrade.plantuml deleted file mode 100644 index d6b36818..00000000 --- a/doc/source/upgrade/developer/upgrades/fast_fw_upgrade.plantuml +++ /dev/null @@ -1,190 +0,0 @@ -' The png image can be generated by running: -' -' plantuml fast_fw_upgrade.plantuml - -@startuml - -actor User -participant Mistral -participant Heat -participant Ansible -participant Nodes -' in newer versions of plantuml we should use: collections Nodes - - -' === FFWD-UPGRADE PREPARE === - -User -> Mistral : openstack overcloud\nffwd-upgrade prepare -activate Mistral - -Mistral -> Mistral : plan update - -Mistral -> Heat : stack update -activate Heat - -Heat --> Mistral -deactivate Heat - -Mistral --> User -deactivate Mistral - - -' === FFWD-UPGRADE RUN === - -User -> Mistral : openstack overcloud\nffwd-upgrade run -activate Mistral -note right of Heat - * Operates on all overcloud nodes. - * Intended usage: - * On bootstrap nodes it shuts down services and - performs upgrade to N+1 and N+2. - * On other nodes it just shuts down services. -end note - -Mistral -> Heat : query stack outputs -activate Heat - -Heat --> Mistral : stack outputs -deactivate Heat - -Mistral -> Mistral : generate playbooks - -Mistral -> Ansible : fast_forward_upgrade_playbook.yaml -activate Ansible - -Ansible -> Nodes : fast_forward_upgrade_tasks\n(once per each release) -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : fast_forward_post_upgrade_tasks -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral --> User : -deactivate Mistral - - -' === UPGRADE RUN === - -User -> Mistral : openstack overcloud\nupgrade run -activate Mistral -note right of Heat - * Reuse of the normal upgrade command. - * Operates on all selected nodes in parallel. - * Note the separate ansible-playbook invocations: - facts aren't carried over between playbooks. - * Intended usage: - * Upgrades bootstrap nodes from N+2 to N+3. - * Upgrades other nodes from N to N+3. -end note - -Mistral -> Heat : query stack outputs -activate Heat - -Heat --> Mistral : stack outputs -deactivate Heat - -Mistral -> Mistral : generate playbooks - -Mistral -> Ansible : upgrade_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : upgrade_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral -> Ansible : deploy_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : host_prep_tasks -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : deploy_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral -> Ansible : post_upgrade_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : post_upgrade_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral --> User : -deactivate Mistral - - -' === FFWD-UPGRADE CONVERGE === - -User -> Mistral : openstack overcloud\nffwd-upgrade converge -activate Mistral -note right of Heat - * Essentially the same as `overcloud deploy`, - asserts that the state of overcloud matches - the latest templates. -end note - -Mistral -> Mistral : plan update - -Mistral -> Heat : stack update -activate Heat - -Heat --> Mistral -deactivate Heat - -Mistral -> Heat : query stack outputs -activate Heat - -Heat --> Mistral : stack outputs -deactivate Heat - -Mistral -> Mistral : generate playbooks - -Mistral -> Ansible : deploy_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : host_prep_tasks -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : deploy_tasks and external_deploy_tasks\nall steps (interleaved) -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral --> User : -deactivate Mistral - - -@enduml diff --git a/doc/source/upgrade/developer/upgrades/fast_fw_upgrade.png b/doc/source/upgrade/developer/upgrades/fast_fw_upgrade.png deleted file mode 100644 index 550a2d1d..00000000 Binary files a/doc/source/upgrade/developer/upgrades/fast_fw_upgrade.png and /dev/null differ diff --git a/doc/source/upgrade/developer/upgrades/fast_fw_upgrade.rst b/doc/source/upgrade/developer/upgrades/fast_fw_upgrade.rst deleted file mode 100644 index 04290b53..00000000 --- a/doc/source/upgrade/developer/upgrades/fast_fw_upgrade.rst +++ /dev/null @@ -1,169 +0,0 @@ -TripleO Fast Forward Upgrade (FFU) N -> N+3 ----------------------------------------------------- - -For a detailed background on how the Fast Forward Upgrade (FFU) workflow was -proposed please refer to the relevant spec_. - -For a guide on running the FFU in your environment see the `FFU Deploy Guide `_. - -.. _ffu_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/post_deployment/upgrade/fast_forward_upgrade.html - -This document will explore some -of the technical details of the Newton to Queens FFU specifically. - -You might find it helpful to consult this high-level diagram as you -read on: - -.. image:: fast_fw_upgrade.png - :scale: 20 % - :alt: Fast forward upgrade workflow diagram - :target: ../../../_images/fast_fw_upgrade.png - -At a high level the FFU workflow consists of the following steps: - -1. Perform a `Minor update`_ on the environment (both undercloud and overcloud) - to bring it to the latest Newton. This will include OS level updates, including kernel - and openvswitch. As usual for minor update the operator will reboot each - node as necessary and so doing this first means the FFU workflow doesn't - (also) have to deal with node reboots later on in the process. - -2. Perform 3 consecutive major upgrades of the undercloud to bring it to - Queens. The undercloud will crucially then have the target version - of the tripleo-heat-templates including the fast_forward_upgrade_tasks - that will deliver the next stages of the workflow. - -3. Generate and then run the fast_forward_upgrade_playbook on the overcloud. This will: - - 3.1 First bring down the controlplane services on **all nodes**. - - 3.2 Then update packages, migrate databases and any other version specific - tasks from Newton to Ocata then Ocata to Pike. This happens only - on a **single node of each role**. - -4. Finally run the Pike to Queens upgrade on all nodes including the Queens - upgrade tasks and service configurations. - -Step 3 above is started by first performing a Heat stack update using the Queens -tripleo-heat-templates from the Queens upgraded undercloud, but without applying any -configuration. This stack update is only used to collect the fast_forward_upgrade_tasks -(ffu_tasks) from each of the services deployed in the given environment and -generate a fast_forward_upgrade_playbook_ ansible playbook. This playbook is -then executed to deliver steps 3.1 and 3.2 above. See below for more information -about how the ffu_tasks are compiled into the fast_forward_upgrade_playbook. - -A notable exception worthy of mention is the configuration of Ceph services -which is managed by ceph-ansible_. That is, for Ceph services there is no -collection of fast_forward_upgrade_tasks from the ceph related service manifests -in the tripleo-heat-templates and so Ceph is not managed by the generated -fast_forward_upgrade_playbook_. Instead ceph-ansible_ will be invoked by -the Queens deployment and service configuration in step 4 above. - -The Heat stack update performed at the start of step 3 also generates the Queens -upgrade_steps_playbook_ and deploy_steps_playbook_ ansible playbooks. One -notable exception is the configuration of Ceph services which is managed -by ceph-ansible_ -Step 4 above (Pike to Queens upgrade tasks and Queens services configuration) -is delivered through execution of these Heat stack update generated playbooks. -Ceph related upgrade and deployment will be applied here with calls to -ceph-ansible_. - -Amongst other things, the P..Q upgrade_tasks stop and disable those systemd -services that are being migrated to run in containers. The Queens `deploy steps playbook `_ -will then apply the required puppet and docker configuration to start the -containers for those services. For this to be possible the Heat stack update -which starts step 3 and that generates the ansible playbooks must include the -required `docker configuration and environment`_ files, including the latest -container images and making sure to set the to-be containerized services to refer -to the equivalent `docker templates`_ for the Heat resource registry. - -.. _Minor update: https://docs.openstack.org/tripleo-docs/latest/install/post_deployment/package_update.html -.. _upgrade_steps_playbook: https://github.com/openstack/tripleo-heat-templates/blob/82f128f15b1b1eb7bf6ac7df0c6d01e5619309eb/common/deploy-steps.j2#L528 -.. _deploy_steps_playbook: https://github.com/openstack/tripleo-heat-templates/blob/82f128f15b1b1eb7bf6ac7df0c6d01e5619309eb/common/deploy-steps.j2#L382 -.. _fast_forward_upgrade_playbook: https://review.opendev.org/#/c/499221/20/common/deploy-steps.j2@541 -.. _docker configuration and environment: https://docs.openstack.org/tripleo-docs/latest/install/containers_deployment/overcloud.html#preparing-the-environment -.. _docker templates: https://github.com/openstack/tripleo-heat-templates/blob/750fa306ce41c949928d5a3a7253aff99dd1af8f/environments/docker.yaml#L7-L58 -.. _ceph-ansible: https://github.com/ceph/ceph-ansible - -FFU and tripleo-heat-templates -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This section will present an overview of how the fast_forward_upgrade_playbook.yaml -is generated from the tripleo-heat-templates. - -FFU uses *fast_forward_upgrade_tasks* (ffu_tasks) to define the upgrade -workflow. These are 'normal' ansible tasks and they are carried as a list in -the outputs section of a given service manifest, see containerized -`neutron-api`_ for an example. - -The ffu_tasks for those services that are enabled in a given deployment are -collected in the outputs of the deploy-steps.j2_ into a -*fast_forward_upgrade_playbook* output. This is then retrieved using the -config-download_ mechanism and written to disk as an ansible playbook. - -The *fast_forward_upgrade_tasks* defined for a given service can use the -**step** and **release** variables to specify when a given task should be -executed. At a high level the fast_forward_upgrade_playbook consists of two -loops - there is a very good explanation in `/#/c/499221 `_ -commit message, but an outer loop for the release (first Ocata tasks then Pike -tasks) and then an inner loop for the steps within each release. - -The *ffu_tasks* which are set to run in steps 0 to 3 are designated the -*fast_forward_upgrade_prep_role_tasks* and these are executed on all nodes for -a given role. Then the *ffu_tasks* which have steps 4 to max (currently 9) are -designated the *fast_forward_upgrade_bootstrap_role_tasks* and these are only -executed on a single node for each role (one controller, one compute etc). - -The top level fast_forward_upgrade_playbook.yaml looks like:: - - - hosts: overcloud - become: true - tasks: - - include_tasks: fast_forward_upgrade_release_tasks.yaml - loop_control: - loop_var: release - with_items: {get_param: [FastForwardUpgradeReleases]} - -The *fast_forward_upgrade_release_tasks.yaml* in turn looks like:: - - - include_tasks: fast_forward_upgrade_prep_tasks.yaml - - include_tasks: fast_forward_upgrade_bootstrap_tasks.yaml - -The *fast_forward_upgrade_prep_tasks.yaml* specifies the loop with -sequence 0 to 3 as explained above:: - - - include_tasks: fast_forward_upgrade_prep_role_tasks.yaml - with_sequence: start=0 end=3 - loop_control: - loop_var: step - -And where the *fast_forward_upgrade_prep_role_tasks.yaml* includes the -*ffu_tasks* on all nodes for each role:: - - - include_tasks: Controller/fast_forward_upgrade_tasks.yaml - when: role_name == 'Controller' - - include_tasks: Compute/fast_forward_upgrade_tasks.yaml - when: role_name == 'Compute' - ...etc - -Similarly for the *fast_forward_upgrade_bootstrap_tasks.yaml* it specifies -the loop sequence for the step variable to be 4 to 9:: - - - include_tasks: fast_forward_upgrade_bootstrap_role_tasks.yaml - with_sequence: start=4 end=9 - loop_control: - loop_var: step - -And where the *fast_forward_upgrade_bootstrap_role_tasks.yaml* include the -*ffu_tasks* only on a single node for each role type:: - - - include_tasks: Controller/fast_forward_upgrade_tasks.yaml - when: role_name == 'Controller' and ansible_hostname == Controller[0] - - include_tasks: Compute/fast_forward_upgrade_tasks.yaml - when: role_name == 'Compute' and ansible_hostname == Compute[0] - ...etc - -.. _neutron-api: https://github.com/openstack/tripleo-heat-templates/blob/master/deployment/neutron/neutron-api-container-puppet.yaml#L415 -.. _spec: https://github.com/openstack/tripleo-specs/blob/master/specs/queens/fast-forward-upgrades.rst -.. _deploy-steps.j2: https://github.com/openstack/tripleo-heat-templates/blob/master/common/deploy-steps.j2#L377 -.. _config-download: https://github.com/openstack/tripleo-common/blob/master/tripleo_common/utils/config.py - diff --git a/doc/source/upgrade/developer/upgrades/links.rst b/doc/source/upgrade/developer/upgrades/links.rst deleted file mode 100644 index b5898615..00000000 --- a/doc/source/upgrade/developer/upgrades/links.rst +++ /dev/null @@ -1,31 +0,0 @@ -.. Links, citations, and others... -.. _RDO Software Factory job definition: - https://github.com/rdo-infra/review.rdoproject.org-config/blob/9668021f655e53413108f8c15988f68caa8d31ba/jobs/tripleo-upstream.yml#L802 -.. _featureset file: - https://github.com/openstack/tripleo-quickstart/tree/master/config/general_config -.. _scenario file: - https://github.com/openstack/tripleo-heat-templates/tree/master/ci/environments -.. _featureset011: - https://github.com/openstack/tripleo-quickstart/blob/master/config/general_config/featureset011.yml -.. _featureset matrix: - https://docs.openstack.org/tripleo-quickstart/latest/feature-configuration.html -.. _tripleo-upgrade: - https://github.com/redhat-openstack/tripleo-upgrade -.. _integrate: - https://review.opendev.org/#/q/topic:link_tripleo_upgrade -.. _James Slagle: - http://lists.openstack.org/pipermail/openstack-dev/2017-February/112993.html -.. _traas: - https://github.com/slagle/traas -.. _templates/example-environments: - https://github.com/slagle/traas/tree/master/templates/example-environments -.. _tripleo-ci/zuul.d: - https://github.com/openstack-infra/tripleo-ci/blob/4042e9c225cf9dac917b8d4c3a245b8ff492056d/zuul.d/multinode-jobs.yaml#L82 -.. _sample template: - https://github.com/slagle/traas/blob/master/templates/example-environments/rdo-cloud-env-config-download.yaml -.. _traas/scripts/traas.sh: - https://github.com/slagle/traas/blob/fb447a585895dd783519dfec68a9728fa72b7609/scripts/traas.sh -.. _CI Status: - http://cistatus.tripleo.org/ -.. _TripleO CI promotion jobs: - http://38.145.34.234/ diff --git a/doc/source/upgrade/developer/upgrades/major_upgrade.plantuml b/doc/source/upgrade/developer/upgrades/major_upgrade.plantuml deleted file mode 100644 index 668505ef..00000000 --- a/doc/source/upgrade/developer/upgrades/major_upgrade.plantuml +++ /dev/null @@ -1,188 +0,0 @@ -' The png image can be generated by running: -' -' plantuml major_upgrade.plantuml - -@startuml - -actor User -participant Mistral -participant Heat -participant Ansible -participant Nodes -' in newer versions of plantuml we should use: collections Nodes - - -' === UPGRADE PREPARE === - -User -> Mistral : openstack overcloud\nupgrade prepare -activate Mistral - -Mistral -> Mistral : plan update - -Mistral -> Heat : stack update -activate Heat - -Heat --> Mistral -deactivate Heat - -Mistral --> User -deactivate Mistral - - -' === UPGRADE RUN === - -User -> Mistral : openstack overcloud\nupgrade run -activate Mistral -note right of Heat - * Operates on all selected nodes in parallel. - * Note the separate ansible-playbook invocations: - facts aren't carried over between playbooks. -end note - -Mistral -> Heat : query stack outputs -activate Heat - -Heat --> Mistral : stack outputs -deactivate Heat - -Mistral -> Mistral : generate playbooks - -Mistral -> Ansible : upgrade_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : upgrade_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral -> Ansible : deploy_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : host_prep_tasks -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : deploy_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral -> Ansible : post_upgrade_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : post_upgrade_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral --> User : -deactivate Mistral - - -' === EXTERNAL UPGRADE RUN === - -User -> Mistral : openstack overcloud\nexternal-upgrade run -activate Mistral -note right of Heat - * Executes tasks on undercloud, but can affect - overcloud via delegation or nested Ansible process. - * Single play: facts are carried over between - upgrade and deploy tasks. - * Often limited via `--tags` to perform a particular - upgrade task or an upgrade of a particular service. - * Can be executed before `upgrade run` for some tasks. -end note - -Mistral -> Heat : query stack outputs -activate Heat - -Heat --> Mistral : stack outputs -deactivate Heat - -Mistral -> Mistral : generate playbooks - -Mistral -> Ansible : external_upgrade_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : external_upgrade_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : external_deploy_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral --> User : -deactivate Mistral - - -' === UPGRADE CONVERGE === - -User -> Mistral : openstack overcloud\nupgrade converge -activate Mistral -note right of Heat - * Essentially the same as `overcloud deploy`, - asserts that the state of overcloud matches - the latest templates. -end note - -Mistral -> Mistral : plan update - -Mistral -> Heat : stack update -activate Heat - -Heat --> Mistral -deactivate Heat - -Mistral -> Heat : query stack outputs -activate Heat - -Heat --> Mistral : stack outputs -deactivate Heat - -Mistral -> Mistral : generate playbooks - -Mistral -> Ansible : deploy_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : host_prep_tasks -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : deploy_tasks and external_deploy_tasks\nall steps (interleaved) -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral --> User : -deactivate Mistral - - -@enduml diff --git a/doc/source/upgrade/developer/upgrades/major_upgrade.png b/doc/source/upgrade/developer/upgrades/major_upgrade.png deleted file mode 100644 index e4943542..00000000 Binary files a/doc/source/upgrade/developer/upgrades/major_upgrade.png and /dev/null differ diff --git a/doc/source/upgrade/developer/upgrades/major_upgrade.rst b/doc/source/upgrade/developer/upgrades/major_upgrade.rst deleted file mode 100644 index c2fd8724..00000000 --- a/doc/source/upgrade/developer/upgrades/major_upgrade.rst +++ /dev/null @@ -1,392 +0,0 @@ -Overcloud Major Upgrade Workflow and CLI ----------------------------------------- - -The purpose of this documentation is to deep-dive into the code which -delivers the major upgrade workflow in TripleO. For information about -the steps an operator needs to perform when running this upgrade -please see the `operator docs `_. - -.. _major_upgrade_deploy_guide: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/post_deployment/upgrade/major_upgrade.html - -The major upgrade workflow is delivered almost exclusively via Ansible -playbook invocations on the overcloud nodes. Heat is used to generate -the Ansible playbooks (during the 'prepare' command at the beginning, -and 'converge' command at the end of the upgrade). The -Queens_upgrade_spec_ may be of interest in describing the design of -the workflow. - -CLI code is in python-tripleoclient_, mistral workflows and actions in -tripleo-common_, and upgrade tasks in tripleo-heat-templates_. The -following sections dive into the details top-down per individual CLI -commands which are used to deliver the major upgrade: - -* `openstack overcloud upgrade prepare $ARGS`_ -* `openstack overcloud upgrade run $ARGS`_ -* `openstack overcloud external-upgrade run $ARGS`_ -* `openstack overcloud upgrade converge $ARGS`_ - -You might also find it helpful to consult this high-level diagram as -you read the following sections: - -.. image:: major_upgrade.png - :scale: 20 % - :alt: Major upgrade workflow diagram - :target: ../../../_images/major_upgrade.png - -.. _queens_upgrade_spec: https://github.com/openstack/tripleo-specs/blob/master/specs/queens/tripleo_ansible_upgrades_workflow.rst -.. _python-tripleoclient: https://github.com/openstack/python-tripleoclient/blob/master/tripleoclient/v1/overcloud_upgrade.py -.. _tripleo-common: https://github.com/openstack/tripleo-common/blob/master/workbooks/package_update.yaml -.. _tripleo-heat-templates: https://github.com/openstack/tripleo-heat-templates/blob/8277d675bc9496eb164f429fa265f79252166f2d/common/deploy-steps.j2#L604 - -openstack overcloud upgrade prepare $ARGS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The entry point for the upgrade CLI commands, *prepare*, *run* and -*converge*, is given in the python-tripleoclient setup.cfg_. All three -are also defined in the same file, overcloud-upgrade.py_. - -The 'prepare' Heat stack update does not apply any TripleO -configuration and is exclusively used to generate the Ansible -playbooks that are subsequently invoked to deliver the upgrade. - -As you can see the UpgradePrepare_ class inherits from DeployOvercloud_. The -reason for this is to prevent duplication of the logic concerned with validating -the configuration passed into the prepare command (all the -e env.yaml files), -as well as updating_the_swift_stored_plan_ with the overcloud configuration. - -The prepare_env_file_ is automatically prepended to the list of -environment files passed to Heat (as specified by -prepare_command_prepends_). It contains resource_registry and -parameter_defaults which are intended to be in effect during the -upgrade. - -As a result the UpgradePrepare class inherits all the Deploy_parser_arguments_, -including ``--stack`` and ``-e`` for the additional environment files. We explicitly -set the update_plan_only_ argument so that the Heat stack update does not get -executed by the parent class and returns after completing all the template -processing. - -Instead, the Heat stack update is performed by a mistral workflow. On the -client side the hook is in the update method defined in package_update.py_. -This invokes the package_update_plan_ mistral workflow in tripleo-common. -The package_update_plan_ workflow has a number of tasks, one of which invokes -the heat stack update using the update_stack_action_. - -Back on the tripleoclient side, we use base_wait_for_messages_ to listen -for messages on the Zaqar_queue_ that is used by the mistral workflow. - -The operator must include all environment files previously used with -the `overcloud deploy` command. It is especially important that the -operator includes the environment file containing the references for -the target version container images. - -See the `operator docs `_ for -pointers to how that file is generated and for reference it will look -something like - - .. code-block:: bash - - parameter_defaults: - DockerAodhApiImage: 192.168.24.1:8787/queens/centos-binary-aodh-api:current-tripleo-rdo - DockerAodhConfigImage: 192.168.24.1:8787/queens/centos-binary-aodh-api:current-tripleo-rdo - DockerAodhEvaluatorImage: 192.168.24.1:8787/queens/centos-binary-aodh-evaluator:current-tripleo-rdo - DockerAodhListenerImage: 192.168.24.1:8787/queens/centos-binary-aodh-listener:current-tripleo-rdo - -Once the Heat stack update has been completed successfully and the stack is -in UPDATE_COMPLETE state, you can download the configuration ansible playbooks -using the config download cli - - - .. code-block:: bash - - [stack@521-m--undercloud ~]$ source stackrc - (undercloud) [stack@521-m--undercloud ~]$ openstack overcloud config download --config-dir MYCONFIGDIR - The TripleO configuration has been successfully generated into: MYCONFIGDIR/tripleo-gep7gh-config - -and you can inspect the ansible playbooks which are used by the *upgrade run* -before executing them. - - -.. _setup.cfg: https://github.com/openstack/python-tripleoclient/blob/e9a68430400a6b99005c6aa675bf9bd27ed810a1/setup.cfg#L88-L90 -.. _overcloud-upgrade.py: https://github.com/openstack/python-tripleoclient/blob/f0110cdff0edcf40d8e94d4848c543310ea5c54e/tripleoclient/v1/overcloud_upgrade.py#L14 -.. _UpgradePrepare: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_upgrade.py#L29 -.. _DeployOvercloud: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_deploy.py#L44 -.. _updating_the_swift_stored_plan: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_deploy.py#L301 -.. _update_plan_only: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_upgrade.py#L72 -.. _deploy_parser_arguments: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_deploy.py#L689-L890 -.. _package_update.py: https://github.com/openstack/python-tripleoclient/blob/3d9183fc03aa96bce093e774ab4bf51655579a9c/tripleoclient/workflows/package_update.py#L34 -.. _package_update_plan: https://github.com/openstack/tripleo-common/blob/1d3aefbe2f0aac2828eba69ee9efc57a7b7bf385/workbooks/package_update.yaml#L9 -.. _update_stack_action: https://github.com/openstack/tripleo-common/blob/4d7258c2d8a521818146368568da07fd429e5a23/tripleo_common/actions/package_update.py#L100 -.. _base_wait_for_messages: https://github.com/openstack/python-tripleoclient/blob/3d9183fc03aa96bce093e774ab4bf51655579a9c/tripleoclient/workflows/package_update.py#L38 -.. _zaqar_queue: https://github.com/openstack/tripleo-common/blob/1d3aefbe2f0aac2828eba69ee9efc57a7b7bf385/workbooks/package_update.yaml#L17 -.. _prepare_command_prepends: https://github.com/openstack/python-tripleoclient/blob/3d9183fc03aa96bce093e774ab4bf51655579a9c/tripleoclient/v1/overcloud_upgrade.py#L76-L79 -.. _prepare_env_file: https://github.com/openstack/tripleo-heat-templates/blob/3ab23982a2fd3ffcad09e76f226bd4aab4040d4e/environments/lifecycle/upgrade-prepare.yaml#L4-L12 - -openstack overcloud upgrade run $ARGS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Unlike the first step in the workflow, the *upgrade prepare*, the UpgradeRun_ -class does not inherit from DeployOvercloud. There is no need for the operator -to pass all the environment files and configuration here. The template processing -and update of the stack and swift stored plan have already taken place. -The ansible playbooks are ready to be retrieved by config download as demonstrated -above. The upgrade run operation thus will simply execute those ansible playbooks -generated by the upgrade prepare command, against the nodes specified in the -parameters. - -Either ``--nodes`` or ``--roles`` parameters are used to limit the ansible -playbook execution to specific nodes. Both ``--roles`` and ``--nodes`` are -used by ansible with the tripleo-ansible-inventory_. This creates the -ansible inventory based on the Heat stack outputs, so that for example -``Controller`` and ``overcloud-controller-0`` are both valid values for -the ansible-playbook |--limit| parameter. - -See `overcloud upgrade run `_ for additional information. - -.. _overcloud_upgrade_run: https://docs.openstack.org/project-deploy-guide/tripleo-docs/latest/post_deployment/upgrade/major_upgrade.html#openstack-overcloud-upgrade-run - -As documented in the major upgrade documentation and the nodes_or_roles_helptext_, -the operator *must* use ``--roles`` for the controllers. Upgrading the -controlplane, one node at a time is currently not supported, mainly -due to limitations in the pacemaker cluster upgrade which needs to -occur across all nodes in the same operation. The operator may use -``--roles`` for non controlplane nodes or may prefer to specify one or -more specific nodes by name with ``--nodes``. In either case the value -specified by the operator is simply passed through to ansible as the -limit_hosts_ parameter. - -The ``--ssh-user`` and all other parameters are similarly -collected and passed to the ansible invocation which starts on the client side -in the run_update_ansible_action_ method call. The |--skip-tags| -parameter can be used to skip certain ansible tasks with the ansible-skip-tags_ -ansible-playbook parameter. The allowed ``--skip-tags`` values are restricted -to a predefined set, validated against -MAJOR_UPGRADE_SKIP_TAGS_. Finally, the |--playbook| parameter as the name -suggests is used to specify the ansible playbook(s) to run. By default and -as you can see in the definition, this defaults to a special value 'all' -which causes all-upgrade-playbooks-to-run_. The value of all_playbooks -in that previous reference, is stored in the MAJOR_UPGRADE_PLAYBOOKS_ constant. - -As with the *upgrade prepare*, for *upgrade run* a mistral workflow is used -to perform the 'main' operation, which in this case is execution of the -ansible playbooks. On the client side the update_nodes_workflow_invocation_ -is where mistral is invoked and takes as workflow input the various collected -parameters described above. You can see that the update_nodes_workflow_ which -lives in tripleo-common has parameters defined under the 'input:' section which -correspond to the *openstack overcloud upgrade run* parameters. - -There are two main tasks in the update_nodes_workflow_, the download-config_action_ -which is invoked in a first 'download_config' task, and the ansible-playbook_action_ -action which is invoked in the 'node_update' task. This is ultimately where -ansible-playbook-is-executed_ with processutils.execute. - -Finally back on the client side we listen for messages on the run_zaqar_queue_ -before declaring the upgrade-run-success_! - - -.. _UpgradeRun: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_upgrade.py#L94 -.. _tripleo-ansible-inventory: https://github.com/openstack/tripleo-common/blob/cef9c406514fd0b01b7984b89334d8e8abd7a244/tripleo_common/inventory.py#L1 -.. |--limit| replace:: ``--limit`` -.. _--limit: https://docs.ansible.com/ansible/2.4/ansible-playbook.html#cmdoption-ansible-playbook-l -.. _nodes_or_roles_helptext: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_upgrade.py#L111-L131 -.. _limit_hosts: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_upgrade.py#L207-L212 -.. _run_update_ansible_action: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_upgrade.py#L212-L217 -.. |--skip-tags| replace:: ``--skip-tags`` -.. _--skip-tags: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_upgrade.py#L211 -.. _ansible-skip-tags: https://docs.ansible.com/ansible/2.4/ansible-playbook.html#cmdoption-ansible-playbook-skip-tags -.. _MAJOR_UPGRADE_SKIP_TAGS: https://github.com/openstack/python-tripleoclient/blob/3931606423a17c40a4458eb4df3c47cc6a829dbb/tripleoclient/constants.py#L56 -.. |--playbook| replace:: ``--playbook`` -.. _--playbook: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_upgrade.py#L133-L150 -.. _all-upgrade-playbooks-to-run: https://github.com/openstack/python-tripleoclient/blob/3931606423a17c40a4458eb4df3c47cc6a829dbb/tripleoclient/utils.py#L946 -.. _MAJOR_UPGRADE_PLAYBOOKS: https://github.com/openstack/python-tripleoclient/blob/3931606423a17c40a4458eb4df3c47cc6a829dbb/tripleoclient/constants.py#L53 -.. _update_nodes_workflow_invocation: https://github.com/openstack/python-tripleoclient/blob/3931606423a17c40a4458eb4df3c47cc6a829dbb/tripleoclient/workflows/package_update.py#L85 -.. _update_nodes_workflow: https://github.com/openstack/tripleo-common/blob/cef9c406514fd0b01b7984b89334d8e8abd7a244/workbooks/package_update.yaml#L99-L114 -.. _download-config_action: https://github.com/openstack/tripleo-common/blob/cef9c406514fd0b01b7984b89334d8e8abd7a244/tripleo_common/actions/config.py#L65 -.. _ansible-playbook_action: https://github.com/openstack/tripleo-common/blob/cef9c406514fd0b01b7984b89334d8e8abd7a244/tripleo_common/actions/ansible.py#L243 -.. _ansible-playbook-is-executed: https://github.com/openstack/tripleo-common/blob/cef9c406514fd0b01b7984b89334d8e8abd7a244/tripleo_common/actions/ansible.py#L533-L535 -.. _run_zaqar_queue: https://github.com/openstack/python-tripleoclient/blob/3931606423a17c40a4458eb4df3c47cc6a829dbb/tripleoclient/workflows/package_update.py#L89 -.. _upgrade-run-success: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_upgrade.py#L219-L222 - -openstack overcloud external-upgrade run $ARGS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The `external-upgrade run` command is used to upgrade the services -whose deployment (and upgrade) procedure is not tied to execution on -particular overcloud nodes. The deployment/upgrade procedures are thus -executed from the undercloud, even though a full overcloud inventory -is available for use. - -The `external upgrade playbook` first executes -`external_upgrade_tasks` and then `external_deploy_tasks`. The -execution happens within the same Ansible play, so facts from -`external_upgrade_tasks` are carried over to -`external_deploy_tasks`. This is a mechanism which will allow you to -amend what your deploy tasks do based on whether an upgrade is being -run or not. - -Often it's not desirable to run the tasks for all services at the same -time, so `external-upgrade run` supports ``--tags`` argument to limit -which tasks are run. - -The mechanisms of `external-upgrade` and `external-update` commands -and Ansible tasks are the same, but two commands and task hooks are -provided because generally in OpenStack we distinguish minor update -vs. major upgrade workflows. If your service only has one type of -upgrade, you can make the `external_update_tasks` the same as -`external_upgrade_tasks` by using YAML anchors and references. - -.. _external upgrade playbook: https://github.com/openstack/tripleo-heat-templates/blob/8fd90c2d45e2680b018eae8387d86d420f738f5a/common/deploy-steps.j2#L767-L822 - -openstack overcloud upgrade converge $ARGS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The UpgradeConverge_ class like the UpgradePrepare class also inherits from -the DeployOvercloud_ class thus getting all of its parameters and template -processing. The operator needs to pass in all Heat environment files -used as part of the upgrade prepare including the container images file. - -The main objective of the upgrade converge operation is to unset the -upgrade specific parameters that have been set on the overcloud Heat -stack as part of prepare. These are unset using the converge_env_file_ -which is included in the list of client_converge_env_files_ passed to -the Heat stack update. - -The 'converge' applies all TripleO configuration against all overcloud -nodes and thus serves as a sanity check that the overcloud was -successfully upgraded, since the same configuration will already have -been applied. The 'converge' will also leave the Heat stack in a good -state for subsequent updates, for instance scaling to add nodes. - -As these values are set in parameter_defaults a Heat stack update is required -against the overcloud Heat stack to explicitly unset them. In particular and -as pointed out in the operator_converge_docs_ until converge has completed, -any operations that require a Heat stack update will likely fail, as the -'noop' of the DeploymentSteps in the prepare_env_file_ in particular means -none of the usual docker/puppet/* config is applied. Setting something with -parameter_defaults means it is used until explicitly unset via parameter_defaults -as that value will override any other default value specified via the -tripleo-heat-templates. - -Unlike the prepare command there is no mistral workflow here and instead -we rely on the parent DeployOvercloud_ class to invoke the -converge_heat_stack_update_ and so the implementation is also simpler. - -.. _UpgradeConverge: https://github.com/openstack/python-tripleoclient/blob/3931606423a17c40a4458eb4df3c47cc6a829dbb/tripleoclient/v1/overcloud_upgrade.py#L225 -.. _client_converge_env_files: https://github.com/openstack/python-tripleoclient/blob/c7b7b4e3dcd34f9e51686065e328e73556967bab/tripleoclient/v1/overcloud_upgrade.py#L253 -.. _operator_converge_docs: https://docs.openstack.org/tripleo-docs/latest/install/post_deployment/upgrade.html#openstack-overcloud-upgrade-converge -.. _converge_heat_stack_update: https://github.com/openstack/python-tripleoclient/blob/3931606423a17c40a4458eb4df3c47cc6a829dbb/tripleoclient/v1/overcloud_deploy.py#L223 -.. _converge_env_file: https://github.com/openstack/tripleo-heat-templates/blob/3ab23982a2fd3ffcad09e76f226bd4aab4040d4e/environments/lifecycle/upgrade-converge.yaml#L4-L7 - -Upgrade CLI developer workflow -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This section will give some examples of a potential developer workflow for -testing fixes or in-progress gerrit reviews against python-tripleoclient, -tripleo-common or tripleo-heat-templates for the upgrade workflow. This -may be useful if you are working on an upgrades related bug for example. - -Making changes to the ansible playbooks -....................................... - -If there is a failure running one of the upgrades related ansible playbooks, -you might need to examine and if necessary fix the related ansible task. -The tasks themselves live in each of the tripleo-heat-templates service -manifests, under the upgrade_tasks section of the template outputs. For example -see the containerized rabbitmq_upgrade_tasks_. - -If you make a change in service upgrade_tasks, then to test it you will need to - -1. Patch the tripleo-heat-templates in your environment with the fix -2. Rerun `openstack overcloud upgrade prepare $ARGS`_, so that the resulting - ansible playbooks include your fix. -3. Finally run the playbooks with `openstack overcloud upgrade run $ARGS`_. - -Assuming you are using the default /usr/share/openstack-tripleo-heat-templates -directory for the deployment templates you can use the following as just one -example: - - .. code-block:: bash - - # backup tht in case you want to revert - or just yum re-install ;) - sudo cp -r /usr/share/openstack-tripleo-heat-templates \ - /usr/share/openstack-tripleo-heat-templates.ORIG - # Apply patch from gerrit e.g. https://review.opendev.org/#/c/563073/ - curl -4sSL 'https://review.opendev.org/changes/563073/revisions/current/patch?download' | \ - base64 -d | \ - sudo patch -d /usr/share/openstack-tripleo-heat-templates/ -p1 - -Making changes to the upgrades workflow -....................................... - -If instead you need to add or fix something in the upgrades workflow itself, -for example to handle a new parameter needed passed through to ansible, or any -other change, you will need to patch python-tripleoclient and tripleo-common, -depending on whether your fixes extend to the mistral workflow too. - -There are many ways to patch your environment and the following is a different -approach to the one used in the tripleo-heat-templates above where we patched -the installed templates in place. In the following examples instead we clone -tripleo-common and tripleoclient, patch them using gerrit reviews and then -re-install from source. - - .. note:: - - The following example commands include complete removal and replacement - of the installed tripleoclient and tripleo-common! - -Patching python-tripleoclient: - - .. code-block:: bash - - # python-tripleoclient - clone source, patch from gerrit and install - git clone https://github.com/openstack/python-tripleoclient.git -b stable/queens ~/python-tripleoclient - pushd ~/python-tripleoclient - - # Apply patches from gerrit e.g. https://review.opendev.org/#/c/564267 - curl "https://review.opendev.org/changes/564267/revisions/current/patch" | \ - base64 --decode > /home/stack/"564267.patch" - patch -N -p1 -b -z .first < /home/stack/564267.patch - # Remove current version and re-install - sudo rm -rf /usr/lib/python2.7/site-packages/python_tripleoclient* - sudo rm -rf /usr/lib/python2.7/site-packages/tripleoclient - sudo python setup.py clean --all install - popd - -Patching tripleo-common: - - .. note:: - - After switching to containerized undercloud, local tripleo-common - changes to be applied in all Mistral containers. - - .. code-block:: bash - - # tripleo-common - clone from source, patch from gerrit and install - git clone https://github.com/openstack/tripleo-common -b stable/queens - pushd ~/tripleo-common - # Apply patches from gerrit e.g. https://review.opendev.org/#/c/562995 - curl "https://review.opendev.org/changes/562995/revisions/current/patch" | \ - base64 --decode > /home/stack/"562995.patch" - patch -N -p1 -b -z .first < /home/stack/562995.patch - # Remove current version and re install - sudo rm -rf /usr/lib/python2.7/site-packages/tripleo_common* - sudo python setup.py clean --all install - popd - sudo cp /usr/share/tripleo-common/sudoers /etc/sudoers.d/tripleo-common - -Finally you need to update the mistral workbooks with the newly installed -versions. In code block above, the tripleo-common change at 562995_ has changed -package_update.yaml and so that is what we need to update here: - - .. code-block:: bash - - mistral workbook-update /usr/share/tripleo-common/workbooks/package_update.yaml - # Since entry_points.txt is affected next steps are required: - # Re populate mistral db and restart services - sudo mistral-db-manage populate - sudo systemctl restart openstack-mistral-api.service - sudo systemctl restart openstack-mistral-engine.service - sudo systemctl restart openstack-mistral-executor.service - -.. _rabbitmq_upgrade_tasks: https://github.com/openstack/tripleo-heat-templates/blob/master/deployment/rabbitmq/rabbitmq-messaging-rpc-pacemaker-puppet.yaml#L305 -.. _562995: https://review.opendev.org/#/c/562995 diff --git a/doc/source/upgrade/developer/upgrades/major_upgrade_with_os.plantuml b/doc/source/upgrade/developer/upgrades/major_upgrade_with_os.plantuml deleted file mode 100644 index 3d88e3ff..00000000 --- a/doc/source/upgrade/developer/upgrades/major_upgrade_with_os.plantuml +++ /dev/null @@ -1,214 +0,0 @@ -' The png image can be generated by running: -' -' plantuml major_upgrade_with_os.plantuml - -@startuml - -actor User -participant "undercloud" -participant "controller-0" -participant "controller-1" -participant "controller-2" - -' === PREPARE === - -note left of undercloud #AAFFAA - If controllers == 1, env file - is given to prefer `persist/restore` - instead of `transfer` for MariaDB. -end note -User -> "undercloud" : openstack overcloud upgrade prepare -note right - Update heat outputs using latest templates. -end note -User <-- "undercloud" - -' === REPROVISION === - -User -> "controller-0" : openstack overcloud upgrade run\n --tags system-upgrade-prepare \n --playbook upgrade-playbook.yml \n --node controller-0 -note right - Persist data from the bootstrap node - while it is live. - (Fetch the data to the undercloud.) - - if controllers == 1: - mariadb and other data (composable) - - if controllers > 1: - only other data (composable) -end note - -"undercloud" <- "controller-0" -note left: /var/lib/mistral/tripleo-persist/ -User <-- "controller-0" - -User -> "controller-0" : openstack server rebuild controller-0 -note right - Reprovision to the new OS. -end note - -User <-- "controller-0" - -User -> "controller-0" : openstack overcloud upgrade prepare\n(or enable-ssh-admin.sh, will be improved upon) -note right - Install Mistral ssh keys -end note - -User <-- "controller-0" - -User -> "controller-0" : openstack overcloud upgrade run\n --tags system-upgrade-run \n --playbook upgrade-playbook.yml \n --node controller-0 -note right - Restore the data onto bootstrap node. - (Push the data from the undercloud.) - - if controllers == 1: - mariadb and other data (composable) - - if controllers > 1: - only other data (composable) -end note - -User <-- "controller-0" - -' === TRANSFER DATA === - -group Transfer the data to the freshly re-installed controller\n(only required in multi-controller environments) -note over "undercloud", "controller-2" #AAFFAA - Everything is composable, but diagram showcases MariaDB specifically -end note -User -> "undercloud" : openstack overcloud external-upgrade run -"undercloud" -> "controller-1" : pcs resource disable\ngalera-bundle -note right: Disable MariaDB -"controller-1" -> "controller-2" -note right: Disable MariaDB - -note over undercloud, "controller-2" #FFAAAA - control plane outage. -end note - - -"undercloud" -> "controller-1" : transfer module run in ansible -note right - Transfer data: - - if controllers == 1: - nothing - - if controllers > 1: - mysql data -end note - -"controller-1" -> "undercloud": get the data -note left: /var/lib/mistral/tripleo-transfer/ -User <-- "controller-1" - -"undercloud" -> "controller-0": push the data -note right - extract the data -end note - -User <-- "undercloud" -end - -' === START CONTROLLER 0 === - -"User" -> "controller-0": openstack overcloud upgrade run --nodes controller-0 -note right - Configure OpenStack, - start a new cluster. - (1 node total) -end note - -note over undercloud, "controller-2" #FFAAAA - controller-0 is a one node cluster running the latest version - of tripleo on the new os. - The controller plane is back. -end note - -User <-- "controller-0" - -' === ADD CONTROLLER 1 === - -User -> "controller-1" : openstack overcloud upgrade run\n --tags system-upgrade-prepare \n --playbook upgrade-playbook.yml \n --node controller-1 -note right - Persist data - (composable) -end note - -"undercloud" <- "controller-1" -note left: /var/lib/mistral/tripleo-persist/ - -User -> "controller-1" : openstack server rebuild controller-1 -note right - Reprovision to the new OS. -end note - -User <-- "controller-1" - -User -> "controller-1" : openstack overcloud upgrade prepare\n(os oc reprovision ssh-admin) -note right - Install Mistral ssh keys -end note - -User <-- "controller-1" - -User -> "controller-1" : openstack overcloud upgrade run\n --tags system-upgrade-run \n --playbook upgrade-playbook.yml \n --nodes controller-1 -note right - Restore data - (composable) -end note - -User <-- "controller-1" - -"User" -> "controller-1": openstack overcloud upgrade run --nodes controller-0,controller-1 -"User" -> "controller-0": -note right of "controller-1" - Configure OpenStack, - join the cluster. - (2 nodes total) -end note - -User <-- "controller-1" - -' === ADD CONTROLLER 2 === - -User -> "controller-2" : openstack overcloud upgrade run\n --tags system-upgrade-prepare \n --playbook upgrade-playbook.yml \n --node controller-2 -note right - Persist data - (composable) -end note - -"undercloud" <- "controller-2" -note left: /var/lib/mistral/tripleo-persist/ -User <-- "controller-2" - -User -> "controller-2" : openstack server rebuild controller-2 -note right - Reprovision - to the new OS. -end note - -User <-- "controller-2" - -User -> "controller-2" : openstack overcloud upgrade prepare\n(os oc reprovision ssh-admin) -note right - Install Mistral - ssh keys -end note - -User <-- "controller-2" - -User -> "controller-2" : openstack overcloud upgrade run\n --tags system-upgrade-run \n --playbook upgrade-playbook.yml \n --nodes controller-2 -note right - Restore data - (composable) -end note - -User <-- "controller-2" - -"User" -> "controller-2": openstack overcloud upgrade run --nodes controller-0,controller-1,controller-2 -"User" -> "controller-1": -"User" -> "controller-0": -note right of "controller-2" - Configure OpenStack, - join the cluster. - (3 nodes total) -end note - -User <-- "controller-2" - -@enduml diff --git a/doc/source/upgrade/developer/upgrades/major_upgrade_with_os.png b/doc/source/upgrade/developer/upgrades/major_upgrade_with_os.png deleted file mode 100644 index d6781287..00000000 Binary files a/doc/source/upgrade/developer/upgrades/major_upgrade_with_os.png and /dev/null differ diff --git a/doc/source/upgrade/developer/upgrades/minor_update.plantuml b/doc/source/upgrade/developer/upgrades/minor_update.plantuml deleted file mode 100644 index 52c6b27a..00000000 --- a/doc/source/upgrade/developer/upgrades/minor_update.plantuml +++ /dev/null @@ -1,174 +0,0 @@ -' The png image can be generated by running: -' -' plantuml minor_update.plantuml - -@startuml - -actor User -participant Mistral -participant Heat -participant Ansible -participant Nodes -' in newer versions of plantuml we should use: collections Nodes - - -' === UPDATE PREPARE === - -User -> Mistral : openstack overcloud\nupdate prepare -activate Mistral - -Mistral -> Mistral : plan update - -Mistral -> Heat : stack update -activate Heat - -Heat --> Mistral -deactivate Heat - -Mistral --> User -deactivate Mistral - - -' === UPDATE RUN === - -User -> Mistral : openstack overcloud\nupdate run -activate Mistral -note right of Heat - * Operates on all selected nodes one-by-one. -end note - -Mistral -> Heat : query stack outputs -activate Heat - -Heat --> Mistral : stack outputs -deactivate Heat - -Mistral -> Mistral : generate playbooks - -Mistral -> Ansible : update_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : update_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : host_prep_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : common_deploy_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : post_update_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral --> User : -deactivate Mistral - - -' === EXTERNAL UPDATE RUN === - -User -> Mistral : openstack overcloud\nexternal-update run -activate Mistral -note right of Heat - * Executes tasks on undercloud, but can affect - overcloud via delegation or nested Ansible process. - * Single play: facts are carried over between - update and deploy tasks. - * Often limited via `--tags` to perform a particular - update task or an update of a particular service. - * Can be executed before `update run` for some tasks. -end note - -Mistral -> Heat : query stack outputs -activate Heat - -Heat --> Mistral : stack outputs -deactivate Heat - -Mistral -> Mistral : generate playbooks - -Mistral -> Ansible : external_update_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : external_update_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : external_deploy_tasks all steps -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral --> User : -deactivate Mistral - - -' === UPDATE CONVERGE === - -User -> Mistral : openstack overcloud\nupdate converge -activate Mistral -note right of Heat - * Essentially the same as `overcloud deploy`, - asserts that the state of overcloud matches - the latest templates. -end note - -Mistral -> Mistral : plan update - -Mistral -> Heat : stack update -activate Heat - -Heat --> Mistral -deactivate Heat - -Mistral -> Heat : query stack outputs -activate Heat - -Heat --> Mistral : stack outputs -deactivate Heat - -Mistral -> Mistral : generate playbooks - -Mistral -> Ansible : deploy_steps_playbook.yaml -activate Ansible - -Ansible -> Nodes : host_prep_tasks -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible -> Nodes : deploy_tasks and external_deploy_tasks\nall steps (interleaved) -activate Nodes - -Nodes --> Ansible -deactivate Nodes - -Ansible --> Mistral -deactivate Ansible - -Mistral --> User : -deactivate Mistral - - -@enduml diff --git a/doc/source/upgrade/developer/upgrades/minor_update.png b/doc/source/upgrade/developer/upgrades/minor_update.png deleted file mode 100644 index b181dd46..00000000 Binary files a/doc/source/upgrade/developer/upgrades/minor_update.png and /dev/null differ diff --git a/doc/source/upgrade/developer/upgrades/minor_update.rst b/doc/source/upgrade/developer/upgrades/minor_update.rst deleted file mode 100644 index c4512ff9..00000000 --- a/doc/source/upgrade/developer/upgrades/minor_update.rst +++ /dev/null @@ -1,171 +0,0 @@ -==================== -Minor version update -==================== - -.. TOOD(aschultz): update to reference in the deploy guide -.. To get developer understanding of minor updates, first read the -.. :doc:`operator docs for minor updates <../../minor_update>` -.. and perhaps try to go through the update as an operator would, to get -.. the basic idea. - -Assuming operator-level familiarity with the minor updates, let's look -at individual pieces in more detail. - -How update commands work -======================== - -The following subsections describe the individual update commands: - -* `openstack overcloud update prepare`_ -* `openstack overcloud update run`_ -* `openstack overcloud external-update run`_ -* `openstack overcloud update converge`_ - -You might also find it helpful to consult this high-level diagram as -you read: - -.. image:: minor_update.png - :scale: 20 % - :alt: Minor update workflow diagram - :target: ../../../_images/minor_update.png - -`openstack overcloud update prepare` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The `update prepare` command performs a Heat stack update, mapping -some resources to ``OS::Heat::None`` in order to prevent the usual -deployment config management tasks being performed (running Puppet, -starting containers, running external installers like -ceph-ansible). See the `update prepare environment file`_. - -.. _`update prepare environment file`: https://github.com/openstack/tripleo-heat-templates/blob/4286727ae70b1fa4ca6656c3f035afeac6eb2a95/environments/lifecycle/update-prepare.yaml - -The purpose of this stack update is to regenerate fresh outputs of the -Heat stack. These outputs contain Ansible playbooks and task lists -which are then used in the later in the `update run` phase. - -`openstack overcloud update run` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The `update run` command utilizes the previously generated Heat stack -outputs. It downloads the playbook yamls and their included task list -yaml via the config-download mechanisms, and executes the -`update steps playbook`_. - -.. _`update steps playbook`: https://github.com/openstack/tripleo-heat-templates/blob/4286727ae70b1fa4ca6656c3f035afeac6eb2a95/common/deploy-steps.j2#L558-L592 - -The command accepts ``--nodes`` or ``--roles`` argument to limit which -nodes will be targeted during a particular `update run` -execution. Even if the limit matches multiple nodes (e.g. all nodes -within one role), the play is executed with ``serial: 1``, meaning -that all actions are finished on one node before starting the update -on another. - -The play first executes `update_steps_tasks.yaml` which are tasks -collected from the ``update_tasks`` entry in composable -services. - -After the update tasks are finished, deployment workflow is -performed on the node being updated. That means reusing -`host_prep_tasks.yaml` and `common_deploy_steps_tasks.yaml`, which are -executed like on a fresh deployment, except during minor update -they're within a play with the aforementioned ``serial: 1`` limiting. - -Finally, ``post_update_tasks`` are executed. They are utilized by -services which need to perform something *after* deployment workflow -during the minor update. The update of the node is complete and the -Ansible play continues to update another node. - -`openstack overcloud external-update run` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The `external-update run` command is used to update the services whose -deployment (and update) procedure is not tied to execution on -particular overcloud nodes. The deployment/update procedures are thus -executed from the undercloud, even though a full overcloud inventory -is available for use. - -The `external update playbook` first executes `external_update_tasks` -and then `external_deploy_tasks`. The execution happens within the -same Ansible play, so facts from `external_update_tasks` are carried -over to `external_deploy_tasks`. This is a mechanism which will allow -you to amend what your deploy tasks do based on whether an update is -being run or not. - -Often it's not desirable to run the tasks for all services at the same -time, so `external-update run` supports ``--tags`` argument to limit -which tasks are run. - -The mechanisms of `external-upgrade` and `external-update` commands -and Ansible tasks are the same, but two commands and task hooks are -provided because generally in OpenStack we distinguish minor update -vs. major upgrade workflows. If your service only has one type of -upgrade, you can make the `external_update_tasks` the same as -`external_upgrade_tasks` by using YAML anchors and references. - -.. _external update playbook: https://github.com/openstack/tripleo-heat-templates/blob/8fd90c2d45e2680b018eae8387d86d420f738f5a/common/deploy-steps.j2#L644-L699 - -`openstack overcloud update converge` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. note:: - Update Converge is only required for versions less than Wallaby. - Update Converge has been removed for Wallaby and beyond. - -The `update converge` command performs a Heat stack update, reverting -the previous ``OS::Heat::None`` resource mappings back to the values -used for regular deployments and configuration updates, and -potentially also resets some parameter values. For environments with -Ceph, majority of this already happened on `ceph-upgrade run`, so the -final `update converge` effectively just resets the -CephAnsiblePlaybook parameter. - -See the `update converge environment file`_. - -.. _`update converge environment file`: https://github.com/openstack/tripleo-heat-templates/blob/4286727ae70b1fa4ca6656c3f035afeac6eb2a95/environments/lifecycle/update-converge.yaml - -The purpose of this stack update is to re-run config management -mechanisms and assert that the overcloud state matches what is -provided by the templates and environment files. - -Writing update logic for a service -================================== - -Simple config/image replacement -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If the service is managed by Paunch_ or tripleo_container_manage_ Ansible role, -it may be that there's no need to write any update tasks. Paunch or -tripleo_container_manage can automatically handle simple updates: change in -configuration or change of container image URL triggers automatic removal of -the old container and creation of new one with latest config and latest image. -If that's all the service needs for updates, you don't need to create any -``update_tasks``. - -Custom tasks during updates -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If the service is not managed by Paunch_ nor tripleo_container_manage_, or if -the simple container replacement done by Paunch is not sufficient for the -service update, you will need to include custom update logic. This is done via -providing these outputs in your composable service template: - -* ``update_tasks`` -- these are executed before deployment tasks on the - node being updated. - -* ``post_update_tasks`` -- these are executed after deployment tasks on - the node being updated. - -.. _Paunch: https://opendev.org/openstack/paunch/src/branch/master/README.rst -.. _tripleo_container_manage: https://docs.openstack.org/tripleo-ansible/latest/roles/role-tripleo_container_manage.html - -Update tasks are generally meant to bring the service into a stopped -state (sometimes with pre-fetched new images, this is necessary for -services managed by Pacemaker). Then the same workflow as during -deployment is used to bring the node back up into a running state, and -the post-update tasks can then perform any actions needed after the -deployment workflow. - -Similarly as deployment tasks, the update tasks and post-update tasks -are executed in steps_. - -.. _steps: https://github.com/openstack/tripleo-heat-templates/blob/4286727ae70b1fa4ca6656c3f035afeac6eb2a95/common/deploy-steps.j2#L17-L18 diff --git a/doc/source/upgrade/developer/upgrades/rdo_upgrades_jobs.png b/doc/source/upgrade/developer/upgrades/rdo_upgrades_jobs.png deleted file mode 100644 index 7663e185..00000000 Binary files a/doc/source/upgrade/developer/upgrades/rdo_upgrades_jobs.png and /dev/null differ diff --git a/doc/source/upgrade/developer/upgrades/upgrades.rst b/doc/source/upgrade/developer/upgrades/upgrades.rst deleted file mode 100644 index de8be9d7..00000000 --- a/doc/source/upgrade/developer/upgrades/upgrades.rst +++ /dev/null @@ -1,14 +0,0 @@ -Upgrades Development -==================== - -This section is intended to give a better understanding of the upgrade/update -process in TripleO. As well as a walkthrough for developers on the way upgrade -workflow enables OpenStack services upgrade. - -.. toctree:: - :maxdepth: 2 - - major_upgrade - minor_update - fast_fw_upgrade - ci_upgrades diff --git a/doc/source/upgrade/index.rst b/doc/source/upgrade/index.rst deleted file mode 100644 index c288ab7e..00000000 --- a/doc/source/upgrade/index.rst +++ /dev/null @@ -1,9 +0,0 @@ -Upgrade, Update, FFWD Upgrade Guide -=================================== - -.. toctree:: - :maxdepth: 3 - :includehidden: - - developer/upgrades/upgrades.rst - diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 570933f6..00000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -# The order of packages is significant, because pip processes them in the order -# of appearance. Changing the order has an impact on the overall integration -# process, which may cause wedges in the gate later. - -openstackdocstheme>=2.2.1 # Apache-2.0 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index feac6034..00000000 --- a/setup.cfg +++ /dev/null @@ -1,17 +0,0 @@ -[metadata] -name = tripleo-docs -summary = TripleO documentation -description_file = - README.rst -author = OpenStack -author_email = openstack-discuss@lists.openstack.org -home_page = https://docs.openstack.org/tripleo-docs/latest/ -classifier = - Environment :: OpenStack - Intended Audience :: Information Technology - Intended Audience :: System Administrators - License :: OSI Approved :: Apache Software License - Operating System :: POSIX :: Linux - Programming Language :: Python - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: 3 :: Only diff --git a/setup.py b/setup.py deleted file mode 100644 index cd35c3c3..00000000 --- a/setup.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (c) 2013 Hewlett-Packard Development Company, L.P. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import setuptools - -setuptools.setup( - setup_requires=['pbr>=2.0.0'], - pbr=True) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index c165f4c9..00000000 --- a/tox.ini +++ /dev/null @@ -1,35 +0,0 @@ -[tox] -minversion = 3.18.0 -envlist = docs,deploy-guide - -[testenv] -basepython = python3 -usedevelop = True -setenv = VIRTUAL_ENV={envdir} -deps = -r{toxinidir}/requirements.txt - - -[testenv:venv] -commands = {posargs} - -[testenv:docs] -deps = - -c{env:UPPER_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/master} - -r{toxinidir}/doc/requirements.txt -commands = - sphinx-build -a -E -W -d doc/build/doctrees --keep-going -b html doc/source doc/build/html - -[testenv:pdf-docs] -allowlist_externals = - make -description = - Build PDF documentation. -envdir = {toxworkdir}/docs -deps = {[testenv:docs]deps} -commands = - sphinx-build --keep-going -b latex doc/source doc/build/pdf - make -C doc/build/pdf - -[testenv:deploy-guide] -deps = {[testenv:docs]deps} -commands = sphinx-build -a -E -W -d deploy-guide/build/doctrees --keep-going -b html deploy-guide/source deploy-guide/build/html