From 6a26fff4d37ffa0c6398db5e56619a4f9abbd78c Mon Sep 17 00:00:00 2001
From: Pradeep Kilambi <pkilambi@redhat.com>
Date: Mon, 1 Jun 2015 16:49:09 -0400
Subject: [PATCH] Add telemetry best practices to the admin guide

Closes-Bug: #1460824

Change-Id: I73f60610450eb08a61b7150685901ca93065b6ec
---
 doc/admin-guide-cloud/ch_telemetry.xml        |   1 +
 .../section_telemetry-best-practices.xml      | 177 ++++++++++++++++++
 2 files changed, 178 insertions(+)
 create mode 100644 doc/admin-guide-cloud/telemetry/section_telemetry-best-practices.xml
diff --git a/doc/admin-guide-cloud/ch_telemetry.xml b/doc/admin-guide-cloud/ch_telemetry.xml
index 234d24330e..eb516bc026 100644
--- a/doc/admin-guide-cloud/ch_telemetry.xml
+++ b/doc/admin-guide-cloud/ch_telemetry.xml
@@ -51,4 +51,5 @@
   <xi:include href="telemetry/section_telemetry-measurements.xml"/>
   <xi:include href="telemetry/section_telemetry-events.xml"/>
   <xi:include href="telemetry/section_telemetry-troubleshooting-guide.xml"/>
+  <xi:include href="telemetry/section_telemetry-best-practices.xml"/>
 </chapter>
diff --git a/doc/admin-guide-cloud/telemetry/section_telemetry-best-practices.xml b/doc/admin-guide-cloud/telemetry/section_telemetry-best-practices.xml
new file mode 100644
index 0000000000..f5071c8192
--- /dev/null
+++ b/doc/admin-guide-cloud/telemetry/section_telemetry-best-practices.xml
@@ -0,0 +1,177 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<section xmlns="http://docbook.org/ns/docbook"
+  xmlns:xi="http://www.w3.org/2001/XInclude"
+  xmlns:xlink="http://www.w3.org/1999/xlink"
+  version="5.0"
+  xml:id="section_telemetry-best-practices">
+  <title>Telemetry best practices</title>
+    <para>
+        The following are some suggested best practices to follow when deploying
+        and configuring the Telemetry service. The best practices are divided into
+        data collection and storage.
+    </para>
+    <section xml:id="section_telemetry-data-collection-best-practices">
+      <title>Data collection</title>
+      <procedure>
+      <step>
+        <para>
+         The Telemetry module collects a continuously growing set of data.
+         Not all the data will be relevant for a cloud administrator to monitor.
+        </para>
+          <itemizedlist>
+          <listitem>
+            <para>Based on your needs, you can edit the <filename>pipeline.yaml</filename>
+         configuration file to include a selected number of meters while disregarding
+         the rest.</para>
+          </listitem>
+          <listitem>
+            <para>By default, Telemetry service polls the service APIs every 10 minutes.
+                You can change the polling interval on a per meter basis by editing the
+                <filename>pipeline.yaml</filename> configuration file.</para>
+              <warning>
+                  <para>
+                      If the polling interval is too short, it will likely cause increase of stored data
+                      and the stress on the service APIs.
+                  </para>
+              </warning>
+          </listitem>
+          <listitem>
+            <para>Expand the configuration to have greater control over different meter intervals.</para>
+          </listitem>
+        </itemizedlist>
+        <note>
+        <para>
+        For more information, see the <link xlink:href=
+        " http://docs.openstack.org/admin-guide-cloud/content/section_telemetry-pipeline-configuration.html">
+        Pipeline Configuration Options</link>.
+        </para>
+        </note>
+      </step>
+      <step>
+        <para>
+         If you are using the Kilo version of Telemetry, you can delay or adjust polling requests by enabling
+         the jitter support. This adds a random delay on how the polling agents send requests to the
+         service APIs. To enable jitter, set <option>shuffle_time_before_polling_task</option>
+         in the <filename>ceilometer.conf</filename> configuration file to an integer greater than 0.
+        </para>
+      </step>
+      <step>
+        <para>
+         If you are using Juno or later releases, based on the number of resources that will be polled,
+         you can add additional central and compute agents as necessary. The agents are designed to scale horizontally.
+        </para>
+        <note>
+        <para>
+            For more information see, <link xlink:href=
+            "http://docs.openstack.org/admin-guide-cloud/content/section_telemetry-cetral-compute-agent-ha.html">
+            HA deployment for Central and Compute Agents</link>.
+        </para>
+        </note>
+      </step>
+      <step>
+        <para>
+         If you are using Juno or later releases, use the <literal>notifier://</literal> publisher rather
+         than <literal>rpc://</literal> as there is a certain level of overhead that comes with RPC.
+        </para>
+        <note>
+        <para>
+            For more information on RPC overhead, see <link xlink:href=
+            "https://www.rabbitmq.com/tutorials/tutorial-six-python.html">
+            RPC overhead info</link>.
+        </para>
+        </note>
+      </step>
+      </procedure>
+    </section>
+    <section xml:id="section_telemetry-data-storage-best-practices">
+    <title>Data storage</title>
+     <procedure>
+      <step>
+        <para>
+            We recommend that you avoid open-ended queries. In order to get better performance you can use
+            reasonable time ranges and/or other query constraints for retrieving measurements.
+        </para>
+        <para>
+            For example, this open-ended query might return an unpredictable amount of data:
+            <screen><prompt>$</prompt><userinput>ceilometer sample-list --meter cpu -q 'resource_id=INSTANCE_ID_1'</userinput></screen>
+
+            Whereas, this well-formed query returns a more reasonable amount of data, hence better performance:
+             <screen><prompt>$</prompt><userinput>ceilometer sample-list --meter cpu -q 'resource_id=INSTANCE_ID_1;timestamp&gt;2015-05-01T00:00:00;timestamp&lt;2015-06-01T00:00:00'</userinput>
+            </screen>
+        </para>
+      </step>
+      <step>
+          <para>
+            You can install the API behind <literal>mod_wsgi</literal>, as it provides more settings to tweak,
+            like <literal>threads</literal> and <literal>processes</literal> in case of <literal>WSGIDaemon</literal>.
+          </para>
+          <note>
+          <para>
+              For more information on how to configure <literal>mod_wsgi</literal>, see the <link xlink:href=
+                "http://docs.openstack.org/developer/ceilometer/install/mod_wsgi.html">
+              Install Documentation</link>.
+          </para>
+          </note>
+      </step>
+      <step>
+        <para>
+            The collection service provided by the Telemetry project is not intended to be
+            an archival service. Set a Time to Live (TTL) value to expire data and minimize
+            the database size. If you would like to keep your data for longer time period,
+            you may consider storing it in a data warehouse outside of Telemetry.
+        </para>
+        <note>
+        <para>
+            For more information on how to set the TTL, see <link xlink:href=
+                "http://docs.openstack.org/admin-guide-cloud/content/section_telemetry-storing-data.html">
+              TTL support for various back ends</link>.
+        </para>
+        </note>
+      </step>
+      <step>
+        <para>
+            We recommend that you do not use SQLAlchemy back end prior to the Juno release, as it previously contained extraneous
+            relationships to handle deprecated data models. This resulted in extremely poor query
+            performance.
+        </para>
+      </step>
+      <step>
+        <para>
+            We recommend that you do not run MongoDB on the same node as the controller. Keep it on a separate
+            node optimized for fast storage for better performance. Also it is advisable for the MongoDB
+            node to have a lot of memory.
+        </para>
+        <note>
+            <para>
+             For more information on how much memory you need, see <link xlink:href=
+                "http://docs.mongodb.org/manual/faq/diagnostics/#how-do-i-calculate-how-much-ram-i-need-for-my-application">
+                MongoDB FAQ</link>.
+            </para>
+        </note>
+      </step>
+      <step>
+        <para>
+            Use replica sets in MongoDB. Replica sets provide high availability through
+            automatic failover. If your primary node fails, MongoDB will elect a secondary
+            node to replace the primary node, and your cluster will remain functional.
+        </para>
+        <para>
+            For more information on replica sets, see the <link xlink:href=
+            "http://docs.mongodb.org/manual/tutorial/deploy-replica-set/">
+            MongoDB replica sets docs</link>.
+        </para>
+      </step>
+      <step>
+        <para>
+            Use sharding in MongoDB. Sharding helps in storing data records across
+            multiple machines and is the MongoDB’s approach to meet the demands of data growth.
+        </para>
+        <para>
+            For more information on sharding, see the <link xlink:href=
+            "http://docs.mongodb.org/manual/sharding/">
+            MongoDB sharding docs</link>.
+        </para>
+      </step>
+     </procedure>
+    </section>
+</section>