Allow dynamic index settings

If searchlight's deployed in a shared ES cluster it's likely there'll
be server-wide default settings for indices that aren't beneficial to
Searchlight (logstash, for instance, recommends much larger indexing
latency than Searchlight since it's handling extremely high write
throughput). This patch allows index settings to be specified for indices
created by searchlight-manage. These settings can be specified in the
elasticearch.index_settings config option as a comma separated key_value
list, and can include any of the settings in
https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules.html

Change-Id: I11b128786045e7a6a8f7e46b216f059366ccc603
Implements: blueprint configurable-index-settings
This commit is contained in:
Steve McLellan 2016-06-23 17:00:37 -05:00
parent c6a46fd22a
commit bee851b372
11 changed files with 149 additions and 65 deletions

View File

@ -174,6 +174,9 @@ driver = messaging
[[post-config|$SEARCHLIGHT_CONF]]
[elasticsearch]
index_settings = number_of_shards:1,number_of_replicas:0
[listener]
#notifications_pool = searchlight-listener

View File

@ -97,6 +97,22 @@ following options control indexing behavior::
# is an explicit action
action.auto_create_index: false
Index settings
**************
In addition to server-wide index settings it's possible to configure
Searchlight to apply settings to indices it creates with
``searchlight-manage``. Index settings can be specified as follows in
``searchlight.conf``::
[elasticsearch]
index_settings = refresh_interval:2s,number_of_replicas:1
The ``index.`` prefix for settings is optional; Searchlight will prepend it if
it's not given (e.g. ``index.refresh_interval`` is also acceptable).
Index settings are applied at creation time and so are not limited to the
'dynamic' index settings. They are applied to all created indices.
See also:
* http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-index\_.html

View File

@ -0,0 +1,11 @@
---
prelude: >
Allow dynamic index settings to allow Searchlight to
be tuned differently from other indices on a shared
Elasticsearch cluster.
features:
- Allows a deployer to override some index-level
Elasticsearch settings for all Searchlight-created
indices. This is useful when running Searchlight on
a shared Elasticsearch cluster. Settings are overriden
using the elasticsearch.index_settings config option.

View File

@ -156,7 +156,7 @@ class IndexCommands(object):
# Step #1: Create new indexes for each Resource Group Type.
# The index needs to be fully functional before it gets
# added to any aliases. This inclues all settings and
# added to any aliases. This includes all settings and
# mappings. Only then can we add it to the aliases. We first
# need to create all indexes. This is done by resource group.
# We cache and turn off new indexes' refresh intervals,
@ -171,6 +171,7 @@ class IndexCommands(object):
for group, search, listen in resource_groups:
index_name = es_utils.create_new_index(group)
index_names[group] = index_name
refresh_intervals[index_name] = \
es_utils.get_index_refresh_interval(index_name)
# Disable refresh interval by setting its value to -1

View File

@ -37,7 +37,10 @@ search_opts = [
cfg.StrOpt('index_gc_deletes', default='300s',
help='Time for which deleted documents are held in order to'
'prevent older, out-of-order updates causing them to be'
'created fresh in error.')
'created fresh in error.'),
cfg.DictOpt('index_settings', default={},
help='Dynamic index settings to be applied to new indices. '
'Format: opt1:val1,opt2:val2')
]
CONF = cfg.CONF

View File

@ -135,7 +135,6 @@ class IndexBase(plugin.Plugin):
child_plugin.check_mapping_sort_fields()
# Prepare the new index for this document type.
self.setup_index_settings(index_name=index_name)
self.setup_index_mapping(index_name=index_name)
def initial_indexing(self, index_name=None, setup_data=True):
@ -155,13 +154,6 @@ class IndexBase(plugin.Plugin):
if setup_data:
self.setup_data(index_name)
def setup_index_settings(self, index_name):
"""Update index settings. """
index_settings = self.get_settings()
if index_settings:
self.engine.indices.put_settings(body=index_settings,
index=index_name)
def setup_index_mapping(self, index_name):
"""Update index document mapping."""
# Using 'reversed' because in e-s 2.x, child mappings must precede
@ -447,10 +439,9 @@ class IndexBase(plugin.Plugin):
@abc.abstractmethod
def _get_rbac_field_filters(self, request_context):
"""Return any RBAC field filters to be injected into an indices
query. Document type will be added to this list.
"""Return any RBAC field filters in a list to be injected into an
indices query. Document type will be added.
"""
return []
def get_notification_handler(self):
"""Get the notification handler which implements NotificationBase."""
@ -465,17 +456,9 @@ class IndexBase(plugin.Plugin):
"""
pass
def get_settings(self):
"""Get an index settings."""
return {
"index": {
"gc_deletes": CONF.elasticsearch.index_gc_deletes
}
}
@abc.abstractmethod
def get_mapping(self):
"""Get an index mapping."""
return {}
def get_full_mapping(self):
"""Gets the full mapping doc for this type, including children. This

View File

@ -20,6 +20,7 @@ import logging
import oslo_utils
import six
from oslo_config import cfg
from oslo_utils import encodeutils
import searchlight.elasticsearch
from searchlight import i18n
@ -32,6 +33,8 @@ LOG = logging.getLogger(__name__)
_LW = i18n._LW
_LE = i18n._LE
CONF = cfg.CONF
VERSION_CONFLICT_MSG = 'version_conflict_engine_exception'
@ -39,7 +42,7 @@ def get_now_str():
"""Wrapping this to make testing easier (mocking utcnow's troublesome)
and keep it in one place in case oslo changes
"""
return oslo_utils.timeutils.isotime(datetime.datetime.utcnow())
return oslo_utils.timeutils.isotime(oslo_utils.timeutils.utcnow())
def timestamp_to_isotime(timestamp):
@ -152,12 +155,18 @@ def create_new_index(group):
"""
es_engine = searchlight.elasticsearch.get_api()
kwargs = {}
index_settings = _get_index_settings_from_config()
if index_settings:
kwargs = {'body': {'index': index_settings}}
index_name = None
while not index_name:
# Use utcnow() to ensure that the name is unique.
index_name = group + '-' + datetime.datetime.utcnow().strftime(FORMAT)
now = oslo_utils.timeutils.utcnow()
index_name = (group + '-' + now.strftime(FORMAT))
try:
es_engine.indices.create(index=index_name)
es_engine.indices.create(index=index_name, **kwargs)
except es_exc.TransportError as e:
if (e.error.startswith("IndexAlreadyExistsException") or
e.error.startswith("index_already_exists_exception")):
@ -169,6 +178,20 @@ def create_new_index(group):
return index_name
def _get_index_settings_from_config():
index_settings = {}
if CONF.elasticsearch.index_gc_deletes is not None:
index_settings['gc_deletes'] = CONF.elasticsearch.index_gc_deletes
for setting, value in six.iteritems(CONF.elasticsearch.index_settings):
if setting.startswith('index.'):
setting = setting[len('index_'):]
index_settings[setting] = value
return index_settings
def add_extra_mappings(index_name, doc_type_info):
"""Add mappings for the specified doc_types if they already do not
exist in the index. This is to work around a "feature" in Elasticsearch.

View File

@ -13,11 +13,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import mock
from oslo_config import cfg
import oslo_utils
from searchlight.elasticsearch.plugins import utils as es_utils
from searchlight.elasticsearch import ROLE_USER_FIELD
from searchlight.tests import fake_plugins
from searchlight.tests import functional
now = oslo_utils.timeutils.utcnow()
now_str = now.strftime(es_utils.FORMAT)
class TestSearchLoad(functional.FunctionalTest):
def setUp(self):
super(TestSearchLoad, self).setUp()
@ -85,14 +95,26 @@ class TestSearchLoad(functional.FunctionalTest):
self.assertEqual(
['admin', 'user'], sorted(es_hits[0][ROLE_USER_FIELD]))
def test_gc_verify_setting(self):
images_plugin = self.initialized_plugins['OS::Glance::Image']
alias_name = images_plugin.alias_name_listener
settings = self.elastic_connection.indices.get_settings(alias_name)
# We are alias-based, not index-based. We pass in an alias to
# get_setings() but it returns a dict based on the indexes. We
# do not know the index name(s). We will verify the first index
# in the dict.
index_name = list(settings)[0]
self.assertEqual(
"300s", settings[index_name]['settings']['index']['gc_deletes'])
def test_index_settings(self):
"""Test the default gc_delete interval plus some other
dynamic index settings
"""
with mock.patch.object(cfg.CONF, 'elasticsearch') as mock_settings:
mock_settings.index_gc_deletes = '100s'
mock_settings.index_settings = {
'refresh_interval': '2s',
'index.number_of_replicas': 1
}
index_name = es_utils.create_new_index('test-index-settings')
try:
settings = self.elastic_connection.indices.get_settings(
index_name)
index_settings = settings[index_name]['settings']['index']
self.assertEqual("100s", index_settings['gc_deletes'])
self.assertEqual("2s", index_settings['refresh_interval'])
self.assertEqual("1", index_settings['number_of_replicas'])
finally:
es_utils.delete_index(index_name)

View File

@ -240,31 +240,9 @@ class TestPlugin(test_utils.BaseTestCase):
doc_type, mapping = six.next(plugin.get_full_mapping())
self.assertNotIn('doc_values', mapping['properties']['id'])
def test_setup_index_settings(self):
mock_engine = mock.Mock()
# Test #1: Use the default settings for the plugin.
plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine)
plugin.setup_index_settings(index_name='fake')
mock_engine.indices.put_settings.assert_called_once_with(
index='fake',
body={
'index': {
'gc_deletes': '300s'
}
})
# Test #2: The plugin has no settings.
mock_engine.reset_mock()
with mock.patch.object(plugin, 'get_settings', return_value=None):
plugin.setup_index_settings(index_name='fake')
mock_engine.indices.put_settings.assert_not_called()
@mock.patch('searchlight.elasticsearch.plugins.base.'
'IndexBase.setup_index_mapping')
@mock.patch('searchlight.elasticsearch.plugins.base.'
'IndexBase.setup_index_settings')
def test_prepare_index(self, mock_settings, mock_mapping):
def test_prepare_index(self, mock_mapping):
"""Verify Indexbase.prepare_index(). The method will verify that all
non-analyzed mapping fields that are raw, are truly marked as raw.
This applies to any children plugins. There should not be any
@ -285,12 +263,10 @@ class TestPlugin(test_utils.BaseTestCase):
}}}}
plugin.prepare_index('fake')
mock_settings.assert_called_once_with(index_name='fake')
mock_mapping.assert_called_once_with(index_name='fake')
# Test #2: Plugin with no children, bad "raw" mapping field.
mock_mapping.reset_mock()
mock_settings.reset_mock()
plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine)
with mock.patch.object(plugin, 'get_mapping') as mock_map:
mock_map.return_value = {"properties": {
@ -302,19 +278,16 @@ class TestPlugin(test_utils.BaseTestCase):
"sorting.")
self.assertRaisesRegexp(Exception, message,
plugin.prepare_index, index_name='fake')
mock_settings.assert_not_called()
mock_mapping.assert_not_called()
# Test #3: Plugin with two children. No "raw" mapping fields.
mock_mapping.reset_mock()
mock_settings.reset_mock()
parent_plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine)
child1_plugin = fake_plugins.FakeChildPlugin(es_engine=mock_engine)
child1_plugin.register_parent(parent_plugin)
child2_plugin = fake_plugins.FakeChildPlugin(es_engine=mock_engine)
child2_plugin.register_parent(parent_plugin)
parent_plugin.prepare_index('fake')
mock_settings.assert_called_once_with(index_name='fake')
mock_mapping.assert_called_once_with(index_name='fake')
@mock.patch('searchlight.elasticsearch.plugins.helper.'

View File

@ -14,10 +14,17 @@
# under the License.
import mock
from oslo_config import cfg
import oslo_utils
from searchlight.elasticsearch.plugins import utils as plugin_utils
from searchlight.tests.unit import utils as unit_test_utils
from searchlight.tests import utils as test_utils
CONF = cfg.CONF
now = oslo_utils.timeutils.utcnow()
now_str = now.strftime(plugin_utils.FORMAT)
class TestPluginUtils(test_utils.BaseTestCase):
def test_facet_value_query(self):
@ -128,3 +135,43 @@ class TestPluginUtils(test_utils.BaseTestCase):
}
mock_engine.indices.put_settings.assert_called_with(expected_body,
'test-index')
@mock.patch('searchlight.elasticsearch.get_api')
def test_index_settings(self, mock_api):
mock_engine = mock.Mock()
mock_api.return_value = mock_engine
with mock.patch.object(CONF, 'elasticsearch') as mock_settings:
mock_settings.index_gc_deletes = '100s'
mock_settings.index_settings = {
'key1': 'value1',
'index.key2': 'value2',
'index.something.key3': 'value3'
}
with mock.patch('oslo_utils.timeutils.utcnow', return_value=now):
plugin_utils.create_new_index('test')
expected = {
'index': {
'key1': 'value1',
'key2': 'value2',
'something.key3': 'value3',
'gc_deletes': '100s'
}
}
mock_engine.indices.create.assert_called_with(index='test-' + now_str,
body=expected)
@mock.patch('searchlight.elasticsearch.get_api')
def test_no_index_settings(self, mock_api):
mock_engine = mock.Mock()
mock_api.return_value = mock_engine
with mock.patch('searchlight.elasticsearch.plugins.'
'utils._get_index_settings_from_config',
return_value={}):
with mock.patch('oslo_utils.timeutils.utcnow', return_value=now):
plugin_utils.create_new_index('test')
mock_engine.indices.create.assert_called_with(index='test-' + now_str)

View File

@ -65,7 +65,9 @@ class TestReindexingUtils(test_utils.BaseTestCase):
target_index=dst,
query=expected_mult)
def test_create_new_index(self):
@mock.patch('searchlight.elasticsearch.plugins.'
'utils._get_index_settings_from_config', return_value={})
def test_create_new_index(self, mock_get_settings):
# Regex for matching the index name. The index name is the group
# group name appended with a time stmap. The format for the
# timestamp is defined in elasitcsearch.plugins.utils and is