Allow dynamic index settings

If searchlight's deployed in a shared ES cluster it's likely there'll
be server-wide default settings for indices that aren't beneficial to
Searchlight (logstash, for instance, recommends much larger indexing
latency than Searchlight since it's handling extremely high write
throughput). This patch allows index settings to be specified for indices
created by searchlight-manage. These settings can be specified in the
elasticearch.index_settings config option as a comma separated key_value
list, and can include any of the settings in
https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules.html

Change-Id: I11b128786045e7a6a8f7e46b216f059366ccc603
Implements: blueprint configurable-index-settings
This commit is contained in:
Steve McLellan 2016-06-23 17:00:37 -05:00
parent c6a46fd22a
commit bee851b372
11 changed files with 149 additions and 65 deletions

View File

@ -174,6 +174,9 @@ driver = messaging
[[post-config|$SEARCHLIGHT_CONF]] [[post-config|$SEARCHLIGHT_CONF]]
[elasticsearch]
index_settings = number_of_shards:1,number_of_replicas:0
[listener] [listener]
#notifications_pool = searchlight-listener #notifications_pool = searchlight-listener

View File

@ -97,6 +97,22 @@ following options control indexing behavior::
# is an explicit action # is an explicit action
action.auto_create_index: false action.auto_create_index: false
Index settings
**************
In addition to server-wide index settings it's possible to configure
Searchlight to apply settings to indices it creates with
``searchlight-manage``. Index settings can be specified as follows in
``searchlight.conf``::
[elasticsearch]
index_settings = refresh_interval:2s,number_of_replicas:1
The ``index.`` prefix for settings is optional; Searchlight will prepend it if
it's not given (e.g. ``index.refresh_interval`` is also acceptable).
Index settings are applied at creation time and so are not limited to the
'dynamic' index settings. They are applied to all created indices.
See also: See also:
* http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-index\_.html * http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-index\_.html

View File

@ -0,0 +1,11 @@
---
prelude: >
Allow dynamic index settings to allow Searchlight to
be tuned differently from other indices on a shared
Elasticsearch cluster.
features:
- Allows a deployer to override some index-level
Elasticsearch settings for all Searchlight-created
indices. This is useful when running Searchlight on
a shared Elasticsearch cluster. Settings are overriden
using the elasticsearch.index_settings config option.

View File

@ -156,7 +156,7 @@ class IndexCommands(object):
# Step #1: Create new indexes for each Resource Group Type. # Step #1: Create new indexes for each Resource Group Type.
# The index needs to be fully functional before it gets # The index needs to be fully functional before it gets
# added to any aliases. This inclues all settings and # added to any aliases. This includes all settings and
# mappings. Only then can we add it to the aliases. We first # mappings. Only then can we add it to the aliases. We first
# need to create all indexes. This is done by resource group. # need to create all indexes. This is done by resource group.
# We cache and turn off new indexes' refresh intervals, # We cache and turn off new indexes' refresh intervals,
@ -171,6 +171,7 @@ class IndexCommands(object):
for group, search, listen in resource_groups: for group, search, listen in resource_groups:
index_name = es_utils.create_new_index(group) index_name = es_utils.create_new_index(group)
index_names[group] = index_name index_names[group] = index_name
refresh_intervals[index_name] = \ refresh_intervals[index_name] = \
es_utils.get_index_refresh_interval(index_name) es_utils.get_index_refresh_interval(index_name)
# Disable refresh interval by setting its value to -1 # Disable refresh interval by setting its value to -1

View File

@ -37,7 +37,10 @@ search_opts = [
cfg.StrOpt('index_gc_deletes', default='300s', cfg.StrOpt('index_gc_deletes', default='300s',
help='Time for which deleted documents are held in order to' help='Time for which deleted documents are held in order to'
'prevent older, out-of-order updates causing them to be' 'prevent older, out-of-order updates causing them to be'
'created fresh in error.') 'created fresh in error.'),
cfg.DictOpt('index_settings', default={},
help='Dynamic index settings to be applied to new indices. '
'Format: opt1:val1,opt2:val2')
] ]
CONF = cfg.CONF CONF = cfg.CONF

View File

@ -135,7 +135,6 @@ class IndexBase(plugin.Plugin):
child_plugin.check_mapping_sort_fields() child_plugin.check_mapping_sort_fields()
# Prepare the new index for this document type. # Prepare the new index for this document type.
self.setup_index_settings(index_name=index_name)
self.setup_index_mapping(index_name=index_name) self.setup_index_mapping(index_name=index_name)
def initial_indexing(self, index_name=None, setup_data=True): def initial_indexing(self, index_name=None, setup_data=True):
@ -155,13 +154,6 @@ class IndexBase(plugin.Plugin):
if setup_data: if setup_data:
self.setup_data(index_name) self.setup_data(index_name)
def setup_index_settings(self, index_name):
"""Update index settings. """
index_settings = self.get_settings()
if index_settings:
self.engine.indices.put_settings(body=index_settings,
index=index_name)
def setup_index_mapping(self, index_name): def setup_index_mapping(self, index_name):
"""Update index document mapping.""" """Update index document mapping."""
# Using 'reversed' because in e-s 2.x, child mappings must precede # Using 'reversed' because in e-s 2.x, child mappings must precede
@ -447,10 +439,9 @@ class IndexBase(plugin.Plugin):
@abc.abstractmethod @abc.abstractmethod
def _get_rbac_field_filters(self, request_context): def _get_rbac_field_filters(self, request_context):
"""Return any RBAC field filters to be injected into an indices """Return any RBAC field filters in a list to be injected into an
query. Document type will be added to this list. indices query. Document type will be added.
""" """
return []
def get_notification_handler(self): def get_notification_handler(self):
"""Get the notification handler which implements NotificationBase.""" """Get the notification handler which implements NotificationBase."""
@ -465,17 +456,9 @@ class IndexBase(plugin.Plugin):
""" """
pass pass
def get_settings(self): @abc.abstractmethod
"""Get an index settings."""
return {
"index": {
"gc_deletes": CONF.elasticsearch.index_gc_deletes
}
}
def get_mapping(self): def get_mapping(self):
"""Get an index mapping.""" """Get an index mapping."""
return {}
def get_full_mapping(self): def get_full_mapping(self):
"""Gets the full mapping doc for this type, including children. This """Gets the full mapping doc for this type, including children. This

View File

@ -20,6 +20,7 @@ import logging
import oslo_utils import oslo_utils
import six import six
from oslo_config import cfg
from oslo_utils import encodeutils from oslo_utils import encodeutils
import searchlight.elasticsearch import searchlight.elasticsearch
from searchlight import i18n from searchlight import i18n
@ -32,6 +33,8 @@ LOG = logging.getLogger(__name__)
_LW = i18n._LW _LW = i18n._LW
_LE = i18n._LE _LE = i18n._LE
CONF = cfg.CONF
VERSION_CONFLICT_MSG = 'version_conflict_engine_exception' VERSION_CONFLICT_MSG = 'version_conflict_engine_exception'
@ -39,7 +42,7 @@ def get_now_str():
"""Wrapping this to make testing easier (mocking utcnow's troublesome) """Wrapping this to make testing easier (mocking utcnow's troublesome)
and keep it in one place in case oslo changes and keep it in one place in case oslo changes
""" """
return oslo_utils.timeutils.isotime(datetime.datetime.utcnow()) return oslo_utils.timeutils.isotime(oslo_utils.timeutils.utcnow())
def timestamp_to_isotime(timestamp): def timestamp_to_isotime(timestamp):
@ -152,12 +155,18 @@ def create_new_index(group):
""" """
es_engine = searchlight.elasticsearch.get_api() es_engine = searchlight.elasticsearch.get_api()
kwargs = {}
index_settings = _get_index_settings_from_config()
if index_settings:
kwargs = {'body': {'index': index_settings}}
index_name = None index_name = None
while not index_name: while not index_name:
# Use utcnow() to ensure that the name is unique. # Use utcnow() to ensure that the name is unique.
index_name = group + '-' + datetime.datetime.utcnow().strftime(FORMAT) now = oslo_utils.timeutils.utcnow()
index_name = (group + '-' + now.strftime(FORMAT))
try: try:
es_engine.indices.create(index=index_name) es_engine.indices.create(index=index_name, **kwargs)
except es_exc.TransportError as e: except es_exc.TransportError as e:
if (e.error.startswith("IndexAlreadyExistsException") or if (e.error.startswith("IndexAlreadyExistsException") or
e.error.startswith("index_already_exists_exception")): e.error.startswith("index_already_exists_exception")):
@ -169,6 +178,20 @@ def create_new_index(group):
return index_name return index_name
def _get_index_settings_from_config():
index_settings = {}
if CONF.elasticsearch.index_gc_deletes is not None:
index_settings['gc_deletes'] = CONF.elasticsearch.index_gc_deletes
for setting, value in six.iteritems(CONF.elasticsearch.index_settings):
if setting.startswith('index.'):
setting = setting[len('index_'):]
index_settings[setting] = value
return index_settings
def add_extra_mappings(index_name, doc_type_info): def add_extra_mappings(index_name, doc_type_info):
"""Add mappings for the specified doc_types if they already do not """Add mappings for the specified doc_types if they already do not
exist in the index. This is to work around a "feature" in Elasticsearch. exist in the index. This is to work around a "feature" in Elasticsearch.

View File

@ -13,11 +13,21 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import mock
from oslo_config import cfg
import oslo_utils
from searchlight.elasticsearch.plugins import utils as es_utils
from searchlight.elasticsearch import ROLE_USER_FIELD from searchlight.elasticsearch import ROLE_USER_FIELD
from searchlight.tests import fake_plugins from searchlight.tests import fake_plugins
from searchlight.tests import functional from searchlight.tests import functional
now = oslo_utils.timeutils.utcnow()
now_str = now.strftime(es_utils.FORMAT)
class TestSearchLoad(functional.FunctionalTest): class TestSearchLoad(functional.FunctionalTest):
def setUp(self): def setUp(self):
super(TestSearchLoad, self).setUp() super(TestSearchLoad, self).setUp()
@ -85,14 +95,26 @@ class TestSearchLoad(functional.FunctionalTest):
self.assertEqual( self.assertEqual(
['admin', 'user'], sorted(es_hits[0][ROLE_USER_FIELD])) ['admin', 'user'], sorted(es_hits[0][ROLE_USER_FIELD]))
def test_gc_verify_setting(self): def test_index_settings(self):
images_plugin = self.initialized_plugins['OS::Glance::Image'] """Test the default gc_delete interval plus some other
alias_name = images_plugin.alias_name_listener dynamic index settings
settings = self.elastic_connection.indices.get_settings(alias_name) """
# We are alias-based, not index-based. We pass in an alias to with mock.patch.object(cfg.CONF, 'elasticsearch') as mock_settings:
# get_setings() but it returns a dict based on the indexes. We mock_settings.index_gc_deletes = '100s'
# do not know the index name(s). We will verify the first index mock_settings.index_settings = {
# in the dict. 'refresh_interval': '2s',
index_name = list(settings)[0] 'index.number_of_replicas': 1
self.assertEqual( }
"300s", settings[index_name]['settings']['index']['gc_deletes'])
index_name = es_utils.create_new_index('test-index-settings')
try:
settings = self.elastic_connection.indices.get_settings(
index_name)
index_settings = settings[index_name]['settings']['index']
self.assertEqual("100s", index_settings['gc_deletes'])
self.assertEqual("2s", index_settings['refresh_interval'])
self.assertEqual("1", index_settings['number_of_replicas'])
finally:
es_utils.delete_index(index_name)

View File

@ -240,31 +240,9 @@ class TestPlugin(test_utils.BaseTestCase):
doc_type, mapping = six.next(plugin.get_full_mapping()) doc_type, mapping = six.next(plugin.get_full_mapping())
self.assertNotIn('doc_values', mapping['properties']['id']) self.assertNotIn('doc_values', mapping['properties']['id'])
def test_setup_index_settings(self):
mock_engine = mock.Mock()
# Test #1: Use the default settings for the plugin.
plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine)
plugin.setup_index_settings(index_name='fake')
mock_engine.indices.put_settings.assert_called_once_with(
index='fake',
body={
'index': {
'gc_deletes': '300s'
}
})
# Test #2: The plugin has no settings.
mock_engine.reset_mock()
with mock.patch.object(plugin, 'get_settings', return_value=None):
plugin.setup_index_settings(index_name='fake')
mock_engine.indices.put_settings.assert_not_called()
@mock.patch('searchlight.elasticsearch.plugins.base.' @mock.patch('searchlight.elasticsearch.plugins.base.'
'IndexBase.setup_index_mapping') 'IndexBase.setup_index_mapping')
@mock.patch('searchlight.elasticsearch.plugins.base.' def test_prepare_index(self, mock_mapping):
'IndexBase.setup_index_settings')
def test_prepare_index(self, mock_settings, mock_mapping):
"""Verify Indexbase.prepare_index(). The method will verify that all """Verify Indexbase.prepare_index(). The method will verify that all
non-analyzed mapping fields that are raw, are truly marked as raw. non-analyzed mapping fields that are raw, are truly marked as raw.
This applies to any children plugins. There should not be any This applies to any children plugins. There should not be any
@ -285,12 +263,10 @@ class TestPlugin(test_utils.BaseTestCase):
}}}} }}}}
plugin.prepare_index('fake') plugin.prepare_index('fake')
mock_settings.assert_called_once_with(index_name='fake')
mock_mapping.assert_called_once_with(index_name='fake') mock_mapping.assert_called_once_with(index_name='fake')
# Test #2: Plugin with no children, bad "raw" mapping field. # Test #2: Plugin with no children, bad "raw" mapping field.
mock_mapping.reset_mock() mock_mapping.reset_mock()
mock_settings.reset_mock()
plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine) plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine)
with mock.patch.object(plugin, 'get_mapping') as mock_map: with mock.patch.object(plugin, 'get_mapping') as mock_map:
mock_map.return_value = {"properties": { mock_map.return_value = {"properties": {
@ -302,19 +278,16 @@ class TestPlugin(test_utils.BaseTestCase):
"sorting.") "sorting.")
self.assertRaisesRegexp(Exception, message, self.assertRaisesRegexp(Exception, message,
plugin.prepare_index, index_name='fake') plugin.prepare_index, index_name='fake')
mock_settings.assert_not_called()
mock_mapping.assert_not_called() mock_mapping.assert_not_called()
# Test #3: Plugin with two children. No "raw" mapping fields. # Test #3: Plugin with two children. No "raw" mapping fields.
mock_mapping.reset_mock() mock_mapping.reset_mock()
mock_settings.reset_mock()
parent_plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine) parent_plugin = fake_plugins.FakeSimplePlugin(es_engine=mock_engine)
child1_plugin = fake_plugins.FakeChildPlugin(es_engine=mock_engine) child1_plugin = fake_plugins.FakeChildPlugin(es_engine=mock_engine)
child1_plugin.register_parent(parent_plugin) child1_plugin.register_parent(parent_plugin)
child2_plugin = fake_plugins.FakeChildPlugin(es_engine=mock_engine) child2_plugin = fake_plugins.FakeChildPlugin(es_engine=mock_engine)
child2_plugin.register_parent(parent_plugin) child2_plugin.register_parent(parent_plugin)
parent_plugin.prepare_index('fake') parent_plugin.prepare_index('fake')
mock_settings.assert_called_once_with(index_name='fake')
mock_mapping.assert_called_once_with(index_name='fake') mock_mapping.assert_called_once_with(index_name='fake')
@mock.patch('searchlight.elasticsearch.plugins.helper.' @mock.patch('searchlight.elasticsearch.plugins.helper.'

View File

@ -14,10 +14,17 @@
# under the License. # under the License.
import mock import mock
from oslo_config import cfg
import oslo_utils
from searchlight.elasticsearch.plugins import utils as plugin_utils from searchlight.elasticsearch.plugins import utils as plugin_utils
from searchlight.tests.unit import utils as unit_test_utils from searchlight.tests.unit import utils as unit_test_utils
from searchlight.tests import utils as test_utils from searchlight.tests import utils as test_utils
CONF = cfg.CONF
now = oslo_utils.timeutils.utcnow()
now_str = now.strftime(plugin_utils.FORMAT)
class TestPluginUtils(test_utils.BaseTestCase): class TestPluginUtils(test_utils.BaseTestCase):
def test_facet_value_query(self): def test_facet_value_query(self):
@ -128,3 +135,43 @@ class TestPluginUtils(test_utils.BaseTestCase):
} }
mock_engine.indices.put_settings.assert_called_with(expected_body, mock_engine.indices.put_settings.assert_called_with(expected_body,
'test-index') 'test-index')
@mock.patch('searchlight.elasticsearch.get_api')
def test_index_settings(self, mock_api):
mock_engine = mock.Mock()
mock_api.return_value = mock_engine
with mock.patch.object(CONF, 'elasticsearch') as mock_settings:
mock_settings.index_gc_deletes = '100s'
mock_settings.index_settings = {
'key1': 'value1',
'index.key2': 'value2',
'index.something.key3': 'value3'
}
with mock.patch('oslo_utils.timeutils.utcnow', return_value=now):
plugin_utils.create_new_index('test')
expected = {
'index': {
'key1': 'value1',
'key2': 'value2',
'something.key3': 'value3',
'gc_deletes': '100s'
}
}
mock_engine.indices.create.assert_called_with(index='test-' + now_str,
body=expected)
@mock.patch('searchlight.elasticsearch.get_api')
def test_no_index_settings(self, mock_api):
mock_engine = mock.Mock()
mock_api.return_value = mock_engine
with mock.patch('searchlight.elasticsearch.plugins.'
'utils._get_index_settings_from_config',
return_value={}):
with mock.patch('oslo_utils.timeutils.utcnow', return_value=now):
plugin_utils.create_new_index('test')
mock_engine.indices.create.assert_called_with(index='test-' + now_str)

View File

@ -65,7 +65,9 @@ class TestReindexingUtils(test_utils.BaseTestCase):
target_index=dst, target_index=dst,
query=expected_mult) query=expected_mult)
def test_create_new_index(self): @mock.patch('searchlight.elasticsearch.plugins.'
'utils._get_index_settings_from_config', return_value={})
def test_create_new_index(self, mock_get_settings):
# Regex for matching the index name. The index name is the group # Regex for matching the index name. The index name is the group
# group name appended with a time stmap. The format for the # group name appended with a time stmap. The format for the
# timestamp is defined in elasitcsearch.plugins.utils and is # timestamp is defined in elasitcsearch.plugins.utils and is