From 967b918bae7a76b6ec37d7f95bd473ebbfedb284 Mon Sep 17 00:00:00 2001 From: Bharat Kunwar Date: Sun, 4 Aug 2019 09:35:40 +0100 Subject: [PATCH] Implement separate db per tenancy At present, all time series are accumulated in the same database in InfluxDB. This makes queries slow for tenants that have less data. This patch enables the option to use separate database per tenancy. This changeset implements the changes on monasca-api which handles read requests to InfluxDB database. It also updates the relevant docs providing link to the migration tool which enables users to migrate their existing data to a database per tenant model. Change-Id: I7de6e0faf069b889d3953583b29a876c3d82c62c Story: 2006331 Task: 36073 --- doc/source/admin/index.rst | 15 ++++ .../influxdb/metrics_repository.py | 46 +++++++---- monasca_api/conf/influxdb.py | 4 + monasca_api/tests/test_repositories.py | 80 ++++++++++--------- ...rt-for-db-per-tenant-6ada0c3979de6df8.yaml | 6 ++ 5 files changed, 99 insertions(+), 52 deletions(-) create mode 100644 releasenotes/notes/influxdb-support-for-db-per-tenant-6ada0c3979de6df8.yaml diff --git a/doc/source/admin/index.rst b/doc/source/admin/index.rst index 5499d9f44..0e041340d 100644 --- a/doc/source/admin/index.rst +++ b/doc/source/admin/index.rst @@ -72,3 +72,18 @@ Enabling InfluxDB Time Series Index in existing deployments If enabling TSI on an existing InfluxDB install please follow the instructions for migrating existing data here: https://docs.influxdata.com/influxdb/v1.7/administration/upgrading/#upgrading-influxdb-1-3-1-4-no-tsi-preview-to-1-7-x-tsi-enabled + +Database Per Tenant +------------------- + +It is envisaged that separate database per tenant will be the default +behaviour in a future release of Monasca. Not only would it make queries +faster for tenants, it would also allow administrators to define +retention policy per tenancy. To enable this, set +`influxdb.db_per_tenant` to `True` in `monasca-{api,persister}` config +(it defaults to `False` at the moment if not set). + +To migrate existing data to database per tenant, refer to README.rst +under the following URL which also contains the Python script to +facilitate migration: +https://opendev.org/openstack/monasca-persister/src/branch/master/monasca_persister/tools/db-per-tenant/ diff --git a/monasca_api/common/repositories/influxdb/metrics_repository.py b/monasca_api/common/repositories/influxdb/metrics_repository.py index 12c55d836..7d699c03b 100644 --- a/monasca_api/common/repositories/influxdb/metrics_repository.py +++ b/monasca_api/common/repositories/influxdb/metrics_repository.py @@ -30,6 +30,7 @@ from monasca_api.common.repositories import exceptions from monasca_api.common.repositories import metrics_repository MEASUREMENT_NOT_FOUND_MSG = "measurement not found" +DATABASE_NOT_FOUND_MSG = "database not found" CONF = cfg.CONF LOG = log.getLogger(__name__) @@ -42,8 +43,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): self.conf = cfg.CONF self.influxdb_client = client.InfluxDBClient( self.conf.influxdb.ip_address, self.conf.influxdb.port, - self.conf.influxdb.user, self.conf.influxdb.password, - self.conf.influxdb.database_name) + self.conf.influxdb.user, self.conf.influxdb.password) self._version = None self._init_version() except Exception as ex: @@ -259,11 +259,12 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): else name.replace("'", "\\'").encode('utf-8') where_clause += ' from "{}" '.format(clean_name) - # tenant id - where_clause += " where _tenant_id = '{}' ".format(tenant_id) - # region - where_clause += " and _region = '{}' ".format(region) + where_clause += " where _region = '{}'".format(region) + + # tenant id + if not self.conf.influxdb.db_per_tenant: + where_clause += " and _tenant_id = '{}'".format(tenant_id) # dimensions - optional if dimensions: @@ -273,14 +274,14 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): clean_dimension_name = dimension_name.replace("\'", "\\'") if PY3 \ else dimension_name.replace("\'", "\\'").encode('utf-8') if dimension_value == "": - where_clause += " and \"{}\" =~ /.+/ ".format( + where_clause += " and \"{}\" =~ /.+/".format( clean_dimension_name) elif '|' in dimension_value: # replace ' with \' to make query parsable clean_dimension_value = dimension_value.replace("\'", "\\'") if PY3 else \ dimension_value.replace("\'", "\\'").encode('utf-8') - where_clause += " and \"{}\" =~ /^{}$/ ".format( + where_clause += " and \"{}\" =~ /^{}$/".format( clean_dimension_name, clean_dimension_value) else: @@ -288,7 +289,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): clean_dimension_value = dimension_value.replace("\'", "\\'") if PY3 else \ dimension_value.replace("\'", "\\'").encode('utf-8') - where_clause += " and \"{}\" = '{}' ".format( + where_clause += " and \"{}\" = '{}'".format( clean_dimension_name, clean_dimension_value) @@ -310,6 +311,19 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): end_timestamp) return from_clause + def query_tenant_db(self, query, tenant_id): + database = ('%s_%s' % (self.conf.influxdb.database_name, tenant_id) + if self.conf.influxdb.db_per_tenant + else self.conf.influxdb.database_name) + try: + return self.influxdb_client.query(query, database=database) + except InfluxDBClientError as ex: + if (str(ex).startswith(DATABASE_NOT_FOUND_MSG) and + self.conf.influxdb.db_per_tenant): + return None + else: + raise + def list_metrics(self, tenant_id, region, name, dimensions, offset, limit, start_timestamp=None, end_timestamp=None): @@ -322,7 +336,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): if offset: query += ' offset {}'.format(int(offset) + 1) - result = self.influxdb_client.query(query) + result = self.query_tenant_db(query, tenant_id) json_metric_list = self._build_serie_metric_list(result, tenant_id, @@ -585,7 +599,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): dimensions = self._get_dimensions(tenant_id, region, name, dimensions) query += " slimit 1" - result = self.influxdb_client.query(query) + result = self.query_tenant_db(query, tenant_id) if not result: return json_measurement_list @@ -661,7 +675,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): query = self._build_show_measurements_query(dimensions, None, tenant_id, region) - result = self.influxdb_client.query(query) + result = self.query_tenant_db(query, tenant_id) json_name_list = self._build_measurement_name_list(result) return json_name_list @@ -687,7 +701,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): dimensions = self._get_dimensions(tenant_id, region, name, dimensions) query += " slimit 1" - result = self.influxdb_client.query(query) + result = self.query_tenant_db(query, tenant_id) if not result: return json_statistics_list @@ -879,7 +893,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): query += where_clause + time_clause + offset_clause + limit_clause - result = self.influxdb_client.query(query) + result = self.query_tenant_db(query, tenant_id) if not result: return json_alarm_history_list @@ -931,7 +945,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): tenant_id, region, start_timestamp, end_timestamp) - result = self.influxdb_client.query(query) + result = self.query_tenant_db(query, tenant_id) json_dim_name_list = self._build_serie_dimension_values( result, dimension_name) return json_dim_name_list @@ -946,7 +960,7 @@ class MetricsRepository(metrics_repository.AbstractMetricsRepository): tenant_id, region, start_timestamp, end_timestamp) - result = self.influxdb_client.query(query) + result = self.query_tenant_db(query, tenant_id) json_dim_name_list = self._build_serie_dimension_names(result) return json_dim_name_list except Exception as ex: diff --git a/monasca_api/conf/influxdb.py b/monasca_api/conf/influxdb.py index 540e99468..a55ae1df1 100644 --- a/monasca_api/conf/influxdb.py +++ b/monasca_api/conf/influxdb.py @@ -21,6 +21,10 @@ influxdb_opts = [ cfg.StrOpt('database_name', default='mon', help=''' Database name where metrics are stored +'''), + cfg.BoolOpt('db_per_tenant', default=False, + help=''' +Whether to use a separate database per tenant '''), cfg.HostAddressOpt('ip_address', default='127.0.0.1', help=''' diff --git a/monasca_api/tests/test_repositories.py b/monasca_api/tests/test_repositories.py index 0a459ee99..4a1a6f5b2 100644 --- a/monasca_api/tests/test_repositories.py +++ b/monasca_api/tests/test_repositories.py @@ -192,10 +192,10 @@ class TestRepoMetricsInfluxDB(base.BaseTestCase): @patch("monasca_api.common.repositories.influxdb." "metrics_repository.client.InfluxDBClient") - def test_list_dimension_values(self, influxdb_client_mock, timestamp=True): + def test_list_dimension_values(self, influxdb_client_mock, timestamp=False): mock_client = influxdb_client_mock.return_value - tenant = u'38dc2a2549f94d2e9a4fa1cc45a4970c' + tenant_id = u'38dc2a2549f94d2e9a4fa1cc45a4970c' region = u'useast' metric = u'custom_metric' column = u'hostname' @@ -209,29 +209,32 @@ class TestRepoMetricsInfluxDB(base.BaseTestCase): u'columns': [column] }] } - repo = influxdb_repo.MetricsRepository() mock_client.query.reset_mock() - if timestamp: - result = repo.list_dimension_values(tenant, region, metric, column, - start_timestamp, end_timestamp) - else: - result = repo.list_dimension_values(tenant, region, metric, column) + + db_per_tenant = repo.conf.influxdb.db_per_tenant + database = repo.conf.influxdb.database_name + database += "_%s" % tenant_id if db_per_tenant else "" + + result = (repo.list_dimension_values(tenant_id, region, metric, column, + start_timestamp, end_timestamp) + if timestamp else + repo.list_dimension_values(tenant_id, region, metric, column)) self.assertEqual(result, [{u'dimension_value': hostname}]) - expected_query = ('show tag values from "{metric}"' - ' with key = "{column}"' - ' where _tenant_id = \'{tenant}\'' - ' and _region = \'{region}\' ' - .format(tenant=tenant, region=region, - metric=metric, column=column)) - expected_query += (' and time >= {start_timestamp}000000u' - ' and time < {end_timestamp}000000u' - .format(start_timestamp=start_timestamp, - end_timestamp=end_timestamp) - if timestamp else '') - mock_client.query.assert_called_once_with(expected_query) + query = ('show tag values from "{metric}"' + ' with key = "{column}"' + ' where _region = \'{region}\'' + .format(region=region, metric=metric, column=column)) + query += ('' if db_per_tenant else ' and _tenant_id = \'{tenant_id}\'' + .format(tenant_id=tenant_id)) + query += (' and time >= {start_timestamp}000000u' + ' and time < {end_timestamp}000000u' + .format(start_timestamp=start_timestamp, + end_timestamp=end_timestamp) + if timestamp else '') + mock_client.query.assert_called_once_with(query, database=database) def test_list_dimension_values_with_timestamp(self): self.test_list_dimension_values(timestamp=True) @@ -241,7 +244,7 @@ class TestRepoMetricsInfluxDB(base.BaseTestCase): def test_list_dimension_names(self, influxdb_client_mock, timestamp=False): mock_client = influxdb_client_mock.return_value - tenant = u'38dc2a2549f94d2e9a4fa1cc45a4970c' + tenant_id = u'38dc2a2549f94d2e9a4fa1cc45a4970c' region = u'useast' metric = u'custom_metric' start_timestamp = 1571917171275 @@ -257,11 +260,15 @@ class TestRepoMetricsInfluxDB(base.BaseTestCase): repo = influxdb_repo.MetricsRepository() mock_client.query.reset_mock() - if timestamp: - result = repo.list_dimension_names(tenant, region, metric, - start_timestamp, end_timestamp) - else: - result = repo.list_dimension_names(tenant, region, metric) + + db_per_tenant = repo.conf.influxdb.db_per_tenant + database = repo.conf.influxdb.database_name + database += "_%s" % tenant_id if db_per_tenant else "" + + result = (repo.list_dimension_names(tenant_id, region, metric, + start_timestamp, end_timestamp) + if timestamp else + repo.list_dimension_names(tenant_id, region, metric)) self.assertEqual(result, [ @@ -269,17 +276,18 @@ class TestRepoMetricsInfluxDB(base.BaseTestCase): {u'dimension_name': u'service'} ]) - expected_query = ('show tag keys from "{metric}"' - ' where _tenant_id = \'{tenant}\'' - ' and _region = \'{region}\' ' - .format(tenant=tenant, region=region, metric=metric)) - expected_query += (' and time >= {start_timestamp}000000u' - ' and time < {end_timestamp}000000u' - .format(start_timestamp=start_timestamp, - end_timestamp=end_timestamp) - if timestamp else '') + query = ('show tag keys from "{metric}"' + ' where _region = \'{region}\'' + .format(region=region, metric=metric)) + query += ('' if db_per_tenant else ' and _tenant_id = \'{tenant_id}\'' + .format(tenant_id=tenant_id)) + query += (' and time >= {start_timestamp}000000u' + ' and time < {end_timestamp}000000u' + .format(start_timestamp=start_timestamp, + end_timestamp=end_timestamp) + if timestamp else '') - mock_client.query.assert_called_once_with(expected_query) + mock_client.query.assert_called_once_with(query, database=database) def test_list_dimension_names_with_timestamp(self): self.test_list_dimension_names(timestamp=True) diff --git a/releasenotes/notes/influxdb-support-for-db-per-tenant-6ada0c3979de6df8.yaml b/releasenotes/notes/influxdb-support-for-db-per-tenant-6ada0c3979de6df8.yaml new file mode 100644 index 000000000..62ae51328 --- /dev/null +++ b/releasenotes/notes/influxdb-support-for-db-per-tenant-6ada0c3979de6df8.yaml @@ -0,0 +1,6 @@ +--- +features: + - | + Configuration option `db_per_tenant` added for InfluxDB to allow + data points to be written to dedicated tenant database where the + `database_name` prefixes the tenant ID, e.g. monasca_tenantid.