The compaction_strategy_options as received from the `system.schema_columnfamilies` table are all string valued and comparison to typed values (ints, floats) as defined in models results in sync_table() issuning ALTER TABLE statements redundantly even if there were no changes to compaction options. This patch casts the values defined in models to strings to ensure they compare equal as needed. Additionally, this patch normalizes handling of the `min_threshold` and `max_threshold` options for SizeTieredCompactionStrategy which exist outside the `compaction_strategy_options` mapping and have different names in the `system.schema_columnfamilies` table compared to the CQL options. A typo in the `BaseModel.__compaction_tombstone_threshold__` option is also fixed.
319 lines
11 KiB
Python
319 lines
11 KiB
Python
import json
|
|
import warnings
|
|
from cqlengine import SizeTieredCompactionStrategy, LeveledCompactionStrategy
|
|
from cqlengine import ONE
|
|
from cqlengine.named import NamedTable
|
|
|
|
from cqlengine.connection import connection_manager, execute
|
|
from cqlengine.exceptions import CQLEngineException
|
|
|
|
import logging
|
|
from collections import namedtuple
|
|
Field = namedtuple('Field', ['name', 'type'])
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# system keyspaces
|
|
schema_columnfamilies = NamedTable('system', 'schema_columnfamilies')
|
|
|
|
|
|
def create_keyspace(name, strategy_class='SimpleStrategy', replication_factor=3, durable_writes=True, **replication_values):
|
|
"""
|
|
creates a keyspace
|
|
|
|
:param name: name of keyspace to create
|
|
:param strategy_class: keyspace replication strategy class
|
|
:param replication_factor: keyspace replication factor
|
|
:param durable_writes: 1.2 only, write log is bypassed if set to False
|
|
:param **replication_values: 1.2 only, additional values to ad to the replication data map
|
|
"""
|
|
with connection_manager() as con:
|
|
_, keyspaces = con.execute("""SELECT keyspace_name FROM system.schema_keyspaces""", {}, ONE)
|
|
if name not in [r[0] for r in keyspaces]:
|
|
#try the 1.2 method
|
|
replication_map = {
|
|
'class': strategy_class,
|
|
'replication_factor':replication_factor
|
|
}
|
|
replication_map.update(replication_values)
|
|
if strategy_class.lower() != 'simplestrategy':
|
|
# Although the Cassandra documentation states for `replication_factor`
|
|
# that it is "Required if class is SimpleStrategy; otherwise,
|
|
# not used." we get an error if it is present.
|
|
replication_map.pop('replication_factor', None)
|
|
|
|
query = """
|
|
CREATE KEYSPACE {}
|
|
WITH REPLICATION = {}
|
|
""".format(name, json.dumps(replication_map).replace('"', "'"))
|
|
|
|
if strategy_class != 'SimpleStrategy':
|
|
query += " AND DURABLE_WRITES = {}".format('true' if durable_writes else 'false')
|
|
|
|
execute(query)
|
|
|
|
|
|
def delete_keyspace(name):
|
|
with connection_manager() as con:
|
|
_, keyspaces = con.execute("""SELECT keyspace_name FROM system.schema_keyspaces""", {}, ONE)
|
|
if name in [r[0] for r in keyspaces]:
|
|
execute("DROP KEYSPACE {}".format(name))
|
|
|
|
def create_table(model, create_missing_keyspace=True):
|
|
warnings.warn("create_table has been deprecated in favor of sync_table and will be removed in a future release", DeprecationWarning)
|
|
sync_table(model, create_missing_keyspace)
|
|
|
|
def sync_table(model, create_missing_keyspace=True):
|
|
|
|
if model.__abstract__:
|
|
raise CQLEngineException("cannot create table from abstract model")
|
|
|
|
#construct query string
|
|
cf_name = model.column_family_name()
|
|
raw_cf_name = model.column_family_name(include_keyspace=False)
|
|
|
|
ks_name = model._get_keyspace()
|
|
#create missing keyspace
|
|
if create_missing_keyspace:
|
|
create_keyspace(ks_name)
|
|
|
|
with connection_manager() as con:
|
|
tables = con.execute(
|
|
"SELECT columnfamily_name from system.schema_columnfamilies WHERE keyspace_name = :ks_name",
|
|
{'ks_name': ks_name},
|
|
ONE
|
|
)
|
|
tables = [x[0] for x in tables.results]
|
|
|
|
#check for an existing column family
|
|
if raw_cf_name not in tables:
|
|
qs = get_create_table(model)
|
|
|
|
try:
|
|
execute(qs)
|
|
except CQLEngineException as ex:
|
|
# 1.2 doesn't return cf names, so we have to examine the exception
|
|
# and ignore if it says the column family already exists
|
|
if "Cannot add already existing column family" not in unicode(ex):
|
|
raise
|
|
else:
|
|
# see if we're missing any columns
|
|
fields = get_fields(model)
|
|
field_names = [x.name for x in fields]
|
|
for name, col in model._columns.items():
|
|
if col.primary_key or col.partition_key: continue # we can't mess with the PK
|
|
if col.db_field_name in field_names: continue # skip columns already defined
|
|
|
|
# add missing column using the column def
|
|
query = "ALTER TABLE {} add {}".format(cf_name, col.get_column_def())
|
|
logger.debug(query)
|
|
execute(query)
|
|
|
|
update_compaction(model)
|
|
|
|
|
|
#get existing index names, skip ones that already exist
|
|
with connection_manager() as con:
|
|
_, idx_names = con.execute(
|
|
"SELECT index_name from system.\"IndexInfo\" WHERE table_name=:table_name",
|
|
{'table_name': raw_cf_name},
|
|
ONE
|
|
)
|
|
|
|
idx_names = [i[0] for i in idx_names]
|
|
idx_names = filter(None, idx_names)
|
|
|
|
indexes = [c for n,c in model._columns.items() if c.index]
|
|
if indexes:
|
|
for column in indexes:
|
|
if column.db_index_name in idx_names: continue
|
|
qs = ['CREATE INDEX index_{}_{}'.format(raw_cf_name, column.db_field_name)]
|
|
qs += ['ON {}'.format(cf_name)]
|
|
qs += ['("{}")'.format(column.db_field_name)]
|
|
qs = ' '.join(qs)
|
|
|
|
try:
|
|
execute(qs)
|
|
except CQLEngineException:
|
|
# index already exists
|
|
pass
|
|
|
|
def get_create_table(model):
|
|
cf_name = model.column_family_name()
|
|
qs = ['CREATE TABLE {}'.format(cf_name)]
|
|
|
|
#add column types
|
|
pkeys = [] # primary keys
|
|
ckeys = [] # clustering keys
|
|
qtypes = [] # field types
|
|
def add_column(col):
|
|
s = col.get_column_def()
|
|
if col.primary_key:
|
|
keys = (pkeys if col.partition_key else ckeys)
|
|
keys.append('"{}"'.format(col.db_field_name))
|
|
qtypes.append(s)
|
|
for name, col in model._columns.items():
|
|
add_column(col)
|
|
|
|
qtypes.append('PRIMARY KEY (({}){})'.format(', '.join(pkeys), ckeys and ', ' + ', '.join(ckeys) or ''))
|
|
|
|
qs += ['({})'.format(', '.join(qtypes))]
|
|
|
|
with_qs = ['read_repair_chance = {}'.format(model.__read_repair_chance__)]
|
|
|
|
_order = ['"{}" {}'.format(c.db_field_name, c.clustering_order or 'ASC') for c in model._clustering_keys.values()]
|
|
|
|
if _order:
|
|
with_qs.append('clustering order by ({})'.format(', '.join(_order)))
|
|
|
|
compaction_options = get_compaction_options(model)
|
|
|
|
if compaction_options:
|
|
compaction_options = json.dumps(compaction_options).replace('"', "'")
|
|
with_qs.append("compaction = {}".format(compaction_options))
|
|
|
|
# add read_repair_chance
|
|
qs += ['WITH {}'.format(' AND '.join(with_qs))]
|
|
|
|
|
|
qs = ' '.join(qs)
|
|
return qs
|
|
|
|
|
|
def get_compaction_options(model):
|
|
"""
|
|
Generates dictionary (later converted to a string) for creating and altering
|
|
tables with compaction strategy
|
|
|
|
:param model:
|
|
:return:
|
|
"""
|
|
if not model.__compaction__:
|
|
return {}
|
|
|
|
result = {'class':model.__compaction__}
|
|
|
|
def setter(key, limited_to_strategy = None):
|
|
"""
|
|
sets key in result, checking if the key is limited to either SizeTiered or Leveled
|
|
:param key: one of the compaction options, like "bucket_high"
|
|
:param limited_to_strategy: SizeTieredCompactionStrategy, LeveledCompactionStrategy
|
|
:return:
|
|
"""
|
|
mkey = "__compaction_{}__".format(key)
|
|
tmp = getattr(model, mkey)
|
|
if tmp and limited_to_strategy and limited_to_strategy != model.__compaction__:
|
|
raise CQLEngineException("{} is limited to {}".format(key, limited_to_strategy))
|
|
|
|
if tmp:
|
|
# Explicitly cast the values to strings to be able to compare the
|
|
# values against introspected values from Cassandra.
|
|
result[key] = str(tmp)
|
|
|
|
setter('tombstone_compaction_interval')
|
|
setter('tombstone_threshold')
|
|
|
|
setter('bucket_high', SizeTieredCompactionStrategy)
|
|
setter('bucket_low', SizeTieredCompactionStrategy)
|
|
setter('max_threshold', SizeTieredCompactionStrategy)
|
|
setter('min_threshold', SizeTieredCompactionStrategy)
|
|
setter('min_sstable_size', SizeTieredCompactionStrategy)
|
|
|
|
setter('sstable_size_in_mb', LeveledCompactionStrategy)
|
|
|
|
return result
|
|
|
|
|
|
def get_fields(model):
|
|
# returns all fields that aren't part of the PK
|
|
ks_name = model._get_keyspace()
|
|
col_family = model.column_family_name(include_keyspace=False)
|
|
|
|
with connection_manager() as con:
|
|
query = "SELECT column_name, validator FROM system.schema_columns \
|
|
WHERE keyspace_name = :ks_name AND columnfamily_name = :col_family"
|
|
|
|
logger.debug("get_fields %s %s", ks_name, col_family)
|
|
|
|
tmp = con.execute(query, {'ks_name': ks_name, 'col_family': col_family}, ONE)
|
|
return [Field(x[0], x[1]) for x in tmp.results]
|
|
# convert to Field named tuples
|
|
|
|
|
|
def get_table_settings(model):
|
|
return schema_columnfamilies.objects.consistency(ONE).get(
|
|
keyspace_name=model._get_keyspace(),
|
|
columnfamily_name=model.column_family_name(include_keyspace=False))
|
|
|
|
|
|
def update_compaction(model):
|
|
"""Updates the compaction options for the given model if necessary.
|
|
|
|
:param model: The model to update.
|
|
|
|
:return: `True`, if the compaction options were modified in Cassandra,
|
|
`False` otherwise.
|
|
:rtype: bool
|
|
"""
|
|
logger.debug("Checking %s for compaction differences", model)
|
|
row = get_table_settings(model)
|
|
# check compaction_strategy_class
|
|
if not model.__compaction__:
|
|
return
|
|
|
|
do_update = not row['compaction_strategy_class'].endswith(model.__compaction__)
|
|
|
|
existing_options = json.loads(row['compaction_strategy_options'])
|
|
# The min/max thresholds are stored differently in the system data dictionary
|
|
existing_options.update({
|
|
'min_threshold': str(row['min_compaction_threshold']),
|
|
'max_threshold': str(row['max_compaction_threshold']),
|
|
})
|
|
|
|
desired_options = get_compaction_options(model)
|
|
desired_options.pop('class', None)
|
|
|
|
for k, v in desired_options.items():
|
|
val = existing_options.pop(k, None)
|
|
if val != v:
|
|
do_update = True
|
|
|
|
# check compaction_strategy_options
|
|
if do_update:
|
|
options = get_compaction_options(model)
|
|
# jsonify
|
|
options = json.dumps(options).replace('"', "'")
|
|
cf_name = model.column_family_name()
|
|
query = "ALTER TABLE {} with compaction = {}".format(cf_name, options)
|
|
logger.debug(query)
|
|
execute(query)
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def delete_table(model):
|
|
warnings.warn("delete_table has been deprecated in favor of drop_table()", DeprecationWarning)
|
|
return drop_table(model)
|
|
|
|
|
|
def drop_table(model):
|
|
|
|
# don't try to delete non existant tables
|
|
ks_name = model._get_keyspace()
|
|
with connection_manager() as con:
|
|
_, tables = con.execute(
|
|
"SELECT columnfamily_name from system.schema_columnfamilies WHERE keyspace_name = :ks_name",
|
|
{'ks_name': ks_name},
|
|
ONE
|
|
)
|
|
raw_cf_name = model.column_family_name(include_keyspace=False)
|
|
if raw_cf_name not in [t[0] for t in tables]:
|
|
return
|
|
|
|
cf_name = model.column_family_name()
|
|
execute('drop table {};'.format(cf_name))
|
|
|
|
|