Files
deb-python-cassandra-driver/cassandra/metadata.py
2013-07-19 13:47:50 -05:00

735 lines
23 KiB
Python

from bisect import bisect_left
from collections import defaultdict
try:
from collections import OrderedDict
except ImportError: # Python <2.7
from cassandra.util import OrderedDict
from hashlib import md5
import json
import logging
import re
from threading import RLock
import weakref
murmur3 = None
try:
from murmur3 import murmur3
except ImportError, exc:
pass
import cassandra.cqltypes as types
from cassandra.marshal import varint_unpack
from cassandra.pool import Host
log = logging.getLogger(__name__)
_keywords = set((
'select', 'from', 'where', 'and', 'key', 'insert', 'update', 'with',
'limit', 'using', 'use', 'count', 'set',
'begin', 'apply', 'batch', 'truncate', 'delete', 'in', 'create',
'keyspace', 'schema', 'columnfamily', 'table', 'index', 'on', 'drop',
'primary', 'into', 'values', 'timestamp', 'ttl', 'alter', 'add', 'type',
'compact', 'storage', 'order', 'by', 'asc', 'desc', 'clustering',
'token', 'writetime', 'map', 'list', 'to'
))
_unreserved_keywords = set((
'key', 'clustering', 'ttl', 'compact', 'storage', 'type', 'values'
))
class Metadata(object):
"""
Holds a representation of the cluster schema and topology.
"""
cluster_name = None
""" The string name of the cluster. """
keyspaces = None
"""
A map from keyspace names to matching :class:`~.KeyspaceMetadata` instances.
"""
token_map = None
""" A :class:`~.TokenMap` instance describing the ring topology. """
def __init__(self, cluster):
# use a weak reference so that the Cluster object can be GC'ed.
# Normally the cycle detector would handle this, but implementing
# __del__ disables that.
self.cluster_ref = weakref.ref(cluster)
self.keyspaces = {}
self._hosts = {}
self._hosts_lock = RLock()
def export_schema_as_string(self):
"""
Returns a string that can be executed as a query in order to recreate
the entire schema. The string is formatted to be human readable.
"""
return "\n".join(ks.export_as_string() for ks in self.keyspaces.values())
def rebuild_schema(self, keyspace, table, ks_results, cf_results, col_results):
"""
Rebuild the view of the current schema from a fresh set of rows from
the system schema tables.
For internal use only.
"""
cf_def_rows = defaultdict(list)
col_def_rows = defaultdict(lambda: defaultdict(list))
for row in cf_results:
cf_def_rows[row["keyspace_name"]].append(row)
for row in col_results:
ksname = row["keyspace_name"]
cfname = row["columnfamily_name"]
col_def_rows[ksname][cfname].append(row)
# either table or ks_results must be None
if not table:
# ks_results is not None
added_keyspaces = set()
for row in ks_results:
keyspace_meta = self._build_keyspace_metadata(row)
for table_row in cf_def_rows.get(keyspace_meta.name, []):
table_meta = self._build_table_metadata(
keyspace_meta, table_row, col_def_rows[keyspace_meta.name])
keyspace_meta.tables[table_meta.name] = table_meta
added_keyspaces.add(keyspace_meta.name)
self.keyspaces[keyspace_meta.name] = keyspace_meta
if not keyspace:
# remove not-just-added keyspaces
self.keyspaces = dict((name, meta) for name, meta in self.keyspaces.items()
if name in added_keyspaces)
else:
# keyspace is not None, table is not None
try:
keyspace_meta = self.keyspaces[keyspace]
except KeyError:
# we're trying to update a table in a keyspace we don't know
# about, something went wrong.
# TODO log error, submit schema refresh
pass
if keyspace in cf_def_rows:
for table_row in cf_def_rows[keyspace]:
table_meta = self._build_table_metadata(
keyspace_meta, table_row, col_def_rows[keyspace])
keyspace_meta.tables[table_meta.name] = table_meta
def _build_keyspace_metadata(self, row):
name = row["keyspace_name"]
durable_writes = row["durable_writes"]
strategy_class = row["strategy_class"]
strategy_options = json.loads(row["strategy_options"])
return KeyspaceMetadata(name, durable_writes, strategy_class, strategy_options)
def _build_table_metadata(self, keyspace_metadata, row, col_rows):
cfname = row["columnfamily_name"]
comparator = types.lookup_casstype(row["comparator"])
if issubclass(comparator, types.CompositeType):
column_name_types = comparator.subtypes
is_composite = True
else:
column_name_types = (comparator,)
is_composite = False
num_column_name_components = len(column_name_types)
last_col = column_name_types[-1]
column_aliases = json.loads(row["column_aliases"])
if is_composite:
if issubclass(last_col, types.ColumnToCollectionType):
# collections
is_compact = False
has_value = False
clustering_size = num_column_name_components - 2
elif (len(column_aliases) == num_column_name_components - 1
and issubclass(last_col, types.UTF8Type)):
# aliases?
is_compact = False
has_value = False
clustering_size = num_column_name_components - 1
else:
# compact table
is_compact = True
has_value = True
clustering_size = num_column_name_components
else:
is_compact = True
if column_aliases or not col_rows.get(cfname):
has_value = True
clustering_size = num_column_name_components
else:
has_value = False
clustering_size = 0
table_meta = TableMetadata(keyspace_metadata, cfname)
table_meta.comparator = comparator
# partition key
key_aliases = row.get("key_aliases")
key_aliases = json.loads(key_aliases) if key_aliases else []
key_type = types.lookup_casstype(row["key_validator"])
key_types = key_type.subtypes if issubclass(key_type, types.CompositeType) else [key_type]
for i, col_type in enumerate(key_types):
if len(key_aliases) > i:
column_name = key_aliases[i]
elif i == 0:
column_name = "key"
else:
column_name = "key%d" % i
col = ColumnMetadata(table_meta, column_name, col_type)
table_meta.columns[column_name] = col
table_meta.partition_key.append(col)
# clustering key
for i in range(clustering_size):
if len(column_aliases) > i:
column_name = column_aliases[i]
else:
column_name = "column%d" % i
col = ColumnMetadata(table_meta, column_name, column_name_types[i])
table_meta.columns[column_name] = col
table_meta.clustering_key.append(col)
# value alias (if present)
if has_value:
validator = types.lookup_casstype(row["default_validator"])
if not key_aliases: # TODO are we checking the right thing here?
value_alias = "value"
else:
value_alias = row["value_alias"]
col = ColumnMetadata(table_meta, value_alias, validator)
table_meta.columns[value_alias] = col
# other normal columns
if col_rows:
for col_row in col_rows[cfname]:
column_meta = self._build_column_metadata(table_meta, col_row)
table_meta.columns[column_meta.name] = column_meta
table_meta.options = self._build_table_options(row, is_compact)
return table_meta
def _build_table_options(self, row, is_compact_storage):
""" Setup the mostly-non-schema table options, like caching settings """
options = dict((o, row.get(o)) for o in TableMetadata.recognized_options)
options["is_compact_storage"] = is_compact_storage
return options
def _build_column_metadata(self, table_metadata, row):
name = row["column_name"]
data_type = types.lookup_casstype(row["validator"])
column_meta = ColumnMetadata(table_metadata, name, data_type)
index_meta = self._build_index_metadata(column_meta, row)
column_meta.index = index_meta
return column_meta
def _build_index_metadata(self, column_metadata, row):
index_name = row.get("index_name")
index_type = row.get("index_type")
if index_name or index_type:
return IndexMetadata(column_metadata, index_name, index_type)
else:
return None
def rebuild_token_map(self, partitioner, token_map):
"""
Rebuild our view of the topology from fresh rows from the
system topology tables.
For internal use only.
"""
if partitioner.endswith('RandomPartitioner'):
token_class = MD5Token
elif partitioner.endswith('Murmur3Partitioner'):
token_class = Murmur3Token
if murmur3 is None:
log.warning(
"The murmur3 C extension is not available, token awareness "
"cannot be supported for the Murmur3Partitioner")
elif partitioner.endswith('ByteOrderedPartitioner'):
token_class = BytesToken
else:
self.token_map = None
return
tokens_to_hosts = defaultdict(set)
ring = []
for host, token_strings in token_map.iteritems():
for token_string in token_strings:
token = token_class(token_string)
ring.append(token)
tokens_to_hosts[token].add(host)
ring = sorted(ring)
self.token_map = TokenMap(token_class, tokens_to_hosts, ring)
def get_replicas(self, key):
"""
Returns a list of :class:`.Host` instances that are replicas for a given
partition key.
"""
t = self.token_map
if not t:
return []
try:
return t.get_replicas(t.token_class.from_key(key))
except NoMurmur3:
return []
def add_host(self, address):
cluster = self.cluster_ref()
with self._hosts_lock:
if address not in self._hosts:
new_host = Host(address, cluster.conviction_policy_factory)
self._hosts[address] = new_host
else:
return None
new_host.monitor.register(cluster)
return new_host
def remove_host(self, host):
with self._hosts_lock:
return bool(self._hosts.pop(host.address, False))
def get_host(self, address):
return self._hosts.get(address)
def all_hosts(self):
"""
Returns a list of all known :class:`.Host` instances in the cluster.
"""
with self._hosts_lock:
return self._hosts.values()
class KeyspaceMetadata(object):
"""
A representation of the schema for a single keyspace.
"""
name = None
""" The string name of the keyspace """
durable_writes = True
"""
A boolean indicating whether durable writes are enabled for this keyspace
or not
"""
replication = None
"""
A dict holding the replication settings for this keyspace. Typically,
there will be a "class" entry with the name of the replication strategy
class.
"""
tables = None
"""
A map from table names to instances of :class:`~.TableMetadata`.
"""
def __init__(self, name, durable_writes, strategy_class, strategy_options):
self.name = name
self.durable_writes = durable_writes
self.replication = strategy_options
self.replication["class"] = strategy_class
self.tables = {}
def export_as_string(self):
return "\n".join([self.as_cql_query()] + [t.as_cql_query() for t in self.tables.values()])
def as_cql_query(self):
ret = "CREATE KEYSPACE %s WITH REPLICATION = { 'class' : '%s'" % \
(self.name, self.replication["class"])
for k, v in self.replication.items():
if k != "class":
ret += ", '%s': '%s'" % (k, v)
ret += ' } AND DURABLE_WRITES = %s;' % ("true" if self.durable_writes else "false")
class TableMetadata(object):
"""
A representation of the schema for a single table.
"""
keyspace = None
""" An instance of :class:`~.KeyspaceMetadata` """
name = None
""" The string name of the table """
partition_key = None
"""
A list of :class:`.ColumnMetadata` instances representing the columns in
the partition key for this table. This will always hold at least one
column.
"""
clustering_key = None
"""
A list of :class:`.ColumnMetadata` instances representing the columns
in the clustering key for this table. These are all of the
:attr:`.primary_key` columns that are not in the :attr:`.partition_key`.
Note that a table may have no clustering keys, in which case this will
be an empty list.
"""
@property
def primary_key(self):
"""
A list of :class:`.ColumnMetadata` representing the components of
the primary key for this table.
"""
return self.partition_key + self.clustering_key
columns = None
"""
A dict mapping column names to :class:`.ColumnMetadata` instances.
"""
options = None
"""
A dict mapping table option names to their specific settings for this
table.
"""
recognized_options = (
"comment", "read_repair_chance", # "local_read_repair_chance",
"replicate_on_write", "gc_grace_seconds", "bloom_filter_fp_chance",
"caching", "compaction_strategy_class", "compaction_strategy_options",
"min_compaction_threshold", "max_compression_threshold",
"compression_parameters")
def __init__(self, keyspace_metadata, name, partition_key=None, clustering_key=None, columns=None, options=None):
self.keyspace = keyspace_metadata
self.name = name
self.partition_key = [] if partition_key is None else partition_key
self.clustering_key = [] if clustering_key is None else clustering_key
self.columns = OrderedDict() if columns is None else columns
self.options = options
self.comparator = None
def export_as_string(self):
"""
Returns a string of CQL queries that can be used to recreate this table
along with all indexes on it. The returned string is formatted to
be human readable.
"""
ret = self.as_cql_query(formatted=True)
ret += ";"
for col_meta in self.columns.values():
if col_meta.index:
ret += "\n%s;" % (col_meta.index.as_cql_query(),)
return ret
def as_cql_query(self, formatted=False):
"""
Returns a CQL query that can be used to recreate this table (index
creations are not included). If `formatted` is set to :const:`True`,
extra whitespace will be added to make the query human readable.
"""
ret = "CREATE TABLE %s.%s (%s" % (self.keyspace.name, self.name, "\n" if formatted else "")
if formatted:
column_join = ",\n"
padding = " "
else:
column_join = ", "
padding = ""
columns = []
for col in self.columns.values():
columns.append("%s %s" % (col.name, col.typestring))
if len(self.partition_key) == 1 and not self.clustering_key:
columns[0] += " PRIMARY KEY"
ret += column_join.join("%s%s" % (padding, col) for col in columns)
# primary key
if len(self.partition_key) > 1 or self.clustering_key:
ret += "%s%sPRIMARY KEY (" % (column_join, padding)
if len(self.partition_key) > 1:
ret += "(%s)" % ", ".join(col.name for col in self.partition_key)
else:
ret += self.partition_key[0].name
if self.clustering_key:
ret += ", %s" % ", ".join(col.name for col in self.clustering_key)
ret += ")"
# options
ret += "%s) WITH " % ("\n" if formatted else "")
option_strings = []
if self.options.get("is_compact_storage"):
option_strings.append("COMPACT STORAGE")
if self.clustering_key:
cluster_str = "CLUSTERING ORDER BY "
clustering_names = self.protect_names([c.name for c in self.clustering_key])
if self.options.get("is_compact_storage") and \
not issubclass(self.comparator, types.CompositeType):
subtypes = [self.comparator]
else:
subtypes = self.comparator.subtypes
inner = []
for colname, coltype in zip(clustering_names, subtypes):
ordering = "DESC" if issubclass(coltype, types.ReversedType) else "ASC"
inner.append("%s %s" % (colname, ordering))
cluster_str += "(%s)" % ", ".join(inner)
option_strings.append(cluster_str)
option_strings.extend(map(self._make_option_str, self.recognized_options))
option_strings = filter(lambda x: x is not None, option_strings)
join_str = "\n AND " if formatted else " AND "
ret += join_str.join(option_strings)
return ret
def _make_option_str(self, name):
value = self.options.get(name)
if value is not None:
if name == "comment":
value = value or ""
return "%s = %s" % (name, self.protect_value(value))
def protect_name(self, name):
if isinstance(name, unicode):
name = name.encode('utf8')
return self.maybe_escape_name(name)
def protect_names(self, names):
return map(self.protect_name, names)
def protect_value(self, value):
if value is None:
return 'NULL' # this totally won't work
if isinstance(value, bool):
value = str(value).lower()
elif isinstance(value, float):
return '%f' % value
elif isinstance(value, int):
return str(value)
return "'%s'" % value.replace("'", "''")
valid_cql3_word_re = re.compile(r'^[a-z][0-9a-z_]*$')
def is_valid_name(self, name):
if name is None:
return False
if name.lower() in _keywords - _unreserved_keywords:
return False
return self.valid_cql3_word_re.match(name) is not None
def maybe_escape_name(self, name):
if self.is_valid_name(name):
return name
return self.escape_name(name)
def escape_name(self, name):
return '"%s"' % (name.replace('"', '""'),)
class ColumnMetadata(object):
"""
A representation of a single column in a table.
"""
table = None
""" The :class:`.TableMetadata` this column belongs to. """
name = None
""" The string name of this column. """
data_type = None
index = None
"""
If an index exists on this column, this is an instance of
:class:`.IndexMetadata`, otherwise :const:`None`.
"""
def __init__(self, table_metadata, column_name, data_type, index_metadata=None):
self.table = table_metadata
self.name = column_name
self.data_type = data_type
self.index = index_metadata
@property
def typestring(self):
"""
A string representation of the type for this column, such as "varchar"
or "map<string, int>".
"""
if issubclass(self.data_type, types.ReversedType):
return self.data_type.subtypes[0].cql_parameterized_type()
else:
return self.data_type.cql_parameterized_type()
def __str__(self):
return "%s %s" % (self.name, self.data_type)
class IndexMetadata(object):
"""
A representation of a secondary index on a column.
"""
column = None
"""
The column (:class:`.ColumnMetadata`) this index is on.
"""
name = None
""" A string name for the index. """
index_type = None
""" A string representing the type of index. """
def __init__(self, column_metadata, index_name=None, index_type=None):
self.column = column_metadata
self.name = index_name
self.index_type = index_type
def as_cql_query(self):
"""
Returns a CQL query that can be used to recreate this index.
"""
table = self.column.table
return "CREATE INDEX %s ON %s.%s (%s)" % (self.name, table.keyspace.name, table.name, self.column.name)
class TokenMap(object):
"""
Information about the layout of the ring.
"""
token_class = None
"""
A subclass of :class:`.Token`, depending on what partitioner the cluster uses.
"""
tokens_to_hosts = None
"""
A map of :class:`.Token` objects to :class:`.Host` objects.
"""
ring = None
"""
An ordered list of :class:`.Token` instances in the ring.
"""
def __init__(self, token_class, tokens_to_hosts, ring):
self.token_class = token_class
self.tokens_to_hosts = tokens_to_hosts
self.ring = ring
def get_replicas(self, token):
"""
Get :class:`.Host` instances representing all of the replica nodes
for a given :class:`.Token`.
"""
# TODO depending on keyspace and replication strategy options,
# return full set of replicas
point = bisect_left(self.ring, token)
if point == 0 and token != self.ring[0]:
return self.tokens_to_hosts[self.ring[-1]]
elif point == len(self.ring):
return self.tokens_to_hosts[self.ring[0]]
else:
return self.tokens_to_hosts[self.ring[point]]
class Token(object):
"""
Abstract class representing a token.
"""
@classmethod
def hash_fn(cls, key):
return key
@classmethod
def from_key(cls, key):
return cls(cls.hash_fn(key))
def __cmp__(self, other):
if self.value < other.value:
return -1
elif self.value == other.value:
return 0
else:
return 1
MIN_LONG = -(2 ** 63)
MAX_LONG = (2 ** 63) - 1
class NoMurmur3(Exception):
pass
class Murmur3Token(Token):
"""
A token for ``Murmur3Partitioner``.
"""
@classmethod
def hash_fn(cls, key):
if murmur3 is not None:
h = murmur3(key)
return h if h != MIN_LONG else MAX_LONG
else:
raise NoMurmur3()
def __init__(self, token):
""" `token` should be an int or string representing the token """
self.value = int(token)
class MD5Token(Token):
"""
A token for ``RandomPartitioner``.
"""
@classmethod
def hash_fn(cls, key):
return abs(varint_unpack(md5('foo').digest()))
def __init__(self, token):
""" `token` should be an int or string representing the token """
self.value = int(token)
class BytesToken(Token):
"""
A token for ``ByteOrderedPartitioner``.
"""
def __init__(self, token_string):
""" `token_string` should be string representing the token """
self.value = token_string