From c3cef9207b9477f604bedc5780fae60210a60bc5 Mon Sep 17 00:00:00 2001 From: gholt Date: Sun, 5 Jun 2011 23:22:35 +0000 Subject: [PATCH 01/11] Adding account_autocreate mode and refactoring TRUE_VALUES --- doc/source/deployment_guide.rst | 4 ++++ etc/proxy-server.conf-sample | 3 +++ swift/common/bench.py | 4 ++-- swift/common/daemon.py | 3 ++- swift/common/middleware/staticweb.py | 2 +- swift/common/utils.py | 2 +- swift/proxy/server.py | 23 ++++++++++++++++++----- swift/stats/log_uploader.py | 2 +- 8 files changed, 32 insertions(+), 11 deletions(-) diff --git a/doc/source/deployment_guide.rst b/doc/source/deployment_guide.rst index 52a4f80f..04b99fa1 100644 --- a/doc/source/deployment_guide.rst +++ b/doc/source/deployment_guide.rst @@ -547,6 +547,10 @@ error_suppression_limit 10 Error count to consider a node error limited allow_account_management false Whether account PUTs and DELETEs are even callable +account_autocreate false If set to 'true' authorized + accounts that do not yet exist + within the Swift cluster will + be automatically created. ============================ =============== ============================= [tempauth] diff --git a/etc/proxy-server.conf-sample b/etc/proxy-server.conf-sample index fef0e81f..496eb4ae 100644 --- a/etc/proxy-server.conf-sample +++ b/etc/proxy-server.conf-sample @@ -40,6 +40,9 @@ use = egg:swift#proxy # If set to 'true' any authorized user may create and delete accounts; if # 'false' no one, even authorized, can. # allow_account_management = false +# If set to 'true' authorized accounts that do not yet exist within the Swift +# cluster will be automatically created. +# account_autocreate = false [filter:tempauth] use = egg:swift#tempauth diff --git a/swift/common/bench.py b/swift/common/bench.py index 28d8c7e8..51e39f79 100644 --- a/swift/common/bench.py +++ b/swift/common/bench.py @@ -43,7 +43,7 @@ class Bench(object): self.user = conf.user self.key = conf.key self.auth_url = conf.auth - self.use_proxy = conf.use_proxy in TRUE_VALUES + self.use_proxy = conf.use_proxy.lower() in TRUE_VALUES if self.use_proxy: url, token = client.get_auth(self.auth_url, self.user, self.key) self.token = token @@ -125,7 +125,7 @@ class BenchController(object): self.logger = logger self.conf = conf self.names = [] - self.delete = conf.delete in TRUE_VALUES + self.delete = conf.delete.lower() in TRUE_VALUES self.gets = int(conf.num_gets) def run(self): diff --git a/swift/common/daemon.py b/swift/common/daemon.py index 96914d95..abcc8dea 100644 --- a/swift/common/daemon.py +++ b/swift/common/daemon.py @@ -75,7 +75,8 @@ def run_daemon(klass, conf_file, section_name='', once=False, **kwargs): log_name=kwargs.get('log_name')) # once on command line (i.e. daemonize=false) will over-ride config - once = once or conf.get('daemonize', 'true') not in utils.TRUE_VALUES + once = once or \ + conf.get('daemonize', 'true').lower() not in utils.TRUE_VALUES # pre-configure logger if 'logger' in kwargs: diff --git a/swift/common/middleware/staticweb.py b/swift/common/middleware/staticweb.py index 8e58ad50..81225a90 100644 --- a/swift/common/middleware/staticweb.py +++ b/swift/common/middleware/staticweb.py @@ -270,7 +270,7 @@ class StaticWeb(object): :param start_response: The original WSGI start_response hook. :param prefix: Any prefix desired for the container listing. """ - if self._listings not in TRUE_VALUES: + if self._listings.lower() not in TRUE_VALUES: resp = HTTPNotFound()(env, self._start_response) return self._error_response(resp, env, start_response) tmp_env = self._get_escalated_env(env) diff --git a/swift/common/utils.py b/swift/common/utils.py index 4ee57db8..ac183317 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -72,7 +72,7 @@ if hash_conf.read('/etc/swift/swift.conf'): pass # Used when reading config values -TRUE_VALUES = set(('true', '1', 'yes', 'True', 'Yes', 'on', 'On', 't', 'y')) +TRUE_VALUES = set(('true', '1', 'yes', 'on', 't', 'y')) def validate_configuration(): diff --git a/swift/proxy/server.py b/swift/proxy/server.py index 3300e7a3..01e33d84 100644 --- a/swift/proxy/server.py +++ b/swift/proxy/server.py @@ -41,8 +41,8 @@ from webob.exc import HTTPBadRequest, HTTPMethodNotAllowed, \ from webob import Request, Response from swift.common.ring import Ring -from swift.common.utils import get_logger, normalize_timestamp, split_path, \ - cache_from_env, ContextPool +from swift.common.utils import cache_from_env, ContextPool, get_logger, \ + normalize_timestamp, split_path, TRUE_VALUES from swift.common.bufferedhttp import http_connect from swift.common.constraints import check_metadata, check_object_creation, \ check_utf8, CONTAINER_LISTING_LIMIT, MAX_ACCOUNT_NAME_LENGTH, \ @@ -353,7 +353,7 @@ class Controller(object): result_code = self.app.memcache.get(cache_key) if result_code == 200: return partition, nodes - elif result_code == 404: + elif result_code == 404 and not self.app.account_autocreate: return None, None result_code = 0 attempts_left = self.app.account_ring.replica_count @@ -386,6 +386,17 @@ class Controller(object): except (Exception, TimeoutError): self.exception_occurred(node, _('Account'), _('Trying to get account info for %s') % path) + if result_code == 404: + if self.app.account_autocreate: + if len(account) > MAX_ACCOUNT_NAME_LENGTH: + return None, None + headers = {'X-Timestamp': normalize_timestamp(time.time()), + 'x-trans-id': self.trans_id} + resp = self.make_requests(Request.blank('/v1' + path), + self.app.account_ring, partition, 'PUT', + path, [headers] * len(nodes)) + if resp.status_int // 100 == 2: + result_code = 200 if self.app.memcache and result_code in (200, 404): if result_code == 200: cache_timeout = self.app.recheck_account_existence @@ -1391,7 +1402,7 @@ class BaseApplication(object): self.put_queue_depth = int(conf.get('put_queue_depth', 10)) self.object_chunk_size = int(conf.get('object_chunk_size', 65536)) self.client_chunk_size = int(conf.get('client_chunk_size', 65536)) - self.log_headers = conf.get('log_headers') == 'True' + self.log_headers = conf.get('log_headers', 'no').lower() in TRUE_VALUES self.error_suppression_interval = \ int(conf.get('error_suppression_interval', 60)) self.error_suppression_limit = \ @@ -1401,7 +1412,7 @@ class BaseApplication(object): self.recheck_account_existence = \ int(conf.get('recheck_account_existence', 60)) self.allow_account_management = \ - conf.get('allow_account_management', 'false').lower() == 'true' + conf.get('allow_account_management', 'no').lower() in TRUE_VALUES self.resellers_conf = ConfigParser() self.resellers_conf.read(os.path.join(swift_dir, 'resellers.conf')) self.object_ring = object_ring or \ @@ -1413,6 +1424,8 @@ class BaseApplication(object): self.memcache = memcache mimetypes.init(mimetypes.knownfiles + [os.path.join(swift_dir, 'mime.types')]) + self.account_autocreate = \ + conf.get('account_autocreate', 'no').lower() in TRUE_VALUES def get_controller(self, path): """ diff --git a/swift/stats/log_uploader.py b/swift/stats/log_uploader.py index 6051107a..ea51061d 100644 --- a/swift/stats/log_uploader.py +++ b/swift/stats/log_uploader.py @@ -69,7 +69,7 @@ class LogUploader(Daemon): self.internal_proxy = InternalProxy(proxy_server_conf) self.new_log_cutoff = int(cutoff or uploader_conf.get('new_log_cutoff', '7200')) - self.unlink_log = uploader_conf.get('unlink_log', 'True').lower() in \ + self.unlink_log = uploader_conf.get('unlink_log', 'true').lower() in \ utils.TRUE_VALUES self.filename_pattern = regex or \ uploader_conf.get('source_filename_pattern', From e34385f516b6b581695dfad754d2907028e7adeb Mon Sep 17 00:00:00 2001 From: gholt Date: Sun, 5 Jun 2011 23:44:39 +0000 Subject: [PATCH 02/11] Only autocreate accounts for certain operations --- swift/proxy/server.py | 22 +++++++++++++--------- test/unit/proxy/test_server.py | 6 +++--- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/swift/proxy/server.py b/swift/proxy/server.py index 01e33d84..a863b8a1 100644 --- a/swift/proxy/server.py +++ b/swift/proxy/server.py @@ -338,7 +338,7 @@ class Controller(object): node['errors'] = self.app.error_suppression_limit + 1 node['last_error'] = time.time() - def account_info(self, account): + def account_info(self, account, autocreate=False): """ Get account information, and also verify that the account exists. @@ -353,7 +353,7 @@ class Controller(object): result_code = self.app.memcache.get(cache_key) if result_code == 200: return partition, nodes - elif result_code == 404 and not self.app.account_autocreate: + elif result_code == 404 and not autocreate: return None, None result_code = 0 attempts_left = self.app.account_ring.replica_count @@ -387,7 +387,7 @@ class Controller(object): self.exception_occurred(node, _('Account'), _('Trying to get account info for %s') % path) if result_code == 404: - if self.app.account_autocreate: + if autocreate: if len(account) > MAX_ACCOUNT_NAME_LENGTH: return None, None headers = {'X-Timestamp': normalize_timestamp(time.time()), @@ -408,7 +408,7 @@ class Controller(object): return partition, nodes return None, None - def container_info(self, account, container): + def container_info(self, account, container, account_autocreate=False): """ Get container information and thusly verify container existance. This will also make a call to account_info to verify that the @@ -434,7 +434,7 @@ class Controller(object): return partition, nodes, read_acl, write_acl elif status == 404: return None, None, None, None - if not self.account_info(account)[1]: + if not self.account_info(account, autocreate=account_autocreate)[1]: return None, None, None, None result_code = 0 read_acl = None @@ -865,7 +865,8 @@ class ObjectController(Controller): if error_response: return error_response container_partition, containers, _junk, req.acl = \ - self.container_info(self.account_name, self.container_name) + self.container_info(self.account_name, self.container_name, + account_autocreate=self.app.account_autocreate) if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: @@ -922,7 +923,8 @@ class ObjectController(Controller): def PUT(self, req): """HTTP PUT request handler.""" container_partition, containers, _junk, req.acl = \ - self.container_info(self.account_name, self.container_name) + self.container_info(self.account_name, self.container_name, + account_autocreate=self.app.account_autocreate) if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: @@ -1230,7 +1232,8 @@ class ContainerController(Controller): resp.body = 'Container name length of %d longer than %d' % \ (len(self.container_name), MAX_CONTAINER_NAME_LENGTH) return resp - account_partition, accounts = self.account_info(self.account_name) + account_partition, accounts = self.account_info(self.account_name, + autocreate=self.app.account_autocreate) if not accounts: return HTTPNotFound(request=req) container_partition, containers = self.app.container_ring.get_nodes( @@ -1260,7 +1263,8 @@ class ContainerController(Controller): self.clean_acls(req) or check_metadata(req, 'container') if error_response: return error_response - account_partition, accounts = self.account_info(self.account_name) + account_partition, accounts = self.account_info(self.account_name, + autocreate=self.app.account_autocreate) if not accounts: return HTTPNotFound(request=req) container_partition, containers = self.app.container_ring.get_nodes( diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index fe2d1ca0..1ea65b49 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -406,7 +406,7 @@ class TestController(unittest.TestCase): self.assertEqual(write_acl, ret[3]) def test_container_info_invalid_account(self): - def account_info(self, account): + def account_info(self, account, autocreate=False): return None, None with save_globals(): @@ -417,7 +417,7 @@ class TestController(unittest.TestCase): # tests if 200 is cached and used def test_container_info_200(self): - def account_info(self, account): + def account_info(self, account, autocreate=False): return True, True with save_globals(): @@ -443,7 +443,7 @@ class TestController(unittest.TestCase): # tests if 404 is cached and used def test_container_info_404(self): - def account_info(self, account): + def account_info(self, account, autocreate=False): return True, True with save_globals(): From 34e121200ab253bbe79a72066baf1377b4795996 Mon Sep 17 00:00:00 2001 From: David Goetz Date: Tue, 7 Jun 2011 17:32:59 -0700 Subject: [PATCH 03/11] Adding the collection of specified metadata keys, unit tests working --- swift/common/db.py | 31 +++++++++---- swift/stats/db_stats_collector.py | 34 +++++++++++--- test/unit/stats/test_db_stats_collector.py | 53 +++++++++++++++++++--- 3 files changed, 96 insertions(+), 22 deletions(-) diff --git a/swift/common/db.py b/swift/common/db.py index 67913ca9..219c097c 100644 --- a/swift/common/db.py +++ b/swift/common/db.py @@ -879,14 +879,16 @@ class ContainerBroker(DatabaseBroker): return (row['object_count'] in (None, '', 0, '0')) and \ (float(row['delete_timestamp']) > float(row['put_timestamp'])) - def get_info(self): + def get_info(self, include_metadata=False): """ Get global data for the container. - :returns: sqlite.row of (account, container, created_at, put_timestamp, + :returns: dict with keys: account, container, created_at, put_timestamp, delete_timestamp, object_count, bytes_used, reported_put_timestamp, reported_delete_timestamp, - reported_object_count, reported_bytes_used, hash, id) + reported_object_count, reported_bytes_used, hash, id + If include_metadata is set, metadata is included as a key + pointing to a dict of tuples of the metadata """ try: self._commit_puts() @@ -894,13 +896,24 @@ class ContainerBroker(DatabaseBroker): if not self.stale_reads_ok: raise with self.get() as conn: - return conn.execute(''' + metadata = '' + if include_metadata: + metadata = ', metadata' + data = conn.execute(''' SELECT account, container, created_at, put_timestamp, delete_timestamp, object_count, bytes_used, reported_put_timestamp, reported_delete_timestamp, reported_object_count, reported_bytes_used, hash, id + %s FROM container_stat - ''').fetchone() + ''' % metadata).fetchone() + data = dict(data) + if include_metadata: + try: + data['metadata'] = json.loads(data.get('metadata','')) + except ValueError: + data['metadata'] = {} + return data def reported(self, put_timestamp, delete_timestamp, object_count, bytes_used): @@ -1394,9 +1407,9 @@ class AccountBroker(DatabaseBroker): """ Get global data for the account. - :returns: sqlite.row of (account, created_at, put_timestamp, + :returns: dict with keys: account, created_at, put_timestamp, delete_timestamp, container_count, object_count, - bytes_used, hash, id) + bytes_used, hash, id """ try: self._commit_puts() @@ -1404,11 +1417,11 @@ class AccountBroker(DatabaseBroker): if not self.stale_reads_ok: raise with self.get() as conn: - return conn.execute(''' + return dict(conn.execute(''' SELECT account, created_at, put_timestamp, delete_timestamp, container_count, object_count, bytes_used, hash, id FROM account_stat - ''').fetchone() + ''').fetchone()) def list_containers_iter(self, limit, marker, end_marker, prefix, delimiter): diff --git a/swift/stats/db_stats_collector.py b/swift/stats/db_stats_collector.py index 04968f18..c8faa667 100644 --- a/swift/stats/db_stats_collector.py +++ b/swift/stats/db_stats_collector.py @@ -60,6 +60,9 @@ class DatabaseStatsCollector(Daemon): def get_data(self): raise Exception('Not Implemented') + def get_header(self): + raise Exception('Not Implemented') + def find_and_process(self): src_filename = time.strftime(self.filename_format) working_dir = os.path.join(self.target_dir, @@ -70,6 +73,7 @@ class DatabaseStatsCollector(Daemon): hasher = hashlib.md5() try: with open(tmp_filename, 'wb') as statfile: + statfile.write(self.get_header()) for device in os.listdir(self.devices): if self.mount_check and not check_mount(self.devices, device): @@ -122,6 +126,8 @@ class AccountStatsCollector(DatabaseStatsCollector): info['bytes_used']) return line_data + def get_header(self): + return '' class ContainerStatsCollector(DatabaseStatsCollector): """ @@ -133,20 +139,36 @@ class ContainerStatsCollector(DatabaseStatsCollector): super(ContainerStatsCollector, self).__init__(stats_conf, 'container', container_server_data_dir, 'container-stats-%Y%m%d%H_') + self.metadata_keys = [mkey.strip() for mkey in + stats_conf.get('metadata_keys', '').split(',') if mkey.strip()] + + def get_header(self): + header = 'Account Hash, Container Name, Object Count, Bytes Used' + if self.metadata_keys: + xtra_headers = ','.join(self.metadata_keys) + header += ',%s' % xtra_headers + header += '\n' + return header def get_data(self, db_path): """ Data for generated csv has the following columns: Account Hash, Container Name, Object Count, Bytes Used + This will just collect whether or not the metadata is set + using a 1 or ''. """ line_data = None broker = ContainerBroker(db_path) if not broker.is_deleted(): - info = broker.get_info() + info = broker.get_info(include_metadata=bool(self.metadata_keys)) encoded_container_name = urllib.quote(info['container']) - line_data = '"%s","%s",%d,%d\n' % ( - info['account'], - encoded_container_name, - info['object_count'], - info['bytes_used']) + line_data = '"%s","%s",%d,%d' % ( + info['account'], encoded_container_name, + info['object_count'], info['bytes_used']) + if self.metadata_keys: + metadata_results = ','.join( + [info['metadata'].get(mkey) and '1' or '' + for mkey in self.metadata_keys]) + line_data += ',%s' % metadata_results + line_data += '\n' return line_data diff --git a/test/unit/stats/test_db_stats_collector.py b/test/unit/stats/test_db_stats_collector.py index 2721614e..7836351a 100644 --- a/test/unit/stats/test_db_stats_collector.py +++ b/test/unit/stats/test_db_stats_collector.py @@ -66,6 +66,17 @@ class TestDbStats(unittest.TestCase): info = stat.get_data("%s/con.db" % self.containers) self.assertEquals('''"test_acc","test_con",1,10\n''', info) + def test_container_stat_get_metadata(self): + stat = db_stats_collector.ContainerStatsCollector(self.conf) + container_db = ContainerBroker("%s/con.db" % self.containers, + account='test_acc', container='test_con') + container_db.initialize() + container_db.put_object('test_obj', time.time(), 10, 'text', 'faketag') + info = stat.get_data("%s/con.db" % self.containers) + self.assertEquals('''"test_acc","test_con",1,10\n''', info) + container_db.update_metadata({'test1': ('val',1000)}) + + def _gen_account_stat(self): stat = db_stats_collector.AccountStatsCollector(self.conf) output_data = set() @@ -83,20 +94,30 @@ class TestDbStats(unittest.TestCase): self.assertEqual(len(output_data), 10) return stat, output_data - def _gen_container_stat(self): + def _gen_container_stat(self, set_metadata=False): + if set_metadata: + self.conf['metadata_keys'] = 'test1,test2' stat = db_stats_collector.ContainerStatsCollector(self.conf) output_data = set() for i in range(10): - account_db = ContainerBroker( + cont_db = ContainerBroker( "%s/container-stats-201001010%s-%s.db" % (self.containers, i, uuid.uuid4().hex), account='test_acc_%s' % i, container='test_con') - account_db.initialize() - account_db.put_object('test_obj', time.time(), 10, 'text', - 'faketag') + cont_db.initialize() + cont_db.put_object('test_obj', time.time(), 10, 'text', 'faketag') + metadata_output = '' + if set_metadata: + if i%2: + cont_db.update_metadata({'test1': (55,100)}) + metadata_output = ',1,' + else: + cont_db.update_metadata({'test2': (55,100)}) + metadata_output = ',,1' # this will "commit" the data - account_db.get_info() - output_data.add('''"test_acc_%s","test_con",1,10''' % i), + cont_db.get_info() + output_data.add('''"test_acc_%s","test_con",1,10%s''' % + (i, metadata_output)) self.assertEqual(len(output_data), 10) return stat, output_data @@ -112,6 +133,21 @@ class TestDbStats(unittest.TestCase): self.assertEqual(len(output_data), 0) + def test_account_stat_run_once_container_metadata(self): + + stat, output_data = self._gen_container_stat(set_metadata=True) + stat.run_once() + stat_file = os.listdir(self.log_dir)[0] + with open(os.path.join(self.log_dir, stat_file)) as stat_handle: + headers = stat_handle.readline() + self.assert_(headers.startswith('Account Hash, Container Name,')) + for i in range(10): + data = stat_handle.readline() + output_data.discard(data.strip()) + + self.assertEqual(len(output_data), 0) + + def test_account_stat_run_once_both(self): acc_stat, acc_output_data = self._gen_account_stat() con_stat, con_output_data = self._gen_container_stat() @@ -128,6 +164,8 @@ class TestDbStats(unittest.TestCase): con_stat.run_once() stat_file = [f for f in os.listdir(self.log_dir) if f != stat_file][0] with open(os.path.join(self.log_dir, stat_file)) as stat_handle: + headers = stat_handle.readline() + self.assert_(headers.startswith('Account Hash, Container Name,')) for i in range(10): data = stat_handle.readline() con_output_data.discard(data.strip()) @@ -144,6 +182,7 @@ class TestDbStats(unittest.TestCase): db_stat = db_stats_collector.DatabaseStatsCollector(self.conf, 'account', 'test_dir', 'stats-%Y%m%d%H_') self.assertRaises(Exception, db_stat.get_data) + self.assertRaises(Exception, db_stat.get_header) def test_not_not_mounted(self): self.conf['mount_check'] = 'true' From 68e5719a81374e38ef866330a5b93e32b7efa218 Mon Sep 17 00:00:00 2001 From: David Goetz Date: Wed, 8 Jun 2011 08:55:14 -0700 Subject: [PATCH 04/11] fixing case thing and adding docs --- doc/source/overview_stats.rst | 11 ++++++----- etc/log-processor.conf-sample | 1 + swift/stats/db_stats_collector.py | 6 ++++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/doc/source/overview_stats.rst b/doc/source/overview_stats.rst index 04d2299e..3043b57e 100644 --- a/doc/source/overview_stats.rst +++ b/doc/source/overview_stats.rst @@ -19,11 +19,11 @@ the proxy log output to an hourly log file. For example, a proxy request that is made on August 4, 2010 at 12:37 gets logged in a file named 2010080412. This allows easy log rotation and easy per-hour log processing. -****************** -Account stats logs -****************** +********************************* +Account / Container DB stats logs +********************************* -Account stats logs are generated by a stats system process. +DB stats logs are generated by a stats system process. swift-account-stats-logger runs on each account server (via cron) and walks the filesystem looking for account databases. When an account database is found, the logger selects the account hash, bytes_used, container_count, and @@ -34,7 +34,8 @@ runs the account stats logger every hour. Therefore, in a cluster of ten account servers, ten csv files are produced every hour. Also, every account will have one entry for every replica in the system. On average, there will be three copies of each account in the aggregate of all account stat csv files -created in one system-wide run. +created in one system-wide run. The swift-container-stats-logger runs in a +similar fashion, scanning the container dbs. ---------------------- Log Processing plugins diff --git a/etc/log-processor.conf-sample b/etc/log-processor.conf-sample index f014168a..350ae730 100644 --- a/etc/log-processor.conf-sample +++ b/etc/log-processor.conf-sample @@ -54,3 +54,4 @@ processable = false # devices = /srv/node # mount_check = true # user = swift +# metadata_keys = comma separated list of user metadata keys to be collected diff --git a/swift/stats/db_stats_collector.py b/swift/stats/db_stats_collector.py index c8faa667..e6edf470 100644 --- a/swift/stats/db_stats_collector.py +++ b/swift/stats/db_stats_collector.py @@ -139,8 +139,10 @@ class ContainerStatsCollector(DatabaseStatsCollector): super(ContainerStatsCollector, self).__init__(stats_conf, 'container', container_server_data_dir, 'container-stats-%Y%m%d%H_') - self.metadata_keys = [mkey.strip() for mkey in - stats_conf.get('metadata_keys', '').split(',') if mkey.strip()] + # webob calls title on all the header keys + self.metadata_keys = ['X-Container-Meta-%s' % mkey.strip().title() + for mkey in stats_conf.get('metadata_keys', '').split(',') + if mkey.strip()] def get_header(self): header = 'Account Hash, Container Name, Object Count, Bytes Used' From 533946c2c8be9db55c6b5728c016b2c7495ec6c6 Mon Sep 17 00:00:00 2001 From: David Goetz Date: Wed, 8 Jun 2011 09:19:55 -0700 Subject: [PATCH 05/11] fix unittest --- test/unit/stats/test_db_stats_collector.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/unit/stats/test_db_stats_collector.py b/test/unit/stats/test_db_stats_collector.py index 7836351a..01ae8bd5 100644 --- a/test/unit/stats/test_db_stats_collector.py +++ b/test/unit/stats/test_db_stats_collector.py @@ -97,6 +97,7 @@ class TestDbStats(unittest.TestCase): def _gen_container_stat(self, set_metadata=False): if set_metadata: self.conf['metadata_keys'] = 'test1,test2' + # webob runs title on all headers stat = db_stats_collector.ContainerStatsCollector(self.conf) output_data = set() for i in range(10): @@ -109,10 +110,10 @@ class TestDbStats(unittest.TestCase): metadata_output = '' if set_metadata: if i%2: - cont_db.update_metadata({'test1': (55,100)}) + cont_db.update_metadata({'X-Container-Meta-Test1': (55,1)}) metadata_output = ',1,' else: - cont_db.update_metadata({'test2': (55,100)}) + cont_db.update_metadata({'X-Container-Meta-Test2': (55,2)}) metadata_output = ',,1' # this will "commit" the data cont_db.get_info() From 625c1452006f1d48f3ce77f50c224ff92df277cd Mon Sep 17 00:00:00 2001 From: David Goetz Date: Wed, 8 Jun 2011 09:24:44 -0700 Subject: [PATCH 06/11] pep8 --- swift/common/db.py | 6 +++--- swift/stats/db_stats_collector.py | 3 ++- test/unit/stats/test_db_stats_collector.py | 10 ++++------ 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/swift/common/db.py b/swift/common/db.py index 219c097c..783e00dd 100644 --- a/swift/common/db.py +++ b/swift/common/db.py @@ -883,8 +883,8 @@ class ContainerBroker(DatabaseBroker): """ Get global data for the container. - :returns: dict with keys: account, container, created_at, put_timestamp, - delete_timestamp, object_count, bytes_used, + :returns: dict with keys: account, container, created_at, + put_timestamp, delete_timestamp, object_count, bytes_used, reported_put_timestamp, reported_delete_timestamp, reported_object_count, reported_bytes_used, hash, id If include_metadata is set, metadata is included as a key @@ -910,7 +910,7 @@ class ContainerBroker(DatabaseBroker): data = dict(data) if include_metadata: try: - data['metadata'] = json.loads(data.get('metadata','')) + data['metadata'] = json.loads(data.get('metadata', '')) except ValueError: data['metadata'] = {} return data diff --git a/swift/stats/db_stats_collector.py b/swift/stats/db_stats_collector.py index e6edf470..f46e12f2 100644 --- a/swift/stats/db_stats_collector.py +++ b/swift/stats/db_stats_collector.py @@ -129,6 +129,7 @@ class AccountStatsCollector(DatabaseStatsCollector): def get_header(self): return '' + class ContainerStatsCollector(DatabaseStatsCollector): """ Extract storage stats from container databases on the container @@ -168,7 +169,7 @@ class ContainerStatsCollector(DatabaseStatsCollector): info['account'], encoded_container_name, info['object_count'], info['bytes_used']) if self.metadata_keys: - metadata_results = ','.join( + metadata_results = ','.join( [info['metadata'].get(mkey) and '1' or '' for mkey in self.metadata_keys]) line_data += ',%s' % metadata_results diff --git a/test/unit/stats/test_db_stats_collector.py b/test/unit/stats/test_db_stats_collector.py index 01ae8bd5..3e3cc522 100644 --- a/test/unit/stats/test_db_stats_collector.py +++ b/test/unit/stats/test_db_stats_collector.py @@ -74,8 +74,7 @@ class TestDbStats(unittest.TestCase): container_db.put_object('test_obj', time.time(), 10, 'text', 'faketag') info = stat.get_data("%s/con.db" % self.containers) self.assertEquals('''"test_acc","test_con",1,10\n''', info) - container_db.update_metadata({'test1': ('val',1000)}) - + container_db.update_metadata({'test1': ('val', 1000)}) def _gen_account_stat(self): stat = db_stats_collector.AccountStatsCollector(self.conf) @@ -109,11 +108,11 @@ class TestDbStats(unittest.TestCase): cont_db.put_object('test_obj', time.time(), 10, 'text', 'faketag') metadata_output = '' if set_metadata: - if i%2: - cont_db.update_metadata({'X-Container-Meta-Test1': (55,1)}) + if i % 2: + cont_db.update_metadata({'X-Container-Meta-Test1': (5, 1)}) metadata_output = ',1,' else: - cont_db.update_metadata({'X-Container-Meta-Test2': (55,2)}) + cont_db.update_metadata({'X-Container-Meta-Test2': (7, 2)}) metadata_output = ',,1' # this will "commit" the data cont_db.get_info() @@ -148,7 +147,6 @@ class TestDbStats(unittest.TestCase): self.assertEqual(len(output_data), 0) - def test_account_stat_run_once_both(self): acc_stat, acc_output_data = self._gen_account_stat() con_stat, con_output_data = self._gen_container_stat() From c5aeca412c81ea3c618ce945277b119254c01415 Mon Sep 17 00:00:00 2001 From: David Goetz Date: Wed, 8 Jun 2011 09:42:18 -0700 Subject: [PATCH 07/11] clean headers --- swift/stats/db_stats_collector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/stats/db_stats_collector.py b/swift/stats/db_stats_collector.py index f46e12f2..f65c20c3 100644 --- a/swift/stats/db_stats_collector.py +++ b/swift/stats/db_stats_collector.py @@ -146,7 +146,7 @@ class ContainerStatsCollector(DatabaseStatsCollector): if mkey.strip()] def get_header(self): - header = 'Account Hash, Container Name, Object Count, Bytes Used' + header = 'Account Hash,Container Name,Object Count,Bytes Used' if self.metadata_keys: xtra_headers = ','.join(self.metadata_keys) header += ',%s' % xtra_headers From aa91a5a6dab71c7d642645ce278b9abf364471d8 Mon Sep 17 00:00:00 2001 From: David Goetz Date: Wed, 8 Jun 2011 09:44:03 -0700 Subject: [PATCH 08/11] clean headers in unit tests... --- test/unit/stats/test_db_stats_collector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/stats/test_db_stats_collector.py b/test/unit/stats/test_db_stats_collector.py index 3e3cc522..d336016e 100644 --- a/test/unit/stats/test_db_stats_collector.py +++ b/test/unit/stats/test_db_stats_collector.py @@ -140,7 +140,7 @@ class TestDbStats(unittest.TestCase): stat_file = os.listdir(self.log_dir)[0] with open(os.path.join(self.log_dir, stat_file)) as stat_handle: headers = stat_handle.readline() - self.assert_(headers.startswith('Account Hash, Container Name,')) + self.assert_(headers.startswith('Account Hash,Container Name,')) for i in range(10): data = stat_handle.readline() output_data.discard(data.strip()) @@ -164,7 +164,7 @@ class TestDbStats(unittest.TestCase): stat_file = [f for f in os.listdir(self.log_dir) if f != stat_file][0] with open(os.path.join(self.log_dir, stat_file)) as stat_handle: headers = stat_handle.readline() - self.assert_(headers.startswith('Account Hash, Container Name,')) + self.assert_(headers.startswith('Account Hash,Container Name,')) for i in range(10): data = stat_handle.readline() con_output_data.discard(data.strip()) From bb48838404b887063d86c4f480c67b274c57f788 Mon Sep 17 00:00:00 2001 From: gholt Date: Fri, 10 Jun 2011 15:55:25 +0000 Subject: [PATCH 09/11] Adding some tests --- swift/proxy/server.py | 21 ++++++++++----------- test/unit/common/test_utils.py | 4 ++++ test/unit/proxy/test_server.py | 20 ++++++++++++++++++++ 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/swift/proxy/server.py b/swift/proxy/server.py index a863b8a1..2943c6a9 100644 --- a/swift/proxy/server.py +++ b/swift/proxy/server.py @@ -386,17 +386,16 @@ class Controller(object): except (Exception, TimeoutError): self.exception_occurred(node, _('Account'), _('Trying to get account info for %s') % path) - if result_code == 404: - if autocreate: - if len(account) > MAX_ACCOUNT_NAME_LENGTH: - return None, None - headers = {'X-Timestamp': normalize_timestamp(time.time()), - 'x-trans-id': self.trans_id} - resp = self.make_requests(Request.blank('/v1' + path), - self.app.account_ring, partition, 'PUT', - path, [headers] * len(nodes)) - if resp.status_int // 100 == 2: - result_code = 200 + if result_code == 404 and autocreate: + if len(account) > MAX_ACCOUNT_NAME_LENGTH: + return None, None + headers = {'X-Timestamp': normalize_timestamp(time.time()), + 'X-Trans-Id': self.trans_id} + resp = self.make_requests(Request.blank('/v1' + path), + self.app.account_ring, partition, 'PUT', + path, [headers] * len(nodes)) + if resp.status_int // 100 == 2: + result_code = 200 if self.app.memcache and result_code in (200, 404): if result_code == 200: cache_timeout = self.app.recheck_account_existence diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index 323f6067..67a6a1fb 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -768,6 +768,10 @@ log_name = yarr''' self.assertEquals(utils.human_readable(1237940039285380274899124224), '1024Yi') + def test_TRUE_VALUES(self): + for v in utils.TRUE_VALUES: + self.assertEquals(v, v.lower()) + if __name__ == '__main__': unittest.main() diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 1ea65b49..afdea73f 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -393,6 +393,26 @@ class TestController(unittest.TestCase): test(404, 507, 503) test(503, 503, 503) + def test_account_info_account_autocreate(self): + with save_globals(): + proxy_server.http_connect = \ + fake_http_connect(404, 404, 404, 201, 201, 201) + partition, nodes = \ + self.controller.account_info(self.account, autocreate=False) + self.check_account_info_return(partition, nodes, is_none=True) + + proxy_server.http_connect = \ + fake_http_connect(404, 404, 404, 201, 201, 201) + partition, nodes = \ + self.controller.account_info(self.account) + self.check_account_info_return(partition, nodes, is_none=True) + + proxy_server.http_connect = \ + fake_http_connect(404, 404, 404, 201, 201, 201) + partition, nodes = \ + self.controller.account_info(self.account, autocreate=True) + self.check_account_info_return(partition, nodes) + def check_container_info_return(self, ret, is_none=False): if is_none: partition, nodes, read_acl, write_acl = None, None, None, None From c5aafe4992efea8c95cc09e05fdb89288eafb6bb Mon Sep 17 00:00:00 2001 From: David Goetz Date: Fri, 10 Jun 2011 08:59:34 -0700 Subject: [PATCH 10/11] handling no metadata column --- swift/common/db.py | 26 ++++++---- swift/stats/db_stats_collector.py | 4 +- test/unit/stats/test_db_stats_collector.py | 55 ++++++++++++++++++++-- 3 files changed, 70 insertions(+), 15 deletions(-) diff --git a/swift/common/db.py b/swift/common/db.py index 783e00dd..8683d4e0 100644 --- a/swift/common/db.py +++ b/swift/common/db.py @@ -899,14 +899,24 @@ class ContainerBroker(DatabaseBroker): metadata = '' if include_metadata: metadata = ', metadata' - data = conn.execute(''' - SELECT account, container, created_at, put_timestamp, - delete_timestamp, object_count, bytes_used, - reported_put_timestamp, reported_delete_timestamp, - reported_object_count, reported_bytes_used, hash, id - %s - FROM container_stat - ''' % metadata).fetchone() + try: + data = conn.execute(''' + SELECT account, container, created_at, put_timestamp, + delete_timestamp, object_count, bytes_used, + reported_put_timestamp, reported_delete_timestamp, + reported_object_count, reported_bytes_used, hash, id + %s + FROM container_stat + ''' % metadata).fetchone() + except sqlite3.OperationalError, err: + if 'no such column: metadata' not in str(err): + raise + data = conn.execute(''' + SELECT account, container, created_at, put_timestamp, + delete_timestamp, object_count, bytes_used, + reported_put_timestamp, reported_delete_timestamp, + reported_object_count, reported_bytes_used, hash, id + FROM container_stat''').fetchone() data = dict(data) if include_metadata: try: diff --git a/swift/stats/db_stats_collector.py b/swift/stats/db_stats_collector.py index f65c20c3..95efaa85 100644 --- a/swift/stats/db_stats_collector.py +++ b/swift/stats/db_stats_collector.py @@ -58,10 +58,10 @@ class DatabaseStatsCollector(Daemon): (self.stats_type, (time.time() - start) / 60)) def get_data(self): - raise Exception('Not Implemented') + raise NotImplementedError('Subclasses must override') def get_header(self): - raise Exception('Not Implemented') + raise NotImplementedError('Subclasses must override') def find_and_process(self): src_filename = time.strftime(self.filename_format) diff --git a/test/unit/stats/test_db_stats_collector.py b/test/unit/stats/test_db_stats_collector.py index d336016e..3c4949af 100644 --- a/test/unit/stats/test_db_stats_collector.py +++ b/test/unit/stats/test_db_stats_collector.py @@ -93,7 +93,32 @@ class TestDbStats(unittest.TestCase): self.assertEqual(len(output_data), 10) return stat, output_data - def _gen_container_stat(self, set_metadata=False): + def _drop_metadata_col(self, broker, acc_name): + broker.conn.execute('''drop table container_stat''') + broker.conn.executescript(""" + CREATE TABLE container_stat ( + account TEXT DEFAULT '%s', + container TEXT DEFAULT 'test_con', + created_at TEXT, + put_timestamp TEXT DEFAULT '0', + delete_timestamp TEXT DEFAULT '0', + object_count INTEGER, + bytes_used INTEGER, + reported_put_timestamp TEXT DEFAULT '0', + reported_delete_timestamp TEXT DEFAULT '0', + reported_object_count INTEGER DEFAULT 0, + reported_bytes_used INTEGER DEFAULT 0, + hash TEXT default '00000000000000000000000000000000', + id TEXT, + status TEXT DEFAULT '', + status_changed_at TEXT DEFAULT '0' + ); + + INSERT INTO container_stat (object_count, bytes_used) + VALUES (1, 10); + """ % acc_name) + + def _gen_container_stat(self, set_metadata=False, drop_metadata=False): if set_metadata: self.conf['metadata_keys'] = 'test1,test2' # webob runs title on all headers @@ -116,8 +141,13 @@ class TestDbStats(unittest.TestCase): metadata_output = ',,1' # this will "commit" the data cont_db.get_info() - output_data.add('''"test_acc_%s","test_con",1,10%s''' % - (i, metadata_output)) + if drop_metadata: + output_data.add('''"test_acc_%s","test_con",1,10,,''' % i) + else: + output_data.add('''"test_acc_%s","test_con",1,10%s''' % + (i, metadata_output)) + if drop_metadata: + self._drop_metadata_col(cont_db, 'test_acc_%s' % i) self.assertEqual(len(output_data), 10) return stat, output_data @@ -147,6 +177,21 @@ class TestDbStats(unittest.TestCase): self.assertEqual(len(output_data), 0) + def test_account_stat_run_once_container_no_metadata(self): + + stat, output_data = self._gen_container_stat(set_metadata=True, + drop_metadata=True) + stat.run_once() + stat_file = os.listdir(self.log_dir)[0] + with open(os.path.join(self.log_dir, stat_file)) as stat_handle: + headers = stat_handle.readline() + self.assert_(headers.startswith('Account Hash,Container Name,')) + for i in range(10): + data = stat_handle.readline() + output_data.discard(data.strip()) + + self.assertEqual(len(output_data), 0) + def test_account_stat_run_once_both(self): acc_stat, acc_output_data = self._gen_account_stat() con_stat, con_output_data = self._gen_container_stat() @@ -180,8 +225,8 @@ class TestDbStats(unittest.TestCase): def test_not_implemented(self): db_stat = db_stats_collector.DatabaseStatsCollector(self.conf, 'account', 'test_dir', 'stats-%Y%m%d%H_') - self.assertRaises(Exception, db_stat.get_data) - self.assertRaises(Exception, db_stat.get_header) + self.assertRaises(NotImplementedError, db_stat.get_data) + self.assertRaises(NotImplementedError, db_stat.get_header) def test_not_not_mounted(self): self.conf['mount_check'] = 'true' From 818c4faa7368d0d25250e855173fa4016544bf1f Mon Sep 17 00:00:00 2001 From: gholt Date: Fri, 10 Jun 2011 16:56:53 +0000 Subject: [PATCH 11/11] Made failed account autocreate raise exception to cause 5xx. --- swift/proxy/server.py | 5 +++-- test/unit/proxy/test_server.py | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/swift/proxy/server.py b/swift/proxy/server.py index 2943c6a9..8a451200 100644 --- a/swift/proxy/server.py +++ b/swift/proxy/server.py @@ -394,8 +394,9 @@ class Controller(object): resp = self.make_requests(Request.blank('/v1' + path), self.app.account_ring, partition, 'PUT', path, [headers] * len(nodes)) - if resp.status_int // 100 == 2: - result_code = 200 + if resp.status_int // 100 != 2: + raise Exception('Could not autocreate account %r' % path) + result_code = 200 if self.app.memcache and result_code in (200, 404): if result_code == 200: cache_timeout = self.app.recheck_account_existence diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index afdea73f..eb09ad34 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -395,24 +395,46 @@ class TestController(unittest.TestCase): def test_account_info_account_autocreate(self): with save_globals(): + self.memcache.store = {} proxy_server.http_connect = \ fake_http_connect(404, 404, 404, 201, 201, 201) partition, nodes = \ self.controller.account_info(self.account, autocreate=False) self.check_account_info_return(partition, nodes, is_none=True) + self.memcache.store = {} proxy_server.http_connect = \ fake_http_connect(404, 404, 404, 201, 201, 201) partition, nodes = \ self.controller.account_info(self.account) self.check_account_info_return(partition, nodes, is_none=True) + self.memcache.store = {} proxy_server.http_connect = \ fake_http_connect(404, 404, 404, 201, 201, 201) partition, nodes = \ self.controller.account_info(self.account, autocreate=True) self.check_account_info_return(partition, nodes) + self.memcache.store = {} + proxy_server.http_connect = \ + fake_http_connect(404, 404, 404, 503, 201, 201) + partition, nodes = \ + self.controller.account_info(self.account, autocreate=True) + self.check_account_info_return(partition, nodes) + + self.memcache.store = {} + proxy_server.http_connect = \ + fake_http_connect(404, 404, 404, 503, 201, 503) + exc = None + try: + partition, nodes = \ + self.controller.account_info(self.account, autocreate=True) + except Exception, err: + exc = err + self.assertEquals(str(exc), + "Could not autocreate account '/some_account'") + def check_container_info_return(self, ret, is_none=False): if is_none: partition, nodes, read_acl, write_acl = None, None, None, None