# Copyright (c) 2010-2012 OpenStack, LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. import os import time from eventlet import Timeout from swift.obj import server as object_server from swift.common.utils import get_logger, audit_location_generator, \ ratelimit_sleep, TRUE_VALUES from swift.common.exceptions import AuditException, DiskFileError, \ DiskFileNotExist from swift.common.daemon import Daemon SLEEP_BETWEEN_AUDITS = 30 class AuditorWorker(object): """Walk through file system to audit object""" def __init__(self, conf, zero_byte_only_at_fps=0): self.conf = conf self.logger = get_logger(conf, log_route='object-auditor') self.devices = conf.get('devices', '/srv/node') self.mount_check = conf.get('mount_check', 'true').lower() in \ TRUE_VALUES self.max_files_per_second = float(conf.get('files_per_second', 20)) self.max_bytes_per_second = float(conf.get('bytes_per_second', 10000000)) self.auditor_type = 'ALL' self.zero_byte_only_at_fps = zero_byte_only_at_fps if self.zero_byte_only_at_fps: self.max_files_per_second = float(self.zero_byte_only_at_fps) self.auditor_type = 'ZBF' self.log_time = int(conf.get('log_time', 3600)) self.files_running_time = 0 self.bytes_running_time = 0 self.bytes_processed = 0 self.total_bytes_processed = 0 self.total_files_processed = 0 self.passes = 0 self.quarantines = 0 self.errors = 0 def audit_all_objects(self, mode='once'): self.logger.info(_('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 files_running_time = 0 time_auditing = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: loop_time = time.time() self.object_audit(path, device, partition) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - reported >= self.log_time: self.logger.info(_( 'Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin)}) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info(_( 'Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines, 'errors': total_errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed}) def object_audit(self, path, device, partition): """ Audits the given object path. :param path: a path to an object :param device: the device the path is on :param partition: the partition the path is on """ try: if not path.endswith('.data'): return try: name = object_server.read_metadata(path)['name'] except (Exception, Timeout), exc: raise AuditException('Error when reading metadata: %s' % exc) _junk, account, container, obj = name.split('/', 3) df = object_server.DiskFile(self.devices, device, partition, account, container, obj, self.logger, keep_data_fp=True) try: if df.data_file is None: # file is deleted, we found the tombstone return try: obj_size = df.get_data_file_size() except DiskFileError, e: raise AuditException(str(e)) except DiskFileNotExist: return if self.zero_byte_only_at_fps and obj_size: self.passes += 1 return for chunk in df: self.bytes_running_time = ratelimit_sleep( self.bytes_running_time, self.max_bytes_per_second, incr_by=len(chunk)) self.bytes_processed += len(chunk) self.total_bytes_processed += len(chunk) df.close() if df.quarantined_dir: self.quarantines += 1 self.logger.error( _("ERROR Object %(path)s failed audit and will be " "quarantined: ETag and file's md5 do not match"), {'path': path}) finally: df.close(verify_file=False) except AuditException, err: self.logger.increment('quarantines') self.quarantines += 1 self.logger.error(_('ERROR Object %(obj)s failed audit and will ' 'be quarantined: %(err)s'), {'obj': path, 'err': err}) object_server.quarantine_renamer( os.path.join(self.devices, device), path) return except (Exception, Timeout): self.logger.increment('errors') self.errors += 1 self.logger.exception(_('ERROR Trying to audit %s'), path) return self.passes += 1 class ObjectAuditor(Daemon): """Audit objects.""" def __init__(self, conf, **options): self.conf = conf self.logger = get_logger(conf, log_route='object-auditor') self.conf_zero_byte_fps = int(conf.get( 'zero_byte_files_per_second', 50)) def _sleep(self): time.sleep(SLEEP_BETWEEN_AUDITS) def run_forever(self, *args, **kwargs): """Run the object audit until stopped.""" # zero byte only command line option zbo_fps = kwargs.get('zero_byte_fps', 0) if zbo_fps: # only start parent parent = True else: parent = os.fork() # child gets parent = 0 kwargs = {'mode': 'forever'} if parent: kwargs['zero_byte_fps'] = zbo_fps or self.conf_zero_byte_fps while True: try: self.run_once(**kwargs) except (Exception, Timeout): self.logger.exception(_('ERROR auditing')) self._sleep() def run_once(self, *args, **kwargs): """Run the object audit once.""" mode = kwargs.get('mode', 'once') zero_byte_only_at_fps = kwargs.get('zero_byte_fps', 0) worker = AuditorWorker(self.conf, zero_byte_only_at_fps=zero_byte_only_at_fps) worker.audit_all_objects(mode=mode)