From a6fa3bfd2d431772742811fca8b4fe4f98a7d7fa Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Wed, 7 Oct 2015 11:00:56 -0700 Subject: [PATCH] Dedup index entries When we generate indexes we query disk and swift, if any files have overlapping paths between them we were generating duplicate index entries for the same paths. This happened because we used lists to store the entries which allow for duplicates. Fix this by using sets until we need to sort (sets are unsorted so we have to have a list at that point). This will remove any duplicates and make the index pages more correct. Change-Id: I6dfa3b30819d6633c3e483d3a386bdce3e26b572 --- os_loganalyze/generator.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/os_loganalyze/generator.py b/os_loganalyze/generator.py index 09e059a..b9a9af8 100644 --- a/os_loganalyze/generator.py +++ b/os_loganalyze/generator.py @@ -210,24 +210,26 @@ class IndexIterableBuffer(collections.Iterable): self.file_headers = {} self.file_headers['Content-type'] = 'text/html' + # Use sets here to dedup. We can have duplicates + # if disk and swift based paths have overlap. + file_set = self.disk_list() | self.swift_list() # file_list is a list of tuples (relpath, name) - self.file_list = self.disk_list() + self.swift_list() - self.file_list = sorted(self.file_list, key=lambda tup: tup[0]) + self.file_list = sorted(file_set, key=lambda tup: tup[0]) def disk_list(self): - file_list = [] + file_set = set() if os.path.isdir(self.logpath): for f in os.listdir(self.logpath): if os.path.isdir(os.path.join(self.logpath, f)): f = f + '/' if f[-1] != '/' else f - file_list.append(( + file_set.add(( os.path.join('/', self.logname, f), f )) - return file_list + return file_set def swift_list(self): - file_list = [] + file_set = set() if self.config.has_section('swift'): try: swift_config = dict(self.config.items('swift')) @@ -246,7 +248,7 @@ class IndexIterableBuffer(collections.Iterable): fname else: fname = os.path.relpath(f['name'], self.logname) - file_list.append(( + file_set.add(( os.path.join('/', self.logname, fname), fname )) @@ -256,7 +258,7 @@ class IndexIterableBuffer(collections.Iterable): sys.stderr.write('logname: %s\n' % self.logname) traceback.print_exc() - return file_list + return file_set def __iter__(self): env = jinja2.Environment( @@ -299,13 +301,13 @@ def get_file_generator(environ, root_path, config=None): os.path.join(logname, 'index.html'), config) if not file_generator or not file_generator.obj: - if config.has_section('general'): - if config.has_option('general', 'generate_folder_index'): - if config.getboolean('general', 'generate_folder_index'): - index_generator = IndexIterableBuffer(logname, logpath, - config) - if len(index_generator.file_list) > 0: - return index_generator + if (config.has_section('general') and + config.has_option('general', 'generate_folder_index') and + config.getboolean('general', 'generate_folder_index')): + index_generator = IndexIterableBuffer(logname, logpath, + config) + if len(index_generator.file_list) > 0: + return index_generator raise NoSuchFile() return file_generator