From 74b5c7c90cad07258fbe22cee4905813ab0ae433 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Mon, 16 Mar 2015 14:19:57 -0700 Subject: [PATCH] Update regex for better HTML detection. We need to do two things to support better HTML detection. First we must ignore case since HTML is not case sensitive. Second we allow for the first line to be a + diff --git a/os_loganalyze/tests/samples/sample_doctype.html b/os_loganalyze/tests/samples/sample_doctype.html new file mode 100644 index 0000000..77e7f90 --- /dev/null +++ b/os_loganalyze/tests/samples/sample_doctype.html @@ -0,0 +1,3 @@ + + + diff --git a/os_loganalyze/tests/test_views.py b/os_loganalyze/tests/test_views.py new file mode 100644 index 0000000..284f3f9 --- /dev/null +++ b/os_loganalyze/tests/test_views.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +Test the view generators +""" + +import os_loganalyze.filter as osfilter +import os_loganalyze.generator as osgen +from os_loganalyze.tests import base +import os_loganalyze.view as osview + + +class TestViews(base.TestCase): + def get_generator(self, fname): + # Override base's get_generator because we don't want the full + # wsgi application. We just need the generator to give to Views. + root_path = base.samples_path(self.samples_directory) + kwargs = {'PATH_INFO': '/htmlify/%s' % fname} + logname, gen = osgen.get(self.fake_env(**kwargs), root_path) + flines_generator = osfilter.Filter(logname, gen) + return flines_generator + + def test_html_detection(self): + gen = self.get_generator('sample.html') + html_view = osview.HTMLView(gen) + i = iter(html_view) + self.assertFalse(html_view.is_html) + # Move the generator so that the is_html flag is set + i.next() + self.assertTrue(html_view.is_html) + + def test_doctype_html_detection(self): + gen = self.get_generator('sample_doctype.html') + html_view = osview.HTMLView(gen) + i = iter(html_view) + self.assertFalse(html_view.is_html) + # Move the generator so that the is_html flag is set + i.next() + self.assertTrue(html_view.is_html) diff --git a/os_loganalyze/view.py b/os_loganalyze/view.py index 022c3b6..9fba8ad 100644 --- a/os_loganalyze/view.py +++ b/os_loganalyze/view.py @@ -106,7 +106,7 @@ highlight_by_hash(); DATE_LINE = ("" "%s%s\n") NONDATE_LINE = "%s\n" -HTML_RE = re.compile("") # pre tags mean we're partial html and shouldn't escape