diff --git a/compressor/parser.py b/compressor/parser.py deleted file mode 100644 index 8ac09a6..0000000 --- a/compressor/parser.py +++ /dev/null @@ -1,119 +0,0 @@ -from django.utils.encoding import smart_unicode - -from compressor.exceptions import ParserError - - -class ParserBase(object): - """ - Base parser to be subclassed when creating an own parser. - """ - def __init__(self, content): - self.content = content - - def css_elems(self): - """ - Return an iterable containing the css elements to handle - """ - raise NotImplementedError - - def js_elems(self): - """ - Return an iterable containing the js elements to handle - """ - raise NotImplementedError - - def elem_attribs(self, elem): - """ - Return the dictionary like attribute store of the given element - """ - raise NotImplementedError - - def elem_content(self, elem): - """ - Return the content of the given element - """ - raise NotImplementedError - - def elem_name(self, elem): - """ - Return the name of the given element - """ - raise NotImplementedError - - def elem_str(self, elem): - """ - Return the string representation of the given elem - """ - raise NotImplementedError - - -class BeautifulSoupParser(ParserBase): - _soup = None - - @property - def soup(self): - try: - from BeautifulSoup import BeautifulSoup - except ImportError, e: - raise ParserError("Error while initializing Parser: %s" % e) - if self._soup is None: - self._soup = BeautifulSoup(self.content) - return self._soup - - def css_elems(self): - return self.soup.findAll({'link': True, 'style': True}) - - def js_elems(self): - return self.soup.findAll('script') - - def elem_attribs(self, elem): - return dict(elem.attrs) - - def elem_content(self, elem): - return elem.string - - def elem_name(self, elem): - return elem.name - - def elem_str(self, elem): - return smart_unicode(elem) - - -class LxmlParser(ParserBase): - _tree = None - - @property - def tree(self): - try: - from lxml import html - from lxml.etree import tostring - except ImportError, e: - raise ParserError("Error while initializing Parser: %s" % e) - if self._tree is None: - content = '%s' % self.content - self._tree = html.fromstring(content) - try: - ignore = tostring(self._tree, encoding=unicode) - except UnicodeDecodeError: - self._tree = html.soupparser.fromstring(content) - return self._tree - - def css_elems(self): - return self.tree.xpath('link[@rel="stylesheet"]|style') - - def js_elems(self): - return self.tree.findall('script') - - def elem_attribs(self, elem): - return elem.attrib - - def elem_content(self, elem): - return smart_unicode(elem.text) - - def elem_name(self, elem): - return elem.tag - - def elem_str(self, elem): - from lxml import etree - return smart_unicode( - etree.tostring(elem, method='html', encoding=unicode)) diff --git a/compressor/parser/__init__.py b/compressor/parser/__init__.py new file mode 100644 index 0000000..81eba01 --- /dev/null +++ b/compressor/parser/__init__.py @@ -0,0 +1,5 @@ +# support legacy parser module usage +from compressor.parser.base import ParserBase +from compressor.parser.beautifulsoup_parser import BeautifulSoupParser +from compressor.parser.lxml_parser import LxmlParser +from compressor.parser.html5lib_parser import Html5LibParser diff --git a/compressor/parser/base.py b/compressor/parser/base.py new file mode 100644 index 0000000..8bf4dd2 --- /dev/null +++ b/compressor/parser/base.py @@ -0,0 +1,42 @@ +class ParserBase(object): + """ + Base parser to be subclassed when creating an own parser. + """ + def __init__(self, content): + self.content = content + + def css_elems(self): + """ + Return an iterable containing the css elements to handle + """ + raise NotImplementedError + + def js_elems(self): + """ + Return an iterable containing the js elements to handle + """ + raise NotImplementedError + + def elem_attribs(self, elem): + """ + Return the dictionary like attribute store of the given element + """ + raise NotImplementedError + + def elem_content(self, elem): + """ + Return the content of the given element + """ + raise NotImplementedError + + def elem_name(self, elem): + """ + Return the name of the given element + """ + raise NotImplementedError + + def elem_str(self, elem): + """ + Return the string representation of the given elem + """ + raise NotImplementedError diff --git a/compressor/parser/beautifulsoup_parser.py b/compressor/parser/beautifulsoup_parser.py new file mode 100644 index 0000000..1e3003e --- /dev/null +++ b/compressor/parser/beautifulsoup_parser.py @@ -0,0 +1,34 @@ +from compressor.exceptions import ParserError +from compressor.parser import ParserBase +from django.utils.encoding import smart_unicode + +class BeautifulSoupParser(ParserBase): + _soup = None + + @property + def soup(self): + if self._soup is None: + try: + from BeautifulSoup import BeautifulSoup + except ImportError, e: + raise ParserError("Error while initializing Parser: %s" % e) + self._soup = BeautifulSoup(self.content) + return self._soup + + def css_elems(self): + return self.soup.findAll({'link': True, 'style': True}) + + def js_elems(self): + return self.soup.findAll('script') + + def elem_attribs(self, elem): + return dict(elem.attrs) + + def elem_content(self, elem): + return elem.string + + def elem_name(self, elem): + return elem.name + + def elem_str(self, elem): + return smart_unicode(elem) diff --git a/compressor/parser/html5lib_parser.py b/compressor/parser/html5lib_parser.py new file mode 100644 index 0000000..22c7ee6 --- /dev/null +++ b/compressor/parser/html5lib_parser.py @@ -0,0 +1,51 @@ +from compressor.exceptions import ParserError +from compressor.parser import ParserBase +from django.utils.encoding import smart_unicode + +try: + import html5lib +except ImortError: + html5lib = None + +def _serialize(el): + fragment = html5lib.treebuilders.simpletree.DocumentFragment() + fragment.appendChild(el) + return html5lib.serialize(fragment, quote_attr_values=True, + omit_optional_tags=False) + +def _find(tree, *names): + for node in tree.childNodes: + if node.type == 5 and node.name in names: + yield node + +class Html5LibParser(ParserBase): + _html = None + + @property + def html(self): + if self._html is None: + try: + import html5lib + self._html = html5lib.parseFragment(self.content) + except Exception, e: + raise ParserError("Error while initializing Parser: %s" % e) + return self._html + + + def css_elems(self): + return _find(self.html, 'style', 'link') + + def js_elems(self): + return _find(self.html, 'script') + + def elem_attribs(self, elem): + return elem.attributes + + def elem_content(self, elem): + return elem.childNodes[0].value + + def elem_name(self, elem): + return elem.name + + def elem_str(self, elem): + return smart_unicode(_serialize(elem)) diff --git a/compressor/parser/lxml_parser.py b/compressor/parser/lxml_parser.py new file mode 100644 index 0000000..ff3c875 --- /dev/null +++ b/compressor/parser/lxml_parser.py @@ -0,0 +1,43 @@ +from compressor.exceptions import ParserError +from compressor.parser import ParserBase + +from django.utils.encoding import smart_unicode + +class LxmlParser(ParserBase): + _tree = None + + @property + def tree(self): + try: + from lxml import html + from lxml.etree import tostring + except ImportError, e: + raise ParserError("Error while initializing Parser: %s" % e) + if self._tree is None: + content = '%s' % self.content + self._tree = html.fromstring(content) + try: + ignore = tostring(self._tree, encoding=unicode) + except UnicodeDecodeError: + self._tree = html.soupparser.fromstring(content) + return self._tree + + def css_elems(self): + return self.tree.xpath('link[@rel="stylesheet"]|style') + + def js_elems(self): + return self.tree.findall('script') + + def elem_attribs(self, elem): + return elem.attrib + + def elem_content(self, elem): + return smart_unicode(elem.text) + + def elem_name(self, elem): + return elem.tag + + def elem_str(self, elem): + from lxml import etree + return smart_unicode( + etree.tostring(elem, method='html', encoding=unicode)) diff --git a/compressor/tests/tests.py b/compressor/tests/tests.py index 5fdcbd3..2d1538c 100644 --- a/compressor/tests/tests.py +++ b/compressor/tests/tests.py @@ -8,6 +8,11 @@ try: except ImportError: lxml = None +try: + import html5lib +except ImportError: + html5lib = None + from django.core.cache.backends import dummy from django.core.files.storage import get_storage_class from django.template import Template, Context, TemplateSyntaxError @@ -150,6 +155,34 @@ if lxml: def tearDown(self): settings.COMPRESS_PARSER = self.old_parser +if html5lib: + class Html5LibCompressorTesCase(CompressorTestCase): + + def test_css_split(self): + out = [ + ('file', os.path.join(settings.COMPRESS_ROOT, u'css/one.css'), u''), + ('hunk', u'p { border:5px solid green;}', u''), + ('file', os.path.join(settings.COMPRESS_ROOT, u'css/two.css'), u''), + ] + split = self.css_node.split_contents() + split = [(x[0], x[1], self.css_node.parser.elem_str(x[2])) for x in split] + self.assertEqual(out, split) + + def test_js_split(self): + out = [('file', os.path.join(settings.COMPRESS_ROOT, u'js/one.js'), u''), + ('hunk', u'obj.value = "value";', u'') + ] + split = self.js_node.split_contents() + split = [(x[0], x[1], self.js_node.parser.elem_str(x[2])) for x in split] + self.assertEqual(out, split) + + def setUp(self): + self.old_parser = settings.COMPRESS_PARSER + settings.COMPRESS_PARSER = 'compressor.parser.Html5LibParser' + super(Html5LibCompressorTesCase, self).setUp() + + def tearDown(self): + settings.COMPRESS_PARSER = self.old_parser class CssAbsolutizingTestCase(TestCase): def setUp(self): diff --git a/docs/installation.txt b/docs/installation.txt index c563551..c2f9c72 100644 --- a/docs/installation.txt +++ b/docs/installation.txt @@ -51,9 +51,18 @@ for the optional :ref:`parser ` STATIC_DEPS=true pip install lxml +html5lib_ (optional) +^^^^^^^^^^^^^^^^^^^^ + +for the optional :ref:`parser ` +``compressor.parser.Html5LibParser``:: + + pip install html5lib + .. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/ .. _lxml: http://codespeak.net/lxml/ .. _libxml2: http://xmlsoft.org/ +.. _html5lib: html5lib_ (optional) Deprecation ----------- diff --git a/docs/settings.txt b/docs/settings.txt index 2cd15f7..7f77c77 100644 --- a/docs/settings.txt +++ b/docs/settings.txt @@ -171,6 +171,7 @@ The backends included in Django Compressor: - ``compressor.parser.BeautifulSoupParser`` - ``compressor.parser.LxmlParser`` +- ``compressor.parser.Html5LibParser`` See :ref:`dependencies` for more info about the packages you need for each parser.