Files
deb-python-django-compressor/compressor/parser/lxml.py
Jaap Roes b6d5131611 Created new parser, HtmlParser, based on the stdlib HTMLParser module.
Added AutoSelectParser, picks LxmlParser if lxml is available, falls back to HtmlParser if not, also the new default.
Created a special BeautifulSoupTest in order to still test this parser.
Updated README, installation and settings docs to reflect these changes.
2011-04-20 12:01:33 +02:00

53 lines
1.6 KiB
Python

from __future__ import absolute_import
from django.core.exceptions import ImproperlyConfigured
from django.utils.encoding import smart_unicode
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
from compressor.utils.cache import cached_property
class LxmlParser(ParserBase):
@cached_property
def tree(self):
content = '<root>%s</root>' % self.content
try:
from lxml.html import fromstring, soupparser
from lxml.etree import tostring
self.tostring = tostring
tree = fromstring(content)
try:
ignore = tostring(tree, encoding=unicode)
except UnicodeDecodeError:
tree = soupparser.fromstring(content)
except ImportError, err:
raise ImproperlyConfigured("Error while importing lxml: %s" % err)
except Exception, err:
raise ParserError("Error while initializing Parser: %s" % err)
else:
return tree
def css_elems(self):
return self.tree.xpath('link[@rel="stylesheet"]|style')
def js_elems(self):
return self.tree.findall('script')
def elem_attribs(self, elem):
return elem.attrib
def elem_content(self, elem):
return smart_unicode(elem.text)
def elem_name(self, elem):
return elem.tag
def elem_str(self, elem):
elem_as_string = smart_unicode(
self.tostring(elem, method='html', encoding=unicode))
if elem.tag == 'link':
# This makes testcases happy
return elem_as_string.replace('>', ' />')
return elem_as_string