82 lines
2.7 KiB
Python
82 lines
2.7 KiB
Python
from __future__ import absolute_import, unicode_literals
|
|
|
|
from django.core.exceptions import ImproperlyConfigured
|
|
from django.utils import six
|
|
from django.utils.encoding import smart_text
|
|
|
|
from compressor.exceptions import ParserError
|
|
from compressor.parser import ParserBase
|
|
from compressor.utils.decorators import cached_property
|
|
|
|
|
|
class LxmlParser(ParserBase):
|
|
"""
|
|
LxmlParser will use `lxml.html` parser to parse rendered contents of
|
|
{% compress %} tag. Under python 2 it will also try to use beautiful
|
|
soup parser in case of any problems with encoding.
|
|
"""
|
|
def __init__(self, content):
|
|
try:
|
|
from lxml.html import fromstring
|
|
from lxml.etree import tostring
|
|
except ImportError as err:
|
|
raise ImproperlyConfigured("Error while importing lxml: %s" % err)
|
|
except Exception as err:
|
|
raise ParserError("Error while initializing parser: %s" % err)
|
|
|
|
if not six.PY3:
|
|
# soupparser uses Beautiful Soup 3 which does not run on python 3.x
|
|
try:
|
|
from lxml.html import soupparser
|
|
except ImportError as err:
|
|
soupparser = None
|
|
except Exception as err:
|
|
raise ParserError("Error while initializing parser: %s" % err)
|
|
else:
|
|
soupparser = None
|
|
|
|
self.soupparser = soupparser
|
|
self.fromstring = fromstring
|
|
self.tostring = tostring
|
|
super(LxmlParser, self).__init__(content)
|
|
|
|
@cached_property
|
|
def tree(self):
|
|
"""
|
|
Document tree.
|
|
"""
|
|
content = '<root>%s</root>' % self.content
|
|
tree = self.fromstring(content)
|
|
try:
|
|
self.tostring(tree, encoding=six.text_type)
|
|
except UnicodeDecodeError:
|
|
if self.soupparser: # use soup parser on python 2
|
|
tree = self.soupparser.fromstring(content)
|
|
else: # raise an error on python 3
|
|
raise
|
|
return tree
|
|
|
|
def css_elems(self):
|
|
return self.tree.xpath('//link[re:test(@rel, "^stylesheet$", "i")]|style',
|
|
namespaces={"re": "http://exslt.org/regular-expressions"})
|
|
|
|
def js_elems(self):
|
|
return self.tree.findall('script')
|
|
|
|
def elem_attribs(self, elem):
|
|
return elem.attrib
|
|
|
|
def elem_content(self, elem):
|
|
return smart_text(elem.text)
|
|
|
|
def elem_name(self, elem):
|
|
return elem.tag
|
|
|
|
def elem_str(self, elem):
|
|
elem_as_string = smart_text(
|
|
self.tostring(elem, method='html', encoding=six.text_type))
|
|
if elem.tag == 'link':
|
|
# This makes testcases happy
|
|
return elem_as_string.replace('>', ' />')
|
|
return elem_as_string
|