Use cached_property with the parsers and split the exception handling between ImportErrors and every other exception.

This commit is contained in:
Jannis Leidel
2011-04-18 12:37:31 +02:00
parent e57e405c4e
commit e363a30cc3
3 changed files with 63 additions and 60 deletions

View File

@@ -1,20 +1,23 @@
from __future__ import absolute_import from __future__ import absolute_import
from compressor.exceptions import ParserError from django.core.exceptions import ImproperlyConfigured
from compressor.parser import ParserBase
from django.utils.encoding import smart_unicode from django.utils.encoding import smart_unicode
class BeautifulSoupParser(ParserBase): from compressor.exceptions import ParserError
_soup = None from compressor.parser import ParserBase
from compressor.utils.cache import cached_property
@property
class BeautifulSoupParser(ParserBase):
@cached_property
def soup(self): def soup(self):
if self._soup is None: try:
try: from BeautifulSoup import BeautifulSoup
from BeautifulSoup import BeautifulSoup return BeautifulSoup(self.content)
except ImportError, e: except ImportError, err:
raise ParserError("Error while initializing Parser: %s" % e) raise ImproperlyConfigured("Error while importing BeautifulSoup: %s" % err)
self._soup = BeautifulSoup(self.content) except Exception, err:
return self._soup raise ParserError("Error while initializing Parser: %s" % err)
def css_elems(self): def css_elems(self):
return self.soup.findAll({'link': True, 'style': True}) return self.soup.findAll({'link': True, 'style': True})

View File

@@ -1,43 +1,40 @@
from __future__ import absolute_import from __future__ import absolute_import
from django.utils.encoding import smart_unicode
from django.core.exceptions import ImproperlyConfigured
from compressor.exceptions import ParserError from compressor.exceptions import ParserError
from compressor.parser import ParserBase from compressor.parser import ParserBase
from django.utils.encoding import smart_unicode from compressor.utils.cache import cached_property
try:
import html5lib
except ImortError:
html5lib = None
def _serialize(el):
fragment = html5lib.treebuilders.simpletree.DocumentFragment()
fragment.appendChild(el)
return html5lib.serialize(fragment, quote_attr_values=True,
omit_optional_tags=False)
def _find(tree, *names):
for node in tree.childNodes:
if node.type == 5 and node.name in names:
yield node
class Html5LibParser(ParserBase): class Html5LibParser(ParserBase):
_html = None
@property def _serialize(self, elem):
fragment = html5lib.treebuilders.simpletree.DocumentFragment()
fragment.appendChild(elem)
return html5lib.serialize(fragment,
quote_attr_values=True, omit_optional_tags=False)
def _find(self, *names):
for node in self.html.childNodes:
if node.type == 5 and node.name in names:
yield node
@cached_property
def html(self): def html(self):
if self._html is None: try:
try: import html5lib
import html5lib return html5lib.parseFragment(self.content)
self._html = html5lib.parseFragment(self.content) except ImortError, err:
except Exception, e: raise ImproperlyConfigured("Error while importing html5lib: %s" % err)
raise ParserError("Error while initializing Parser: %s" % e) except Exception, err:
return self._html raise ParserError("Error while initializing Parser: %s" % err)
def css_elems(self): def css_elems(self):
return _find(self.html, 'style', 'link') return self._find('style', 'link')
def js_elems(self): def js_elems(self):
return _find(self.html, 'script') return self._find('script')
def elem_attribs(self, elem): def elem_attribs(self, elem):
return elem.attributes return elem.attributes
@@ -49,4 +46,4 @@ class Html5LibParser(ParserBase):
return elem.name return elem.name
def elem_str(self, elem): def elem_str(self, elem):
return smart_unicode(_serialize(elem)) return smart_unicode(self._serialize(elem))

View File

@@ -1,28 +1,31 @@
from __future__ import absolute_import from __future__ import absolute_import
from compressor.exceptions import ParserError from django.core.exceptions import ImproperlyConfigured
from compressor.parser import ParserBase
from django.utils.encoding import smart_unicode from django.utils.encoding import smart_unicode
class LxmlParser(ParserBase): from compressor.exceptions import ParserError
_tree = None from compressor.parser import ParserBase
from compressor.utils.cache import cached_property
@property
class LxmlParser(ParserBase):
@cached_property
def tree(self): def tree(self):
if self._tree is None: content = '<root>%s</root>' % self.content
try:
from lxml.html import fromstring, soupparser
from lxml.etree import tostring
tree = fromstring(content)
try: try:
from lxml import html ignore = tostring(tree, encoding=unicode)
from lxml.etree import tostring except UnicodeDecodeError:
except ImportError, e: tree = soupparser.fromstring(content)
raise ParserError("Error while initializing Parser: %s" % e) except ImportError, err:
else: raise ImproperlyConfigured("Error while importing lxml: %s" % err)
content = '<root>%s</root>' % self.content except Exception, err:
self._tree = html.fromstring(content) raise ParserError("Error while initializing Parser: %s" % err)
try: else:
ignore = tostring(self._tree, encoding=unicode) return tree
except UnicodeDecodeError:
self._tree = html.soupparser.fromstring(content)
return self._tree
def css_elems(self): def css_elems(self):
return self.tree.xpath('link[@rel="stylesheet"]|style') return self.tree.xpath('link[@rel="stylesheet"]|style')