Use cached_property with the parsers and split the exception handling between ImportErrors and every other exception.

This commit is contained in:
Jannis Leidel
2011-04-18 12:37:31 +02:00
parent e57e405c4e
commit e363a30cc3
3 changed files with 63 additions and 60 deletions

View File

@@ -1,20 +1,23 @@
from __future__ import absolute_import
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
from django.core.exceptions import ImproperlyConfigured
from django.utils.encoding import smart_unicode
class BeautifulSoupParser(ParserBase):
_soup = None
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
from compressor.utils.cache import cached_property
@property
class BeautifulSoupParser(ParserBase):
@cached_property
def soup(self):
if self._soup is None:
try:
from BeautifulSoup import BeautifulSoup
except ImportError, e:
raise ParserError("Error while initializing Parser: %s" % e)
self._soup = BeautifulSoup(self.content)
return self._soup
try:
from BeautifulSoup import BeautifulSoup
return BeautifulSoup(self.content)
except ImportError, err:
raise ImproperlyConfigured("Error while importing BeautifulSoup: %s" % err)
except Exception, err:
raise ParserError("Error while initializing Parser: %s" % err)
def css_elems(self):
return self.soup.findAll({'link': True, 'style': True})

View File

@@ -1,43 +1,40 @@
from __future__ import absolute_import
from django.utils.encoding import smart_unicode
from django.core.exceptions import ImproperlyConfigured
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
from django.utils.encoding import smart_unicode
from compressor.utils.cache import cached_property
try:
import html5lib
except ImortError:
html5lib = None
def _serialize(el):
fragment = html5lib.treebuilders.simpletree.DocumentFragment()
fragment.appendChild(el)
return html5lib.serialize(fragment, quote_attr_values=True,
omit_optional_tags=False)
def _find(tree, *names):
for node in tree.childNodes:
if node.type == 5 and node.name in names:
yield node
class Html5LibParser(ParserBase):
_html = None
@property
def _serialize(self, elem):
fragment = html5lib.treebuilders.simpletree.DocumentFragment()
fragment.appendChild(elem)
return html5lib.serialize(fragment,
quote_attr_values=True, omit_optional_tags=False)
def _find(self, *names):
for node in self.html.childNodes:
if node.type == 5 and node.name in names:
yield node
@cached_property
def html(self):
if self._html is None:
try:
import html5lib
self._html = html5lib.parseFragment(self.content)
except Exception, e:
raise ParserError("Error while initializing Parser: %s" % e)
return self._html
try:
import html5lib
return html5lib.parseFragment(self.content)
except ImortError, err:
raise ImproperlyConfigured("Error while importing html5lib: %s" % err)
except Exception, err:
raise ParserError("Error while initializing Parser: %s" % err)
def css_elems(self):
return _find(self.html, 'style', 'link')
return self._find('style', 'link')
def js_elems(self):
return _find(self.html, 'script')
return self._find('script')
def elem_attribs(self, elem):
return elem.attributes
@@ -49,4 +46,4 @@ class Html5LibParser(ParserBase):
return elem.name
def elem_str(self, elem):
return smart_unicode(_serialize(elem))
return smart_unicode(self._serialize(elem))

View File

@@ -1,28 +1,31 @@
from __future__ import absolute_import
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
from django.core.exceptions import ImproperlyConfigured
from django.utils.encoding import smart_unicode
class LxmlParser(ParserBase):
_tree = None
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
from compressor.utils.cache import cached_property
@property
class LxmlParser(ParserBase):
@cached_property
def tree(self):
if self._tree is None:
content = '<root>%s</root>' % self.content
try:
from lxml.html import fromstring, soupparser
from lxml.etree import tostring
tree = fromstring(content)
try:
from lxml import html
from lxml.etree import tostring
except ImportError, e:
raise ParserError("Error while initializing Parser: %s" % e)
else:
content = '<root>%s</root>' % self.content
self._tree = html.fromstring(content)
try:
ignore = tostring(self._tree, encoding=unicode)
except UnicodeDecodeError:
self._tree = html.soupparser.fromstring(content)
return self._tree
ignore = tostring(tree, encoding=unicode)
except UnicodeDecodeError:
tree = soupparser.fromstring(content)
except ImportError, err:
raise ImproperlyConfigured("Error while importing lxml: %s" % err)
except Exception, err:
raise ParserError("Error while initializing Parser: %s" % err)
else:
return tree
def css_elems(self):
return self.tree.xpath('link[@rel="stylesheet"]|style')