Use cached_property with the parsers and split the exception handling between ImportErrors and every other exception.
This commit is contained in:
@@ -1,20 +1,23 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from compressor.exceptions import ParserError
|
from django.core.exceptions import ImproperlyConfigured
|
||||||
from compressor.parser import ParserBase
|
|
||||||
from django.utils.encoding import smart_unicode
|
from django.utils.encoding import smart_unicode
|
||||||
|
|
||||||
class BeautifulSoupParser(ParserBase):
|
from compressor.exceptions import ParserError
|
||||||
_soup = None
|
from compressor.parser import ParserBase
|
||||||
|
from compressor.utils.cache import cached_property
|
||||||
|
|
||||||
@property
|
|
||||||
|
class BeautifulSoupParser(ParserBase):
|
||||||
|
|
||||||
|
@cached_property
|
||||||
def soup(self):
|
def soup(self):
|
||||||
if self._soup is None:
|
try:
|
||||||
try:
|
from BeautifulSoup import BeautifulSoup
|
||||||
from BeautifulSoup import BeautifulSoup
|
return BeautifulSoup(self.content)
|
||||||
except ImportError, e:
|
except ImportError, err:
|
||||||
raise ParserError("Error while initializing Parser: %s" % e)
|
raise ImproperlyConfigured("Error while importing BeautifulSoup: %s" % err)
|
||||||
self._soup = BeautifulSoup(self.content)
|
except Exception, err:
|
||||||
return self._soup
|
raise ParserError("Error while initializing Parser: %s" % err)
|
||||||
|
|
||||||
def css_elems(self):
|
def css_elems(self):
|
||||||
return self.soup.findAll({'link': True, 'style': True})
|
return self.soup.findAll({'link': True, 'style': True})
|
||||||
|
|||||||
@@ -1,43 +1,40 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
from django.utils.encoding import smart_unicode
|
||||||
|
from django.core.exceptions import ImproperlyConfigured
|
||||||
|
|
||||||
from compressor.exceptions import ParserError
|
from compressor.exceptions import ParserError
|
||||||
from compressor.parser import ParserBase
|
from compressor.parser import ParserBase
|
||||||
from django.utils.encoding import smart_unicode
|
from compressor.utils.cache import cached_property
|
||||||
|
|
||||||
try:
|
|
||||||
import html5lib
|
|
||||||
except ImortError:
|
|
||||||
html5lib = None
|
|
||||||
|
|
||||||
def _serialize(el):
|
|
||||||
fragment = html5lib.treebuilders.simpletree.DocumentFragment()
|
|
||||||
fragment.appendChild(el)
|
|
||||||
return html5lib.serialize(fragment, quote_attr_values=True,
|
|
||||||
omit_optional_tags=False)
|
|
||||||
|
|
||||||
def _find(tree, *names):
|
|
||||||
for node in tree.childNodes:
|
|
||||||
if node.type == 5 and node.name in names:
|
|
||||||
yield node
|
|
||||||
|
|
||||||
class Html5LibParser(ParserBase):
|
class Html5LibParser(ParserBase):
|
||||||
_html = None
|
|
||||||
|
|
||||||
@property
|
def _serialize(self, elem):
|
||||||
|
fragment = html5lib.treebuilders.simpletree.DocumentFragment()
|
||||||
|
fragment.appendChild(elem)
|
||||||
|
return html5lib.serialize(fragment,
|
||||||
|
quote_attr_values=True, omit_optional_tags=False)
|
||||||
|
|
||||||
|
def _find(self, *names):
|
||||||
|
for node in self.html.childNodes:
|
||||||
|
if node.type == 5 and node.name in names:
|
||||||
|
yield node
|
||||||
|
|
||||||
|
@cached_property
|
||||||
def html(self):
|
def html(self):
|
||||||
if self._html is None:
|
try:
|
||||||
try:
|
import html5lib
|
||||||
import html5lib
|
return html5lib.parseFragment(self.content)
|
||||||
self._html = html5lib.parseFragment(self.content)
|
except ImortError, err:
|
||||||
except Exception, e:
|
raise ImproperlyConfigured("Error while importing html5lib: %s" % err)
|
||||||
raise ParserError("Error while initializing Parser: %s" % e)
|
except Exception, err:
|
||||||
return self._html
|
raise ParserError("Error while initializing Parser: %s" % err)
|
||||||
|
|
||||||
|
|
||||||
def css_elems(self):
|
def css_elems(self):
|
||||||
return _find(self.html, 'style', 'link')
|
return self._find('style', 'link')
|
||||||
|
|
||||||
def js_elems(self):
|
def js_elems(self):
|
||||||
return _find(self.html, 'script')
|
return self._find('script')
|
||||||
|
|
||||||
def elem_attribs(self, elem):
|
def elem_attribs(self, elem):
|
||||||
return elem.attributes
|
return elem.attributes
|
||||||
@@ -49,4 +46,4 @@ class Html5LibParser(ParserBase):
|
|||||||
return elem.name
|
return elem.name
|
||||||
|
|
||||||
def elem_str(self, elem):
|
def elem_str(self, elem):
|
||||||
return smart_unicode(_serialize(elem))
|
return smart_unicode(self._serialize(elem))
|
||||||
|
|||||||
@@ -1,28 +1,31 @@
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from compressor.exceptions import ParserError
|
from django.core.exceptions import ImproperlyConfigured
|
||||||
from compressor.parser import ParserBase
|
|
||||||
|
|
||||||
from django.utils.encoding import smart_unicode
|
from django.utils.encoding import smart_unicode
|
||||||
|
|
||||||
class LxmlParser(ParserBase):
|
from compressor.exceptions import ParserError
|
||||||
_tree = None
|
from compressor.parser import ParserBase
|
||||||
|
from compressor.utils.cache import cached_property
|
||||||
|
|
||||||
@property
|
|
||||||
|
class LxmlParser(ParserBase):
|
||||||
|
|
||||||
|
@cached_property
|
||||||
def tree(self):
|
def tree(self):
|
||||||
if self._tree is None:
|
content = '<root>%s</root>' % self.content
|
||||||
|
try:
|
||||||
|
from lxml.html import fromstring, soupparser
|
||||||
|
from lxml.etree import tostring
|
||||||
|
tree = fromstring(content)
|
||||||
try:
|
try:
|
||||||
from lxml import html
|
ignore = tostring(tree, encoding=unicode)
|
||||||
from lxml.etree import tostring
|
except UnicodeDecodeError:
|
||||||
except ImportError, e:
|
tree = soupparser.fromstring(content)
|
||||||
raise ParserError("Error while initializing Parser: %s" % e)
|
except ImportError, err:
|
||||||
else:
|
raise ImproperlyConfigured("Error while importing lxml: %s" % err)
|
||||||
content = '<root>%s</root>' % self.content
|
except Exception, err:
|
||||||
self._tree = html.fromstring(content)
|
raise ParserError("Error while initializing Parser: %s" % err)
|
||||||
try:
|
else:
|
||||||
ignore = tostring(self._tree, encoding=unicode)
|
return tree
|
||||||
except UnicodeDecodeError:
|
|
||||||
self._tree = html.soupparser.fromstring(content)
|
|
||||||
return self._tree
|
|
||||||
|
|
||||||
def css_elems(self):
|
def css_elems(self):
|
||||||
return self.tree.xpath('link[@rel="stylesheet"]|style')
|
return self.tree.xpath('link[@rel="stylesheet"]|style')
|
||||||
|
|||||||
Reference in New Issue
Block a user