41 lines
1.3 KiB
Python
41 lines
1.3 KiB
Python
from __future__ import absolute_import
|
|
from django.core.exceptions import ImproperlyConfigured
|
|
from django.utils.encoding import smart_text
|
|
|
|
from compressor.parser import ParserBase
|
|
|
|
|
|
class BeautifulSoupParser(ParserBase):
|
|
|
|
def __init__(self, content):
|
|
super(BeautifulSoupParser, self).__init__(content)
|
|
try:
|
|
from bs4 import BeautifulSoup
|
|
self.soup = BeautifulSoup(self.content, "html.parser")
|
|
except ImportError:
|
|
raise ImproperlyConfigured("Error while importing BeautifulSoup: %s" % err)
|
|
|
|
def css_elems(self):
|
|
return self.soup.find_all({'link': True, 'style': True})
|
|
|
|
def js_elems(self):
|
|
return self.soup.find_all('script')
|
|
|
|
def elem_attribs(self, elem):
|
|
attrs = dict(elem.attrs)
|
|
# hack around changed behaviour in bs4, it returns lists now instead of one string, see
|
|
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#multi-valued-attributes
|
|
for key, value in attrs.items():
|
|
if type(value) is list:
|
|
attrs[key] = " ".join(value)
|
|
return attrs
|
|
|
|
def elem_content(self, elem):
|
|
return elem.string
|
|
|
|
def elem_name(self, elem):
|
|
return elem.name
|
|
|
|
def elem_str(self, elem):
|
|
return smart_text(elem)
|