Files
deb-python-django-compressor/compressor/parser/htmlparser.py
Jaap Roes b6d5131611 Created new parser, HtmlParser, based on the stdlib HTMLParser module.
Added AutoSelectParser, picks LxmlParser if lxml is available, falls back to HtmlParser if not, also the new default.
Created a special BeautifulSoupTest in order to still test this parser.
Updated README, installation and settings docs to reflect these changes.
2011-04-20 12:01:33 +02:00

78 lines
2.3 KiB
Python

from HTMLParser import HTMLParser
from django.utils.encoding import smart_unicode
from django.utils.datastructures import SortedDict
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
class HtmlParser(ParserBase, HTMLParser):
def __init__(self, content):
HTMLParser.__init__(self)
self.content = content
self._css_elems = []
self._js_elems = []
self._current_tag = None
try:
self.feed(self.content)
self.close()
except Exception, err:
raise ParserError("Error while initializing HtmlParser: %s" % err)
def handle_starttag(self, tag, attrs):
tag = tag.lower()
if tag in ('style', 'script'):
if tag == 'style':
tags = self._css_elems
elif tag == 'script':
tags = self._js_elems
tags.append({
'tag': tag,
'attrs': attrs,
'attrs_dict': dict(attrs),
'text': ''
})
self._current_tag = tag
elif tag == 'link':
self._css_elems.append({
'tag': tag,
'attrs': attrs,
'attrs_dict': dict(attrs),
'text': None
})
def handle_endtag(self, tag):
if self._current_tag and self._current_tag == tag.lower():
self._current_tag = None
def handle_data(self, data):
if self._current_tag == 'style':
self._css_elems[-1]['text'] = data
elif self._current_tag == 'script':
self._js_elems[-1]['text'] = data
def css_elems(self):
return self._css_elems
def js_elems(self):
return self._js_elems
def elem_name(self, elem):
return elem['tag']
def elem_attribs(self, elem):
return elem['attrs_dict']
def elem_content(self, elem):
return smart_unicode(elem['text'])
def elem_str(self, elem):
tag = {}
tag.update(elem)
tag['attrs'] = ''
if len(elem['attrs']):
tag['attrs'] = ' %s' % ' '.join(['%s="%s"' % (name, value) for name, value in elem['attrs']])
if elem['tag'] == 'link':
return '<%(tag)s%(attrs)s />' % tag
else:
return '<%(tag)s%(attrs)s>%(text)s</%(tag)s>' % tag