60 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			60 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import absolute_import
 | |
| from django.core.exceptions import ImproperlyConfigured
 | |
| from django.utils.encoding import smart_text
 | |
| 
 | |
| from compressor.exceptions import ParserError
 | |
| from compressor.parser import ParserBase
 | |
| from compressor.utils.decorators import cached_property
 | |
| 
 | |
| 
 | |
| class Html5LibParser(ParserBase):
 | |
| 
 | |
|     def __init__(self, content):
 | |
|         super(Html5LibParser, self).__init__(content)
 | |
|         import html5lib
 | |
|         self.html5lib = html5lib
 | |
| 
 | |
|     def _serialize(self, elem):
 | |
|         return self.html5lib.serialize(
 | |
|             elem, tree="etree", quote_attr_values=True,
 | |
|             omit_optional_tags=False, use_trailing_solidus=True,
 | |
|         )
 | |
| 
 | |
|     def _find(self, *names):
 | |
|         for elem in self.html:
 | |
|             if elem.tag in names:
 | |
|                 yield elem
 | |
| 
 | |
|     @cached_property
 | |
|     def html(self):
 | |
|         try:
 | |
|             return self.html5lib.parseFragment(self.content, treebuilder="etree")
 | |
|         except ImportError as err:
 | |
|             raise ImproperlyConfigured("Error while importing html5lib: %s" % err)
 | |
|         except Exception as err:
 | |
|             raise ParserError("Error while initializing Parser: %s" % err)
 | |
| 
 | |
|     def css_elems(self):
 | |
|         return self._find('{http://www.w3.org/1999/xhtml}link',
 | |
|                           '{http://www.w3.org/1999/xhtml}style')
 | |
| 
 | |
|     def js_elems(self):
 | |
|         return self._find('{http://www.w3.org/1999/xhtml}script')
 | |
| 
 | |
|     def elem_attribs(self, elem):
 | |
|         return elem.attrib
 | |
| 
 | |
|     def elem_content(self, elem):
 | |
|         return smart_text(elem.text)
 | |
| 
 | |
|     def elem_name(self, elem):
 | |
|         if '}' in elem.tag:
 | |
|             return elem.tag.split('}')[1]
 | |
|         return elem.tag
 | |
| 
 | |
|     def elem_str(self, elem):
 | |
|         # This method serializes HTML in a way that does not pass all tests.
 | |
|         # However, this method is only called in tests anyway, so it doesn't
 | |
|         # really matter.
 | |
|         return smart_text(self._serialize(elem))
 | 
