Fix #405 - compatibility with html5lib 1.0
This commit is contained in:
		
							
								
								
									
										5
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -57,7 +57,8 @@ Julien Phalip | |||||||
| Justin Lilly | Justin Lilly | ||||||
| Luis Nell | Luis Nell | ||||||
| Lukas Lehner | Lukas Lehner | ||||||
| Lukasz Balcerzak | Łukasz Balcerzak | ||||||
|  | Łukasz Langa | ||||||
| Maciek Szczesniak | Maciek Szczesniak | ||||||
| Maor Ben-Dayan | Maor Ben-Dayan | ||||||
| Mark Lavin | Mark Lavin | ||||||
| @@ -89,4 +90,4 @@ Ulrich Petri | |||||||
| Ulysses V | Ulysses V | ||||||
| Vladislav Poluhin | Vladislav Poluhin | ||||||
| wesleyb | wesleyb | ||||||
| Wilson Júnior | Wilson Júnior | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -2,3 +2,5 @@ test: | |||||||
| 	flake8 compressor --ignore=E501,E128 | 	flake8 compressor --ignore=E501,E128 | ||||||
| 	coverage run --branch --source=compressor `which django-admin.py` test --settings=compressor.test_settings compressor | 	coverage run --branch --source=compressor `which django-admin.py` test --settings=compressor.test_settings compressor | ||||||
| 	coverage report --omit=compressor/test*,compressor/filters/jsmin/rjsmin*,compressor/filters/cssmin/cssmin*,compressor/utils/stringformat* | 	coverage report --omit=compressor/test*,compressor/filters/jsmin/rjsmin*,compressor/filters/cssmin/cssmin*,compressor/utils/stringformat* | ||||||
|  |  | ||||||
|  | .PHONY: test | ||||||
|   | |||||||
| @@ -15,39 +15,45 @@ class Html5LibParser(ParserBase): | |||||||
|         self.html5lib = html5lib |         self.html5lib = html5lib | ||||||
|  |  | ||||||
|     def _serialize(self, elem): |     def _serialize(self, elem): | ||||||
|         fragment = self.html5lib.treebuilders.simpletree.DocumentFragment() |         return self.html5lib.serialize( | ||||||
|         fragment.appendChild(elem) |             elem, tree="etree", quote_attr_values=True, | ||||||
|         return self.html5lib.serialize(fragment, |             omit_optional_tags=False, use_trailing_solidus=True, | ||||||
|             quote_attr_values=True, omit_optional_tags=False) |         ) | ||||||
|  |  | ||||||
|     def _find(self, *names): |     def _find(self, *names): | ||||||
|         for node in self.html.childNodes: |         for elem in self.html: | ||||||
|             if node.type == 5 and node.name in names: |             if elem.tag in names: | ||||||
|                 yield node |                 yield elem | ||||||
|  |  | ||||||
|     @cached_property |     @cached_property | ||||||
|     def html(self): |     def html(self): | ||||||
|         try: |         try: | ||||||
|             return self.html5lib.parseFragment(self.content) |             return self.html5lib.parseFragment( | ||||||
|         except ImportError, err: |                 self.content, | ||||||
|  |                 treebuilder="etree", | ||||||
|  |             ) | ||||||
|  |         except ImportError as err: | ||||||
|             raise ImproperlyConfigured("Error while importing html5lib: %s" % err) |             raise ImproperlyConfigured("Error while importing html5lib: %s" % err) | ||||||
|         except Exception, err: |         except Exception as err: | ||||||
|             raise ParserError("Error while initializing Parser: %s" % err) |             raise ParserError("Error while initializing Parser: %s" % err) | ||||||
|  |  | ||||||
|     def css_elems(self): |     def css_elems(self): | ||||||
|         return self._find('style', 'link') |         return self._find('{http://www.w3.org/1999/xhtml}link', | ||||||
|  |                           '{http://www.w3.org/1999/xhtml}style') | ||||||
|  |  | ||||||
|     def js_elems(self): |     def js_elems(self): | ||||||
|         return self._find('script') |         return self._find('{http://www.w3.org/1999/xhtml}script') | ||||||
|  |  | ||||||
|     def elem_attribs(self, elem): |     def elem_attribs(self, elem): | ||||||
|         return elem.attributes |         return elem.attrib | ||||||
|  |  | ||||||
|     def elem_content(self, elem): |     def elem_content(self, elem): | ||||||
|         return elem.childNodes[0].value |         return smart_unicode(elem.text) | ||||||
|  |  | ||||||
|     def elem_name(self, elem): |     def elem_name(self, elem): | ||||||
|         return elem.name |         if '}' in elem.tag: | ||||||
|  |             return elem.tag.split('}')[1] | ||||||
|  |         return elem.tag | ||||||
|  |  | ||||||
|     def elem_str(self, elem): |     def elem_str(self, elem): | ||||||
|         # This method serializes HTML in a way that does not pass all tests. |         # This method serializes HTML in a way that does not pass all tests. | ||||||
|   | |||||||
| @@ -20,12 +20,10 @@ except ImportError: | |||||||
|  |  | ||||||
| from compressor.base import SOURCE_HUNK, SOURCE_FILE | from compressor.base import SOURCE_HUNK, SOURCE_FILE | ||||||
| from compressor.conf import settings | from compressor.conf import settings | ||||||
| from compressor.css import CssCompressor |  | ||||||
| from compressor.tests.test_base import CompressorTestCase | from compressor.tests.test_base import CompressorTestCase | ||||||
|  |  | ||||||
|  |  | ||||||
| class ParserTestCase(object): | class ParserTestCase(object): | ||||||
|  |  | ||||||
|     def setUp(self): |     def setUp(self): | ||||||
|         self.old_parser = settings.COMPRESS_PARSER |         self.old_parser = settings.COMPRESS_PARSER | ||||||
|         settings.COMPRESS_PARSER = self.parser_cls |         settings.COMPRESS_PARSER = self.parser_cls | ||||||
| @@ -42,34 +40,86 @@ LxmlParserTests = skipIf(lxml is None, 'lxml not found')(LxmlParserTests) | |||||||
|  |  | ||||||
| class Html5LibParserTests(ParserTestCase, CompressorTestCase): | class Html5LibParserTests(ParserTestCase, CompressorTestCase): | ||||||
|     parser_cls = 'compressor.parser.Html5LibParser' |     parser_cls = 'compressor.parser.Html5LibParser' | ||||||
|  |     # Special test variants required since xml.etree holds attributes | ||||||
|     def setUp(self): |     # as a plain dictionary, e.g. key order is unpredictable. | ||||||
|         super(Html5LibParserTests, self).setUp() |  | ||||||
|         # special version of the css since the parser sucks |  | ||||||
|         self.css = """\ |  | ||||||
| <link href="/static/css/one.css" rel="stylesheet" type="text/css"> |  | ||||||
| <style type="text/css">p { border:5px solid green;}</style> |  | ||||||
| <link href="/static/css/two.css" rel="stylesheet" type="text/css">""" |  | ||||||
|         self.css_node = CssCompressor(self.css) |  | ||||||
|  |  | ||||||
|     def test_css_split(self): |     def test_css_split(self): | ||||||
|         out = [ |  | ||||||
|             (SOURCE_FILE, os.path.join(settings.COMPRESS_ROOT, u'css', u'one.css'), u'css/one.css', u'<link href="/static/css/one.css" rel="stylesheet" type="text/css">'), |  | ||||||
|             (SOURCE_HUNK, u'p { border:5px solid green;}', None, u'<style type="text/css">p { border:5px solid green;}</style>'), |  | ||||||
|             (SOURCE_FILE, os.path.join(settings.COMPRESS_ROOT, u'css', u'two.css'), u'css/two.css', u'<link href="/static/css/two.css" rel="stylesheet" type="text/css">'), |  | ||||||
|         ] |  | ||||||
|         split = self.css_node.split_contents() |         split = self.css_node.split_contents() | ||||||
|         split = [(x[0], x[1], x[2], self.css_node.parser.elem_str(x[3])) for x in split] |         out0 = ( | ||||||
|         self.assertEqual(out, split) |             SOURCE_FILE, | ||||||
|  |             os.path.join(settings.COMPRESS_ROOT, u'css', u'one.css'), | ||||||
|  |             u'css/one.css', | ||||||
|  |             u'{http://www.w3.org/1999/xhtml}link', | ||||||
|  |             {u'rel': u'stylesheet', u'href': u'/static/css/one.css', | ||||||
|  |              u'type': u'text/css'}, | ||||||
|  |         ) | ||||||
|  |         self.assertEqual(out0, split[0][:3] + (split[0][3].tag, | ||||||
|  |                                                split[0][3].attrib)) | ||||||
|  |         out1 = ( | ||||||
|  |             SOURCE_HUNK, | ||||||
|  |             u'p { border:5px solid green;}', | ||||||
|  |             None, | ||||||
|  |             u'<style type="text/css">p { border:5px solid green;}</style>', | ||||||
|  |         ) | ||||||
|  |         self.assertEqual(out1, split[1][:3] + | ||||||
|  |                          (self.css_node.parser.elem_str(split[1][3]),)) | ||||||
|  |         out2 = ( | ||||||
|  |             SOURCE_FILE, | ||||||
|  |             os.path.join(settings.COMPRESS_ROOT, u'css', u'two.css'), | ||||||
|  |             u'css/two.css', | ||||||
|  |             u'{http://www.w3.org/1999/xhtml}link', | ||||||
|  |             {u'rel': u'stylesheet', u'href': u'/static/css/two.css', | ||||||
|  |              u'type': u'text/css'}, | ||||||
|  |         ) | ||||||
|  |         self.assertEqual(out2, split[2][:3] + (split[2][3].tag, | ||||||
|  |                                                split[2][3].attrib)) | ||||||
|  |  | ||||||
|     def test_js_split(self): |     def test_js_split(self): | ||||||
|         out = [ |  | ||||||
|             (SOURCE_FILE, os.path.join(settings.COMPRESS_ROOT, u'js', u'one.js'), u'js/one.js', u'<script src="/static/js/one.js" type="text/javascript"></script>'), |  | ||||||
|             (SOURCE_HUNK, u'obj.value = "value";', None, u'<script type="text/javascript">obj.value = "value";</script>'), |  | ||||||
|         ] |  | ||||||
|         split = self.js_node.split_contents() |         split = self.js_node.split_contents() | ||||||
|         split = [(x[0], x[1], x[2], self.js_node.parser.elem_str(x[3])) for x in split] |         out0 = ( | ||||||
|         self.assertEqual(out, split) |             SOURCE_FILE, | ||||||
|  |             os.path.join(settings.COMPRESS_ROOT, u'js', u'one.js'), | ||||||
|  |             u'js/one.js', | ||||||
|  |             u'{http://www.w3.org/1999/xhtml}script', | ||||||
|  |             {u'src': u'/static/js/one.js', u'type': u'text/javascript'}, | ||||||
|  |             None, | ||||||
|  |         ) | ||||||
|  |         self.assertEqual(out0, split[0][:3] + (split[0][3].tag, | ||||||
|  |                                                split[0][3].attrib, | ||||||
|  |                                                split[0][3].text)) | ||||||
|  |         out1 = ( | ||||||
|  |             SOURCE_HUNK, | ||||||
|  |             u'obj.value = "value";', | ||||||
|  |             None, | ||||||
|  |             u'{http://www.w3.org/1999/xhtml}script', | ||||||
|  |             {u'type': u'text/javascript'}, | ||||||
|  |             u'obj.value = "value";', | ||||||
|  |         ) | ||||||
|  |         self.assertEqual(out1, split[1][:3] + (split[1][3].tag, | ||||||
|  |                                                split[1][3].attrib, | ||||||
|  |                                                split[1][3].text)) | ||||||
|  |  | ||||||
|  |     def test_css_return_if_off(self): | ||||||
|  |         settings.COMPRESS_ENABLED = False | ||||||
|  |         # Yes, they are semantically equal but attributes might be | ||||||
|  |         # scrambled in unpredictable order. A more elaborate check | ||||||
|  |         # would require parsing both arguments with a different parser | ||||||
|  |         # and then evaluating the result, which no longer is | ||||||
|  |         # a meaningful unit test. | ||||||
|  |         self.assertEqual(len(self.css), len(self.css_node.output())) | ||||||
|  |  | ||||||
|  |     def test_js_return_if_off(self): | ||||||
|  |         try: | ||||||
|  |             enabled = settings.COMPRESS_ENABLED | ||||||
|  |             precompilers = settings.COMPRESS_PRECOMPILERS | ||||||
|  |             settings.COMPRESS_ENABLED = False | ||||||
|  |             settings.COMPRESS_PRECOMPILERS = {} | ||||||
|  |             # As above. | ||||||
|  |             self.assertEqual(len(self.js), len(self.js_node.output())) | ||||||
|  |         finally: | ||||||
|  |             settings.COMPRESS_ENABLED = enabled | ||||||
|  |             settings.COMPRESS_PRECOMPILERS = precompilers | ||||||
|  |  | ||||||
|  |  | ||||||
| Html5LibParserTests = skipIf( | Html5LibParserTests = skipIf( | ||||||
|     html5lib is None, 'html5lib not found')(Html5LibParserTests) |     html5lib is None, 'html5lib not found')(Html5LibParserTests) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Łukasz Langa
					Łukasz Langa