Fixed deprecation warnings for calling HTMLParser on Python 3.X

The logic here comes from http://bazaar.launchpad.net/~leonardr/beautifulsoup/bs4/view/head:/bs4/builder/_htmlparser.py#L20,
inspecting the stdlib code in different versions, and simplifying.

I haven't included the monkey patch from beautifulsoup for Python 3.2.2
because I don't think the bug affects django-compressor's parsing of script
tags.
This commit is contained in:
Luke Plant 2015-10-23 10:17:23 +01:00
parent 0c8f33aa64
commit ff6fa993b4

View File

@ -1,3 +1,5 @@
import sys
from django.utils import six
from django.utils.encoding import smart_text
@ -5,9 +7,26 @@ from compressor.exceptions import ParserError
from compressor.parser import ParserBase
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
# argument which default to True (which we don't want).
# In Python 3.3, it defaults to False.
# In Python 3.4, passing it at all raises a deprecation warning.
# So we only pass it for 3.2.
# In Python 3.4, it also takes a 'convert_charrefs' argument
# which raises a warning if we don't pass it.
major, minor, release = sys.version_info[:3]
CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
HTML_PARSER_ARGS = {}
if CONSTRUCTOR_TAKES_STRICT:
HTML_PARSER_ARGS['strict'] = False
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
HTML_PARSER_ARGS['convert_charrefs'] = False
class DefaultHtmlParser(ParserBase, six.moves.html_parser.HTMLParser):
def __init__(self, content):
six.moves.html_parser.HTMLParser.__init__(self)
six.moves.html_parser.HTMLParser.__init__(self, **HTML_PARSER_ARGS)
self.content = content
self._css_elems = []
self._js_elems = []