Merge pull request #663 from karyon/beatifulsoup

fix Beatifulsoup v4
This commit is contained in:
Mathieu Pillard
2015-09-25 22:47:34 +02:00
6 changed files with 77 additions and 44 deletions

View File

@@ -1,42 +1,46 @@
from __future__ import absolute_import
from django.core.exceptions import ImproperlyConfigured
from django.utils import six
from django.utils.encoding import smart_text
from compressor.exceptions import ParserError
from compressor.parser import ParserBase
from compressor.utils.decorators import cached_property
class BeautifulSoupParser(ParserBase):
@cached_property
def soup(self):
def __init__(self, content):
super(BeautifulSoupParser, self).__init__(content)
try:
if six.PY3:
from bs4 import BeautifulSoup
else:
from bs4 import BeautifulSoup
self.use_bs4 = True
self.soup = BeautifulSoup(self.content, "html.parser")
except ImportError:
try:
from BeautifulSoup import BeautifulSoup
return BeautifulSoup(self.content)
except ImportError as err:
raise ImproperlyConfigured("Error while importing BeautifulSoup: %s" % err)
except Exception as err:
raise ParserError("Error while initializing Parser: %s" % err)
self.use_bs4 = False
self.soup = BeautifulSoup(self.content)
except ImportError as err:
raise ImproperlyConfigured("Error while importing BeautifulSoup: %s" % err)
def css_elems(self):
if six.PY3:
if self.use_bs4:
return self.soup.find_all({'link': True, 'style': True})
else:
return self.soup.findAll({'link': True, 'style': True})
def js_elems(self):
if six.PY3:
if self.use_bs4:
return self.soup.find_all('script')
else:
return self.soup.findAll('script')
def elem_attribs(self, elem):
return dict(elem.attrs)
attrs = dict(elem.attrs)
# hack around changed behaviour in bs4, it returns lists now instead of one string, see
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#multi-valued-attributes
for key, value in attrs.items():
if type(value) is list:
attrs[key] = " ".join(value)
return attrs
def elem_content(self, elem):
return elem.string

View File

@@ -6,10 +6,11 @@ from shutil import rmtree, copytree
try:
from bs4 import BeautifulSoup
use_bs4 = True
except ImportError:
from BeautifulSoup import BeautifulSoup
use_bs4 = False
from django.utils import six
from django.core.cache.backends import locmem
from django.test import SimpleTestCase
from django.test.utils import override_settings
@@ -25,13 +26,19 @@ from compressor.storage import DefaultStorage
def make_soup(markup):
# we use html.parser instead of lxml because it doesn't work on python 3.3
if six.PY3:
return BeautifulSoup(markup, 'html.parser')
if use_bs4:
return BeautifulSoup(markup, "html.parser")
else:
return BeautifulSoup(markup)
def soup_find_all(markup, name):
if use_bs4:
return make_soup(markup).find_all(name)
else:
return make_soup(markup).findAll(name)
def css_tag(href, **kwargs):
rendered_attrs = ''.join(['%s="%s" ' % (k, v) for k, v in kwargs.items()])
template = '<link rel="stylesheet" href="%s" type="text/css" %s/>'
@@ -286,10 +293,7 @@ class CssMediaTestCase(SimpleTestCase):
def test_css_output(self):
css_node = CssCompressor(self.css)
if six.PY3:
links = make_soup(css_node.output()).find_all('link')
else:
links = make_soup(css_node.output()).findAll('link')
links = soup_find_all(css_node.output(), 'link')
media = ['screen', 'print', 'all', None]
self.assertEqual(len(links), 4)
self.assertEqual(media, [l.get('media', None) for l in links])
@@ -298,10 +302,7 @@ class CssMediaTestCase(SimpleTestCase):
css = self.css + '<style type="text/css" media="print">p { border:10px solid red;}</style>'
css_node = CssCompressor(css)
media = ['screen', 'print', 'all', None, 'print']
if six.PY3:
links = make_soup(css_node.output()).find_all('link')
else:
links = make_soup(css_node.output()).findAll('link')
links = soup_find_all(css_node.output(), 'link')
self.assertEqual(media, [l.get('media', None) for l in links])
@override_settings(COMPRESS_PRECOMPILERS=(
@@ -313,10 +314,7 @@ class CssMediaTestCase(SimpleTestCase):
<link rel="stylesheet" href="/static/css/two.css" type="text/css" media="screen">
<style type="text/foobar" media="screen">h1 { border:5px solid green;}</style>"""
css_node = CssCompressor(css)
if six.PY3:
output = make_soup(css_node.output()).find_all(['link', 'style'])
else:
output = make_soup(css_node.output()).findAll(['link', 'style'])
output = soup_find_all(css_node.output(), ['link', 'style'])
self.assertEqual(['/static/css/one.css', '/static/css/two.css', None],
[l.get('href', None) for l in output])
self.assertEqual(['screen', 'screen', 'screen'],
@@ -356,11 +354,10 @@ class JsAsyncDeferTestCase(SimpleTestCase):
return 'defer'
js_node = JsCompressor(self.js)
output = [None, 'async', 'defer', None, 'async', None]
if six.PY3:
scripts = make_soup(js_node.output()).find_all('script')
scripts = soup_find_all(js_node.output(), 'script')
if use_bs4:
attrs = [extract_attr(i) for i in scripts]
else:
scripts = make_soup(js_node.output()).findAll('script')
attrs = [s.get('async') or s.get('defer') for s in scripts]
self.assertEqual(output, attrs)

View File

@@ -11,11 +11,6 @@ try:
except ImportError:
html5lib = None
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
BeautifulSoup = None
from django.utils import unittest
from django.test.utils import override_settings
@@ -116,9 +111,46 @@ class Html5LibParserTests(ParserTestCase, CompressorTestCase):
self.assertEqual(len(self.js), len(self.js_node.output()))
@unittest.skipIf(BeautifulSoup is None, 'BeautifulSoup not found')
class BeautifulSoupParserTests(ParserTestCase, CompressorTestCase):
parser_cls = 'compressor.parser.BeautifulSoupParser'
# just like in the Html5LibParserTests, provide special tests because
# in bs4 attributes are held in dictionaries
def test_css_split(self):
split = self.css_node.split_contents()
out0 = (
SOURCE_FILE,
os.path.join(settings.COMPRESS_ROOT, 'css', 'one.css'),
'css/one.css',
None,
None,
)
self.assertEqual(out0, split[0][:3] + (split[0][3].tag,
split[0][3].attrib))
out1 = (
SOURCE_HUNK,
'p { border:5px solid green;}',
None,
'<style type="text/css">p { border:5px solid green;}</style>',
)
self.assertEqual(out1, split[1][:3] +
(self.css_node.parser.elem_str(split[1][3]),))
out2 = (
SOURCE_FILE,
os.path.join(settings.COMPRESS_ROOT, 'css', 'two.css'),
'css/two.css',
None,
None,
)
self.assertEqual(out2, split[2][:3] + (split[2][3].tag,
split[2][3].attrib))
@override_settings(COMPRESS_ENABLED=False)
def test_css_return_if_off(self):
# in addition to unspecified attribute order,
# bs4 output doesn't have the extra space, so we add that here
fixed_output = self.css_node.output().replace('"/>', '" />')
self.assertEqual(len(self.css), len(fixed_output))
class HtmlParserTests(ParserTestCase, CompressorTestCase):

View File

@@ -66,7 +66,7 @@ Optional
``compressor.parser.BeautifulSoupParser`` and
``compressor.parser.LxmlParser``::
pip install "BeautifulSoup<4.0"
pip install beautifulsoup4
- lxml_

View File

@@ -4,7 +4,7 @@ html5lib==0.999
mock==1.0.1
Jinja2==2.7.3
lxml==3.4.2
BeautifulSoup==3.2.1
beautifulsoup4==4.4.0
unittest2==1.0.0
coffin==0.4.0
jingo==0.7

View File

@@ -6,7 +6,7 @@ two =
mock==1.0.1
Jinja2==2.7.3
lxml==3.4.2
BeautifulSoup==3.2.1
beautifulsoup4==4.4.0
unittest2==1.0.0
jingo==0.7
coffin==0.4.0