Adapt Beautifulsoup parser to changed behaviour in bs4

This commit is contained in:
Johannes Linke
2015-09-22 18:10:48 +02:00
parent 93fee3de35
commit 8b12e303ab

View File

@@ -34,7 +34,13 @@ class BeautifulSoupParser(ParserBase):
return self.soup.findAll('script')
def elem_attribs(self, elem):
return dict(elem.attrs)
attrs = dict(elem.attrs)
# hack around changed behaviour in bs4, it returns lists now instead of one string, see
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#multi-valued-attributes
for key, value in attrs.items():
if type(value) is list:
attrs[key] = " ".join(value)
return attrs
def elem_content(self, elem):
return elem.string