Adapt Beautifulsoup parser to changed behaviour in bs4
This commit is contained in:
@@ -34,7 +34,13 @@ class BeautifulSoupParser(ParserBase):
|
||||
return self.soup.findAll('script')
|
||||
|
||||
def elem_attribs(self, elem):
|
||||
return dict(elem.attrs)
|
||||
attrs = dict(elem.attrs)
|
||||
# hack around changed behaviour in bs4, it returns lists now instead of one string, see
|
||||
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#multi-valued-attributes
|
||||
for key, value in attrs.items():
|
||||
if type(value) is list:
|
||||
attrs[key] = " ".join(value)
|
||||
return attrs
|
||||
|
||||
def elem_content(self, elem):
|
||||
return elem.string
|
||||
|
||||
Reference in New Issue
Block a user