Adapt Beautifulsoup parser to changed behaviour in bs4
This commit is contained in:
@@ -34,7 +34,13 @@ class BeautifulSoupParser(ParserBase):
|
|||||||
return self.soup.findAll('script')
|
return self.soup.findAll('script')
|
||||||
|
|
||||||
def elem_attribs(self, elem):
|
def elem_attribs(self, elem):
|
||||||
return dict(elem.attrs)
|
attrs = dict(elem.attrs)
|
||||||
|
# hack around changed behaviour in bs4, it returns lists now instead of one string, see
|
||||||
|
# http://www.crummy.com/software/BeautifulSoup/bs4/doc/#multi-valued-attributes
|
||||||
|
for key, value in attrs.items():
|
||||||
|
if type(value) is list:
|
||||||
|
attrs[key] = " ".join(value)
|
||||||
|
return attrs
|
||||||
|
|
||||||
def elem_content(self, elem):
|
def elem_content(self, elem):
|
||||||
return elem.string
|
return elem.string
|
||||||
|
|||||||
Reference in New Issue
Block a user