Add a safe_minidom_parse_string function.

Adds a new utils.safe_minidom_parse_string function and
updates external API facing Nova modules to use it.
This ensures we have safe defaults on our incoming API XML parsing.

Internally safe_minidom_parse_string uses a ProtectedExpatParser
class to disable DTDs and entities from being parsed when using
minidom.

Fixes LP Bug #1100282.

Change-Id: Ib90d6379320ff1d007f8a661f7ddaa286ba6918e
This commit is contained in:
Dan Prince 2013-02-01 17:04:27 -05:00
parent 1b9b66ba21
commit 5993324905
9 changed files with 99 additions and 24 deletions

View File

@ -21,7 +21,6 @@ import re
import urlparse
import webob
from xml.dom import minidom
from nova.api.openstack import wsgi
from nova.api.openstack import xmlutil
@ -32,6 +31,7 @@ from nova import exception
from nova.openstack.common import cfg
from nova.openstack.common import log as logging
from nova import quota
from nova import utils
osapi_opts = [
cfg.IntOpt('osapi_max_limit',
@ -356,7 +356,7 @@ def raise_http_conflict_for_instance_invalid_state(exc, action):
class MetadataDeserializer(wsgi.MetadataXMLDeserializer):
def deserialize(self, text):
dom = minidom.parseString(text)
dom = utils.safe_minidom_parse_string(text)
metadata_node = self.find_first_child_named(dom, "metadata")
metadata = self.extract_metadata(metadata_node)
return {'body': {'metadata': metadata}}
@ -364,7 +364,7 @@ class MetadataDeserializer(wsgi.MetadataXMLDeserializer):
class MetaItemDeserializer(wsgi.MetadataXMLDeserializer):
def deserialize(self, text):
dom = minidom.parseString(text)
dom = utils.safe_minidom_parse_string(text)
metadata_item = self.extract_metadata(dom)
return {'body': {'meta': metadata_item}}
@ -382,7 +382,7 @@ class MetadataXMLDeserializer(wsgi.XMLDeserializer):
return metadata
def _extract_metadata_container(self, datastring):
dom = minidom.parseString(datastring)
dom = utils.safe_minidom_parse_string(datastring)
metadata_node = self.find_first_child_named(dom, "metadata")
metadata = self.extract_metadata(metadata_node)
return {'body': {'metadata': metadata}}
@ -394,7 +394,7 @@ class MetadataXMLDeserializer(wsgi.XMLDeserializer):
return self._extract_metadata_container(datastring)
def update(self, datastring):
dom = minidom.parseString(datastring)
dom = utils.safe_minidom_parse_string(datastring)
metadata_item = self.extract_metadata(dom)
return {'body': {'meta': metadata_item}}

View File

@ -16,7 +16,6 @@
# under the License.
"""The cells extension."""
from xml.dom import minidom
from xml.parsers import expat
from webob import exc
@ -32,6 +31,7 @@ from nova import exception
from nova.openstack.common import cfg
from nova.openstack.common import log as logging
from nova.openstack.common import timeutils
from nova import utils
LOG = logging.getLogger(__name__)
@ -99,7 +99,7 @@ class CellDeserializer(wsgi.XMLDeserializer):
def default(self, string):
"""Deserialize an xml-formatted cell create request."""
try:
node = minidom.parseString(string)
node = utils.safe_minidom_parse_string(string)
except expat.ExpatError:
msg = _("cannot understand XML")
raise exception.MalformedRequestBody(reason=msg)

View File

@ -16,7 +16,6 @@
"""The hosts admin extension."""
import webob.exc
from xml.dom import minidom
from xml.parsers import expat
from nova.api.openstack import extensions
@ -25,6 +24,7 @@ from nova.api.openstack import xmlutil
from nova import compute
from nova import exception
from nova.openstack.common import log as logging
from nova import utils
LOG = logging.getLogger(__name__)
authorize = extensions.extension_authorizer('compute', 'hosts')
@ -72,7 +72,7 @@ class HostShowTemplate(xmlutil.TemplateBuilder):
class HostUpdateDeserializer(wsgi.XMLDeserializer):
def default(self, string):
try:
node = minidom.parseString(string)
node = utils.safe_minidom_parse_string(string)
except expat.ExpatError:
msg = _("cannot understand XML")
raise exception.MalformedRequestBody(reason=msg)

View File

@ -16,8 +16,6 @@
"""The security groups extension."""
from xml.dom import minidom
import webob
from webob import exc
@ -30,6 +28,7 @@ from nova.compute import api as compute_api
from nova import db
from nova import exception
from nova.openstack.common import log as logging
from nova import utils
from nova.virt import netutils
LOG = logging.getLogger(__name__)
@ -109,7 +108,7 @@ class SecurityGroupXMLDeserializer(wsgi.MetadataXMLDeserializer):
"""
def default(self, string):
"""Deserialize an xml-formatted security group create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
security_group = {}
sg_node = self.find_first_child_named(dom,
'security_group')
@ -130,7 +129,7 @@ class SecurityGroupRulesXMLDeserializer(wsgi.MetadataXMLDeserializer):
def default(self, string):
"""Deserialize an xml-formatted security group create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
security_group_rule = self._extract_security_group_rule(dom)
return {'body': {'security_group_rule': security_group_rule}}

View File

@ -17,7 +17,6 @@
import webob
from webob import exc
from xml.dom import minidom
from nova.api.openstack import common
from nova.api.openstack import extensions
@ -155,7 +154,7 @@ class CreateDeserializer(CommonDeserializer):
def default(self, string):
"""Deserialize an xml-formatted volume create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
vol = self._extract_volume(dom)
return {'body': {'volume': vol}}

View File

@ -21,7 +21,6 @@ import socket
import webob
from webob import exc
from xml.dom import minidom
from nova.api.openstack import common
from nova.api.openstack.compute import ips
@ -312,7 +311,7 @@ class ActionDeserializer(CommonDeserializer):
"""
def default(self, string):
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
action_node = dom.childNodes[0]
action_name = action_node.tagName
@ -419,7 +418,7 @@ class CreateDeserializer(CommonDeserializer):
def default(self, string):
"""Deserialize an xml-formatted server create request."""
dom = minidom.parseString(string)
dom = utils.safe_minidom_parse_string(string)
server = self._extract_server(dom)
return {'body': {'server': server}}

View File

@ -27,6 +27,7 @@ import webob
from nova import exception
from nova.openstack.common import jsonutils
from nova.openstack.common import log as logging
from nova import utils
from nova import wsgi
@ -217,7 +218,7 @@ class XMLDeserializer(TextDeserializer):
plurals = set(self.metadata.get('plurals', {}))
try:
node = minidom.parseString(datastring).childNodes[0]
node = utils.safe_minidom_parse_string(datastring).childNodes[0]
return {node.nodeName: self._from_xml_node(node, plurals)}
except expat.ExpatError:
msg = _("cannot understand XML")
@ -268,11 +269,11 @@ class XMLDeserializer(TextDeserializer):
def extract_text(self, node):
"""Get the text field contained by the given node."""
if len(node.childNodes) == 1:
child = node.childNodes[0]
ret_val = ""
for child in node.childNodes:
if child.nodeType == child.TEXT_NODE:
return child.nodeValue
return ""
ret_val += child.nodeValue
return ret_val
def extract_elements(self, node):
"""Get only Element type childs from node."""
@ -633,7 +634,7 @@ def action_peek_json(body):
def action_peek_xml(body):
"""Determine action to invoke."""
dom = minidom.parseString(body)
dom = utils.safe_minidom_parse_string(body)
action_node = dom.childNodes[0]
return action_node.tagName

View File

@ -449,6 +449,39 @@ class GenericUtilsTestCase(test.TestCase):
self.assertEqual(fake_execute.uid, 2)
self.assertEqual(fake_execute.uid, os.getuid())
def test_safe_parse_xml(self):
normal_body = ("""
<?xml version="1.0" ?><foo>
<bar>
<v1>hey</v1>
<v2>there</v2>
</bar>
</foo>""").strip()
def killer_body():
return (("""<!DOCTYPE x [
<!ENTITY a "%(a)s">
<!ENTITY b "%(b)s">
<!ENTITY c "%(c)s">]>
<foo>
<bar>
<v1>%(d)s</v1>
</bar>
</foo>""") % {
'a': 'A' * 10,
'b': '&a;' * 10,
'c': '&b;' * 10,
'd': '&c;' * 9999,
}).strip()
dom = utils.safe_minidom_parse_string(normal_body)
self.assertEqual(normal_body, str(dom.toxml()))
self.assertRaises(ValueError,
utils.safe_minidom_parse_string,
killer_body())
def test_xhtml_escape(self):
self.assertEqual('&quot;foo&quot;', utils.xhtml_escape('"foo"'))
self.assertEqual('&apos;foo&apos;', utils.xhtml_escape("'foo'"))

View File

@ -36,6 +36,10 @@ import struct
import sys
import tempfile
import time
from xml.dom import minidom
from xml.parsers import expat
from xml import sax
from xml.sax import expatreader
from xml.sax import saxutils
from eventlet import event
@ -652,6 +656,46 @@ class DynamicLoopingCall(LoopingCallBase):
return self.done
class ProtectedExpatParser(expatreader.ExpatParser):
"""An expat parser which disables DTD's and entities by default."""
def __init__(self, forbid_dtd=True, forbid_entities=True,
*args, **kwargs):
# Python 2.x old style class
expatreader.ExpatParser.__init__(self, *args, **kwargs)
self.forbid_dtd = forbid_dtd
self.forbid_entities = forbid_entities
def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
raise ValueError("Inline DTD forbidden")
def entity_decl(self, entityName, is_parameter_entity, value, base,
systemId, publicId, notationName):
raise ValueError("<!ENTITY> forbidden")
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
# expat 1.2
raise ValueError("<!ENTITY> forbidden")
def reset(self):
expatreader.ExpatParser.reset(self)
if self.forbid_dtd:
self._parser.StartDoctypeDeclHandler = self.start_doctype_decl
if self.forbid_entities:
self._parser.EntityDeclHandler = self.entity_decl
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
def safe_minidom_parse_string(xml_string):
"""Parse an XML string using minidom safely.
"""
try:
return minidom.parseString(xml_string, parser=ProtectedExpatParser())
except sax.SAXParseException as se:
raise expat.ExpatError()
def xhtml_escape(value):
"""Escapes a string so it is valid within XML or XHTML.