300 lines
8.1 KiB
Python
300 lines
8.1 KiB
Python
"""
|
|
dsml - generate and parse DSMLv1 data
|
|
(see http://www.oasis-open.org/committees/dsml/)
|
|
|
|
See http://www.python-ldap.org/ for details.
|
|
|
|
$Id: dsml.py,v 1.42 2016/01/18 15:16:29 stroeder Exp $
|
|
|
|
Python compability note:
|
|
Tested with Python 2.0+.
|
|
"""
|
|
|
|
__version__ = '2.4.25.1'
|
|
|
|
import string,base64
|
|
|
|
|
|
lower = getattr(string, 'lower', lambda s: s.lower())
|
|
|
|
special_entities = (
|
|
('&','&'),
|
|
('<','<'),
|
|
('"','"'),
|
|
("'",'''),
|
|
)
|
|
|
|
|
|
def replace_char(s):
|
|
for char,entity in special_entities:
|
|
s = string.replace(s,char,entity)
|
|
return s
|
|
|
|
|
|
class DSMLWriter:
|
|
"""
|
|
Class for writing LDAP entry records to a DSMLv1 file.
|
|
|
|
Arguments:
|
|
|
|
f
|
|
File object for output.
|
|
base64_attrs
|
|
Attribute types to be base64-encoded.
|
|
dsml_comment
|
|
Text placed in comment lines behind <dsml:dsml>.
|
|
indent
|
|
String used for indentiation of next nested level.
|
|
"""
|
|
|
|
def __init__(
|
|
self,f,base64_attrs=[],dsml_comment='',indent=' '
|
|
):
|
|
self._output_file = f
|
|
self._base64_attrs = {}.fromkeys(map(lower,base64_attrs))
|
|
self._dsml_comment = dsml_comment
|
|
self._indent = indent
|
|
|
|
def _needs_base64_encoding(self,attr_type,attr_value):
|
|
if self._base64_attrs:
|
|
return lower(attr_type) in self._base64_attrs
|
|
else:
|
|
try:
|
|
unicode(attr_value,'utf-8')
|
|
except UnicodeError:
|
|
return 1
|
|
else:
|
|
return 0
|
|
|
|
def writeHeader(self):
|
|
"""
|
|
Write the header
|
|
"""
|
|
self._output_file.write('\n'.join([
|
|
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
'<!DOCTYPE root PUBLIC "dsml.dtd" "http://www.dsml.org/1.0/dsml.dtd">',
|
|
'<dsml:dsml xmlns:dsml="http://www.dsml.org/DSML">',
|
|
'%s<dsml:directory-entries>\n' % (self._indent),
|
|
])
|
|
)
|
|
if self._dsml_comment:
|
|
self._output_file.write('%s<!--\n' % (self._indent))
|
|
self._output_file.write('%s%s\n' % (self._indent,self._dsml_comment))
|
|
self._output_file.write('%s-->\n' % (self._indent))
|
|
|
|
def writeFooter(self):
|
|
"""
|
|
Write the footer
|
|
"""
|
|
self._output_file.write('%s</dsml:directory-entries>\n' % (self._indent))
|
|
self._output_file.write('</dsml:dsml>\n')
|
|
|
|
def unparse(self,dn,entry):
|
|
return self.writeRecord(dn,entry)
|
|
|
|
def writeRecord(self,dn,entry):
|
|
"""
|
|
dn
|
|
string-representation of distinguished name
|
|
entry
|
|
dictionary holding the LDAP entry {attr:data}
|
|
"""
|
|
|
|
# Write line dn: first
|
|
self._output_file.write(
|
|
'%s<dsml:entry dn="%s">\n' % (
|
|
self._indent*2,replace_char(dn)
|
|
)
|
|
)
|
|
|
|
objectclasses = entry.get('objectclass',entry.get('objectClass',[]))
|
|
|
|
self._output_file.write('%s<dsml:objectclass>\n' % (self._indent*3))
|
|
for oc in objectclasses:
|
|
self._output_file.write('%s<dsml:oc-value>%s</dsml:oc-value>\n' % (self._indent*4,oc))
|
|
self._output_file.write('%s</dsml:objectclass>\n' % (self._indent*3))
|
|
|
|
attr_types = entry.keys()[:]
|
|
try:
|
|
attr_types.remove('objectclass')
|
|
attr_types.remove('objectClass')
|
|
except ValueError:
|
|
pass
|
|
attr_types.sort()
|
|
for attr_type in attr_types:
|
|
self._output_file.write('%s<dsml:attr name="%s">\n' % (self._indent*3,attr_type))
|
|
for attr_value_item in entry[attr_type]:
|
|
needs_base64_encoding = self._needs_base64_encoding(
|
|
attr_type,attr_value_item
|
|
)
|
|
if needs_base64_encoding:
|
|
attr_value_item = base64.encodestring(attr_value_item)
|
|
else:
|
|
attr_value_item = replace_char(attr_value_item)
|
|
self._output_file.write('%s<dsml:value%s>\n' % (
|
|
self._indent*4,
|
|
' encoding="base64"'*needs_base64_encoding
|
|
)
|
|
)
|
|
self._output_file.write('%s%s\n' % (
|
|
self._indent*5,
|
|
attr_value_item
|
|
)
|
|
)
|
|
self._output_file.write('%s</dsml:value>\n' % (
|
|
self._indent*4,
|
|
)
|
|
)
|
|
self._output_file.write('%s</dsml:attr>\n' % (self._indent*3))
|
|
self._output_file.write('%s</dsml:entry>\n' % (self._indent*2))
|
|
return
|
|
|
|
|
|
try:
|
|
|
|
import xml.sax,xml.sax.handler
|
|
|
|
except ImportError:
|
|
pass
|
|
|
|
else:
|
|
|
|
class DSMLv1Handler(xml.sax.handler.ContentHandler):
|
|
"""
|
|
Content handler class for DSMLv1
|
|
"""
|
|
|
|
def __init__(self,parser_instance):
|
|
self._parser_instance = parser_instance
|
|
xml.sax.handler.ContentHandler.__init__(self)
|
|
|
|
def startDocument(self):
|
|
pass
|
|
|
|
def endDocument(self):
|
|
pass
|
|
|
|
def startElement(self,raw_name,attrs):
|
|
assert raw_name.startswith(''),'Illegal name'
|
|
name = raw_name[5:]
|
|
if name=='dsml':
|
|
pass
|
|
elif name=='directory-entries':
|
|
self._parsing_entries = 1
|
|
elif name=='entry':
|
|
self._dn = attrs['dn']
|
|
self._entry = {}
|
|
elif name=='attr':
|
|
self._attr_type = attrs['name'].encode('utf-8')
|
|
self._attr_values = []
|
|
elif name=='value':
|
|
self._attr_value = ''
|
|
self._base64_encoding = attrs.get('encoding','').lower()=='base64'
|
|
# Handle object class tags
|
|
elif name=='objectclass':
|
|
self._object_classes = []
|
|
elif name=='oc-value':
|
|
self._oc_value = ''
|
|
# Unhandled tags
|
|
else:
|
|
raise ValueError('Unknown tag %s' % (raw_name))
|
|
|
|
def endElement(self,raw_name):
|
|
assert raw_name.startswith('dsml:'),'Illegal name'
|
|
name = raw_name[5:]
|
|
if name=='dsml':
|
|
pass
|
|
elif name=='directory-entries':
|
|
self._parsing_entries = 0
|
|
elif name=='entry':
|
|
self._parser_instance.handle(self._dn,self._entry)
|
|
del self._dn
|
|
del self._entry
|
|
elif name=='attr':
|
|
self._entry[self._attr_type] = self._attr_values
|
|
del self._attr_type
|
|
del self._attr_values
|
|
elif name=='value':
|
|
if self._base64_encoding:
|
|
attr_value = base64.decodestring(self._attr_value.strip())
|
|
else:
|
|
attr_value = self._attr_value.strip().encode('utf-8')
|
|
self._attr_values.append(attr_value)
|
|
del attr_value
|
|
del self._attr_value
|
|
del self._base64_encoding
|
|
# Handle object class tags
|
|
elif name=='objectclass':
|
|
self._entry['objectClass'] = self._object_classes
|
|
del self._object_classes
|
|
elif name=='oc-value':
|
|
self._object_classes.append(self._oc_value.strip().encode('utf-8'))
|
|
del self._oc_value
|
|
# Unhandled tags
|
|
else:
|
|
raise ValueError('Unknown tag %s' % (raw_name))
|
|
|
|
def characters(self,ch):
|
|
if '_oc_value' in self.__dict__:
|
|
self._oc_value = self._oc_value + ch
|
|
elif '_attr_value' in self.__dict__:
|
|
self._attr_value = self._attr_value + ch
|
|
else:
|
|
pass
|
|
|
|
|
|
class DSMLParser:
|
|
"""
|
|
Base class for a DSMLv1 parser. Applications should sub-class this
|
|
class and override method handle() to implement something meaningful.
|
|
|
|
Public class attributes:
|
|
|
|
records_read
|
|
Counter for records processed so far
|
|
|
|
Arguments:
|
|
|
|
input_file
|
|
File-object to read the DSMLv1 input from
|
|
ignored_attr_types
|
|
Attributes with these attribute type names will be ignored.
|
|
max_entries
|
|
If non-zero specifies the maximum number of entries to be
|
|
read from f.
|
|
line_sep
|
|
String used as line separator
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
input_file,
|
|
ContentHandlerClass,
|
|
ignored_attr_types=None,
|
|
max_entries=0,
|
|
):
|
|
self._input_file = input_file
|
|
self._max_entries = max_entries
|
|
self._ignored_attr_types = {}.fromkeys(map(lower,(ignored_attr_types or [])))
|
|
self._current_record = None,None
|
|
self.records_read = 0
|
|
self._parser = xml.sax.make_parser()
|
|
self._parser.setFeature(xml.sax.handler.feature_namespaces,0)
|
|
content_handler = ContentHandlerClass(self)
|
|
self._parser.setContentHandler(content_handler)
|
|
|
|
def handle(self,*args,**kwargs):
|
|
"""
|
|
Process a single DSMLv1 entry record. This method should be
|
|
implemented by applications using DSMLParser.
|
|
"""
|
|
import pprint
|
|
pprint.pprint(args)
|
|
pprint.pprint(kwargs)
|
|
|
|
def parse(self):
|
|
"""
|
|
Continously read and parse DSML records
|
|
"""
|
|
self._parser.parse(self._input_file)
|