deb-python-pyldap/Lib/dsml.py

300 lines
8.1 KiB
Python

"""
dsml - generate and parse DSMLv1 data
(see http://www.oasis-open.org/committees/dsml/)
See http://www.python-ldap.org/ for details.
$Id: dsml.py,v 1.42 2016/01/18 15:16:29 stroeder Exp $
Python compability note:
Tested with Python 2.0+.
"""
__version__ = '2.4.25.1'
import string,base64
lower = getattr(string, 'lower', lambda s: s.lower())
special_entities = (
('&','&'),
('<','&lt;'),
('"','&quot;'),
("'",'&apos;'),
)
def replace_char(s):
for char,entity in special_entities:
s = string.replace(s,char,entity)
return s
class DSMLWriter:
"""
Class for writing LDAP entry records to a DSMLv1 file.
Arguments:
f
File object for output.
base64_attrs
Attribute types to be base64-encoded.
dsml_comment
Text placed in comment lines behind <dsml:dsml>.
indent
String used for indentiation of next nested level.
"""
def __init__(
self,f,base64_attrs=[],dsml_comment='',indent=' '
):
self._output_file = f
self._base64_attrs = {}.fromkeys(map(lower,base64_attrs))
self._dsml_comment = dsml_comment
self._indent = indent
def _needs_base64_encoding(self,attr_type,attr_value):
if self._base64_attrs:
return lower(attr_type) in self._base64_attrs
else:
try:
unicode(attr_value,'utf-8')
except UnicodeError:
return 1
else:
return 0
def writeHeader(self):
"""
Write the header
"""
self._output_file.write('\n'.join([
'<?xml version="1.0" encoding="UTF-8"?>',
'<!DOCTYPE root PUBLIC "dsml.dtd" "http://www.dsml.org/1.0/dsml.dtd">',
'<dsml:dsml xmlns:dsml="http://www.dsml.org/DSML">',
'%s<dsml:directory-entries>\n' % (self._indent),
])
)
if self._dsml_comment:
self._output_file.write('%s<!--\n' % (self._indent))
self._output_file.write('%s%s\n' % (self._indent,self._dsml_comment))
self._output_file.write('%s-->\n' % (self._indent))
def writeFooter(self):
"""
Write the footer
"""
self._output_file.write('%s</dsml:directory-entries>\n' % (self._indent))
self._output_file.write('</dsml:dsml>\n')
def unparse(self,dn,entry):
return self.writeRecord(dn,entry)
def writeRecord(self,dn,entry):
"""
dn
string-representation of distinguished name
entry
dictionary holding the LDAP entry {attr:data}
"""
# Write line dn: first
self._output_file.write(
'%s<dsml:entry dn="%s">\n' % (
self._indent*2,replace_char(dn)
)
)
objectclasses = entry.get('objectclass',entry.get('objectClass',[]))
self._output_file.write('%s<dsml:objectclass>\n' % (self._indent*3))
for oc in objectclasses:
self._output_file.write('%s<dsml:oc-value>%s</dsml:oc-value>\n' % (self._indent*4,oc))
self._output_file.write('%s</dsml:objectclass>\n' % (self._indent*3))
attr_types = entry.keys()[:]
try:
attr_types.remove('objectclass')
attr_types.remove('objectClass')
except ValueError:
pass
attr_types.sort()
for attr_type in attr_types:
self._output_file.write('%s<dsml:attr name="%s">\n' % (self._indent*3,attr_type))
for attr_value_item in entry[attr_type]:
needs_base64_encoding = self._needs_base64_encoding(
attr_type,attr_value_item
)
if needs_base64_encoding:
attr_value_item = base64.encodestring(attr_value_item)
else:
attr_value_item = replace_char(attr_value_item)
self._output_file.write('%s<dsml:value%s>\n' % (
self._indent*4,
' encoding="base64"'*needs_base64_encoding
)
)
self._output_file.write('%s%s\n' % (
self._indent*5,
attr_value_item
)
)
self._output_file.write('%s</dsml:value>\n' % (
self._indent*4,
)
)
self._output_file.write('%s</dsml:attr>\n' % (self._indent*3))
self._output_file.write('%s</dsml:entry>\n' % (self._indent*2))
return
try:
import xml.sax,xml.sax.handler
except ImportError:
pass
else:
class DSMLv1Handler(xml.sax.handler.ContentHandler):
"""
Content handler class for DSMLv1
"""
def __init__(self,parser_instance):
self._parser_instance = parser_instance
xml.sax.handler.ContentHandler.__init__(self)
def startDocument(self):
pass
def endDocument(self):
pass
def startElement(self,raw_name,attrs):
assert raw_name.startswith(''),'Illegal name'
name = raw_name[5:]
if name=='dsml':
pass
elif name=='directory-entries':
self._parsing_entries = 1
elif name=='entry':
self._dn = attrs['dn']
self._entry = {}
elif name=='attr':
self._attr_type = attrs['name'].encode('utf-8')
self._attr_values = []
elif name=='value':
self._attr_value = ''
self._base64_encoding = attrs.get('encoding','').lower()=='base64'
# Handle object class tags
elif name=='objectclass':
self._object_classes = []
elif name=='oc-value':
self._oc_value = ''
# Unhandled tags
else:
raise ValueError('Unknown tag %s' % (raw_name))
def endElement(self,raw_name):
assert raw_name.startswith('dsml:'),'Illegal name'
name = raw_name[5:]
if name=='dsml':
pass
elif name=='directory-entries':
self._parsing_entries = 0
elif name=='entry':
self._parser_instance.handle(self._dn,self._entry)
del self._dn
del self._entry
elif name=='attr':
self._entry[self._attr_type] = self._attr_values
del self._attr_type
del self._attr_values
elif name=='value':
if self._base64_encoding:
attr_value = base64.decodestring(self._attr_value.strip())
else:
attr_value = self._attr_value.strip().encode('utf-8')
self._attr_values.append(attr_value)
del attr_value
del self._attr_value
del self._base64_encoding
# Handle object class tags
elif name=='objectclass':
self._entry['objectClass'] = self._object_classes
del self._object_classes
elif name=='oc-value':
self._object_classes.append(self._oc_value.strip().encode('utf-8'))
del self._oc_value
# Unhandled tags
else:
raise ValueError('Unknown tag %s' % (raw_name))
def characters(self,ch):
if '_oc_value' in self.__dict__:
self._oc_value = self._oc_value + ch
elif '_attr_value' in self.__dict__:
self._attr_value = self._attr_value + ch
else:
pass
class DSMLParser:
"""
Base class for a DSMLv1 parser. Applications should sub-class this
class and override method handle() to implement something meaningful.
Public class attributes:
records_read
Counter for records processed so far
Arguments:
input_file
File-object to read the DSMLv1 input from
ignored_attr_types
Attributes with these attribute type names will be ignored.
max_entries
If non-zero specifies the maximum number of entries to be
read from f.
line_sep
String used as line separator
"""
def __init__(
self,
input_file,
ContentHandlerClass,
ignored_attr_types=None,
max_entries=0,
):
self._input_file = input_file
self._max_entries = max_entries
self._ignored_attr_types = {}.fromkeys(map(lower,(ignored_attr_types or [])))
self._current_record = None,None
self.records_read = 0
self._parser = xml.sax.make_parser()
self._parser.setFeature(xml.sax.handler.feature_namespaces,0)
content_handler = ContentHandlerClass(self)
self._parser.setContentHandler(content_handler)
def handle(self,*args,**kwargs):
"""
Process a single DSMLv1 entry record. This method should be
implemented by applications using DSMLParser.
"""
import pprint
pprint.pprint(args)
pprint.pprint(kwargs)
def parse(self):
"""
Continously read and parse DSML records
"""
self._parser.parse(self._input_file)