Fixed ldap.schema.tokenizer.split_tokens() to accept a single DOLLAR as separator
This commit is contained in:
parent
e1aa8183a7
commit
434b5d44b0
|
@ -0,0 +1,85 @@
|
|||
"""
|
||||
ldap.schema.tokenizer - Low-level parsing functions for schema element strings
|
||||
|
||||
See http://www.python-ldap.org/ for details.
|
||||
|
||||
\$Id: tokenizer.py,v 1.13 2009/04/29 18:13:55 stroeder Exp $
|
||||
"""
|
||||
|
||||
|
||||
def split_tokens(s,keywordDict):
|
||||
"""
|
||||
Returns list of syntax elements with quotes and spaces
|
||||
stripped.
|
||||
"""
|
||||
result = []
|
||||
result_append = result.append
|
||||
s_len = len(s)
|
||||
i = 0
|
||||
while i<s_len:
|
||||
start = i
|
||||
while i<s_len and s[i]!="'":
|
||||
if s[i]=="(" or s[i]==")":
|
||||
if i>start:
|
||||
result_append(s[start:i])
|
||||
result_append(s[i])
|
||||
i +=1 # Consume parentheses
|
||||
start = i
|
||||
elif s[i]==" " or s[i]=="$":
|
||||
if i>start:
|
||||
result_append(s[start:i])
|
||||
i +=1
|
||||
# Consume more space chars
|
||||
while i<s_len and s[i]==" ":
|
||||
i +=1
|
||||
start = i
|
||||
else:
|
||||
i +=1
|
||||
if i>start:
|
||||
result_append(s[start:i])
|
||||
i +=1
|
||||
if i>=s_len:
|
||||
break
|
||||
start = i
|
||||
while i<s_len and s[i]!="'":
|
||||
i +=1
|
||||
if i>=start:
|
||||
result_append(s[start:i])
|
||||
i +=1
|
||||
return result # split_tokens()
|
||||
|
||||
|
||||
def extract_tokens(l,known_tokens):
|
||||
"""
|
||||
Returns dictionary of known tokens with all values
|
||||
"""
|
||||
assert l[0].strip()=="(" and l[-1].strip()==")",ValueError(l)
|
||||
result = {}
|
||||
result_has_key = result.has_key
|
||||
result.update(known_tokens)
|
||||
i = 0
|
||||
l_len = len(l)
|
||||
while i<l_len:
|
||||
if result_has_key(l[i]):
|
||||
token = l[i]
|
||||
i += 1 # Consume token
|
||||
if i<l_len:
|
||||
if result_has_key(l[i]):
|
||||
# non-valued
|
||||
result[token] = (())
|
||||
elif l[i]=="(":
|
||||
# multi-valued
|
||||
i += 1 # Consume left parentheses
|
||||
start = i
|
||||
while i<l_len and l[i]!=")":
|
||||
i += 1
|
||||
result[token] = tuple(filter(lambda v:v!='$',l[start:i]))
|
||||
i += 1 # Consume right parentheses
|
||||
else:
|
||||
# single-valued
|
||||
result[token] = l[i],
|
||||
i += 1 # Consume single value
|
||||
else:
|
||||
i += 1 # Consume unrecognized item
|
||||
return result
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
import ldap.schema
|
||||
from ldap.schema.tokenizer import split_tokens,extract_tokens
|
||||
|
||||
testcases_split_tokens = (
|
||||
(" BLUBBER DI BLUBB ", ["BLUBBER", "DI", "BLUBB"]),
|
||||
("BLUBBER DI BLUBB",["BLUBBER","DI","BLUBB"]),
|
||||
("BLUBBER DI BLUBB ",["BLUBBER","DI","BLUBB"]),
|
||||
("BLUBBER DI 'BLUBB' ",["BLUBBER","DI","BLUBB"]),
|
||||
("BLUBBER ( DI ) 'BLUBB' ",["BLUBBER","(","DI",")","BLUBB"]),
|
||||
("BLUBBER(DI)",["BLUBBER","(","DI",")"]),
|
||||
("BLUBBER ( DI)",["BLUBBER","(","DI",")"]),
|
||||
("BLUBBER ''",["BLUBBER",""]),
|
||||
("( BLUBBER (DI 'BLUBB'))",["(","BLUBBER","(","DI","BLUBB",")",")"]),
|
||||
("BLUBB (DA$BLAH)",['BLUBB',"(","DA","BLAH",")"]),
|
||||
("BLUBB ( DA $ BLAH )",['BLUBB',"(","DA","BLAH",")"]),
|
||||
("BLUBB (DA$ BLAH)",['BLUBB',"(","DA","BLAH",")"]),
|
||||
("BLUBB (DA $BLAH)",['BLUBB',"(","DA","BLAH",")"]),
|
||||
("BLUBB 'DA$BLAH'",['BLUBB',"DA$BLAH"]),
|
||||
("BLUBB DI 'BLU B B ER' DA 'BLAH' ",['BLUBB','DI','BLU B B ER','DA','BLAH']),
|
||||
("BLUBB DI 'BLU B B ER' DA 'BLAH' LABER",['BLUBB','DI','BLU B B ER','DA','BLAH','LABER']),
|
||||
("BLUBBER DI 'BLU'BB ER' DA 'BLAH' ", ["BLUBBER", "DI", "BLU'BB ER", "DA", "BLAH"]), # for Oracle
|
||||
("BLUBB DI 'BLU B B ER'MUST 'BLAH' ",['BLUBB','DI','BLU B B ER','MUST','BLAH']) # for Oracle
|
||||
)
|
||||
|
||||
for t,r in testcases_split_tokens:
|
||||
l = ldap.schema.tokenizer.split_tokens(t,{'MUST':None})
|
||||
if l!=r:
|
||||
print 'String:',repr(t)
|
||||
print '=>',l
|
||||
print 'differs from',r
|
Loading…
Reference in New Issue