Fixed ldap.schema.tokenizer.split_tokens() to accept a single DOLLAR as separator
This commit is contained in:
		
							
								
								
									
										85
									
								
								Lib/ldap/schema/tokenizer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								Lib/ldap/schema/tokenizer.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,85 @@
 | 
			
		||||
"""
 | 
			
		||||
ldap.schema.tokenizer - Low-level parsing functions for schema element strings
 | 
			
		||||
 | 
			
		||||
See http://www.python-ldap.org/ for details.
 | 
			
		||||
 | 
			
		||||
\$Id: tokenizer.py,v 1.13 2009/04/29 18:13:55 stroeder Exp $
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def split_tokens(s,keywordDict):
 | 
			
		||||
  """
 | 
			
		||||
  Returns list of syntax elements with quotes and spaces
 | 
			
		||||
  stripped.
 | 
			
		||||
  """
 | 
			
		||||
  result = []
 | 
			
		||||
  result_append = result.append
 | 
			
		||||
  s_len = len(s)
 | 
			
		||||
  i = 0
 | 
			
		||||
  while i<s_len:
 | 
			
		||||
    start = i
 | 
			
		||||
    while i<s_len and s[i]!="'":
 | 
			
		||||
      if s[i]=="(" or s[i]==")":
 | 
			
		||||
        if i>start:
 | 
			
		||||
          result_append(s[start:i])
 | 
			
		||||
        result_append(s[i])
 | 
			
		||||
        i +=1 # Consume parentheses
 | 
			
		||||
        start = i
 | 
			
		||||
      elif s[i]==" " or s[i]=="$":
 | 
			
		||||
        if i>start:
 | 
			
		||||
          result_append(s[start:i])
 | 
			
		||||
        i +=1
 | 
			
		||||
        # Consume more space chars
 | 
			
		||||
        while i<s_len and s[i]==" ":
 | 
			
		||||
          i +=1
 | 
			
		||||
        start = i
 | 
			
		||||
      else:
 | 
			
		||||
        i +=1
 | 
			
		||||
    if i>start:
 | 
			
		||||
      result_append(s[start:i])
 | 
			
		||||
    i +=1
 | 
			
		||||
    if i>=s_len:
 | 
			
		||||
      break
 | 
			
		||||
    start = i
 | 
			
		||||
    while i<s_len and s[i]!="'":
 | 
			
		||||
      i +=1
 | 
			
		||||
    if i>=start:
 | 
			
		||||
      result_append(s[start:i])
 | 
			
		||||
    i +=1
 | 
			
		||||
  return result # split_tokens()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def extract_tokens(l,known_tokens):
 | 
			
		||||
  """
 | 
			
		||||
  Returns dictionary of known tokens with all values
 | 
			
		||||
  """
 | 
			
		||||
  assert l[0].strip()=="(" and l[-1].strip()==")",ValueError(l)
 | 
			
		||||
  result = {}
 | 
			
		||||
  result_has_key = result.has_key
 | 
			
		||||
  result.update(known_tokens)
 | 
			
		||||
  i = 0
 | 
			
		||||
  l_len = len(l)
 | 
			
		||||
  while i<l_len:
 | 
			
		||||
    if result_has_key(l[i]):
 | 
			
		||||
      token = l[i]
 | 
			
		||||
      i += 1 # Consume token
 | 
			
		||||
      if i<l_len:
 | 
			
		||||
        if result_has_key(l[i]):
 | 
			
		||||
          # non-valued
 | 
			
		||||
          result[token] = (())
 | 
			
		||||
        elif l[i]=="(":
 | 
			
		||||
          # multi-valued
 | 
			
		||||
          i += 1 # Consume left parentheses
 | 
			
		||||
          start = i
 | 
			
		||||
          while i<l_len and l[i]!=")":
 | 
			
		||||
            i += 1
 | 
			
		||||
          result[token] = tuple(filter(lambda v:v!='$',l[start:i]))
 | 
			
		||||
          i += 1 # Consume right parentheses
 | 
			
		||||
        else:
 | 
			
		||||
          # single-valued
 | 
			
		||||
          result[token] = l[i],
 | 
			
		||||
          i += 1 # Consume single value
 | 
			
		||||
    else:
 | 
			
		||||
      i += 1 # Consume unrecognized item
 | 
			
		||||
  return result
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										30
									
								
								Tests/Lib/ldap/schema/test_tokenizer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								Tests/Lib/ldap/schema/test_tokenizer.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,30 @@
 | 
			
		||||
import ldap.schema
 | 
			
		||||
from ldap.schema.tokenizer import split_tokens,extract_tokens
 | 
			
		||||
 | 
			
		||||
testcases_split_tokens = (
 | 
			
		||||
  (" BLUBBER DI BLUBB ", ["BLUBBER", "DI", "BLUBB"]),
 | 
			
		||||
  ("BLUBBER DI BLUBB",["BLUBBER","DI","BLUBB"]),
 | 
			
		||||
  ("BLUBBER  DI   BLUBB  ",["BLUBBER","DI","BLUBB"]),
 | 
			
		||||
  ("BLUBBER  DI  'BLUBB'   ",["BLUBBER","DI","BLUBB"]),
 | 
			
		||||
  ("BLUBBER ( DI ) 'BLUBB'   ",["BLUBBER","(","DI",")","BLUBB"]),
 | 
			
		||||
  ("BLUBBER(DI)",["BLUBBER","(","DI",")"]),
 | 
			
		||||
  ("BLUBBER ( DI)",["BLUBBER","(","DI",")"]),
 | 
			
		||||
  ("BLUBBER ''",["BLUBBER",""]),
 | 
			
		||||
  ("( BLUBBER (DI 'BLUBB'))",["(","BLUBBER","(","DI","BLUBB",")",")"]),
 | 
			
		||||
  ("BLUBB (DA$BLAH)",['BLUBB',"(","DA","BLAH",")"]),
 | 
			
		||||
  ("BLUBB ( DA $  BLAH )",['BLUBB',"(","DA","BLAH",")"]),
 | 
			
		||||
  ("BLUBB (DA$ BLAH)",['BLUBB',"(","DA","BLAH",")"]),
 | 
			
		||||
  ("BLUBB (DA $BLAH)",['BLUBB',"(","DA","BLAH",")"]),
 | 
			
		||||
  ("BLUBB 'DA$BLAH'",['BLUBB',"DA$BLAH"]),
 | 
			
		||||
  ("BLUBB DI 'BLU B B ER' DA 'BLAH' ",['BLUBB','DI','BLU B B ER','DA','BLAH']),
 | 
			
		||||
  ("BLUBB DI 'BLU B B ER' DA 'BLAH' LABER",['BLUBB','DI','BLU B B ER','DA','BLAH','LABER']),
 | 
			
		||||
  ("BLUBBER DI 'BLU'BB ER' DA 'BLAH' ", ["BLUBBER", "DI", "BLU'BB ER", "DA", "BLAH"]), # for Oracle
 | 
			
		||||
  ("BLUBB DI 'BLU B B ER'MUST 'BLAH' ",['BLUBB','DI','BLU B B ER','MUST','BLAH']) # for Oracle
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
for t,r in testcases_split_tokens:
 | 
			
		||||
  l = ldap.schema.tokenizer.split_tokens(t,{'MUST':None})
 | 
			
		||||
  if l!=r:
 | 
			
		||||
    print 'String:',repr(t)
 | 
			
		||||
    print '=>',l
 | 
			
		||||
    print 'differs from',r
 | 
			
		||||
		Reference in New Issue
	
	Block a user