Skip to content

Commit

Permalink
Fixed ldap.schema.tokenizer.split_tokens() to accept a single DOLLAR …
Browse files Browse the repository at this point in the history
…as separator
  • Loading branch information
stroeder committed Apr 29, 2009
1 parent e1aa818 commit 434b5d4
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 0 deletions.
85 changes: 85 additions & 0 deletions Lib/ldap/schema/tokenizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""
ldap.schema.tokenizer - Low-level parsing functions for schema element strings
See http://www.python-ldap.org/ for details.
\$Id: tokenizer.py,v 1.13 2009/04/29 18:13:55 stroeder Exp $
"""


def split_tokens(s,keywordDict):
"""
Returns list of syntax elements with quotes and spaces
stripped.
"""
result = []
result_append = result.append
s_len = len(s)
i = 0
while i<s_len:
start = i
while i<s_len and s[i]!="'":
if s[i]=="(" or s[i]==")":
if i>start:
result_append(s[start:i])
result_append(s[i])
i +=1 # Consume parentheses
start = i
elif s[i]==" " or s[i]=="$":
if i>start:
result_append(s[start:i])
i +=1
# Consume more space chars
while i<s_len and s[i]==" ":
i +=1
start = i
else:
i +=1
if i>start:
result_append(s[start:i])
i +=1
if i>=s_len:
break
start = i
while i<s_len and s[i]!="'":
i +=1
if i>=start:
result_append(s[start:i])
i +=1
return result # split_tokens()


def extract_tokens(l,known_tokens):
"""
Returns dictionary of known tokens with all values
"""
assert l[0].strip()=="(" and l[-1].strip()==")",ValueError(l)
result = {}
result_has_key = result.has_key
result.update(known_tokens)
i = 0
l_len = len(l)
while i<l_len:
if result_has_key(l[i]):
token = l[i]
i += 1 # Consume token
if i<l_len:
if result_has_key(l[i]):
# non-valued
result[token] = (())
elif l[i]=="(":
# multi-valued
i += 1 # Consume left parentheses
start = i
while i<l_len and l[i]!=")":
i += 1
result[token] = tuple(filter(lambda v:v!='$',l[start:i]))
i += 1 # Consume right parentheses
else:
# single-valued
result[token] = l[i],
i += 1 # Consume single value
else:
i += 1 # Consume unrecognized item
return result

30 changes: 30 additions & 0 deletions Tests/Lib/ldap/schema/test_tokenizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import ldap.schema
from ldap.schema.tokenizer import split_tokens,extract_tokens

testcases_split_tokens = (
(" BLUBBER DI BLUBB ", ["BLUBBER", "DI", "BLUBB"]),
("BLUBBER DI BLUBB",["BLUBBER","DI","BLUBB"]),
("BLUBBER DI BLUBB ",["BLUBBER","DI","BLUBB"]),
("BLUBBER DI 'BLUBB' ",["BLUBBER","DI","BLUBB"]),
("BLUBBER ( DI ) 'BLUBB' ",["BLUBBER","(","DI",")","BLUBB"]),
("BLUBBER(DI)",["BLUBBER","(","DI",")"]),
("BLUBBER ( DI)",["BLUBBER","(","DI",")"]),
("BLUBBER ''",["BLUBBER",""]),
("( BLUBBER (DI 'BLUBB'))",["(","BLUBBER","(","DI","BLUBB",")",")"]),
("BLUBB (DA$BLAH)",['BLUBB',"(","DA","BLAH",")"]),
("BLUBB ( DA $ BLAH )",['BLUBB',"(","DA","BLAH",")"]),
("BLUBB (DA$ BLAH)",['BLUBB',"(","DA","BLAH",")"]),
("BLUBB (DA $BLAH)",['BLUBB',"(","DA","BLAH",")"]),
("BLUBB 'DA$BLAH'",['BLUBB',"DA$BLAH"]),
("BLUBB DI 'BLU B B ER' DA 'BLAH' ",['BLUBB','DI','BLU B B ER','DA','BLAH']),
("BLUBB DI 'BLU B B ER' DA 'BLAH' LABER",['BLUBB','DI','BLU B B ER','DA','BLAH','LABER']),
("BLUBBER DI 'BLU'BB ER' DA 'BLAH' ", ["BLUBBER", "DI", "BLU'BB ER", "DA", "BLAH"]), # for Oracle
("BLUBB DI 'BLU B B ER'MUST 'BLAH' ",['BLUBB','DI','BLU B B ER','MUST','BLAH']) # for Oracle
)

for t,r in testcases_split_tokens:
l = ldap.schema.tokenizer.split_tokens(t,{'MUST':None})
if l!=r:
print 'String:',repr(t)
print '=>',l
print 'differs from',r

0 comments on commit 434b5d4

Please sign in to comment.