-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed ldap.schema.tokenizer.split_tokens() to accept a single DOLLAR …
…as separator
- Loading branch information
stroeder
committed
Apr 29, 2009
1 parent
e1aa818
commit 434b5d4
Showing
2 changed files
with
115 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
""" | ||
ldap.schema.tokenizer - Low-level parsing functions for schema element strings | ||
See http://www.python-ldap.org/ for details. | ||
\$Id: tokenizer.py,v 1.13 2009/04/29 18:13:55 stroeder Exp $ | ||
""" | ||
|
||
|
||
def split_tokens(s,keywordDict): | ||
""" | ||
Returns list of syntax elements with quotes and spaces | ||
stripped. | ||
""" | ||
result = [] | ||
result_append = result.append | ||
s_len = len(s) | ||
i = 0 | ||
while i<s_len: | ||
start = i | ||
while i<s_len and s[i]!="'": | ||
if s[i]=="(" or s[i]==")": | ||
if i>start: | ||
result_append(s[start:i]) | ||
result_append(s[i]) | ||
i +=1 # Consume parentheses | ||
start = i | ||
elif s[i]==" " or s[i]=="$": | ||
if i>start: | ||
result_append(s[start:i]) | ||
i +=1 | ||
# Consume more space chars | ||
while i<s_len and s[i]==" ": | ||
i +=1 | ||
start = i | ||
else: | ||
i +=1 | ||
if i>start: | ||
result_append(s[start:i]) | ||
i +=1 | ||
if i>=s_len: | ||
break | ||
start = i | ||
while i<s_len and s[i]!="'": | ||
i +=1 | ||
if i>=start: | ||
result_append(s[start:i]) | ||
i +=1 | ||
return result # split_tokens() | ||
|
||
|
||
def extract_tokens(l,known_tokens): | ||
""" | ||
Returns dictionary of known tokens with all values | ||
""" | ||
assert l[0].strip()=="(" and l[-1].strip()==")",ValueError(l) | ||
result = {} | ||
result_has_key = result.has_key | ||
result.update(known_tokens) | ||
i = 0 | ||
l_len = len(l) | ||
while i<l_len: | ||
if result_has_key(l[i]): | ||
token = l[i] | ||
i += 1 # Consume token | ||
if i<l_len: | ||
if result_has_key(l[i]): | ||
# non-valued | ||
result[token] = (()) | ||
elif l[i]=="(": | ||
# multi-valued | ||
i += 1 # Consume left parentheses | ||
start = i | ||
while i<l_len and l[i]!=")": | ||
i += 1 | ||
result[token] = tuple(filter(lambda v:v!='$',l[start:i])) | ||
i += 1 # Consume right parentheses | ||
else: | ||
# single-valued | ||
result[token] = l[i], | ||
i += 1 # Consume single value | ||
else: | ||
i += 1 # Consume unrecognized item | ||
return result | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import ldap.schema | ||
from ldap.schema.tokenizer import split_tokens,extract_tokens | ||
|
||
testcases_split_tokens = ( | ||
(" BLUBBER DI BLUBB ", ["BLUBBER", "DI", "BLUBB"]), | ||
("BLUBBER DI BLUBB",["BLUBBER","DI","BLUBB"]), | ||
("BLUBBER DI BLUBB ",["BLUBBER","DI","BLUBB"]), | ||
("BLUBBER DI 'BLUBB' ",["BLUBBER","DI","BLUBB"]), | ||
("BLUBBER ( DI ) 'BLUBB' ",["BLUBBER","(","DI",")","BLUBB"]), | ||
("BLUBBER(DI)",["BLUBBER","(","DI",")"]), | ||
("BLUBBER ( DI)",["BLUBBER","(","DI",")"]), | ||
("BLUBBER ''",["BLUBBER",""]), | ||
("( BLUBBER (DI 'BLUBB'))",["(","BLUBBER","(","DI","BLUBB",")",")"]), | ||
("BLUBB (DA$BLAH)",['BLUBB',"(","DA","BLAH",")"]), | ||
("BLUBB ( DA $ BLAH )",['BLUBB',"(","DA","BLAH",")"]), | ||
("BLUBB (DA$ BLAH)",['BLUBB',"(","DA","BLAH",")"]), | ||
("BLUBB (DA $BLAH)",['BLUBB',"(","DA","BLAH",")"]), | ||
("BLUBB 'DA$BLAH'",['BLUBB',"DA$BLAH"]), | ||
("BLUBB DI 'BLU B B ER' DA 'BLAH' ",['BLUBB','DI','BLU B B ER','DA','BLAH']), | ||
("BLUBB DI 'BLU B B ER' DA 'BLAH' LABER",['BLUBB','DI','BLU B B ER','DA','BLAH','LABER']), | ||
("BLUBBER DI 'BLU'BB ER' DA 'BLAH' ", ["BLUBBER", "DI", "BLU'BB ER", "DA", "BLAH"]), # for Oracle | ||
("BLUBB DI 'BLU B B ER'MUST 'BLAH' ",['BLUBB','DI','BLU B B ER','MUST','BLAH']) # for Oracle | ||
) | ||
|
||
for t,r in testcases_split_tokens: | ||
l = ldap.schema.tokenizer.split_tokens(t,{'MUST':None}) | ||
if l!=r: | ||
print 'String:',repr(t) | ||
print '=>',l | ||
print 'differs from',r |