Skip to content

Commit

Permalink
some refactoring, tested with t_ldif.py
Browse files Browse the repository at this point in the history
  • Loading branch information
stroeder committed Feb 29, 2016
1 parent e8a55d2 commit 30e4b31
Showing 1 changed file with 90 additions and 73 deletions.
163 changes: 90 additions & 73 deletions Lib/ldif.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
See http://www.python-ldap.org/ for details.
$Id: ldif.py,v 1.91 2016/01/26 10:43:24 stroeder Exp $
$Id: ldif.py,v 1.92 2016/02/29 22:51:36 stroeder Exp $
Python compability note:
Tested with Python 2.0+, but should work with Python 1.5.2+.
Expand Down Expand Up @@ -97,7 +97,7 @@ def __init__(self,output_file,base64_attrs=None,cols=76,line_sep='\n'):
self._output_file = output_file
self._base64_attrs = list_dict([a.lower() for a in (base64_attrs or [])])
self._cols = cols
self._line_sep = line_sep
self._last_line_sep = line_sep
self.records_written = 0

def _unfold_lines(self,line):
Expand All @@ -108,16 +108,16 @@ def _unfold_lines(self,line):
line_len = len(line)
if line_len<=self._cols:
self._output_file.write(line)
self._output_file.write(self._line_sep)
self._output_file.write(self._last_line_sep)
else:
# Fold line
pos = self._cols
self._output_file.write(line[0:min(line_len,self._cols)])
self._output_file.write(self._line_sep)
self._output_file.write(self._last_line_sep)
while pos<line_len:
self._output_file.write(' ')
self._output_file.write(line[pos:min(line_len,pos+self._cols-1)])
self._output_file.write(self._line_sep)
self._output_file.write(self._last_line_sep)
pos = pos+self._cols-1
return # _unfold_lines()

Expand Down Expand Up @@ -181,7 +181,7 @@ def _unparseChangeRecord(self,modlist):
for mod_val in mod_vals:
self._unparseAttrTypeandValue(mod_type,mod_val)
if mod_len==3:
self._output_file.write('-'+self._line_sep)
self._output_file.write('-'+self._last_line_sep)

def unparse(self,dn,record):
"""
Expand All @@ -201,7 +201,7 @@ def unparse(self,dn,record):
else:
raise ValueError('Argument record must be dictionary or list instead of %s' % (repr(record)))
# Write empty line separating the records
self._output_file.write(self._line_sep)
self._output_file.write(self._last_line_sep)
# Count records written
self.records_written = self.records_written+1
return # unparse()
Expand Down Expand Up @@ -270,11 +270,16 @@ def __init__(
self._max_entries = max_entries
self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])])
self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])])
self._line_sep = line_sep
self._last_line_sep = line_sep
self.version = None
# Initialize counters
self.line_counter = 0
self.byte_counter = 0
self.records_read = 0
self._line = self._readline()
# Store some symbols for better performance
self._base64_decodestring = base64.decodestring
# Read very first line
self._last_line = self._readline()

def handle(self,dn,entry):
"""
Expand All @@ -287,7 +292,12 @@ def _readline(self):
s = self._input_file.readline()
self.line_counter = self.line_counter + 1
self.byte_counter = self.byte_counter + len(s)
if s[-2:]=='\r\n':
if not s:
raise EOFError('EOF reached after %d lines (%d bytes)' % (
self.line_counter,
self.byte_counter,
))
elif s[-2:]=='\r\n':
return s[:-2]
elif s[-1:]=='\n':
return s[:-1]
Expand All @@ -298,11 +308,12 @@ def _unfold_lines(self):
"""
Unfold several folded lines with trailing space into one line
"""
unfolded_lines = [ self._line ]
self._line = self._readline()
while self._line and self._line[0]==' ':
unfolded_lines.append(self._line[1:])
self._line = self._readline()
unfolded_lines = [ self._last_line ]
next_line = self._readline()
while next_line and next_line[0]==' ':
unfolded_lines.append(next_line[1:])
next_line = self._readline()
self._last_line = next_line
return ''.join(unfolded_lines)

def _next_key_and_value(self):
Expand All @@ -319,15 +330,18 @@ def _next_key_and_value(self):
return None,None
if unfolded_line=='-':
return '-',None
colon_pos = unfolded_line.index(':')
try:
colon_pos = unfolded_line.index(':')
except ValueError,e:
raise ValueError('no value-spec in %s' % (repr(unfolded_line)))
attr_type = unfolded_line[0:colon_pos]
# if needed attribute value is BASE64 decoded
value_spec = unfolded_line[colon_pos:colon_pos+2]
if value_spec==': ':
attr_value = unfolded_line[colon_pos+2:].lstrip()
elif value_spec=='::':
# attribute value needs base64-decoding
attr_value = base64.decodestring(unfolded_line[colon_pos+2:])
attr_value = self._base64_decodestring(unfolded_line[colon_pos+2:])
elif value_spec==':<':
# fetch attribute value from URL
url = unfolded_line[colon_pos+2:].strip()
Expand All @@ -340,18 +354,36 @@ def _next_key_and_value(self):
attr_value = unfolded_line[colon_pos+1:]
return attr_type,attr_value

def _consume_empty_lines(self):
"""
Consume empty lines until first non-empty line.
Must only be used between full records!
Returns non-empty key-value-tuple.
"""
# Local symbol for better performance
next_key_and_value = self._next_key_and_value
# Consume empty lines
try:
k,v = next_key_and_value()
while k==v==None:
k,v = next_key_and_value()
except EOFError:
k,v = None,None
return k,v

def parse_entry_records(self):
"""
Continously read and parse LDIF entry records
"""
k,v = self._next_key_and_value()
# Local symbol for better performance
next_key_and_value = self._next_key_and_value
# Consume empty lines
k,v = self._consume_empty_lines()
# Consume 'version' line
if k=='version':
self.version = v
k,v = self._next_key_and_value()
if k==v==None:
k,v = self._next_key_and_value()
else:
self.version = None
self.version = int(v)
k,v = self._consume_empty_lines()

# Loop for processing whole records
while k!=None and \
Expand All @@ -364,25 +396,27 @@ def parse_entry_records(self):
dn = v
entry = {}
# Consume second line of record
k,v = self._next_key_and_value()
k,v = next_key_and_value()

# Loop for reading the attributes
while k!=None and \
not k.lower() in self._ignored_attr_types:
# Add the attribute to the entry if not ignored attribute
try:
entry[k].append(v)
except KeyError:
entry[k]=[v]
# Read the next line within the record
k,v = self._next_key_and_value()
# Consume empty separator line
k,v = self._next_key_and_value()
if entry:
# append entry to result list
self.handle(dn,entry)
try:
# Loop for reading the attributes
while k!=None:
# Add the attribute to the entry if not ignored attribute
if not k.lower() in self._ignored_attr_types:
try:
entry[k].append(v)
except KeyError:
entry[k]=[v]
# Read the next line within the record
k,v = next_key_and_value()
except EOFError:
pass

# handle record
self.handle(dn,entry)
self.records_read = self.records_read + 1

# Consume empty separator line(s)
k,v = self._consume_empty_lines()
return # parse_entry_records()

def parse(self):
Expand All @@ -400,13 +434,14 @@ def handle_modify(self,dn,modops,controls=None):
pass

def parse_change_records(self):
next_key_and_value = self._next_key_and_value
self.changetype_counter = {}
k,v = self._next_key_and_value()
k,v = next_key_and_value()
if k=='version':
self.version = v
k,v = self._next_key_and_value()
k,v = next_key_and_value()
if k==v==None:
k,v = self._next_key_and_value()
k,v = next_key_and_value()
else:
self.version = None

Expand All @@ -422,23 +457,23 @@ def parse_change_records(self):
dn = v
# Read "control:" lines
controls = []
k,v = self._next_key_and_value()
k,v = next_key_and_value()
while k=='control':
try:
control_type,criticality,control_value = v.split(' ',2)
except ValueError:
control_value = None
control_type,criticality = v.split(' ',1)
controls.append((control_type,criticality,control_value))
k,v = self._next_key_and_value()
k,v = next_key_and_value()
# Determine changetype first, assuming changetype: as default
changetype = 'modify'
# Consume second line of record
if k=='changetype':
if not v in valid_changetype_dict:
raise ValueError('Invalid changetype: %s' % repr(v))
changetype = v
k,v = self._next_key_and_value()
k,v = next_key_and_value()

if changetype=='modify':

Expand All @@ -455,15 +490,15 @@ def parse_change_records(self):
# we now have the attribute name to be modified
modattr = v
modvalues = []
k,v = self._next_key_and_value()
k,v = next_key_and_value()
while k==modattr:
modvalues.append(v)
k,v = self._next_key_and_value()
k,v = next_key_and_value()
modops.append((modop,modattr,modvalues or None))
k,v = self._next_key_and_value()
k,v = next_key_and_value()
if k=='-':
# Consume next line
k,v = self._next_key_and_value()
k,v = next_key_and_value()

if modops:
# append entry to result list
Expand All @@ -473,10 +508,12 @@ def parse_change_records(self):

# Consume the unhandled change record
while k!=None:
k,v = self._next_key_and_value()
k,v = next_key_and_value()

# Consume empty separation line
k,v = self._next_key_and_value()
k,v = next_key_and_value()
while k is None and v is None:
k,v = next_key_and_value()

# Increment record counters
try:
Expand Down Expand Up @@ -560,23 +597,3 @@ def ParseLDIF(f,ignore_attrs=None,maxentries=0):
)
ldif_parser.parse()
return ldif_parser.all_records


if __name__ == '__main__':
import sys,os,time,pprint
parser_class_name = sys.argv[1]
parser_class = vars()[parser_class_name]
parser_method_name = sys.argv[2]
for input_file_name in sys.argv[3:]:
input_file_size = os.stat(input_file_name).st_size
input_file = open(input_file_name,'rb')
ldif_parser = parser_class(input_file)
parser_method = getattr(ldif_parser,parser_method_name)
start_time = time.time()
parser_method()
end_time = time.time()
input_file.close()
print '***Time needed:',end_time-start_time,'seconds'
print '***Records read:',ldif_parser.records_read
print '***Lines read:',ldif_parser.line_counter
print '***Bytes read:',ldif_parser.byte_counter,'of',input_file_size

0 comments on commit 30e4b31

Please sign in to comment.