some refactoring, tested with t_ldif.py

ECN · Feb 29, 2016 · 30e4b31 · 30e4b31
1 parent e8a55d2
commit 30e4b31
Showing 1 changed file with 90 additions and 73 deletions.
diff --git a/Lib/ldif.py b/Lib/ldif.py
@@ -3,7 +3,7 @@
 
 See http://www.python-ldap.org/ for details.
 
-$Id: ldif.py,v 1.91 2016/01/26 10:43:24 stroeder Exp $
+$Id: ldif.py,v 1.92 2016/02/29 22:51:36 stroeder Exp $
 
 Python compability note:
 Tested with Python 2.0+, but should work with Python 1.5.2+.
@@ -97,7 +97,7 @@ def __init__(self,output_file,base64_attrs=None,cols=76,line_sep='\n'):
     self._output_file = output_file
     self._base64_attrs = list_dict([a.lower() for a in (base64_attrs or [])])
     self._cols = cols
-    self._line_sep = line_sep
+    self._last_line_sep = line_sep
     self.records_written = 0
 
   def _unfold_lines(self,line):
@@ -108,16 +108,16 @@ def _unfold_lines(self,line):
     line_len = len(line)
     if line_len<=self._cols:
       self._output_file.write(line)
-      self._output_file.write(self._line_sep)
+      self._output_file.write(self._last_line_sep)
     else:
       # Fold line
       pos = self._cols
       self._output_file.write(line[0:min(line_len,self._cols)])
-      self._output_file.write(self._line_sep)
+      self._output_file.write(self._last_line_sep)
       while pos<line_len:
         self._output_file.write(' ')
         self._output_file.write(line[pos:min(line_len,pos+self._cols-1)])
-        self._output_file.write(self._line_sep)
+        self._output_file.write(self._last_line_sep)
         pos = pos+self._cols-1
     return # _unfold_lines()
 
@@ -181,7 +181,7 @@ def _unparseChangeRecord(self,modlist):
         for mod_val in mod_vals:
           self._unparseAttrTypeandValue(mod_type,mod_val)
       if mod_len==3:
-        self._output_file.write('-'+self._line_sep)
+        self._output_file.write('-'+self._last_line_sep)
 
   def unparse(self,dn,record):
     """
@@ -201,7 +201,7 @@ def unparse(self,dn,record):
     else:
       raise ValueError('Argument record must be dictionary or list instead of %s' % (repr(record)))
     # Write empty line separating the records
-    self._output_file.write(self._line_sep)
+    self._output_file.write(self._last_line_sep)
     # Count records written
     self.records_written = self.records_written+1
     return # unparse()
@@ -270,11 +270,16 @@ def __init__(
     self._max_entries = max_entries
     self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])])
     self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])])
-    self._line_sep = line_sep
+    self._last_line_sep = line_sep
+    self.version = None
+    # Initialize counters
     self.line_counter = 0
     self.byte_counter = 0
     self.records_read = 0
-    self._line = self._readline()
+    # Store some symbols for better performance
+    self._base64_decodestring = base64.decodestring
+    # Read very first line
+    self._last_line = self._readline()
 
   def handle(self,dn,entry):
     """
@@ -287,7 +292,12 @@ def _readline(self):
     s = self._input_file.readline()
     self.line_counter = self.line_counter + 1
     self.byte_counter = self.byte_counter + len(s)
-    if s[-2:]=='\r\n':
+    if not s:
+      raise EOFError('EOF reached after %d lines (%d bytes)' % (
+        self.line_counter,
+        self.byte_counter,
+      ))
+    elif s[-2:]=='\r\n':
       return s[:-2]
     elif s[-1:]=='\n':
       return s[:-1]
@@ -298,11 +308,12 @@ def _unfold_lines(self):
     """
     Unfold several folded lines with trailing space into one line
     """
-    unfolded_lines = [ self._line ]
-    self._line = self._readline()
-    while self._line and self._line[0]==' ':
-      unfolded_lines.append(self._line[1:])
-      self._line = self._readline()
+    unfolded_lines = [ self._last_line ]
+    next_line = self._readline()
+    while next_line and next_line[0]==' ':
+      unfolded_lines.append(next_line[1:])
+      next_line = self._readline()
+    self._last_line = next_line
     return ''.join(unfolded_lines)
 
   def _next_key_and_value(self):
@@ -319,15 +330,18 @@ def _next_key_and_value(self):
       return None,None
     if unfolded_line=='-':
       return '-',None
-    colon_pos = unfolded_line.index(':')
+    try:
+      colon_pos = unfolded_line.index(':')
+    except ValueError,e:
+      raise ValueError('no value-spec in %s' % (repr(unfolded_line)))
     attr_type = unfolded_line[0:colon_pos]
     # if needed attribute value is BASE64 decoded
     value_spec = unfolded_line[colon_pos:colon_pos+2]
     if value_spec==': ':
       attr_value = unfolded_line[colon_pos+2:].lstrip()
     elif value_spec=='::':
       # attribute value needs base64-decoding
-      attr_value = base64.decodestring(unfolded_line[colon_pos+2:])
+      attr_value = self._base64_decodestring(unfolded_line[colon_pos+2:])
     elif value_spec==':<':
       # fetch attribute value from URL
       url = unfolded_line[colon_pos+2:].strip()
@@ -340,18 +354,36 @@ def _next_key_and_value(self):
       attr_value = unfolded_line[colon_pos+1:]
     return attr_type,attr_value
 
+  def _consume_empty_lines(self):
+    """
+    Consume empty lines until first non-empty line.
+    Must only be used between full records!
+
+    Returns non-empty key-value-tuple.
+    """
+    # Local symbol for better performance
+    next_key_and_value = self._next_key_and_value
+    # Consume empty lines
+    try:
+      k,v = next_key_and_value()
+      while k==v==None:
+        k,v = next_key_and_value()
+    except EOFError:
+      k,v = None,None
+    return k,v
+
   def parse_entry_records(self):
     """
     Continously read and parse LDIF entry records
     """
-    k,v = self._next_key_and_value()
+    # Local symbol for better performance
+    next_key_and_value = self._next_key_and_value
+    # Consume empty lines
+    k,v = self._consume_empty_lines()
+    # Consume 'version' line
     if k=='version':
-      self.version = v
-      k,v = self._next_key_and_value()
-      if k==v==None:
-        k,v = self._next_key_and_value()
-    else:
-      self.version = None
+      self.version = int(v)
+      k,v = self._consume_empty_lines()
 
     # Loop for processing whole records
     while k!=None and \
@@ -364,25 +396,27 @@ def parse_entry_records(self):
       dn = v
       entry = {}
       # Consume second line of record
-      k,v = self._next_key_and_value()
+      k,v = next_key_and_value()
 
-      # Loop for reading the attributes
-      while k!=None and \
-         not k.lower() in self._ignored_attr_types:
-        # Add the attribute to the entry if not ignored attribute
-        try:
-          entry[k].append(v)
-        except KeyError:
-          entry[k]=[v]
-        # Read the next line within the record
-        k,v = self._next_key_and_value()
-      # Consume empty separator line
-      k,v = self._next_key_and_value()
-      if entry:
-        # append entry to result list
-        self.handle(dn,entry)
+      try:
+        # Loop for reading the attributes
+        while k!=None:
+          # Add the attribute to the entry if not ignored attribute
+          if not k.lower() in self._ignored_attr_types:
+            try:
+              entry[k].append(v)
+            except KeyError:
+              entry[k]=[v]
+          # Read the next line within the record
+          k,v = next_key_and_value()
+      except EOFError:
+        pass
+
+      # handle record
+      self.handle(dn,entry)
       self.records_read = self.records_read + 1
-
+      # Consume empty separator line(s)
+      k,v = self._consume_empty_lines()
     return # parse_entry_records()
 
   def parse(self):
@@ -400,13 +434,14 @@ def handle_modify(self,dn,modops,controls=None):
     pass
 
   def parse_change_records(self):
+    next_key_and_value = self._next_key_and_value
     self.changetype_counter = {}
-    k,v = self._next_key_and_value()
+    k,v = next_key_and_value()
     if k=='version':
       self.version = v
-      k,v = self._next_key_and_value()
+      k,v = next_key_and_value()
       if k==v==None:
-        k,v = self._next_key_and_value()
+        k,v = next_key_and_value()
     else:
       self.version = None
 
@@ -422,23 +457,23 @@ def parse_change_records(self):
       dn = v
       # Read "control:" lines
       controls = []
-      k,v = self._next_key_and_value()
+      k,v = next_key_and_value()
       while k=='control':
         try:
           control_type,criticality,control_value = v.split(' ',2)
         except ValueError:
           control_value = None
           control_type,criticality = v.split(' ',1)
         controls.append((control_type,criticality,control_value))
-        k,v = self._next_key_and_value()
+        k,v = next_key_and_value()
       # Determine changetype first, assuming changetype: as default
       changetype = 'modify'
       # Consume second line of record
       if k=='changetype':
         if not v in valid_changetype_dict:
           raise ValueError('Invalid changetype: %s' % repr(v))
         changetype = v
-        k,v = self._next_key_and_value()
+        k,v = next_key_and_value()
 
       if changetype=='modify':
 
@@ -455,15 +490,15 @@ def parse_change_records(self):
           # we now have the attribute name to be modified
           modattr = v
           modvalues = []
-          k,v = self._next_key_and_value()
+          k,v = next_key_and_value()
           while k==modattr:
             modvalues.append(v)
-            k,v = self._next_key_and_value()
+            k,v = next_key_and_value()
           modops.append((modop,modattr,modvalues or None))
-          k,v = self._next_key_and_value()
+          k,v = next_key_and_value()
           if k=='-':
             # Consume next line
-            k,v = self._next_key_and_value()
+            k,v = next_key_and_value()
 
         if modops:
           # append entry to result list
@@ -473,10 +508,12 @@ def parse_change_records(self):
 
         # Consume the unhandled change record
         while k!=None:
-          k,v = self._next_key_and_value()
+          k,v = next_key_and_value()
 
       # Consume empty separation line
-      k,v = self._next_key_and_value()
+      k,v = next_key_and_value()
+      while k is None and v is None:
+        k,v = next_key_and_value()
 
       # Increment record counters
       try:
@@ -560,23 +597,3 @@ def ParseLDIF(f,ignore_attrs=None,maxentries=0):
   )
   ldif_parser.parse()
   return ldif_parser.all_records
-
-
-if __name__ == '__main__':
-  import sys,os,time,pprint
-  parser_class_name = sys.argv[1]
-  parser_class = vars()[parser_class_name]
-  parser_method_name = sys.argv[2]
-  for input_file_name in sys.argv[3:]:
-    input_file_size = os.stat(input_file_name).st_size
-    input_file = open(input_file_name,'rb')
-    ldif_parser = parser_class(input_file)
-    parser_method = getattr(ldif_parser,parser_method_name)
-    start_time = time.time()
-    parser_method()
-    end_time = time.time()
-    input_file.close()
-    print '***Time needed:',end_time-start_time,'seconds'
-    print '***Records read:',ldif_parser.records_read
-    print '***Lines read:',ldif_parser.line_counter
-    print '***Bytes read:',ldif_parser.byte_counter,'of',input_file_size