From 65cc6fdb4d8caa83e70ae2b393615236697542ed Mon Sep 17 00:00:00 2001 From: stroeder Date: Sat, 20 Jun 2015 14:09:45 +0000 Subject: [PATCH] Refactoring LDIFParser, especially added parsing of change records --- CHANGES | 12 ++- Lib/ldif.py | 274 +++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 208 insertions(+), 78 deletions(-) diff --git a/CHANGES b/CHANGES index 86614c8..d1eddd5 100644 --- a/CHANGES +++ b/CHANGES @@ -11,6 +11,16 @@ Lib/ ldapurl, more information in some exceptions. * ldap.ldapobject.LDAPObject: New convenience methods for SASL GSSAPI or EXTERNAL binds +* Refactored parts in ldif.LDIFParser: + - New class attributes line_counter and byte_counter contain + amount of LDIF data read so far + - Renamed some internally used methods + - Added support for parsing change records currently limited to + changetype: modify + - New separate methods parse_entry_records() (also called by parse()) + and parse_change_records() + - Stricter order checking of dn:, changetype:, etc. + - Removed non-existent 'AttrTypeandValueLDIF' from ldif.__all__ ---------------------------------------------------------------- Released 2.4.19 2015-01-10 @@ -1158,4 +1168,4 @@ Released 2.0.0pre02 2002-02-01 ---------------------------------------------------------------- Released 1.10alpha3 2000-09-19 -$Id: CHANGES,v 1.343 2015/06/11 15:13:44 stroeder Exp $ +$Id: CHANGES,v 1.344 2015/06/20 14:09:45 stroeder Exp $ diff --git a/Lib/ldif.py b/Lib/ldif.py index bc47040..cbe741e 100644 --- a/Lib/ldif.py +++ b/Lib/ldif.py @@ -3,7 +3,7 @@ See http://www.python-ldap.org/ for details. -$Id: ldif.py,v 1.80 2015/06/05 21:04:58 stroeder Exp $ +$Id: ldif.py,v 1.81 2015/06/20 14:09:45 stroeder Exp $ Python compability note: Tested with Python 2.0+, but should work with Python 1.5.2+. @@ -15,7 +15,7 @@ # constants 'ldif_pattern', # functions - 'AttrTypeandValueLDIF','CreateLDIF','ParseLDIF', + 'CreateLDIF','ParseLDIF', # classes 'LDIFWriter', 'LDIFParser', @@ -40,7 +40,9 @@ ldif_pattern = '^((dn(:|::) %(dn_pattern)s)|(%(attrtype_pattern)s(:|::) .*)$)+' % vars() MOD_OP_INTEGER = { - 'add':0,'delete':1,'replace':2 + 'add' :0, # ldap.MOD_REPLACE + 'delete' :1, # ldap.MOD_DELETE + 'replace':2, # ldap.MOD_REPLACE } MOD_OP_STR = { @@ -98,7 +100,7 @@ def __init__(self,output_file,base64_attrs=None,cols=76,line_sep='\n'): self._line_sep = line_sep self.records_written = 0 - def _unfoldLDIFLine(self,line): + def _unfold_lines(self,line): """ Write string line as one or more folded lines """ @@ -117,7 +119,7 @@ def _unfoldLDIFLine(self,line): self._output_file.write(line[pos:min(line_len,pos+self._cols-1)]) self._output_file.write(self._line_sep) pos = pos+self._cols-1 - return # _unfoldLDIFLine() + return # _unfold_lines() def _needs_base64_encoding(self,attr_type,attr_value): """ @@ -138,9 +140,9 @@ def _unparseAttrTypeandValue(self,attr_type,attr_value): """ if self._needs_base64_encoding(attr_type,attr_value): # Encode with base64 - self._unfoldLDIFLine(':: '.join([attr_type,base64.encodestring(attr_value).replace('\n','')])) + self._unfold_lines(':: '.join([attr_type,base64.encodestring(attr_value).replace('\n','')])) else: - self._unfoldLDIFLine(': '.join([attr_type,attr_value])) + self._unfold_lines(': '.join([attr_type,attr_value])) return # _unparseAttrTypeandValue() def _unparseEntryRecord(self,entry): @@ -240,17 +242,6 @@ class and override method handle() to implement something meaningful. Counter for records processed so far """ - def _stripLineSep(self,s): - """ - Strip trailing line separators from s, but no other whitespaces - """ - if s[-2:]=='\r\n': - return s[:-2] - elif s[-1:]=='\n': - return s[:-1] - else: - return s - def __init__( self, input_file, @@ -280,46 +271,61 @@ def __init__( self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])]) self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])]) self._line_sep = line_sep + self.line_counter = 0 + self.byte_counter = 0 self.records_read = 0 + self._line = self._readline() def handle(self,dn,entry): """ Process a single content LDIF record. This method should be implemented by applications using LDIFParser. """ + pass + + def _readline(self): + s = self._input_file.readline() + self.line_counter = self.line_counter + 1 + self.byte_counter = self.byte_counter + len(s) + if s[-2:]=='\r\n': + return s[:-2] + elif s[-1:]=='\n': + return s[:-1] + else: + return s - def _unfoldLDIFLine(self): + def _unfold_lines(self): """ Unfold several folded lines with trailing space into one line """ - unfolded_lines = [ self._stripLineSep(self._line) ] - self._line = self._input_file.readline() + unfolded_lines = [ self._line ] + self._line = self._readline() while self._line and self._line[0]==' ': - unfolded_lines.append(self._stripLineSep(self._line[1:])) - self._line = self._input_file.readline() + unfolded_lines.append(self._line[1:]) + self._line = self._readline() return ''.join(unfolded_lines) - def _parseAttrTypeandValue(self): + def _next_key_and_value(self): """ Parse a single attribute type and value pair from one or more lines of LDIF data """ # Reading new attribute line - unfolded_line = self._unfoldLDIFLine() + unfolded_line = self._unfold_lines() # Ignore comments which can also be folded while unfolded_line and unfolded_line[0]=='#': - unfolded_line = self._unfoldLDIFLine() - if not unfolded_line or unfolded_line=='\n' or unfolded_line=='\r\n': - return None,None - try: - colon_pos = unfolded_line.index(':') - except ValueError: - # Treat malformed lines without colon as non-existent + unfolded_line = self._unfold_lines() + if not unfolded_line: return None,None + if unfolded_line=='-': + return '-',None + colon_pos = unfolded_line.index(':') attr_type = unfolded_line[0:colon_pos] # if needed attribute value is BASE64 decoded value_spec = unfolded_line[colon_pos:colon_pos+2] - if value_spec=='::': + if value_spec==': ': + attr_value = unfolded_line[colon_pos+2:] + elif value_spec=='::': # attribute value needs base64-decoding attr_value = base64.decodestring(unfolded_line[colon_pos+2:]) elif value_spec==':<': @@ -332,60 +338,154 @@ def _parseAttrTypeandValue(self): attr_value = urllib.urlopen(url).read() elif value_spec==':\r\n' or value_spec=='\n': attr_value = '' - else: - attr_value = unfolded_line[colon_pos+2:].lstrip() return attr_type,attr_value - def parse(self): + def parse_entry_records(self): """ - Continously read and parse LDIF records + Continously read and parse LDIF entry records """ - self._line = self._input_file.readline() + k,v = self._next_key_and_value() + if k=='version': + self.version = v + k,v = self._next_key_and_value() + if k==v==None: + k,v = self._next_key_and_value() + else: + self.version = None - while self._line and \ + # Loop for processing whole records + while k!=None and \ (not self._max_entries or self.records_read