diff --git a/Lib/ldap/dn.py b/Lib/ldap/dn.py index 3298551..00c7b06 100644 --- a/Lib/ldap/dn.py +++ b/Lib/ldap/dn.py @@ -4,6 +4,7 @@ See https://www.python-ldap.org/ for details. """ +import sys from ldap.pkginfo import __version__ import _ldap @@ -46,6 +47,8 @@ def str2dn(dn,flags=0): """ if not dn: return [] + if sys.version_info[0] < 3 and isinstance(dn, unicode): + dn = dn.encode('utf-8') return ldap.functions._ldap_function_call(None,_ldap.str2dn,dn,flags) diff --git a/Lib/ldap/functions.py b/Lib/ldap/functions.py index c60d42b..b887037 100644 --- a/Lib/ldap/functions.py +++ b/Lib/ldap/functions.py @@ -62,7 +62,7 @@ def _ldap_function_call(lock,func,*args,**kwargs): return result -def initialize(uri,trace_level=0,trace_file=sys.stdout,trace_stack_limit=None): +def initialize(uri,trace_level=0,trace_file=sys.stdout,trace_stack_limit=None, bytes_mode=None): """ Return LDAPObject instance by opening LDAP connection to LDAP host specified by LDAP URL @@ -76,11 +76,13 @@ def initialize(uri,trace_level=0,trace_file=sys.stdout,trace_stack_limit=None): trace_file File object where to write the trace output to. Default is to use stdout. + bytes_mode + Whether to enable "bytes_mode" for backwards compatibility under Py2. """ - return LDAPObject(uri,trace_level,trace_file,trace_stack_limit) + return LDAPObject(uri,trace_level,trace_file,trace_stack_limit,bytes_mode) -def open(host,port=389,trace_level=0,trace_file=sys.stdout,trace_stack_limit=None): +def open(host,port=389,trace_level=0,trace_file=sys.stdout,trace_stack_limit=None,bytes_mode=None): """ Return LDAPObject instance by opening LDAP connection to specified LDAP host @@ -95,10 +97,12 @@ def open(host,port=389,trace_level=0,trace_file=sys.stdout,trace_stack_limit=Non trace_file File object where to write the trace output to. Default is to use stdout. + bytes_mode + Whether to enable "bytes_mode" for backwards compatibility under Py2. """ import warnings warnings.warn('ldap.open() is deprecated! Use ldap.initialize() instead.', DeprecationWarning,2) - return initialize('ldap://%s:%d' % (host,port),trace_level,trace_file,trace_stack_limit) + return initialize('ldap://%s:%d' % (host,port),trace_level,trace_file,trace_stack_limit,bytes_mode) init = open diff --git a/Lib/ldap/ldapobject.py b/Lib/ldap/ldapobject.py index e83287d..01f0044 100644 --- a/Lib/ldap/ldapobject.py +++ b/Lib/ldap/ldapobject.py @@ -4,6 +4,8 @@ See https://www.python-ldap.org/ for details. """ +from __future__ import unicode_literals + from os import strerror from ldap.pkginfo import __version__, __author__, __license__ @@ -20,6 +22,7 @@ import traceback import sys,time,pprint,_ldap,ldap,ldap.sasl,ldap.functions +import warnings from ldap.schema import SCHEMA_ATTRS from ldap.controls import LDAPControl,DecodeControlTuples,RequestControlTuples @@ -28,6 +31,11 @@ from ldap import LDAPError +PY2 = bool(sys.version_info[0] <= 2) +if PY2: + text_type = unicode +else: + text_type = str class NO_UNIQUE_ENTRY(ldap.NO_SUCH_OBJECT): """ @@ -55,7 +63,7 @@ class SimpleLDAPObject: def __init__( self,uri, - trace_level=0,trace_file=None,trace_stack_limit=5 + trace_level=0,trace_file=None,trace_stack_limit=5,bytes_mode=None ): self._trace_level = trace_level self._trace_file = trace_file or sys.stdout @@ -66,6 +74,186 @@ def __init__( self.timeout = -1 self.protocol_version = ldap.VERSION3 + # Bytes mode + # ---------- + + # By default, raise a TypeError when receiving invalid args + self.bytes_mode_hardfail = True + if bytes_mode is None and PY2: + warnings.warn( + "Under Python 2, python-ldap uses bytes by default. " + "This will be removed in Python 3 (no bytes for DN/RDN/field names). " + "Please call initialize(..., bytes_mode=False) explicitly.", + BytesWarning, + stacklevel=2, + ) + bytes_mode = True + # Disable hard failure when running in backwards compatibility mode. + self.bytes_mode_hardfail = False + elif bytes_mode and not PY2: + raise ValueError("bytes_mode is *not* supported under Python 3.") + # On by default on Py2, off on Py3. + self.bytes_mode = bytes_mode + + def _bytesify_input(self, value): + """Adapt a value following bytes_mode in Python 2. + + In Python 3, returns the original value unmodified. + + With bytes_mode ON, takes bytes or None and returns bytes or None. + With bytes_mode OFF, takes unicode or None and returns bytes or None. + + This function should be applied on all text inputs (distinguished names + and attribute names in modlists) to convert them to the bytes expected + by the C bindings. + """ + if not PY2: + return value + + if value is None: + return value + elif self.bytes_mode: + if isinstance(value, bytes): + return value + else: + if self.bytes_mode_hardfail: + raise TypeError("All provided fields *must* be bytes when bytes mode is on; got %r" % (value,)) + else: + warnings.warn( + "Received non-bytes value %r with default (disabled) bytes mode; please choose an explicit " + "option for bytes_mode on your LDAP connection" % (value,), + BytesWarning, + stacklevel=6, + ) + return value.encode('utf-8') + else: + if not isinstance(value, text_type): + raise TypeError("All provided fields *must* be text when bytes mode is off; got %r" % (value,)) + assert not isinstance(value, bytes) + return value.encode('utf-8') + + def _bytesify_inputs(self, *values): + """Adapt values following bytes_mode. + + Applies _bytesify_input on each arg. + + Usage: + >>> a, b, c = self._bytesify_inputs(a, b, c) + """ + if not PY2: + return values + return ( + self._bytesify_input(value) + for value in values + ) + + def _bytesify_modlist(self, modlist, with_opcode): + """Adapt a modlist according to bytes_mode. + + A modlist is a tuple of (op, attr, value), where: + - With bytes_mode ON, attr is checked to be bytes + - With bytes_mode OFF, attr is converted from unicode to bytes + - value is *always* bytes + """ + if not PY2: + return modlist + if with_opcode: + return tuple( + (op, self._bytesify_input(attr), val) + for op, attr, val in modlist + ) + else: + return tuple( + (self._bytesify_input(attr), val) + for attr, val in modlist + ) + + def _unbytesify_text_value(self, value): + """Adapt a 'known text, UTF-8 encoded' returned value following bytes_mode. + + With bytes_mode ON, takes bytes or None and returns bytes or None. + With bytes_mode OFF, takes bytes or None and returns unicode or None. + + This function should only be applied on field *values*; distinguished names + or field *names* are already natively handled in result4. + """ + if value is None: + return value + + # Preserve logic of assertions only under Python 2 + if PY2: + assert isinstance(value, bytes), "Expected bytes value, got text instead (%r)" % (value,) + + if self.bytes_mode: + return value + else: + return value.decode('utf-8') + + def _maybe_rebytesify_text(self, value): + """Re-encodes text to bytes if needed by bytes_mode. + + Takes unicode (and checks for it), and returns: + - bytes under bytes_mode + - unicode otherwise. + """ + if not PY2: + return value + + if value is None: + return value + + assert isinstance(value, text_type), "Should return text, got bytes instead (%r)" % (value,) + if not self.bytes_mode: + return value + else: + return value.encode('utf-8') + + def _bytesify_result_value(self, result_value): + """Applies bytes_mode to a result value. + + Such a value can either be: + - a dict mapping an attribute name to its list of values + (where attribute names are unicode and values bytes) + - a list of referals (which are unicode) + """ + if not PY2: + return result_value + if hasattr(result_value, 'items'): + # It's a attribute_name: [values] dict + return dict( + (self._maybe_rebytesify_text(key), value) + for (key, value) in result_value.items() + ) + elif isinstance(result_value, bytes): + return result_value + else: + # It's a list of referals + # Example value: + # [u'ldap://DomainDnsZones.xxxx.root.local/DC=DomainDnsZones,DC=xxxx,DC=root,DC=local'] + return [self._maybe_rebytesify_text(referal) for referal in result_value] + + def _bytesify_results(self, results, with_ctrls=False): + """Converts a "results" object according to bytes_mode. + + Takes: + - a list of (dn, {field: [values]}) if with_ctrls is False + - a list of (dn, {field: [values]}, ctrls) if with_ctrls is True + + And, if bytes_mode is on, converts dn and fields to bytes. + """ + if not PY2: + return results + if with_ctrls: + return [ + (self._maybe_rebytesify_text(dn), self._bytesify_result_value(fields), ctrls) + for (dn, fields, ctrls) in results + ] + else: + return [ + (self._maybe_rebytesify_text(dn), self._bytesify_result_value(fields)) + for (dn, fields) in results + ] + def _ldap_lock(self,desc=''): if ldap.LIBLDAP_R: return ldap.LDAPLock(desc='%s within %s' %(desc,repr(self))) @@ -185,6 +373,8 @@ def add_ext(self,dn,modlist,serverctrls=None,clientctrls=None): The parameter modlist is similar to the one passed to modify(), except that no operation integer need be included in the tuples. """ + dn = self._bytesify_input(dn) + modlist = self._bytesify_modlist(modlist, with_opcode=False) return self._ldap_call(self._l.add_ext,dn,modlist,RequestControlTuples(serverctrls),RequestControlTuples(clientctrls)) def add_ext_s(self,dn,modlist,serverctrls=None,clientctrls=None): @@ -209,6 +399,7 @@ def simple_bind(self,who='',cred='',serverctrls=None,clientctrls=None): """ simple_bind([who='' [,cred='']]) -> int """ + who, cred = self._bytesify_inputs(who, cred) return self._ldap_call(self._l.simple_bind,who,cred,RequestControlTuples(serverctrls),RequestControlTuples(clientctrls)) def simple_bind_s(self,who='',cred='',serverctrls=None,clientctrls=None): @@ -285,6 +476,7 @@ def compare_ext(self,dn,attr,value,serverctrls=None,clientctrls=None): A design bug in the library prevents value from containing nul characters. """ + dn, attr = self._bytesify_inputs(dn, attr) return self._ldap_call(self._l.compare_ext,dn,attr,value,RequestControlTuples(serverctrls),RequestControlTuples(clientctrls)) def compare_ext_s(self,dn,attr,value,serverctrls=None,clientctrls=None): @@ -315,6 +507,7 @@ def delete_ext(self,dn,serverctrls=None,clientctrls=None): form returns the message id of the initiated request, and the result can be obtained from a subsequent call to result(). """ + dn = self._bytesify_input(dn) return self._ldap_call(self._l.delete_ext,dn,RequestControlTuples(serverctrls),RequestControlTuples(clientctrls)) def delete_ext_s(self,dn,serverctrls=None,clientctrls=None): @@ -363,6 +556,8 @@ def modify_ext(self,dn,modlist,serverctrls=None,clientctrls=None): """ modify_ext(dn, modlist[,serverctrls=None[,clientctrls=None]]) -> int """ + dn = self._bytesify_input(dn) + modlist = self._bytesify_modlist(modlist, with_opcode=True) return self._ldap_call(self._l.modify_ext,dn,modlist,RequestControlTuples(serverctrls),RequestControlTuples(clientctrls)) def modify_ext_s(self,dn,modlist,serverctrls=None,clientctrls=None): @@ -416,6 +611,7 @@ def modrdn_s(self,dn,newrdn,delold=1): return self.rename_s(dn,newrdn,None,delold) def passwd(self,user,oldpw,newpw,serverctrls=None,clientctrls=None): + user, oldpw, newpw = self._bytesify_inputs(user, oldpw, newpw) return self._ldap_call(self._l.passwd,user,oldpw,newpw,RequestControlTuples(serverctrls),RequestControlTuples(clientctrls)) def passwd_s(self,user,oldpw,newpw,serverctrls=None,clientctrls=None): @@ -437,6 +633,7 @@ def rename(self,dn,newrdn,newsuperior=None,delold=1,serverctrls=None,clientctrls This actually corresponds to the rename* routines in the LDAP-EXT C API library. """ + dn, newrdn, newsuperior = self._bytesify_inputs(dn, newrdn, newsuperior) return self._ldap_call(self._l.rename,dn,newrdn,newsuperior,delold,RequestControlTuples(serverctrls),RequestControlTuples(clientctrls)) def rename_s(self,dn,newrdn,newsuperior=None,delold=1,serverctrls=None,clientctrls=None): @@ -525,6 +722,8 @@ def result4(self,msgid=ldap.RES_ANY,all=1,timeout=None,add_ctrls=0,add_intermedi if add_ctrls: resp_data = [ (t,r,DecodeControlTuples(c,resp_ctrl_classes)) for t,r,c in resp_data ] decoded_resp_ctrls = DecodeControlTuples(resp_ctrls,resp_ctrl_classes) + if resp_data is not None: + resp_data = self._bytesify_results(resp_data, with_ctrls=add_ctrls) return resp_type, resp_data, resp_msgid, decoded_resp_ctrls, resp_name, resp_value def search_ext(self,base,scope,filterstr='(objectClass=*)',attrlist=None,attrsonly=0,serverctrls=None,clientctrls=None,timeout=-1,sizelimit=0): @@ -572,6 +771,9 @@ def search_ext(self,base,scope,filterstr='(objectClass=*)',attrlist=None,attrson The amount of search results retrieved can be limited with the sizelimit parameter if non-zero. """ + base, filterstr = self._bytesify_inputs(base, filterstr) + if attrlist is not None: + attrlist = tuple(self._bytesify_inputs(*attrlist)) return self._ldap_call( self._l.search_ext, base,scope,filterstr, @@ -665,6 +867,8 @@ def search_subschemasubentry_s(self,dn=''): None as result indicates that the DN of the sub schema sub entry could not be determined. + + Returns: None or text/bytes depending on bytes_mode. """ try: r = self.search_s( @@ -686,7 +890,9 @@ def search_subschemasubentry_s(self,dn=''): # If dn was already root DSE we can return here return None else: - return search_subschemasubentry_dn + # With legacy bytes mode, return bytes; otherwise, since this is a DN, + # RFCs impose that the field value *can* be decoded to UTF-8. + return self._unbytesify_text_value(search_subschemasubentry_dn) except IndexError: return None @@ -788,7 +994,7 @@ class ReconnectLDAPObject(SimpleLDAPObject): def __init__( self,uri, - trace_level=0,trace_file=None,trace_stack_limit=5, + trace_level=0,trace_file=None,trace_stack_limit=5,bytes_mode=None, retry_max=1,retry_delay=60.0 ): """ @@ -803,7 +1009,7 @@ def __init__( self._uri = uri self._options = [] self._last_bind = None - SimpleLDAPObject.__init__(self,uri,trace_level,trace_file,trace_stack_limit) + SimpleLDAPObject.__init__(self,uri,trace_level,trace_file,trace_stack_limit,bytes_mode) self._reconnect_lock = ldap.LDAPLock(desc='reconnect lock within %s' % (repr(self))) self._retry_max = retry_max self._retry_delay = retry_delay diff --git a/Lib/ldap/sasl.py b/Lib/ldap/sasl.py index 34d4cb0..fa6f4f5 100644 --- a/Lib/ldap/sasl.py +++ b/Lib/ldap/sasl.py @@ -46,6 +46,8 @@ def __init__(self, cb_value_dict, mech): the SASL mechaninsm to be uesd. """ self.cb_value_dict = cb_value_dict or {} + if not isinstance(mech, bytes): + mech = mech.encode('utf-8') self.mech = mech def callback(self, cb_id, challenge, prompt, defresult): @@ -64,6 +66,8 @@ def callback(self, cb_id, challenge, prompt, defresult): useful for writing generic sasl GUIs, which would need to know all the questions to ask, before the answers are returned to the sasl lib (in contrast to one question at a time). + + Unicode strings are always converted to bytes. """ # The following print command might be useful for debugging @@ -78,6 +82,8 @@ def callback(self, cb_id, challenge, prompt, defresult): repr(defresult), repr(self.cb_value_dict.get(cb_result)) )) + if not isinstance(cb_result, bytes): + cb_result = cb_result.encode('utf-8') return cb_result diff --git a/Lib/ldap/schema/models.py b/Lib/ldap/schema/models.py index 300981e..c0391b4 100644 --- a/Lib/ldap/schema/models.py +++ b/Lib/ldap/schema/models.py @@ -32,6 +32,7 @@ class SchemaElement: schema_element_str String which contains the schema element description to be parsed. + (Bytestrings are decoded using UTF-8) Class attributes: @@ -46,6 +47,8 @@ class SchemaElement: } def __init__(self,schema_element_str=None): + if sys.version_info >= (3, 0) and isinstance(schema_element_str, bytes): + schema_element_str = schema_element_str.decode('utf-8') if schema_element_str: l = split_tokens(schema_element_str) self.set_id(l[1]) diff --git a/Lib/ldap/schema/subentry.py b/Lib/ldap/schema/subentry.py index 4d42b19..2a42b4c 100644 --- a/Lib/ldap/schema/subentry.py +++ b/Lib/ldap/schema/subentry.py @@ -456,7 +456,9 @@ def urlfetch(uri,trace_level=0): if uri.startswith('ldap:') or uri.startswith('ldaps:') or uri.startswith('ldapi:'): import ldapurl ldap_url = ldapurl.LDAPUrl(uri) - l=ldap.initialize(ldap_url.initializeUrl(),trace_level) + + # This is an internal function; don't enable bytes_mode. + l=ldap.initialize(ldap_url.initializeUrl(),trace_level,bytes_mode=False) l.protocol_version = ldap.VERSION3 l.simple_bind_s(ldap_url.who or '', ldap_url.cred or '') subschemasubentry_dn = l.search_subschemasubentry_s(ldap_url.dn) diff --git a/Lib/ldif.py b/Lib/ldif.py index 94ffd91..82c4e3f 100644 --- a/Lib/ldif.py +++ b/Lib/ldif.py @@ -4,6 +4,8 @@ See https://www.python-ldap.org/ for details. """ +from __future__ import unicode_literals + __version__ = '2.5.2' __all__ = [ @@ -60,7 +62,7 @@ def is_dn(s): return rm!=None and rm.group(0)==s -SAFE_STRING_PATTERN = '(^(\000|\n|\r| |:|<)|[\000\n\r\200-\377]+|[ ]+$)' +SAFE_STRING_PATTERN = b'(^(\000|\n|\r| |:|<)|[\000\n\r\200-\377]+|[ ]+$)' safe_string_re = re.compile(SAFE_STRING_PATTERN) def list_dict(l): @@ -80,7 +82,7 @@ class LDIFWriter: def __init__(self,output_file,base64_attrs=None,cols=76,line_sep='\n'): """ output_file - file object for output + file object for output; should be opened in *text* mode base64_attrs list of attribute types to be base64-encoded in any case cols @@ -129,15 +131,17 @@ def _unparseAttrTypeandValue(self,attr_type,attr_value): Write a single attribute type/value pair attr_type - attribute type + attribute type (text) attr_value - attribute value + attribute value (bytes) """ if self._needs_base64_encoding(attr_type,attr_value): # Encode with base64 - self._unfold_lines(':: '.join([attr_type, b64encode(attr_value).replace('\n','')])) + encoded = b64encode(attr_value).decode('ascii') + encoded = encoded.replace('\n','') + self._unfold_lines(':: '.join([attr_type, encoded])) else: - self._unfold_lines(': '.join([attr_type,attr_value])) + self._unfold_lines(': '.join([attr_type, attr_value.decode('ascii')])) return # _unparseAttrTypeandValue() def _unparseEntryRecord(self,entry): @@ -161,13 +165,14 @@ def _unparseChangeRecord(self,modlist): changetype = 'modify' else: raise ValueError("modlist item of wrong length: %d" % (mod_len)) - self._unparseAttrTypeandValue('changetype',changetype) + self._unparseAttrTypeandValue('changetype',changetype.encode('ascii')) for mod in modlist: if mod_len==2: mod_type,mod_vals = mod elif mod_len==3: mod_op,mod_type,mod_vals = mod - self._unparseAttrTypeandValue(MOD_OP_STR[mod_op],mod_type) + self._unparseAttrTypeandValue(MOD_OP_STR[mod_op], + mod_type.encode('ascii')) else: raise ValueError("Subsequent modlist item of wrong length") if mod_vals: @@ -185,7 +190,8 @@ def unparse(self,dn,record): or a list with a modify list like for LDAPObject.modify(). """ # Start with line containing the distinguished name - self._unparseAttrTypeandValue('dn',dn) + dn = dn.encode('utf-8') + self._unparseAttrTypeandValue('dn', dn) # Dispatch to record type specific writers if isinstance(record,dict): self._unparseEntryRecord(record) @@ -260,6 +266,8 @@ def __init__( String used as line separator """ self._input_file = input_file + # Detect whether the file is open in text or bytes mode. + self._file_sends_bytes = isinstance(self._input_file.read(0), bytes) self._max_entries = max_entries self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])]) self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])]) @@ -287,6 +295,10 @@ def handle(self,dn,entry): def _readline(self): s = self._input_file.readline() + if self._file_sends_bytes: + # The RFC does not allow UTF-8 values; we support it as a + # non-official, backwards compatibility layer + s = s.decode('utf-8') self.line_counter = self.line_counter + 1 self.byte_counter = self.byte_counter + len(s) if not s: @@ -319,6 +331,8 @@ def _next_key_and_value(self): """ Parse a single attribute type and value pair from one or more lines of LDIF data + + Returns attr_type (text) and attr_value (bytes) """ # Reading new attribute line unfolded_line = self._unfold_lines() @@ -338,9 +352,15 @@ def _next_key_and_value(self): value_spec = unfolded_line[colon_pos:colon_pos+2] if value_spec==': ': attr_value = unfolded_line[colon_pos+2:].lstrip() + # All values should be valid ascii; we support UTF-8 as a + # non-official, backwards compatibility layer. + attr_value = attr_value.encode('utf-8') elif value_spec=='::': # attribute value needs base64-decoding - attr_value = self._b64decode(unfolded_line[colon_pos+2:]) + # base64 makes sens only for ascii + attr_value = unfolded_line[colon_pos+2:] + attr_value = attr_value.encode('ascii') + attr_value = self._b64decode(attr_value) elif value_spec==':<': # fetch attribute value from URL url = unfolded_line[colon_pos+2:].strip() @@ -350,7 +370,9 @@ def _next_key_and_value(self): if u[0] in self._process_url_schemes: attr_value = urlopen(url).read() else: - attr_value = unfolded_line[colon_pos+1:] + # All values should be valid ascii; we support UTF-8 as a + # non-official, backwards compatibility layer. + attr_value = unfolded_line[colon_pos+1:].encode('utf-8') return attr_type,attr_value def _consume_empty_lines(self): @@ -383,7 +405,7 @@ def parse_entry_records(self): k,v = self._consume_empty_lines() # Consume 'version' line if k=='version': - self.version = int(v) + self.version = int(v.decode('ascii')) k,v = self._consume_empty_lines() except EOFError: return @@ -394,6 +416,9 @@ def parse_entry_records(self): # Consume first line which must start with "dn: " if k!='dn': raise ValueError('Line %d: First line of record does not start with "dn:": %s' % (self.line_counter,repr(k))) + # Value of a 'dn' field *has* to be valid UTF-8 + # k is text, v is bytes. + v = v.decode('utf-8') if not is_dn(v): raise ValueError('Line %d: Not a valid string-representation for dn: %s.' % (self.line_counter,repr(v))) dn = v @@ -452,6 +477,9 @@ def parse_change_records(self): # Consume first line which must start with "dn: " if k!='dn': raise ValueError('Line %d: First line of record does not start with "dn:": %s' % (self.line_counter,repr(k))) + # Value of a 'dn' field *has* to be valid UTF-8 + # k is text, v is bytes. + v = v.decode('utf-8') if not is_dn(v): raise ValueError('Line %d: Not a valid string-representation for dn: %s.' % (self.line_counter,repr(v))) dn = v @@ -460,6 +488,8 @@ def parse_change_records(self): # Read "control:" lines controls = [] while k!=None and k=='control': + # v is still bytes, spec says it should be valid utf-8; decode it. + v = v.decode('utf-8') try: control_type,criticality,control_value = v.split(' ',2) except ValueError: @@ -472,6 +502,8 @@ def parse_change_records(self): changetype = None # Consume changetype line of record if k=='changetype': + # v is still bytes, spec says it should be valid utf-8; decode it. + v = v.decode('utf-8') if not v in valid_changetype_dict: raise ValueError('Invalid changetype: %s' % repr(v)) changetype = v @@ -491,6 +523,8 @@ def parse_change_records(self): except KeyError: raise ValueError('Line %d: Invalid mod-op string: %s' % (self.line_counter,repr(k))) # we now have the attribute name to be modified + # v is still bytes, spec says it should be valid utf-8; decode it. + v = v.decode('utf-8') modattr = v modvalues = [] try: diff --git a/Lib/slapdtest.py b/Lib/slapdtest.py index 1aba887..fcd36e1 100644 --- a/Lib/slapdtest.py +++ b/Lib/slapdtest.py @@ -5,6 +5,8 @@ See https://www.python-ldap.org/ for details. """ +from __future__ import unicode_literals + __version__ = '2.5.2' import os @@ -365,13 +367,15 @@ def ldapadd(self, ldif, extra_args=None): """ Runs ldapadd on this slapd instance, passing it the ldif content """ - self._cli_popen(self.PATH_LDAPADD, extra_args=extra_args, stdin_data=ldif) + self._cli_popen(self.PATH_LDAPADD, extra_args=extra_args, + stdin_data=ldif.encode('utf-8')) def ldapmodify(self, ldif, extra_args=None): """ Runs ldapadd on this slapd instance, passing it the ldif content """ - self._cli_popen(self.PATH_LDAPMODIFY, extra_args=extra_args, stdin_data=ldif) + self._cli_popen(self.PATH_LDAPMODIFY, extra_args=extra_args, + stdin_data=ldif.encode('utf-8')) class SlapdTestCase(unittest.TestCase): @@ -383,11 +387,11 @@ class SlapdTestCase(unittest.TestCase): server = None ldap_object_class = None - def _open_ldap_conn(self, who=None, cred=None): + def _open_ldap_conn(self, who=None, cred=None, **kwargs): """ return a LDAPObject instance after simple bind """ - ldap_conn = self.ldap_object_class(self.server.ldap_uri) + ldap_conn = self.ldap_object_class(self.server.ldap_uri, **kwargs) ldap_conn.protocol_version = 3 #ldap_conn.set_option(ldap.OPT_REFERRALS, 0) ldap_conn.simple_bind_s(who or self.server.root_dn, cred or self.server.root_pw) diff --git a/Modules/LDAPObject.c b/Modules/LDAPObject.c index 2eff88e..ce0ff52 100644 --- a/Modules/LDAPObject.c +++ b/Modules/LDAPObject.c @@ -142,7 +142,7 @@ Tuple_to_LDAPMod( PyObject* tup, int no_op ) if (list == Py_None) { /* None indicates a NULL mod_bvals */ - } else if (PyString_Check(list)) { + } else if (PyBytes_Check(list)) { /* Single string is a singleton list */ lm->mod_bvalues = PyMem_NEW(struct berval *, 2); if (lm->mod_bvalues == NULL) @@ -151,8 +151,8 @@ Tuple_to_LDAPMod( PyObject* tup, int no_op ) if (lm->mod_bvalues[0] == NULL) goto nomem; lm->mod_bvalues[1] = NULL; - lm->mod_bvalues[0]->bv_len = PyString_Size(list); - lm->mod_bvalues[0]->bv_val = PyString_AsString(list); + lm->mod_bvalues[0]->bv_len = PyBytes_Size(list); + lm->mod_bvalues[0]->bv_val = PyBytes_AsString(list); } else if (PySequence_Check(list)) { nstrs = PySequence_Length(list); lm->mod_bvalues = PyMem_NEW(struct berval *, nstrs + 1); @@ -166,14 +166,14 @@ Tuple_to_LDAPMod( PyObject* tup, int no_op ) item = PySequence_GetItem(list, i); if (item == NULL) goto error; - if (!PyString_Check(item)) { + if (!PyBytes_Check(item)) { PyErr_SetObject( PyExc_TypeError, Py_BuildValue( "sO", - "expected a string in the list", item)); + "expected a byte string in the list", item)); Py_DECREF(item); goto error; } - lm->mod_bvalues[i]->bv_len = PyString_Size(item); - lm->mod_bvalues[i]->bv_val = PyString_AsString(item); + lm->mod_bvalues[i]->bv_len = PyBytes_Size(item); + lm->mod_bvalues[i]->bv_val = PyBytes_AsString(item); Py_DECREF(item); } if (nstrs == 0) @@ -268,7 +268,11 @@ attrs_from_List( PyObject *attrlist, char***attrsp, PyObject** seq) { if (attrlist == Py_None) { /* None means a NULL attrlist */ - } else if (PyString_Check(attrlist)) { +#if PY_MAJOR_VERSION == 2 + } else if (PyBytes_Check(attrlist)) { +#else + } else if (PyUnicode_Check(attrlist)) { +#endif /* caught by John Benninghoff */ PyErr_SetObject( PyExc_TypeError, Py_BuildValue("sO", "expected *list* of strings, not a string", attrlist )); @@ -289,12 +293,21 @@ attrs_from_List( PyObject *attrlist, char***attrsp, PyObject** seq) { item = PySequence_Fast_GET_ITEM(*seq, i); if (item == NULL) goto error; - if (!PyString_Check(item)) { +#if PY_MAJOR_VERSION == 2 + /* Encoded by Python to UTF-8 */ + if (!PyBytes_Check(item)) { +#else + if (!PyUnicode_Check(item)) { +#endif PyErr_SetObject(PyExc_TypeError, Py_BuildValue("sO", "expected string in list", item)); goto error; } - attrs[i] = PyString_AsString(item); +#if PY_MAJOR_VERSION == 2 + attrs[i] = PyBytes_AsString(item); +#else + attrs[i] = PyUnicode_AsUTF8(item); +#endif } attrs[len] = NULL; } @@ -551,7 +564,7 @@ static int interaction ( unsigned flags, if (result == NULL) /*searching for a better error code */ return LDAP_OPERATIONS_ERROR; - c_result = PyString_AsString(result); /*xxx Error checking?? */ + c_result = PyBytes_AsString(result); /*xxx Error checking?? */ /* according to the sasl docs, we should malloc() the returned string only for calls where interact->id == SASL_CB_PASS, so we @@ -647,7 +660,7 @@ l_ldap_sasl_bind_s( LDAPObject* self, PyObject* args ) if (ldaperror == LDAP_SASL_BIND_IN_PROGRESS) { if (servercred && servercred->bv_val && *servercred->bv_val) - return PyString_FromStringAndSize( servercred->bv_val, servercred->bv_len ); + return PyBytes_FromStringAndSize( servercred->bv_val, servercred->bv_len ); } else if (ldaperror != LDAP_SUCCESS) return LDAPerror( self->ldap, "l_ldap_sasl_bind_s" ); return PyInt_FromLong( ldaperror ); @@ -699,7 +712,7 @@ l_ldap_sasl_interactive_bind_s( LDAPObject* self, PyObject* args ) /* now we extract the sasl mechanism from the SASL Object */ mechanism = PyObject_GetAttrString(SASLObject, "mech"); if (mechanism == NULL) return NULL; - c_mechanism = PyString_AsString(mechanism); + c_mechanism = PyBytes_AsString(mechanism); Py_DECREF(mechanism); mechanism = NULL; @@ -1188,7 +1201,7 @@ l_ldap_whoami_s( LDAPObject* self, PyObject* args ) if ( ldaperror!=LDAP_SUCCESS ) return LDAPerror( self->ldap, "ldap_whoami_s" ); - result = LDAPberval_to_object(bvalue); + result = LDAPberval_to_unicode_object(bvalue); return result; } diff --git a/Modules/berval.c b/Modules/berval.c index b118669..73d7f9b 100644 --- a/Modules/berval.c +++ b/Modules/berval.c @@ -89,7 +89,29 @@ LDAPberval_to_object(const struct berval *bv) Py_INCREF(ret); } else { - ret = PyString_FromStringAndSize(bv->bv_val, bv->bv_len); + ret = PyBytes_FromStringAndSize(bv->bv_val, bv->bv_len); + } + + return ret; +} + +/* + * Same as LDAPberval_to_object, but returns a Unicode PyObject. + * Use when the value is known to be text (for instance a distinguishedName). + * + * Returns a new Python object on success, or NULL on failure. + */ +PyObject * +LDAPberval_to_unicode_object(const struct berval *bv) +{ + PyObject *ret = NULL; + + if (!bv) { + ret = Py_None; + Py_INCREF(ret); + } + else { + ret = PyUnicode_FromStringAndSize(bv->bv_val, bv->bv_len); } return ret; diff --git a/Modules/berval.h b/Modules/berval.h index 514e9f9..2489e45 100644 --- a/Modules/berval.h +++ b/Modules/berval.h @@ -10,5 +10,6 @@ int LDAPberval_from_object(PyObject *obj, struct berval *bv); int LDAPberval_from_object_check(PyObject *obj); void LDAPberval_release(struct berval *bv); PyObject *LDAPberval_to_object(const struct berval *bv); +PyObject *LDAPberval_to_unicode_object(const struct berval *bv); #endif /* __h_berval_ */ diff --git a/Modules/constants.c b/Modules/constants.c index 06c249a..7ed9e41 100644 --- a/Modules/constants.c +++ b/Modules/constants.c @@ -307,71 +307,71 @@ LDAPinit_constants( PyObject* d ) PyDict_SetItemString( d, "TLS_AVAIL", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_MANAGEDSAIT); + obj = PyUnicode_FromString(LDAP_CONTROL_MANAGEDSAIT); PyDict_SetItemString( d, "CONTROL_MANAGEDSAIT", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_PROXY_AUTHZ); + obj = PyUnicode_FromString(LDAP_CONTROL_PROXY_AUTHZ); PyDict_SetItemString( d, "CONTROL_PROXY_AUTHZ", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_SUBENTRIES); + obj = PyUnicode_FromString(LDAP_CONTROL_SUBENTRIES); PyDict_SetItemString( d, "CONTROL_SUBENTRIES", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_VALUESRETURNFILTER); + obj = PyUnicode_FromString(LDAP_CONTROL_VALUESRETURNFILTER); PyDict_SetItemString( d, "CONTROL_VALUESRETURNFILTER", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_ASSERT); + obj = PyUnicode_FromString(LDAP_CONTROL_ASSERT); PyDict_SetItemString( d, "CONTROL_ASSERT", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_PRE_READ); + obj = PyUnicode_FromString(LDAP_CONTROL_PRE_READ); PyDict_SetItemString( d, "CONTROL_PRE_READ", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_POST_READ); + obj = PyUnicode_FromString(LDAP_CONTROL_POST_READ); PyDict_SetItemString( d, "CONTROL_POST_READ", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_SORTREQUEST); + obj = PyUnicode_FromString(LDAP_CONTROL_SORTREQUEST); PyDict_SetItemString( d, "CONTROL_SORTREQUEST", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_SORTRESPONSE); + obj = PyUnicode_FromString(LDAP_CONTROL_SORTRESPONSE); PyDict_SetItemString( d, "CONTROL_SORTRESPONSE", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_PAGEDRESULTS); + obj = PyUnicode_FromString(LDAP_CONTROL_PAGEDRESULTS); PyDict_SetItemString( d, "CONTROL_PAGEDRESULTS", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_SYNC); + obj = PyUnicode_FromString(LDAP_CONTROL_SYNC); PyDict_SetItemString( d, "CONTROL_SYNC", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_SYNC_STATE); + obj = PyUnicode_FromString(LDAP_CONTROL_SYNC_STATE); PyDict_SetItemString( d, "CONTROL_SYNC_STATE", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_SYNC_DONE); + obj = PyUnicode_FromString(LDAP_CONTROL_SYNC_DONE); PyDict_SetItemString( d, "CONTROL_SYNC_DONE", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_SYNC_INFO); + obj = PyUnicode_FromString(LDAP_SYNC_INFO); PyDict_SetItemString( d, "SYNC_INFO", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_PASSWORDPOLICYREQUEST); + obj = PyUnicode_FromString(LDAP_CONTROL_PASSWORDPOLICYREQUEST); PyDict_SetItemString( d, "CONTROL_PASSWORDPOLICYREQUEST", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_PASSWORDPOLICYRESPONSE); + obj = PyUnicode_FromString(LDAP_CONTROL_PASSWORDPOLICYRESPONSE); PyDict_SetItemString( d, "CONTROL_PASSWORDPOLICYRESPONSE", obj ); Py_DECREF(obj); - obj = PyString_FromString(LDAP_CONTROL_RELAX); + obj = PyUnicode_FromString(LDAP_CONTROL_RELAX); PyDict_SetItemString( d, "CONTROL_RELAX", obj ); Py_DECREF(obj); diff --git a/Modules/errors.c b/Modules/errors.c index 3731965..e5cb0ee 100644 --- a/Modules/errors.c +++ b/Modules/errors.c @@ -80,7 +80,7 @@ LDAPerror( LDAP *l, char *msg ) if (info == NULL) return NULL; - str = PyString_FromString(ldap_err2string(errnum)); + str = PyUnicode_FromString(ldap_err2string(errnum)); if (str) PyDict_SetItemString( info, "desc", str ); Py_XDECREF(str); @@ -95,7 +95,7 @@ LDAPerror( LDAP *l, char *msg ) if (ldap_get_option(l, LDAP_OPT_MATCHED_DN, &matched) >= 0 && matched != NULL) { if (*matched != '\0') { - str = PyString_FromString(matched); + str = PyUnicode_FromString(matched); if (str) PyDict_SetItemString( info, "matched", str ); Py_XDECREF(str); @@ -104,13 +104,13 @@ LDAPerror( LDAP *l, char *msg ) } if (errnum == LDAP_REFERRAL) { - str = PyString_FromString(msg); + str = PyUnicode_FromString(msg); if (str) PyDict_SetItemString( info, "info", str ); Py_XDECREF(str); } else if (ldap_get_option(l, LDAP_OPT_ERROR_STRING, &error) >= 0) { if (error != NULL && *error != '\0') { - str = PyString_FromString(error); + str = PyUnicode_FromString(error); if (str) PyDict_SetItemString( info, "info", str ); Py_XDECREF(str); diff --git a/Modules/functions.c b/Modules/functions.c index b2dbf83..ffb6765 100644 --- a/Modules/functions.c +++ b/Modules/functions.c @@ -76,9 +76,9 @@ l_ldap_str2dn( PyObject* unused, PyObject *args ) LDAPAVA *ava = rdn[j]; PyObject *tuple; - tuple = Py_BuildValue("(O&O&i)", - LDAPberval_to_object, &ava->la_attr, - LDAPberval_to_object, &ava->la_value, + tuple = Py_BuildValue("(O&O&i)", + LDAPberval_to_unicode_object, &ava->la_attr, + LDAPberval_to_unicode_object, &ava->la_value, ava->la_flags & ~(LDAP_AVA_FREE_ATTR|LDAP_AVA_FREE_VALUE)); if (!tuple) { Py_DECREF(rdnlist); diff --git a/Modules/ldapcontrol.c b/Modules/ldapcontrol.c index 0bf86a1..f3dc92a 100644 --- a/Modules/ldapcontrol.c +++ b/Modules/ldapcontrol.c @@ -102,13 +102,13 @@ Tuple_to_LDAPControl( PyObject* tup ) berbytes.bv_len = 0; berbytes.bv_val = NULL; } - else if (PyString_Check(bytes)) { - berbytes.bv_len = PyString_Size(bytes); - berbytes.bv_val = PyString_AsString(bytes); + else if (PyBytes_Check(bytes)) { + berbytes.bv_len = PyBytes_Size(bytes); + berbytes.bv_val = PyBytes_AsString(bytes); } else { PyErr_SetObject(PyExc_TypeError, Py_BuildValue("sO", - "expected a string", bytes)); + "expected bytes", bytes)); LDAPControl_DEL(lc); return NULL; } diff --git a/Modules/message.c b/Modules/message.c index 33127b6..1a289db 100644 --- a/Modules/message.c +++ b/Modules/message.c @@ -49,6 +49,7 @@ LDAPmessage_to_python(LDAP *ld, LDAPMessage *m, int add_ctrls, int add_intermedi BerElement *ber = NULL; PyObject* entrytuple; PyObject* attrdict; + PyObject* pydn; dn = ldap_get_dn( ld, entry ); if (dn == NULL) { @@ -91,22 +92,26 @@ LDAPmessage_to_python(LDAP *ld, LDAPMessage *m, int add_ctrls, int add_intermedi attr = ldap_next_attribute( ld, entry, ber ) ) { PyObject* valuelist; + PyObject* pyattr; + pyattr = PyUnicode_FromString(attr); + struct berval ** bvals = ldap_get_values_len( ld, entry, attr ); /* Find which list to append to */ - if ( PyMapping_HasKeyString( attrdict, attr ) ) { - valuelist = PyMapping_GetItemString( attrdict, attr ); + if ( PyDict_Contains( attrdict, pyattr ) ) { + valuelist = PyDict_GetItem( attrdict, pyattr ); } else { valuelist = PyList_New(0); - if (valuelist != NULL && PyMapping_SetItemString(attrdict, - attr, valuelist) == -1) { + if (valuelist != NULL && PyDict_SetItem(attrdict, + pyattr, valuelist) == -1) { Py_DECREF(valuelist); valuelist = NULL; /* catch error later */ } } if (valuelist == NULL) { + Py_DECREF(pyattr); Py_DECREF(attrdict); Py_DECREF(result); if (ber != NULL) @@ -125,6 +130,7 @@ LDAPmessage_to_python(LDAP *ld, LDAPMessage *m, int add_ctrls, int add_intermedi valuestr = LDAPberval_to_object(bvals[i]); if (PyList_Append( valuelist, valuestr ) == -1) { + Py_DECREF(pyattr); Py_DECREF(attrdict); Py_DECREF(result); Py_DECREF(valuestr); @@ -141,15 +147,25 @@ LDAPmessage_to_python(LDAP *ld, LDAPMessage *m, int add_ctrls, int add_intermedi } ldap_value_free_len(bvals); } + Py_DECREF(pyattr); Py_DECREF( valuelist ); ldap_memfree(attr); } + pydn = PyUnicode_FromString(dn); + if (pydn == NULL) { + Py_DECREF(result); + ldap_msgfree( m ); + ldap_memfree(dn); + return NULL; + } + if (add_ctrls) { - entrytuple = Py_BuildValue("(sOO)", dn, attrdict, pyctrls); + entrytuple = Py_BuildValue("(OOO)", pydn, attrdict, pyctrls); } else { - entrytuple = Py_BuildValue("(sO)", dn, attrdict); + entrytuple = Py_BuildValue("(OO)", pydn, attrdict); } + Py_DECREF(pydn); ldap_memfree(dn); Py_DECREF(attrdict); Py_XDECREF(pyctrls); @@ -191,7 +207,8 @@ LDAPmessage_to_python(LDAP *ld, LDAPMessage *m, int add_ctrls, int add_intermedi if (refs) { Py_ssize_t i; for (i=0; refs[i] != NULL; i++) { - PyObject *refstr = PyString_FromString(refs[i]); + /* A referal is a distinguishedName => unicode */ + PyObject *refstr = PyUnicode_FromString(refs[i]); PyList_Append(reflist, refstr); Py_DECREF(refstr); } @@ -218,6 +235,7 @@ LDAPmessage_to_python(LDAP *ld, LDAPMessage *m, int add_ctrls, int add_intermedi PyObject* valtuple; PyObject *valuestr; char *retoid = 0; + PyObject *pyoid; struct berval *retdata = 0; if (ldap_parse_intermediate( ld, entry, &retoid, &retdata, &serverctrls, 0 ) != LDAP_SUCCESS) { @@ -240,10 +258,17 @@ LDAPmessage_to_python(LDAP *ld, LDAPMessage *m, int add_ctrls, int add_intermedi valuestr = LDAPberval_to_object(retdata); ber_bvfree( retdata ); - valtuple = Py_BuildValue("(sOO)", retoid, + pyoid = PyUnicode_FromString(retoid); + ldap_memfree( retoid ); + if (pyoid == NULL) { + Py_DECREF(result); + ldap_msgfree( m ); + return NULL; + } + valtuple = Py_BuildValue("(OOO)", pyoid, valuestr ? valuestr : Py_None, pyctrls); - ldap_memfree( retoid ); + Py_DECREF(pyoid); Py_DECREF(valuestr); Py_XDECREF(pyctrls); PyList_Append(result, valtuple); diff --git a/Modules/options.c b/Modules/options.c index a437ca4..7cf996b 100644 --- a/Modules/options.c +++ b/Modules/options.c @@ -204,7 +204,7 @@ LDAP_get_option(LDAPObject *self, int option) extensions = PyTuple_New(num_extensions); for (i = 0; i < num_extensions; i++) PyTuple_SET_ITEM(extensions, i, - PyString_FromString(apiinfo.ldapai_extensions[i])); + PyUnicode_FromString(apiinfo.ldapai_extensions[i])); /* return api info as a dictionary */ v = Py_BuildValue("{s:i, s:i, s:i, s:s, s:i, s:O}", @@ -321,7 +321,7 @@ LDAP_get_option(LDAPObject *self, int option) Py_INCREF(Py_None); return Py_None; } - v = PyString_FromString(strval); + v = PyUnicode_FromString(strval); ldap_memfree(strval); return v; diff --git a/Tests/t_cext.py b/Tests/t_cext.py index cd171b6..d4740d0 100644 --- a/Tests/t_cext.py +++ b/Tests/t_cext.py @@ -5,6 +5,8 @@ See https://www.python-ldap.org/ for details. """ +from __future__ import unicode_literals + import os import unittest from slapdtest import SlapdTestCase @@ -195,7 +197,7 @@ def test_anon_rootdse_search(self): '', _ldap.SCOPE_BASE, '(objectClass=*)', - ['objectClass', 'namingContexts'], + [str('objectClass'), str('namingContexts')], ) self.assertEqual(type(m), type(0)) result, pmsg, msgid, ctrls = l.result4(m, _ldap.MSG_ALL, self.timeout) @@ -205,9 +207,9 @@ def test_anon_rootdse_search(self): self.assertEqual(ctrls, []) root_dse = pmsg[0][1] self.assertTrue('objectClass' in root_dse) - self.assertTrue('OpenLDAProotDSE' in root_dse['objectClass']) + self.assertTrue(b'OpenLDAProotDSE' in root_dse['objectClass']) self.assertTrue('namingContexts' in root_dse) - self.assertEqual(root_dse['namingContexts'], [self.server.suffix]) + self.assertEqual(root_dse['namingContexts'], [self.server.suffix.encode('ascii')]) def test_unbind(self): l = self._open_conn() @@ -235,8 +237,8 @@ def test_search_ext_individual(self): self.assertEqual(len(pmsg[0]), 2) self.assertEqual(pmsg[0][0], self.server.suffix) self.assertEqual(pmsg[0][0], self.server.suffix) - self.assertTrue('dcObject' in pmsg[0][1]['objectClass']) - self.assertTrue('organization' in pmsg[0][1]['objectClass']) + self.assertTrue(b'dcObject' in pmsg[0][1]['objectClass']) + self.assertTrue(b'organization' in pmsg[0][1]['objectClass']) self.assertEqual(msgid, m) self.assertEqual(ctrls, []) @@ -278,9 +280,9 @@ def test_add(self): m = l.add_ext( "cn=Foo," + self.server.suffix, [ - ('objectClass', 'organizationalRole'), - ('cn', 'Foo'), - ('description', 'testing'), + ('objectClass', b'organizationalRole'), + ('cn', b'Foo'), + ('description', b'testing'), ] ) self.assertEqual(type(m), type(0)) @@ -302,9 +304,9 @@ def test_add(self): ( 'cn=Foo,'+self.server.suffix, { - 'objectClass': ['organizationalRole'], - 'cn': ['Foo'], - 'description': ['testing'], + 'objectClass': [b'organizationalRole'], + 'cn': [b'Foo'], + 'description': [b'testing'], } ) ) @@ -319,10 +321,10 @@ def test_compare(self): m = l.add_ext( dn, [ - ('objectClass', 'person'), - ('sn', 'CompareTest'), - ('cn', 'CompareTest'), - ('userPassword', 'the_password'), + ('objectClass', b'person'), + ('sn', b'CompareTest'), + ('cn', b'CompareTest'), + ('userPassword', b'the_password'), ], ) self.assertEqual(type(m), type(0)) @@ -373,8 +375,8 @@ def test_delete(self): m = l.add_ext( dn, [ - ('objectClass', 'organizationalRole'), - ('cn', 'Deleteme'), + ('objectClass', b'organizationalRole'), + ('cn', b'Deleteme'), ] ) self.assertEqual(type(m), type(0)) @@ -395,7 +397,7 @@ def test_modify_no_such_object(self): m = l.modify_ext( "cn=DoesNotExist,"+self.server.suffix, [ - (_ldap.MOD_ADD, 'description', ['blah']), + (_ldap.MOD_ADD, 'description', [b'blah']), ] ) try: @@ -413,7 +415,7 @@ def test_modify_no_such_object_empty_attrs(self): m = l.modify_ext( "cn=DoesNotExist,"+self.server.suffix, [ - (_ldap.MOD_ADD, 'description', ['dummy']), + (_ldap.MOD_ADD, 'description', [b'dummy']), ] ) self.assertTrue(isinstance(m, int)) @@ -434,10 +436,10 @@ def test_modify(self): m = l.add_ext( dn, [ - ('objectClass', 'person'), - ('cn', 'AddToMe'), - ('sn', 'Modify'), - ('description', 'a description'), + ('objectClass', b'person'), + ('cn', b'AddToMe'), + ('sn', b'Modify'), + ('description', b'a description'), ] ) self.assertEqual(type(m), type(0)) @@ -447,7 +449,7 @@ def test_modify(self): m = l.modify_ext( dn, [ - (_ldap.MOD_ADD, 'description', ['b desc', 'c desc']), + (_ldap.MOD_ADD, 'description', [b'b desc', b'c desc']), ] ) result, pmsg, msgid, ctrls = l.result4(m, _ldap.MSG_ALL, self.timeout) @@ -466,7 +468,7 @@ def test_modify(self): self.assertEqual(pmsg[0][0], dn) d = list(pmsg[0][1]['description']) d.sort() - self.assertEqual(d, ['a description', 'b desc', 'c desc']) + self.assertEqual(d, [b'a description', b'b desc', b'c desc']) def test_rename(self): l = self._open_conn() @@ -474,8 +476,8 @@ def test_rename(self): m = l.add_ext( dn, [ - ('objectClass', 'organizationalRole'), - ('cn', 'RenameMe'), + ('objectClass', b'organizationalRole'), + ('cn', b'RenameMe'), ] ) self.assertEqual(type(m), type(0)) @@ -507,15 +509,15 @@ def test_rename(self): self.assertEqual(ctrls, []) self.assertEqual(len(pmsg), 1) self.assertEqual(pmsg[0][0], dn2) - self.assertEqual(pmsg[0][1]['cn'], ['IAmRenamed']) + self.assertEqual(pmsg[0][1]['cn'], [b'IAmRenamed']) # create the container containerDn = "ou=RenameContainer,"+self.server.suffix m = l.add_ext( containerDn, [ - ('objectClass', 'organizationalUnit'), - ('ou', 'RenameContainer'), + ('objectClass', b'organizationalUnit'), + ('ou', b'RenameContainer'), ] ) result, pmsg, msgid, ctrls = l.result4(m, _ldap.MSG_ALL, self.timeout) @@ -551,7 +553,7 @@ def test_rename(self): self.assertEqual(ctrls, []) self.assertEqual(len(pmsg), 1) self.assertEqual(pmsg[0][0], dn3) - self.assertEqual(pmsg[0][1]['cn'], ['IAmRenamedAgain']) + self.assertEqual(pmsg[0][1]['cn'], [b'IAmRenamedAgain']) def test_whoami(self): @@ -590,10 +592,10 @@ def test_passwd(self): m = l.add_ext( dn, [ - ('objectClass', 'person'), - ('sn', 'PasswordTest'), - ('cn', 'PasswordTest'), - ('userPassword', 'initial'), + ('objectClass', b'person'), + ('sn', b'PasswordTest'), + ('cn', b'PasswordTest'), + ('userPassword', b'initial'), ] ) self.assertEqual(type(m), type(0)) diff --git a/Tests/t_ldap_dn.py b/Tests/t_ldap_dn.py index 97274f3..459c1dc 100644 --- a/Tests/t_ldap_dn.py +++ b/Tests/t_ldap_dn.py @@ -5,6 +5,8 @@ See https://www.python-ldap.org/ for details. """ +from __future__ import unicode_literals + # from Python's standard lib import unittest @@ -27,9 +29,10 @@ def test_is_dn(self): self.assertEqual(ldap.dn.is_dn(',cn=foobar,ou=ae-dir'), False) self.assertEqual(ldap.dn.is_dn('cn=foobar,ou=ae-dir,'), False) self.assertEqual(ldap.dn.is_dn('uid=xkcd,cn=foobar,ou=ae-dir'), True) + self.assertEqual(ldap.dn.is_dn('cn=äöüÄÖÜß,o=äöüÄÖÜß'), True) self.assertEqual( ldap.dn.is_dn( - 'cn=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c.o=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f' + r'cn=\c3\a4\c3\b6\c3\bc\c3\84\c3\96\c3\9c\c3\9f,o=\c3\a4\c3\b6\c3\bc\c3\84\c3\96\c3\9c\c3\9f' ), True ) @@ -97,9 +100,9 @@ def test_str2dn(self): ] ) self.assertEqual( - ldap.dn.str2dn('cn=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f,dc=example,dc=com', flags=0), + ldap.dn.str2dn('cn=äöüÄÖÜß,dc=example,dc=com', flags=0), [ - [('cn', '\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f', 4)], + [('cn', 'äöüÄÖÜß', 4)], [('dc', 'example', 1)], [('dc', 'com', 1)] ] @@ -107,7 +110,7 @@ def test_str2dn(self): self.assertEqual( ldap.dn.str2dn('cn=\\c3\\a4\\c3\\b6\\c3\\bc\\c3\\84\\c3\\96\\c3\\9c\\c3\\9f,dc=example,dc=com', flags=0), [ - [('cn', '\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f', 4)], + [('cn', 'äöüÄÖÜß', 4)], [('dc', 'example', 1)], [('dc', 'com', 1)] ] @@ -156,19 +159,11 @@ def test_dn2str(self): ) self.assertEqual( ldap.dn.dn2str([ - [('cn', '\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f', 4)], - [('dc', 'example', 1)], - [('dc', 'com', 1)] - ]), - 'cn=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f,dc=example,dc=com' - ) - self.assertEqual( - ldap.dn.dn2str([ - [('cn', '\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f', 4)], + [('cn', 'äöüÄÖÜß', 4)], [('dc', 'example', 1)], [('dc', 'com', 1)] ]), - 'cn=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f,dc=example,dc=com' + 'cn=äöüÄÖÜß,dc=example,dc=com' ) def test_explode_dn(self): @@ -197,12 +192,12 @@ def test_explode_dn(self): ['uid=test\\, 42', 'ou=Testing', 'dc=example', 'dc=com'] ) self.assertEqual( - ldap.dn.explode_dn('cn=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f,dc=example,dc=com', flags=0), - ['cn=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f', 'dc=example', 'dc=com'] + ldap.dn.explode_dn('cn=äöüÄÖÜß,dc=example,dc=com', flags=0), + ['cn=äöüÄÖÜß', 'dc=example', 'dc=com'] ) self.assertEqual( ldap.dn.explode_dn('cn=\\c3\\a4\\c3\\b6\\c3\\bc\\c3\\84\\c3\\96\\c3\\9c\\c3\\9f,dc=example,dc=com', flags=0), - ['cn=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f', 'dc=example', 'dc=com'] + ['cn=äöüÄÖÜß', 'dc=example', 'dc=com'] ) def test_explode_rdn(self): @@ -239,12 +234,12 @@ def test_explode_rdn(self): ['uid=test\\+ 42'] ) self.assertEqual( - ldap.dn.explode_rdn('cn=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f', flags=0), - ['cn=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f'] + ldap.dn.explode_rdn('cn=äöüÄÖÜß', flags=0), + ['cn=äöüÄÖÜß'] ) self.assertEqual( ldap.dn.explode_rdn('cn=\\c3\\a4\\c3\\b6\\c3\\bc\\c3\\84\\c3\\96\\c3\\9c\\c3\\9f', flags=0), - ['cn=\xc3\xa4\xc3\xb6\xc3\xbc\xc3\x84\xc3\x96\xc3\x9c\xc3\x9f'] + ['cn=äöüÄÖÜß'] ) diff --git a/Tests/t_ldap_syncrepl.py b/Tests/t_ldap_syncrepl.py index 8f39c67..831c063 100644 --- a/Tests/t_ldap_syncrepl.py +++ b/Tests/t_ldap_syncrepl.py @@ -81,34 +81,34 @@ # NOTE: For the dict, it needs to be kept up-to-date as we make changes! LDAP_ENTRIES = { 'ou=Container,dc=slapd-test,dc=python-ldap,dc=org': { - 'objectClass': ['organizationalUnit'], - 'ou': ['Container'] + 'objectClass': [b'organizationalUnit'], + 'ou': [b'Container'] }, 'cn=Foo2,dc=slapd-test,dc=python-ldap,dc=org': { - 'objectClass': ['organizationalRole'], - 'cn': ['Foo2'] + 'objectClass': [b'organizationalRole'], + 'cn': [b'Foo2'] }, 'cn=Foo4,ou=Container,dc=slapd-test,dc=python-ldap,dc=org': { - 'objectClass': ['organizationalRole'], - 'cn': ['Foo4'] + 'objectClass': [b'organizationalRole'], + 'cn': [b'Foo4'] }, 'cn=Manager,dc=slapd-test,dc=python-ldap,dc=org': { - 'objectClass': ['applicationProcess', 'simpleSecurityObject'], - 'userPassword': ['password'], - 'cn': ['Manager'] + 'objectClass': [b'applicationProcess', b'simpleSecurityObject'], + 'userPassword': [b'password'], + 'cn': [b'Manager'] }, 'cn=Foo3,dc=slapd-test,dc=python-ldap,dc=org': { - 'objectClass': ['organizationalRole'], - 'cn': ['Foo3'] + 'objectClass': [b'organizationalRole'], + 'cn': [b'Foo3'] }, 'cn=Foo1,dc=slapd-test,dc=python-ldap,dc=org': { - 'objectClass': ['organizationalRole'], - 'cn': ['Foo1'] + 'objectClass': [b'organizationalRole'], + 'cn': [b'Foo1'] }, 'dc=slapd-test,dc=python-ldap,dc=org': { - 'objectClass': ['dcObject', 'organization'], - 'dc': ['slapd-test'], - 'o': ['slapd-test'] + 'objectClass': [b'dcObject', b'organization'], + 'dc': [b'slapd-test'], + 'o': [b'slapd-test'] } } diff --git a/Tests/t_ldapobject.py b/Tests/t_ldapobject.py index 5773ff2..d55d018 100644 --- a/Tests/t_ldapobject.py +++ b/Tests/t_ldapobject.py @@ -5,6 +5,17 @@ See https://www.python-ldap.org/ for details. """ +from __future__ import unicode_literals + +import sys + +if sys.version_info[0] <= 2: + PY2 = True + text_type = unicode +else: + PY2 = False + text_type = str + import os import unittest import pickle @@ -83,7 +94,101 @@ def setUp(self): self._ldap_conn except AttributeError: # open local LDAP connection - self._ldap_conn = self._open_ldap_conn() + self._ldap_conn = self._open_ldap_conn(bytes_mode=False) + + def test_reject_bytes_base(self): + base = self.server.suffix + l = self._ldap_conn + + with self.assertRaises(TypeError): + l.search_s(base.encode('utf-8'), ldap.SCOPE_SUBTREE, '(cn=Foo*)', ['*']) + with self.assertRaises(TypeError): + l.search_s(base, ldap.SCOPE_SUBTREE, b'(cn=Foo*)', ['*']) + with self.assertRaises(TypeError): + l.search_s(base, ldap.SCOPE_SUBTREE, '(cn=Foo*)', [b'*']) + + def test_search_keys_are_text(self): + base = self.server.suffix + l = self._ldap_conn + result = l.search_s(base, ldap.SCOPE_SUBTREE, '(cn=Foo*)', ['*']) + result.sort() + dn, fields = result[0] + self.assertEqual(dn, 'cn=Foo1,%s' % base) + self.assertEqual(type(dn), text_type) + for key, values in fields.items(): + self.assertEqual(type(key), text_type) + for value in values: + self.assertEqual(type(value), bytes) + + def _get_bytes_ldapobject(self, explicit=True): + if explicit: + kwargs = {'bytes_mode': True} + else: + kwargs = {} + return self._open_ldap_conn( + who=self.server.root_dn.encode('utf-8'), + cred=self.server.root_pw.encode('utf-8'), + **kwargs + ) + + @unittest.skipUnless(PY2, "no bytes_mode under Py3") + def test_bytesmode_search_requires_bytes(self): + l = self._get_bytes_ldapobject() + base = self.server.suffix + + with self.assertRaises(TypeError): + l.search_s(base.encode('utf-8'), ldap.SCOPE_SUBTREE, '(cn=Foo*)', [b'*']) + with self.assertRaises(TypeError): + l.search_s(base.encode('utf-8'), ldap.SCOPE_SUBTREE, b'(cn=Foo*)', ['*']) + with self.assertRaises(TypeError): + l.search_s(base, ldap.SCOPE_SUBTREE, b'(cn=Foo*)', [b'*']) + + @unittest.skipUnless(PY2, "no bytes_mode under Py3") + def test_bytesmode_search_results_have_bytes(self): + l = self._get_bytes_ldapobject() + base = self.server.suffix + result = l.search_s(base.encode('utf-8'), ldap.SCOPE_SUBTREE, b'(cn=Foo*)', [b'*']) + result.sort() + dn, fields = result[0] + self.assertEqual(dn, b'cn=Foo1,%s' % base) + self.assertEqual(type(dn), bytes) + for key, values in fields.items(): + self.assertEqual(type(key), bytes) + for value in values: + self.assertEqual(type(value), bytes) + + @unittest.skipUnless(PY2, "no bytes_mode under Py3") + def test_unset_bytesmode_search_warns_bytes(self): + l = self._get_bytes_ldapobject(explicit=False) + base = self.server.suffix + + l.search_s(base.encode('utf-8'), ldap.SCOPE_SUBTREE, '(cn=Foo*)', [b'*']) + l.search_s(base.encode('utf-8'), ldap.SCOPE_SUBTREE, b'(cn=Foo*)', ['*']) + l.search_s(base, ldap.SCOPE_SUBTREE, b'(cn=Foo*)', [b'*']) + + def test_search_accepts_unicode_dn(self): + base = self.server.suffix + l = self._ldap_conn + + with self.assertRaises(ldap.NO_SUCH_OBJECT): + result = l.search_s("CN=abc\U0001f498def", ldap.SCOPE_SUBTREE) + + def test_filterstr_accepts_unicode(self): + l = self._ldap_conn + base = self.server.suffix + result = l.search_s(base, ldap.SCOPE_SUBTREE, '(cn=abc\U0001f498def)', ['*']) + self.assertEqual(result, []) + + def test_attrlist_accepts_unicode(self): + base = self.server.suffix + result = self._ldap_conn.search_s( + base, ldap.SCOPE_SUBTREE, + '(cn=Foo*)', ['abc', 'abc\U0001f498def']) + result.sort() + + for dn, attrs in result: + self.assertIsInstance(dn, text_type) + self.assertEqual(attrs, {}) def test001_search_subtree(self): result = self._ldap_conn.search_s( @@ -98,19 +203,19 @@ def test001_search_subtree(self): [ ( 'cn=Foo1,'+self.server.suffix, - {'cn': ['Foo1'], 'objectClass': ['organizationalRole']} + {'cn': [b'Foo1'], 'objectClass': [b'organizationalRole']} ), ( 'cn=Foo2,'+self.server.suffix, - {'cn': ['Foo2'], 'objectClass': ['organizationalRole']} + {'cn': [b'Foo2'], 'objectClass': [b'organizationalRole']} ), ( 'cn=Foo3,'+self.server.suffix, - {'cn': ['Foo3'], 'objectClass': ['organizationalRole']} + {'cn': [b'Foo3'], 'objectClass': [b'organizationalRole']} ), ( 'cn=Foo4,ou=Container,'+self.server.suffix, - {'cn': ['Foo4'], 'objectClass': ['organizationalRole']} + {'cn': [b'Foo4'], 'objectClass': [b'organizationalRole']} ), ] ) @@ -128,15 +233,15 @@ def test002_search_onelevel(self): [ ( 'cn=Foo1,'+self.server.suffix, - {'cn': ['Foo1'], 'objectClass': ['organizationalRole']} + {'cn': [b'Foo1'], 'objectClass': [b'organizationalRole']} ), ( 'cn=Foo2,'+self.server.suffix, - {'cn': ['Foo2'], 'objectClass': ['organizationalRole']} + {'cn': [b'Foo2'], 'objectClass': [b'organizationalRole']} ), ( 'cn=Foo3,'+self.server.suffix, - {'cn': ['Foo3'], 'objectClass': ['organizationalRole']} + {'cn': [b'Foo3'], 'objectClass': [b'organizationalRole']} ), ] ) @@ -151,9 +256,22 @@ def test003_search_oneattr(self): result.sort() self.assertEqual( result, - [('cn=Foo4,ou=Container,'+self.server.suffix, {'cn': ['Foo4']})] + [('cn=Foo4,ou=Container,'+self.server.suffix, {'cn': [b'Foo4']})] ) + def test_search_subschema(self): + l = self._ldap_conn + dn = l.search_subschemasubentry_s() + self.assertIsInstance(dn, text_type) + self.assertEqual(dn, "cn=Subschema") + + @unittest.skipUnless(PY2, "no bytes_mode under Py3") + def test_search_subschema_have_bytes(self): + l = self._get_bytes_ldapobject(explicit=False) + dn = l.search_subschemasubentry_s() + self.assertIsInstance(dn, bytes) + self.assertEqual(dn, b"cn=Subschema") + def test004_errno107(self): l = self.ldap_object_class('ldap://127.0.0.1:42') try: @@ -228,20 +346,22 @@ def test103_reconnect_get_state(self): self.assertEqual( l1.__getstate__(), { - '_last_bind': ( + str('_last_bind'): ( 'simple_bind_s', (bind_dn, 'user1_pw'), {} ), - '_options': [(17, 3)], - '_reconnects_done': 0L, - '_retry_delay': 60.0, - '_retry_max': 1, - '_start_tls': 0, - '_trace_level': 0, - '_trace_stack_limit': 5, - '_uri': self.server.ldapi_uri, - 'timeout': -1, + str('_options'): [(17, 3)], + str('_reconnects_done'): 0, + str('_retry_delay'): 60.0, + str('_retry_max'): 1, + str('_start_tls'): 0, + str('_trace_level'): 0, + str('_trace_stack_limit'): 5, + str('_uri'): self.server.ldapi_uri, + str('bytes_mode'): l1.bytes_mode, + str('bytes_mode_hardfail'): l1.bytes_mode_hardfail, + str('timeout'): -1, }, ) diff --git a/Tests/t_ldapurl.py b/Tests/t_ldapurl.py index a68b032..2be03f6 100644 --- a/Tests/t_ldapurl.py +++ b/Tests/t_ldapurl.py @@ -5,6 +5,8 @@ See https://www.python-ldap.org/ for details. """ +from __future__ import unicode_literals + import unittest from ldap.compat import quote @@ -274,7 +276,7 @@ def test_parse_dn(self): u = LDAPUrl("ldap:///dn=foo%3f") self.assertEqual(u.dn, "dn=foo?") u = LDAPUrl("ldap:///dn=str%c3%b6der.com") - self.assertEqual(u.dn, "dn=str\xc3\xb6der.com") + self.assertEqual(u.dn, "dn=str\xf6der.com") def test_parse_attrs(self): u = LDAPUrl("ldap:///?") @@ -338,7 +340,7 @@ def test_parse_filter(self): u = LDAPUrl("ldap:///???(cn=Q%3f)") self.assertEqual(u.filterstr, "(cn=Q?)") u = LDAPUrl("ldap:///???(sn=Str%c3%b6der)") # (possibly bad?) - self.assertEqual(u.filterstr, "(sn=Str\xc3\xb6der)") + self.assertEqual(u.filterstr, "(sn=Str\xf6der)") u = LDAPUrl("ldap:///???(sn=Str\\c3\\b6der)") self.assertEqual(u.filterstr, "(sn=Str\\c3\\b6der)") # (recommended) u = LDAPUrl("ldap:///???(cn=*\\2a*)") diff --git a/Tests/t_ldif.py b/Tests/t_ldif.py index 76701cc..adf0d26 100644 --- a/Tests/t_ldif.py +++ b/Tests/t_ldif.py @@ -5,6 +5,8 @@ See https://www.python-ldap.org/ for details. """ +from __future__ import unicode_literals + # from Python's standard lib import unittest import textwrap @@ -184,7 +186,7 @@ def test_folded(self): value attrib2: %s - """ % (b'asdf.'*20), [ + """ % ('asdf.'*20), [ ( 'cn=x,cn=y,cn=z', { @@ -273,6 +275,26 @@ def test_big_binary(self): ) def test_unicode(self): + # Encode "Ströder" as UTF-8+Base64 + # Putting "Ströder" in a single line would be an invalid LDIF file + # per https://tools.ietf.org/html/rfc2849 (only safe ascii is allowed in a file) + self.check_records( + """ + dn: cn=Michael Stroeder,dc=stroeder,dc=com + lastname:: U3Ryw7ZkZXI= + + """, + [ + ( + 'cn=Michael Stroeder,dc=stroeder,dc=com', + {'lastname': [b'Str\303\266der']}, + ), + ] + ) + + def test_unencoded_unicode(self): + # Encode "Ströder" as UTF-8, without base64 + # This is an invalid LDIF file, but such files are often found in the wild. self.check_records( """ dn: cn=Michael Stroeder,dc=stroeder,dc=com