From e716349e810aeb38195b0a22a630489cc19f7672 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 30 Nov 2017 14:04:28 +0100 Subject: [PATCH] Doc: Add dedicated page for bytes_mode --- CHANGES | 13 +++-- Doc/bytes_mode.rst | 105 +++++++++++++++++++++++++++++++++++++++++ Doc/index.rst | 36 +------------- Doc/reference/ldap.rst | 7 ++- Lib/ldap/functions.py | 2 +- 5 files changed, 121 insertions(+), 42 deletions(-) create mode 100644 Doc/bytes_mode.rst diff --git a/CHANGES b/CHANGES index 113eb27..28f6f32 100644 --- a/CHANGES +++ b/CHANGES @@ -3,11 +3,16 @@ Released 3.0.0 xxxx-xx-xx Changes since 2.4.45: -Mandatory prerequisites: -- Python 2.7.x or 3.3+ -- pyasn1 0.3.7+ and pyasn1_modules 0.1.5+ +New dependencies (automatically installed when using pip): +- pyasn1 0.3.7+ +- pyasn1_modules 0.1.5+ -Python 3 support is merged from the pyldap fork (https://github.com/pyldap) +Removed support for Python 2.6. + +Python 3 support and bytes_mode: +- merged from the pyldap fork (https://github.com/pyldap) +- please see documentation on bytes_mode and text/bytes handling: + https://python-ldap.readthedocs.io/en/latest/bytes_mode.html Infrastructure: - Add .gitignore diff --git a/Doc/bytes_mode.rst b/Doc/bytes_mode.rst new file mode 100644 index 0000000..ba31f0c --- /dev/null +++ b/Doc/bytes_mode.rst @@ -0,0 +1,105 @@ +.. _text-bytes: + +Bytes/text management +===================== + +Python 3 introduces a hard distinction between *text* (``str``) – sequences of +characters (formally, *Unicode codepoints*) – and ``bytes`` – sequences of +8-bit values used to encode *any* kind of data for storage or transmission. + +Python 2 has the same distinction between ``str`` (bytes) and +``unicode`` (text). +However, values can be implicitly converted between these types as needed, +e.g. when comparing or writing to disk or the network. +The implicit encoding and decoding can be a source of subtle bugs when not +designed and tested adequately. + +In python-ldap 2.x (for Python 2), bytes were used for all fields, +including those guaranteed to be text. + +From version 3.0, python-ldap uses text where appropriate. +On Python 2, the `bytes mode `_ setting influences how text is +handled. + + +What's text, and what's bytes +----------------------------- + +The LDAP protocol states that some fields (distinguished names, relative +distinguished names, attribute names, queries) be encoded in UTF-8. +In python-ldap, these are represented as text (``str`` on Python 3, +``unicode`` on Python 2). + +Attribute *values*, on the other hand, **MAY** +contain any type of data, including text. +To know what type of data is represented, python-ldap would need access to the +schema, which is not always available (nor always correct). +Thus, attribute values are *always* treated as ``bytes``. +Encoding/decoding to other formats – text, images, etc. – is left to the caller. + + +.. _bytes_mode: + +The bytes mode +-------------- + +The behavior of python-ldap 3.0 in Python 2 is influenced by a ``bytes_mode`` +argument to :func:`ldap.initialize`. +The argument can take these values: + +``bytes_mode=True``: backwards-compatible + + Text values returned from python-ldap are always bytes (``str``). + Text values supplied to python-ldap may be either bytes or Unicode. + The encoding for bytes is always assumed to be UTF-8. + + Not available in Python 3. + +``bytes_mode=False``: strictly future-compatible + + Text values must be represented as ``unicode``. + An error is raised if python-ldap receives a text value as bytes (``str``). + +Unspecified: relaxed mode with warnings + + Causes a warning on Python 2. + + Text values returned from python-ldap are always ``unicode``. + Text values supplied to python-ldap should be ``unicode``; + warnings are emitted when they are not. + +Backwards-compatible behavior is not scheduled for removal until Python 2 +itself reaches end of life. + + +Porting recommendations +----------------------- + +Since end of life of Python 2 is coming in a few years, +projects are strongly urged to make their code compatible with Python 3. +General instructions for this are provided `in Python documentation`_ and in +the `Conservative porting guide`_. + +.. _in Python documentation: https://docs.python.org/3/howto/pyporting.html +.. _Conservative porting guide: http://portingguide.readthedocs.io/en/latest/ + + +When porting from python-ldap 2.x, users are advised to update their code +to set ``bytes_mode=False``, and fix any resulting failures. + +The typical usage is as follows. +Note that only the result's *values* are of the ``bytes`` type: + +.. code-block:: pycon + + >>> import ldap + >>> con = ldap.initialize('ldap://localhost:389', bytes_mode=False) + >>> con.simple_bind_s(u'login', u'secret_password') + >>> results = con.search_s(u'ou=people,dc=example,dc=org', ldap.SCOPE_SUBTREE, u"(cn=Raphaël)") + >>> results + [ + ("cn=Raphaël,ou=people,dc=example,dc=org", { + 'cn': [b'Rapha\xc3\xabl'], + 'sn': [b'Barrois'], + }), + ] diff --git a/Doc/index.rst b/Doc/index.rst index 2c5b24e..ef652d9 100644 --- a/Doc/index.rst +++ b/Doc/index.rst @@ -68,47 +68,13 @@ Contents :maxdepth: 2 installing.rst + bytes_mode.rst reference/index.rst resources.rst contributing.rst faq.rst -Bytes/text management ---------------------- - -The LDAP protocol states that some fields (distinguished names, relative distinguished names, -attribute names, queries) be encoded in UTF-8; some other (mostly attribute *values*) **MAY** -contain any type of data, and thus be treated as bytes. - -In Python 2, ``python-ldap`` used bytes for all fields, including those guaranteed to be text. -In order to support Python 3, this distinction is made explicit. This is done -through the ``bytes_mode`` flag to ``ldap.initialize()``. - -When porting from ``python-ldap`` 2.x, users are advised to update their code to set ``bytes_mode=False`` -on calls to these methods. -Under Python 2, ``python-pyldap`` aggressively checks the type of provided arguments, and will raise a ``TypeError`` -for any invalid parameter. -However, if the ``bytes_mode`` kwarg isn't provided, ``pyldap`` will only -raise warnings. - -The typical usage is as follows; note that only the result's *values* are of the bytes type: - -.. code-block:: pycon - - >>> import ldap - >>> con = ldap.initialize('ldap://localhost:389', bytes_mode=False) - >>> con.simple_bind_s('login', 'secret_password') - >>> results = con.search_s('ou=people,dc=example,dc=org', ldap.SCOPE_SUBTREE, "(cn=Raphaël)") - >>> results - [ - ("cn=Raphaël,ou=people,dc=example,dc=org", { - 'cn': [b'Rapha\xc3\xabl'], - 'sn': [b'Barrois'], - }), - ] - - Indices and tables ------------------ diff --git a/Doc/reference/ldap.rst b/Doc/reference/ldap.rst index 8844e59..c28cdec 100644 --- a/Doc/reference/ldap.rst +++ b/Doc/reference/ldap.rst @@ -29,7 +29,7 @@ Functions This module defines the following functions: -.. py:function:: initialize(uri [, trace_level=0 [, trace_file=sys.stdout [, trace_stack_limit=None]]]) -> LDAPObject object +.. py:function:: initialize(uri [, trace_level=0 [, trace_file=sys.stdout [, trace_stack_limit=None, [bytes_mode=None]]]]) -> LDAPObject object Initializes a new connection object for accessing the given LDAP server, and return an LDAP object (see :ref:`ldap-objects`) used to perform operations @@ -48,11 +48,14 @@ This module defines the following functions: that nothing is sent on the wire. The error handling in the calling application has to correctly handle this behaviour. - The optional arguments are for generating debug log information: + Three optional arguments are for generating debug log information: *trace_level* specifies the amount of information being logged, *trace_file* specifies a file-like object as target of the debug log and *trace_stack_limit* specifies the stack limit of tracebacks in debug log. + The *bytes_mode* argument specifies text/bytes behavior under Python 2. + See :ref:`text-bytes` for a complete documentation. + Possible values for *trace_level* are :py:const:`0` for no logging, :py:const:`1` for only logging the method calls with arguments, diff --git a/Lib/ldap/functions.py b/Lib/ldap/functions.py index b887037..1588221 100644 --- a/Lib/ldap/functions.py +++ b/Lib/ldap/functions.py @@ -77,7 +77,7 @@ def initialize(uri,trace_level=0,trace_file=sys.stdout,trace_stack_limit=None, b File object where to write the trace output to. Default is to use stdout. bytes_mode - Whether to enable "bytes_mode" for backwards compatibility under Py2. + Whether to enable :ref:`bytes_mode` for backwards compatibility under Py2. """ return LDAPObject(uri,trace_level,trace_file,trace_stack_limit,bytes_mode)