Source code for xorbits._mars.tensor.indexing.choose

# Copyright 2022-2023 XProbe Inc.
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from ... import opcodes as OperandDef
from ...serialization.serializables import FieldTypes, KeyField, ListField, StringField
from ..array_utils import as_same_device, device
from ..core import Tensor, TensorOrder
from ..datasource import tensor as astensor
from ..operands import TensorOperand, TensorOperandMixin
from ..utils import broadcast_shape, check_out_param


class TensorChoose(TensorOperand, TensorOperandMixin):
    _op_type_ = OperandDef.CHOOSE

    _a = KeyField("a")
    _choices = ListField("choices", FieldTypes.key)
    _mode = StringField("mode")

    def __init__(self, mode=None, **kw):
        super().__init__(_mode=mode, **kw)

    def __setattr__(self, key, value):
        if key == "_mode" and value not in ("raise", "wrap", "clip"):
            raise ValueError(f"mode should be raise, wrap or clip, not {value}")

        super().__setattr__(key, value)

    @property
    def a(self):
        return self._a

    @property
    def choices(self):
        return self._choices

    @property
    def mode(self):
        return self._mode

    def _set_inputs(self, inputs):
        super()._set_inputs(inputs)
        self._a = self._inputs[0]
        self._choices = self._inputs[1:]

    def __call__(self, a, choices, out=None):
        if out is not None and not isinstance(out, Tensor):
            raise TypeError(f"out should be Tensor object, got {type(out)} instead")

        inputs = [a] + choices
        shape = broadcast_shape(a.shape, *[c.shape for c in choices])
        order = TensorOrder.C_ORDER if out is None else out.order
        t = self.new_tensor(inputs, shape, order=order)

        if out is None:
            return t

        check_out_param(out, t, "unsafe")
        out_shape, out_dtype = out.shape, out.dtype
        # if `out` is specified, use out's dtype and shape
        if out_shape != t.shape:
            raise ValueError(f"output shape should be {t.shape}, got {out_shape}")
        setattr(self, "dtype", out_dtype)
        out.data = t.data
        return out

    @classmethod
    def tile(cls, op):
        from ..arithmetic.core import TensorElementWise

        return (yield from TensorElementWise.tile(op))

    @classmethod
    def execute(cls, ctx, op):
        inputs, device_id, xp = as_same_device(
            [ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True
        )
        a, choices = inputs[0], inputs[1:]

        out = op.outputs[0]
        with device(device_id):
            ctx[out.key] = xp.choose(a, choices, mode=op.mode).astype(
                op.dtype, order=out.order.value, copy=False
            )


[docs]def choose(a, choices, out=None, mode="raise"):
    """
    Construct a tensor from an index tensor and a set of tensors to choose from.

    First of all, if confused or uncertain, definitely look at the Examples -
    in its full generality, this function is less simple than it might
    seem from the following code description (below ndi =
    `mt.lib.index_tricks`):

    ``mt.choose(a,c) == mt.array([c[a[I]][I] for I in ndi.ndindex(a.shape)])``.

    But this omits some subtleties.  Here is a fully general summary:

    Given an "index" tensor (`a`) of integers and a sequence of `n` tensors
    (`choices`), `a` and each choice tensor are first broadcast, as necessary,
    to tensors of a common shape; calling these *Ba* and *Bchoices[i], i =
    0,...,n-1* we have that, necessarily, ``Ba.shape == Bchoices[i].shape``
    for each `i`.  Then, a new array with shape ``Ba.shape`` is created as
    follows:

    * if ``mode=raise`` (the default), then, first of all, each element of
      `a` (and thus `Ba`) must be in the range `[0, n-1]`; now, suppose that
      `i` (in that range) is the value at the `(j0, j1, ..., jm)` position
      in `Ba` - then the value at the same position in the new array is the
      value in `Bchoices[i]` at that same position;

    * if ``mode=wrap``, values in `a` (and thus `Ba`) may be any (signed)
      integer; modular arithmetic is used to map integers outside the range
      `[0, n-1]` back into that range; and then the new array is constructed
      as above;

    * if ``mode=clip``, values in `a` (and thus `Ba`) may be any (signed)
      integer; negative integers are mapped to 0; values greater than `n-1`
      are mapped to `n-1`; and then the new tensor is constructed as above.

    Parameters
    ----------
    a : int tensor
        This tensor must contain integers in `[0, n-1]`, where `n` is the number
        of choices, unless ``mode=wrap`` or ``mode=clip``, in which cases any
        integers are permissible.
    choices : sequence of tensors
        Choice tensors. `a` and all of the choices must be broadcastable to the
        same shape.  If `choices` is itself a tensor (not recommended), then
        its outermost dimension (i.e., the one corresponding to
        ``choices.shape[0]``) is taken as defining the "sequence".
    out : tensor, optional
        If provided, the result will be inserted into this tensor. It should
        be of the appropriate shape and dtype.
    mode : {'raise' (default), 'wrap', 'clip'}, optional
        Specifies how indices outside `[0, n-1]` will be treated:

          * 'raise' : an exception is raised
          * 'wrap' : value becomes value mod `n`
          * 'clip' : values < 0 are mapped to 0, values > n-1 are mapped to n-1

    Returns
    -------
    merged_array : Tensor
        The merged result.

    Raises
    ------
    ValueError: shape mismatch
        If `a` and each choice tensor are not all broadcastable to the same
        shape.

    See Also
    --------
    Tensor.choose : equivalent method

    Notes
    -----
    To reduce the chance of misinterpretation, even though the following
    "abuse" is nominally supported, `choices` should neither be, nor be
    thought of as, a single tensor, i.e., the outermost sequence-like container
    should be either a list or a tuple.

    Examples
    --------

    >>> import mars.tensor as mt

    >>> choices = [[0, 1, 2, 3], [10, 11, 12, 13],
    ...   [20, 21, 22, 23], [30, 31, 32, 33]]
    >>> mt.choose([2, 3, 1, 0], choices
    ... # the first element of the result will be the first element of the
    ... # third (2+1) "array" in choices, namely, 20; the second element
    ... # will be the second element of the fourth (3+1) choice array, i.e.,
    ... # 31, etc.
    ... ).execute()
    array([20, 31, 12,  3])
    >>> mt.choose([2, 4, 1, 0], choices, mode='clip').execute() # 4 goes to 3 (4-1)
    array([20, 31, 12,  3])
    >>> # because there are 4 choice arrays
    >>> mt.choose([2, 4, 1, 0], choices, mode='wrap').execute() # 4 goes to (4 mod 4)
    array([20,  1, 12,  3])
    >>> # i.e., 0

    A couple examples illustrating how choose broadcasts:

    >>> a = [[1, 0, 1], [0, 1, 0], [1, 0, 1]]
    >>> choices = [-10, 10]
    >>> mt.choose(a, choices).execute()
    array([[ 10, -10,  10],
           [-10,  10, -10],
           [ 10, -10,  10]])

    >>> # With thanks to Anne Archibald
    >>> a = mt.array([0, 1]).reshape((2,1,1))
    >>> c1 = mt.array([1, 2, 3]).reshape((1,3,1))
    >>> c2 = mt.array([-1, -2, -3, -4, -5]).reshape((1,1,5))
    >>> mt.choose(a, (c1, c2)).execute() # result is 2x3x5, res[0,:,:]=c1, res[1,:,:]=c2
    array([[[ 1,  1,  1,  1,  1],
            [ 2,  2,  2,  2,  2],
            [ 3,  3,  3,  3,  3]],
           [[-1, -2, -3, -4, -5],
            [-1, -2, -3, -4, -5],
            [-1, -2, -3, -4, -5]]])

    """
    a = astensor(a, dtype="i8")
    choices = [astensor(c) for c in choices]

    dtype = np.result_type(*[c.dtype for c in choices])
    op = TensorChoose(mode=mode, dtype=dtype)
    return op(a, choices, out=out)