Source code for xorbits._mars.tensor.base.where

# Copyright 2022-2023 XProbe Inc.
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import itertools

import numpy as np

from ... import opcodes as OperandDef
from ...serialization.serializables import KeyField
from ...utils import has_unknown_shape
from ..array_utils import as_same_device, device
from ..core import TENSOR_TYPE
from ..datasource import tensor as astensor
from ..operands import TensorOperand, TensorOperandMixin
from ..utils import broadcast_shape, unify_chunks
from .broadcast_to import broadcast_to


class TensorWhere(TensorOperand, TensorOperandMixin):
    _op_type_ = OperandDef.WHERE

    _condition = KeyField("condition")
    _x = KeyField("x")
    _y = KeyField("y")

    @property
    def condition(self):
        return self._condition

    @property
    def x(self):
        return self._x

    @property
    def y(self):
        return self._y

    def _set_inputs(self, inputs):
        super()._set_inputs(inputs)
        self._condition = self._inputs[0]
        self._x = self._inputs[1]
        self._y = self._inputs[2]

    def __call__(self, condition, x, y, shape=None):
        shape = shape or broadcast_shape(condition.shape, x.shape, y.shape)
        return self.new_tensor([condition, x, y], shape)

    @classmethod
    def tile(cls, op):
        if has_unknown_shape(*op.inputs):
            yield
        inputs = yield from unify_chunks(
            *[(input, list(range(input.ndim))[::-1]) for input in op.inputs]
        )
        chunk_shapes = [
            t.chunk_shape if isinstance(t, TENSOR_TYPE) else t for t in inputs
        ]
        out_chunk_shape = broadcast_shape(*chunk_shapes)
        output = op.outputs[0]

        out_chunks = []
        nsplits = [[np.nan] * shape for shape in out_chunk_shape]
        get_index = lambda idx, t: tuple(
            0 if t.nsplits[i] == (1,) else ix for i, ix in enumerate(idx)
        )
        for out_index in itertools.product(*(map(range, out_chunk_shape))):
            in_chunks = [
                t.cix[get_index(out_index[-t.ndim :], t)]
                if t.ndim != 0
                else t.chunks[0]
                for t in inputs
            ]
            chunk_shape = broadcast_shape(*(c.shape for c in in_chunks))
            out_chunk = (
                op.copy()
                .reset_key()
                .new_chunk(
                    in_chunks, shape=chunk_shape, index=out_index, order=output.order
                )
            )
            out_chunks.append(out_chunk)
            for i, idx, s in zip(itertools.count(0), out_index, out_chunk.shape):
                nsplits[i][idx] = s

        new_op = op.copy()
        return new_op.new_tensors(
            inputs, output.shape, order=output.order, chunks=out_chunks, nsplits=nsplits
        )

    @classmethod
    def execute(cls, ctx, op):
        (cond, x, y), device_id, xp = as_same_device(
            [ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True
        )

        with device(device_id):
            ctx[op.outputs[0].key] = xp.where(cond, x, y)


[docs]def where(condition, x=None, y=None):
    """
    Return elements, either from `x` or `y`, depending on `condition`.

    If only `condition` is given, return ``condition.nonzero()``.

    Parameters
    ----------
    condition : array_like, bool
        When True, yield `x`, otherwise yield `y`.
    x, y : array_like, optional
        Values from which to choose. `x`, `y` and `condition` need to be
        broadcastable to some shape.

    Returns
    -------
    out : Tensor or tuple of Tensors
        If both `x` and `y` are specified, the output tensor contains
        elements of `x` where `condition` is True, and elements from
        `y` elsewhere.

        If only `condition` is given, return the tuple
        ``condition.nonzero()``, the indices where `condition` is True.

    See Also
    --------
    nonzero, choose

    Notes
    -----
    If `x` and `y` are given and input arrays are 1-D, `where` is
    equivalent to::

        [xv if c else yv for (c,xv,yv) in zip(condition,x,y)]

    Examples
    --------
    >>> import mars.tensor as mt

    >>> mt.where([[True, False], [True, True]],
    ...          [[1, 2], [3, 4]],
    ...          [[9, 8], [7, 6]]).execute()
    array([[1, 8],
           [3, 4]])

    >>> mt.where([[0, 1], [1, 0]]).execute()
    (array([0, 1]), array([1, 0]))

    >>> x = mt.arange(9.).reshape(3, 3)
    >>> mt.where( x > 5 ).execute()
    (array([2, 2, 2]), array([0, 1, 2]))
    >>> mt.where(x < 5, x, -1).execute()               # Note: broadcasting.
    array([[ 0.,  1.,  2.],
           [ 3.,  4., -1.],
           [-1., -1., -1.]])

    Find the indices of elements of `x` that are in `goodvalues`.

    >>> goodvalues = [3, 4, 7]
    >>> ix = mt.isin(x, goodvalues)
    >>> ix.execute()
    array([[False, False, False],
           [ True,  True, False],
           [False,  True, False]])
    >>> mt.where(ix).execute()
    (array([1, 1, 2]), array([0, 1, 1]))
    """
    if (x is None) != (y is None):
        raise ValueError("either both or neither of x and y should be given")

    if x is None and y is None:
        return astensor(condition).nonzero()

    x, y = astensor(x), astensor(y)
    dtype = np.result_type(x.dtype, y.dtype)
    shape = broadcast_shape(x.shape, y.shape)

    if np.isscalar(condition):
        return broadcast_to(x if condition else y, shape).astype(dtype)
    else:
        condition = astensor(condition)
        op = TensorWhere(dtype=dtype)
        return op(condition, x, y, shape=shape)