Source code for xorbits._mars.tensor.base.isin

# Copyright 2022-2023 XProbe Inc.
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union

import numpy as np

from ... import opcodes as OperandDef
from ...serialization.serializables import BoolField
from ...typing import TileableType
from ..array_utils import as_same_device, device
from ..core import TensorOrder
from ..datasource import tensor as astensor
from ..operands import TensorOperand, TensorOperandMixin


class TensorIsIn(TensorOperand, TensorOperandMixin):
    _op_type_ = OperandDef.ISIN

    assume_unique = BoolField("assume_unique")
    invert = BoolField("invert")

    def __call__(self, element, test_elements):
        self.dtype = np.dtype(bool)
        return self.new_tensor(
            [element, test_elements], shape=element.shape, order=TensorOrder.C_ORDER
        )

    @classmethod
    def tile(cls, op):
        from ..merge.stack import TensorStack
        from ..reduction import TensorAll, TensorAny

        ar1, ar2 = op.inputs
        invert = op.invert
        out = op.outputs[0]

        out_chunks = []
        for ar1_chunk in ar1.chunks:
            to_concat_chunks = []
            for ar2_chunk in ar2.chunks:
                chunk_op = op.copy().reset_key()
                out_chunk = chunk_op.new_chunk(
                    [ar1_chunk, ar2_chunk],
                    dtype=out.dtype,
                    shape=ar1_chunk.shape,
                    order=out.order,
                    index=ar1_chunk.index,
                )
                to_concat_chunks.append(out_chunk)
            if len(to_concat_chunks) == 1:
                out_chunks.append(to_concat_chunks[0])
            else:
                # concat chunks
                concat_op = TensorStack(axis=0)
                shape = (len(to_concat_chunks),) + ar1_chunk.shape
                concat_chunk = concat_op.new_chunk(
                    to_concat_chunks, shape=shape, dtype=out.dtype, order=out.order
                )
                if not invert:
                    chunk_op = TensorAny(axis=(0,), dtype=out.dtype)
                    out_chunk = chunk_op.new_chunk(
                        [concat_chunk],
                        shape=ar1_chunk.shape,
                        dtype=out.dtype,
                        order=out.order,
                        index=ar1_chunk.index,
                    )
                else:
                    chunk_op = TensorAll(axis=(0,), dtype=out.dtype)
                    out_chunk = chunk_op.new_chunk(
                        [concat_chunk],
                        shape=ar1_chunk.shape,
                        dtype=out.dtype,
                        order=out.order,
                        index=ar1_chunk.index,
                    )
                out_chunks.append(out_chunk)

        params = out.params.copy()
        params["nsplits"] = ar1.nsplits
        params["chunks"] = out_chunks
        new_op = op.copy()
        return new_op.new_tensors(op.inputs, kws=[params])

    @classmethod
    def execute(cls, ctx, op):
        (element, test_elements), device_id, xp = as_same_device(
            [ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True
        )

        with device(device_id):
            ctx[op.outputs[0].key] = xp.isin(
                element, test_elements, assume_unique=op.assume_unique, invert=op.invert
            )


[docs]def isin(
    element: Union[TileableType, np.ndarray],
    test_elements: Union[TileableType, np.ndarray, list],
    assume_unique: bool = False,
    invert: bool = False,
):
    """
    Calculates `element in test_elements`, broadcasting over `element` only.
    Returns a boolean array of the same shape as `element` that is True
    where an element of `element` is in `test_elements` and False otherwise.

    Parameters
    ----------
    element : array_like
        Input tensor.
    test_elements : array_like
        The values against which to test each value of `element`.
        This argument is flattened if it is a tensor or array_like.
        See notes for behavior with non-array-like parameters.
    assume_unique : bool, optional
        If True, the input tensors are both assumed to be unique, which
        can speed up the calculation.  Default is False.
    invert : bool, optional
        If True, the values in the returned tensor are inverted, as if
        calculating `element not in test_elements`. Default is False.
        ``mt.isin(a, b, invert=True)`` is equivalent to (but faster
        than) ``mt.invert(mt.isin(a, b))``.

    Returns
    -------
    isin : Tensor, bool
        Has the same shape as `element`. The values `element[isin]`
        are in `test_elements`.

    See Also
    --------
    in1d                  : Flattened version of this function.

    Notes
    -----

    `isin` is an element-wise function version of the python keyword `in`.
    ``isin(a, b)`` is roughly equivalent to
    ``mt.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.

    `element` and `test_elements` are converted to tensors if they are not
    already. If `test_elements` is a set (or other non-sequence collection)
    it will be converted to an object tensor with one element, rather than a
    tensor of the values contained in `test_elements`. This is a consequence
    of the `tensor` constructor's way of handling non-sequence collections.
    Converting the set to a list usually gives the desired behavior.

    Examples
    --------
    >>> import mars.tensor as mt

    >>> element = 2*mt.arange(4).reshape((2, 2))
    >>> element.execute()
    array([[0, 2],
           [4, 6]])
    >>> test_elements = [1, 2, 4, 8]
    >>> mask = mt.isin(element, test_elements)
    >>> mask.execute()
    array([[ False,  True],
           [ True,  False]])
    >>> element[mask].execute()
    array([2, 4])
    >>> mask = mt.isin(element, test_elements, invert=True)
    >>> mask.execute()
    array([[ True, False],
           [ False, True]])
    >>> element[mask]
    array([0, 6])

    Because of how `array` handles sets, the following does not
    work as expected:

    >>> test_set = {1, 2, 4, 8}
    >>> mt.isin(element, test_set).execute()
    array([[ False, False],
           [ False, False]])

    Casting the set to a list gives the expected result:

    >>> mt.isin(element, list(test_set)).execute()
    array([[ False,  True],
           [ True,  False]])
    """
    element, test_elements = astensor(element), astensor(test_elements).ravel()
    op = TensorIsIn(assume_unique=assume_unique, invert=invert)
    return op(element, test_elements)