Source code for xorbits._mars.tensor.indexing.nonzero

# Copyright 2022-2023 XProbe Inc.
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from ... import opcodes as OperandDef
from ...core import ExecutableTuple, recursive_tile
from ...serialization.serializables import KeyField
from ..core import TensorOrder
from ..datasource import tensor as astensor
from ..operands import TensorHasInput, TensorOperandMixin
from .unravel_index import unravel_index


class TensorNonzero(TensorHasInput, TensorOperandMixin):
    _op_type_ = OperandDef.NONZERO

    _input = KeyField("input")

    @property
    def output_limit(self):
        return float("inf")

    def __call__(self, a):
        kws = [
            {"shape": (np.nan,), "order": TensorOrder.C_ORDER, "_idx_": i}
            for i in range(a.ndim)
        ]
        return ExecutableTuple(self.new_tensors([a], kws=kws, output_limit=len(kws)))

    @classmethod
    def tile(cls, op):
        from ..datasource import arange

        in_tensor = astensor(op.input)

        flattened = in_tensor.astype(bool).flatten()
        flattened = yield from recursive_tile(flattened)
        indices = arange(flattened.size, dtype=np.intp, chunk_size=flattened.nsplits)
        indices = indices[flattened]
        dim_indices = unravel_index(indices, in_tensor.shape)
        dim_indices = yield from recursive_tile(dim_indices)

        kws = [
            {"nsplits": ind.nsplits, "chunks": ind.chunks, "shape": o.shape}
            for ind, o in zip(dim_indices, op.outputs)
        ]
        new_op = op.copy()
        return new_op.new_tensors(op.inputs, kws=kws, output_limit=len(kws))


[docs]def nonzero(a):
    """
    Return the indices of the elements that are non-zero.

    Returns a tuple of tensors, one for each dimension of `a`,
    containing the indices of the non-zero elements in that
    dimension. The values in `a` are always tested and returned.
    The corresponding non-zero
    values can be obtained with::

        a[nonzero(a)]

    To group the indices by element, rather than dimension, use::

        transpose(nonzero(a))

    The result of this is always a 2-D array, with a row for
    each non-zero element.

    Parameters
    ----------
    a : array_like
        Input tensor.

    Returns
    -------
    tuple_of_arrays : tuple
        Indices of elements that are non-zero.

    See Also
    --------
    flatnonzero :
        Return indices that are non-zero in the flattened version of the input
        tensor.
    Tensor.nonzero :
        Equivalent tensor method.
    count_nonzero :
        Counts the number of non-zero elements in the input tensor.

    Examples
    --------
    >>> import mars.tensor as mt

    >>> x = mt.array([[1,0,0], [0,2,0], [1,1,0]])
    >>> x.execute()
    array([[1, 0, 0],
           [0, 2, 0],
           [1, 1, 0]])
    >>> mt.nonzero(x).execute()
    (array([0, 1, 2, 2]), array([0, 1, 0, 1]))

    >>> x[mt.nonzero(x)].execute()  # TODO(jisheng): accomplish this after fancy indexing is supported

    >>> mt.transpose(mt.nonzero(x)).execute() # TODO(jisheng): accomplish this later

    A common use for ``nonzero`` is to find the indices of an array, where
    a condition is True.  Given an array `a`, the condition `a` > 3 is a
    boolean array and since False is interpreted as 0, np.nonzero(a > 3)
    yields the indices of the `a` where the condition is true.

    >>> a = mt.array([[1,2,3],[4,5,6],[7,8,9]])
    >>> (a > 3).execute()
    array([[False, False, False],
           [ True,  True,  True],
           [ True,  True,  True]])
    >>> mt.nonzero(a > 3).execute()
    (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2]))

    The ``nonzero`` method of the boolean array can also be called.

    >>> (a > 3).nonzero().execute()
    (array([1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2]))

    """
    a = astensor(a)
    op = TensorNonzero(dtype=np.dtype(np.intp))
    return op(a)