Source code for xorbits._mars.tensor.reduction.nanmean

# Copyright 2022-2023 XProbe Inc.
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from ... import opcodes as OperandDef
from ..array_utils import as_same_device, device
from ..datasource import tensor as astensor
from .core import TensorReduction, TensorReductionMixin, nannumel
from .mean import TensorMean


class TensorNanMean(TensorReduction, TensorReductionMixin):
    _op_type_ = OperandDef.NANMEAN

    def __init__(self, axis=None, keepdims=None, combine_size=None, stage=None, **kw):
        stage = self._rewrite_stage(stage)
        super().__init__(
            _axis=axis,
            _keepdims=keepdims,
            _combine_size=combine_size,
            stage=stage,
            **kw
        )

    @classmethod
    def execute_map(cls, ctx, op):
        (in_chunk,), device_id, xp = as_same_device(
            [ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True
        )

        axis = cls.get_axis(op.axis)

        with device(device_id):
            chunk_count = nannumel(
                in_chunk, axis=axis, dtype=np.int64, keepdims=bool(op.keepdims)
            )
            chunk_sum = xp.nansum(
                in_chunk, axis=axis, dtype=op.dtype, keepdims=bool(op.keepdims)
            )
            ctx[op.outputs[0].key] = (chunk_sum, chunk_count)

    @classmethod
    def execute_agg(cls, ctx, op):
        axis = cls.get_axis(op.axis)

        a = ctx[op.inputs[0].key]
        if not isinstance(a, (list, tuple)):
            (inp,), device_id, xp = as_same_device(
                [a], device=op.device, ret_extra=True
            )

            with device(device_id):
                ctx[op.outputs[0].key] = xp.nanmean(
                    inp, axis=axis, dtype=op.dtype, keepdims=bool(op.keepdims)
                )
        else:
            (_data, _count), device_id, xp = as_same_device(
                a, device=op.device, ret_extra=True
            )

            with device(device_id):
                chunk_count = xp.sum(
                    _count, axis=axis, dtype=op.dtype, keepdims=bool(op.keepdims)
                )
                chunk_sum = xp.sum(
                    _data, axis=axis, dtype=op.dtype, keepdims=bool(op.keepdims)
                )
                ctx[op.outputs[0].key] = xp.true_divide(
                    chunk_sum, chunk_count, dtype=op.dtype
                )

    @classmethod
    def execute_combine(cls, ctx, op):
        TensorMean.execute_combine(ctx, op)


[docs]def nanmean(a, axis=None, dtype=None, out=None, keepdims=None, combine_size=None):
    """
    Compute the arithmetic mean along the specified axis, ignoring NaNs.

    Returns the average of the tensor elements.  The average is taken over
    the flattened tensor by default, otherwise over the specified axis.
    `float64` intermediate and return values are used for integer inputs.

    For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised.

    Parameters
    ----------
    a : array_like
        Tensor containing numbers whose mean is desired. If `a` is not an
        tensor, a conversion is attempted.
    axis : int, optional
        Axis along which the means are computed. The default is to compute
        the mean of the flattened tensor.
    dtype : data-type, optional
        Type to use in computing the mean.  For integer inputs, the default
        is `float64`; for inexact inputs, it is the same as the input
        dtype.
    out : Tensor, optional
        Alternate output tensor in which to place the result.  The default
        is ``None``; if provided, it must have the same shape as the
        expected output, but the type will be cast if necessary.  See
        `doc.ufuncs` for details.
    keepdims : bool, optional
        If this is set to True, the axes which are reduced are left
        in the result as dimensions with size one. With this option,
        the result will broadcast correctly against the original `a`.

        If the value is anything but the default, then
        `keepdims` will be passed through to the `mean` or `sum` methods
        of sub-classes of `Tensor`.  If the sub-classes methods
        does not implement `keepdims` any exceptions will be raised.
    combine_size: int, optional
        The number of chunks to combine.

    Returns
    -------
    m : Tensor, see dtype parameter above
        If `out=None`, returns a new array containing the mean values,
        otherwise a reference to the output array is returned. Nan is
        returned for slices that contain only NaNs.

    See Also
    --------
    average : Weighted average
    mean : Arithmetic mean taken while not ignoring NaNs
    var, nanvar

    Notes
    -----
    The arithmetic mean is the sum of the non-NaN elements along the axis
    divided by the number of non-NaN elements.

    Note that for floating-point input, the mean is computed using the same
    precision the input has.  Depending on the input data, this can cause
    the results to be inaccurate, especially for `float32`.  Specifying a
    higher-precision accumulator using the `dtype` keyword can alleviate
    this issue.

    Examples
    --------
    >>> import mars.tensor as mt

    >>> a = mt.array([[1, mt.nan], [3, 4]])
    >>> mt.nanmean(a).execute()
    2.6666666666666665
    >>> mt.nanmean(a, axis=0).execute()
    array([ 2.,  4.])
    >>> mt.nanmean(a, axis=1).execute()
    array([ 1.,  3.5])

    """
    a = astensor(a)
    if dtype is None:
        dtype = np.nanmean(np.empty((1,), dtype=a.dtype)).dtype
    op = TensorNanMean(
        axis=axis, dtype=dtype, keepdims=keepdims, combine_size=combine_size
    )
    return op(a, out=out)