Source code for xorbits._mars.tensor.reduction.nanmean

# Copyright 2022-2023 XProbe Inc.
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from ... import opcodes as OperandDef
from ..array_utils import as_same_device, device
from ..datasource import tensor as astensor
from .core import TensorReduction, TensorReductionMixin, nannumel
from .mean import TensorMean


class TensorNanMean(TensorReduction, TensorReductionMixin):
    _op_type_ = OperandDef.NANMEAN

    def __init__(self, axis=None, keepdims=None, combine_size=None, stage=None, **kw):
        stage = self._rewrite_stage(stage)
        super().__init__(
            _axis=axis,
            _keepdims=keepdims,
            _combine_size=combine_size,
            stage=stage,
            **kw
        )

    @classmethod
    def execute_map(cls, ctx, op):
        (in_chunk,), device_id, xp = as_same_device(
            [ctx[c.key] for c in op.inputs], device=op.device, ret_extra=True
        )

        axis = cls.get_axis(op.axis)

        with device(device_id):
            chunk_count = nannumel(
                in_chunk, axis=axis, dtype=np.int64, keepdims=bool(op.keepdims)
            )
            chunk_sum = xp.nansum(
                in_chunk, axis=axis, dtype=op.dtype, keepdims=bool(op.keepdims)
            )
            ctx[op.outputs[0].key] = (chunk_sum, chunk_count)

    @classmethod
    def execute_agg(cls, ctx, op):
        axis = cls.get_axis(op.axis)

        a = ctx[op.inputs[0].key]
        if not isinstance(a, (list, tuple)):
            (inp,), device_id, xp = as_same_device(
                [a], device=op.device, ret_extra=True
            )

            with device(device_id):
                ctx[op.outputs[0].key] = xp.nanmean(
                    inp, axis=axis, dtype=op.dtype, keepdims=bool(op.keepdims)
                )
        else:
            (_data, _count), device_id, xp = as_same_device(
                a, device=op.device, ret_extra=True
            )

            with device(device_id):
                chunk_count = xp.sum(
                    _count, axis=axis, dtype=op.dtype, keepdims=bool(op.keepdims)
                )
                chunk_sum = xp.sum(
                    _data, axis=axis, dtype=op.dtype, keepdims=bool(op.keepdims)
                )
                ctx[op.outputs[0].key] = xp.true_divide(
                    chunk_sum, chunk_count, dtype=op.dtype
                )

    @classmethod
    def execute_combine(cls, ctx, op):
        TensorMean.execute_combine(ctx, op)


[docs]def nanmean(a, axis=None, dtype=None, out=None, keepdims=None, combine_size=None): """ Compute the arithmetic mean along the specified axis, ignoring NaNs. Returns the average of the tensor elements. The average is taken over the flattened tensor by default, otherwise over the specified axis. `float64` intermediate and return values are used for integer inputs. For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised. Parameters ---------- a : array_like Tensor containing numbers whose mean is desired. If `a` is not an tensor, a conversion is attempted. axis : int, optional Axis along which the means are computed. The default is to compute the mean of the flattened tensor. dtype : data-type, optional Type to use in computing the mean. For integer inputs, the default is `float64`; for inexact inputs, it is the same as the input dtype. out : Tensor, optional Alternate output tensor in which to place the result. The default is ``None``; if provided, it must have the same shape as the expected output, but the type will be cast if necessary. See `doc.ufuncs` for details. keepdims : bool, optional If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the original `a`. If the value is anything but the default, then `keepdims` will be passed through to the `mean` or `sum` methods of sub-classes of `Tensor`. If the sub-classes methods does not implement `keepdims` any exceptions will be raised. combine_size: int, optional The number of chunks to combine. Returns ------- m : Tensor, see dtype parameter above If `out=None`, returns a new array containing the mean values, otherwise a reference to the output array is returned. Nan is returned for slices that contain only NaNs. See Also -------- average : Weighted average mean : Arithmetic mean taken while not ignoring NaNs var, nanvar Notes ----- The arithmetic mean is the sum of the non-NaN elements along the axis divided by the number of non-NaN elements. Note that for floating-point input, the mean is computed using the same precision the input has. Depending on the input data, this can cause the results to be inaccurate, especially for `float32`. Specifying a higher-precision accumulator using the `dtype` keyword can alleviate this issue. Examples -------- >>> import mars.tensor as mt >>> a = mt.array([[1, mt.nan], [3, 4]]) >>> mt.nanmean(a).execute() 2.6666666666666665 >>> mt.nanmean(a, axis=0).execute() array([ 2., 4.]) >>> mt.nanmean(a, axis=1).execute() array([ 1., 3.5]) """ a = astensor(a) if dtype is None: dtype = np.nanmean(np.empty((1,), dtype=a.dtype)).dtype op = TensorNanMean( axis=axis, dtype=dtype, keepdims=keepdims, combine_size=combine_size ) return op(a, out=out)