Source code for xorbits._mars.tensor.statistics.average

# Copyright 2022-2023 XProbe Inc.
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from ..base.broadcast_to import broadcast_to
from ..base.swapaxes import swapaxes
from ..datasource import tensor as astensor


[docs]def average(a, axis=None, weights=None, returned=False): """ Compute the weighted average along the specified axis. Parameters ---------- a : array_like Tensor containing data to be averaged. If `a` is not a tensor, a conversion is attempted. axis : None or int or tuple of ints, optional Axis or axes along which to average `a`. The default, axis=None, will average over all of the elements of the input tensor. If axis is negative it counts from the last to the first axis. If axis is a tuple of ints, averaging is performed on all of the axes specified in the tuple instead of a single axis or all the axes as before. weights : array_like, optional A tensor of weights associated with the values in `a`. Each value in `a` contributes to the average according to its associated weight. The weights tensor can either be 1-D (in which case its length must be the size of `a` along the given axis) or of the same shape as `a`. If `weights=None`, then all data in `a` are assumed to have a weight equal to one. returned : bool, optional Default is `False`. If `True`, the tuple (`average`, `sum_of_weights`) is returned, otherwise only the average is returned. If `weights=None`, `sum_of_weights` is equivalent to the number of elements over which the average is taken. Returns ------- average, [sum_of_weights] : tensor_type or double Return the average along the specified axis. When returned is `True`, return a tuple with the average as the first element and the sum of the weights as the second element. The return type is `Float` if `a` is of integer type, otherwise it is of the same type as `a`. `sum_of_weights` is of the same type as `average`. Raises ------ ZeroDivisionError When all weights along axis are zero. See `numpy.ma.average` for a version robust to this type of error. TypeError When the length of 1D `weights` is not the same as the shape of `a` along axis. See Also -------- mean Examples -------- >>> import mars.tensor as mt >>> data = list(range(1,5)) >>> data [1, 2, 3, 4] >>> mt.average(data).execute() 2.5 >>> mt.average(range(1,11), weights=range(10,0,-1)).execute() 4.0 >>> data = mt.arange(6).reshape((3,2)) >>> data.execute() array([[0, 1], [2, 3], [4, 5]]) >>> mt.average(data, axis=1, weights=[1./4, 3./4]).execute() array([ 0.75, 2.75, 4.75]) >>> mt.average(data, weights=[1./4, 3./4]).execute() Traceback (most recent call last): ... TypeError: Axis must be specified when shapes of a and weights differ. """ from ..arithmetic import multiply, truediv a = astensor(a) if weights is None: avg = a.mean(axis) scl = avg.dtype.type(a.size / avg.size) else: wgt = astensor(weights) if issubclass(a.dtype.type, (np.integer, np.bool_)): result_dtype = np.result_type(a.dtype, wgt.dtype, "f8") else: result_dtype = np.result_type(a.dtype, wgt.dtype) # sanity checks if a.shape != wgt.shape: if axis is None: raise TypeError( "Axis must be specified when shapes of a and weights differ." ) if wgt.ndim != 1: raise TypeError( "1D weights expected when shapes of a and weights differ." ) if wgt.shape[0] != a.shape[axis]: raise ValueError( "Length of weights not compatible with specified axis." ) # setup wgt to broadcast along axis wgt = broadcast_to(wgt, (a.ndim - 1) * (1,) + wgt.shape) wgt = swapaxes(wgt, -1, axis) scl = wgt.sum(axis=axis, dtype=result_dtype) with np.errstate(divide="raise"): avg = truediv(multiply(a, wgt, dtype=result_dtype).sum(axis), scl) if returned: if scl.shape != avg.shape: scl = broadcast_to(scl, avg.shape) return avg, scl else: return avg