# Copyright 2022-2023 XProbe Inc.
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from ... import opcodes as OperandDef
from ...serialization.serializables import AnyField
from ..utils import gen_random_seeds
from .core import TensorDistribution, TensorRandomOperandMixin, handle_array
class TensorStandardT(TensorDistribution, TensorRandomOperandMixin):
_input_fields_ = ["df"]
_op_type_ = OperandDef.RAND_STANDARD_T
_fields_ = "df", "size"
df = AnyField("df")
_func_name = "standard_t"
def __call__(self, df, chunk_size=None):
return self.new_tensor([df], None, raw_chunk_size=chunk_size)
[docs]def standard_t(random_state, df, size=None, chunk_size=None, gpu=None, dtype=None):
r"""
Draw samples from a standard Student's t distribution with `df` degrees
of freedom.
A special case of the hyperbolic distribution. As `df` gets
large, the result resembles that of the standard normal
distribution (`standard_normal`).
Parameters
----------
df : float or array_like of floats
Degrees of freedom, should be > 0.
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. If size is ``None`` (default),
a single value is returned if ``df`` is a scalar. Otherwise,
``mt.array(df).size`` samples are drawn.
chunk_size : int or tuple of int or tuple of ints, optional
Desired chunk size on each dimension
gpu : bool, optional
Allocate the tensor on GPU if True, False as default
dtype : data-type, optional
Data-type of the returned tensor.
Returns
-------
out : Tensor or scalar
Drawn samples from the parameterized standard Student's t distribution.
Notes
-----
The probability density function for the t distribution is
.. math:: P(x, df) = \frac{\Gamma(\frac{df+1}{2})}{\sqrt{\pi df}
\Gamma(\frac{df}{2})}\Bigl( 1+\frac{x^2}{df} \Bigr)^{-(df+1)/2}
The t test is based on an assumption that the data come from a
Normal distribution. The t test provides a way to test whether
the sample mean (that is the mean calculated from the data) is
a good estimate of the true mean.
The derivation of the t-distribution was first published in
1908 by William Gosset while working for the Guinness Brewery
in Dublin. Due to proprietary issues, he had to publish under
a pseudonym, and so he used the name Student.
References
----------
.. [1] Dalgaard, Peter, "Introductory Statistics With R",
Springer, 2002.
.. [2] Wikipedia, "Student's t-distribution"
http://en.wikipedia.org/wiki/Student's_t-distribution
Examples
--------
From Dalgaard page 83 [1]_, suppose the daily energy intake for 11
women in Kj is:
>>> import mars.tensor as mt
>>> intake = mt.array([5260., 5470, 5640, 6180, 6390, 6515, 6805, 7515, \
... 7515, 8230, 8770])
Does their energy intake deviate systematically from the recommended
value of 7725 kJ?
We have 10 degrees of freedom, so is the sample mean within 95% of the
recommended value?
>>> s = mt.random.standard_t(10, size=100000)
>>> mt.mean(intake).execute()
6753.636363636364
>>> intake.std(ddof=1).execute()
1142.1232221373727
Calculate the t statistic, setting the ddof parameter to the unbiased
value so the divisor in the standard deviation will be degrees of
freedom, N-1.
>>> t = (mt.mean(intake)-7725)/(intake.std(ddof=1)/mt.sqrt(len(intake)))
>>> import matplotlib.pyplot as plt
>>> h = plt.hist(s.execute(), bins=100, normed=True)
For a one-sided t-test, how far out in the distribution does the t
statistic appear?
>>> (mt.sum(s<t) / float(len(s))).execute()
0.0090699999999999999 #random
So the p-value is about 0.009, which says the null hypothesis has a
probability of about 99% of being true.
"""
if dtype is None:
dtype = np.random.RandomState().standard_t(handle_array(df), size=(0,)).dtype
size = random_state._handle_size(size)
seed = gen_random_seeds(1, random_state.to_numpy())[0]
op = TensorStandardT(size=size, seed=seed, gpu=gpu, dtype=dtype)
return op(df, chunk_size=chunk_size)