Source code for xorbits._mars.dataframe.missing.checkna

from typing import Any

import numpy as np
import pandas as pd

from ... import dataframe as md
from ... import opcodes
from ... import tensor as mt
from ...core import OutputType
from ...serialization.serializables import BoolField
from ..operands import (

class DataFrameCheckNA(DataFrameOperand, DataFrameOperandMixin):
    _op_type_ = opcodes.CHECK_NA

    _positive = BoolField("positive")

    def __init__(self, positive=None, sparse=None, output_types=None, **kw):

    def positive(self) -> bool:
        return self._positive

    def __call__(self, df):
        if isinstance(df, DATAFRAME_TYPE):
            self.output_types = [OutputType.dataframe]
        elif isinstance(df, SERIES_TYPE):
            self.output_types = [OutputType.series]
        elif isinstance(df, TENSOR_TYPE) or isinstance(df, INDEX_TYPE):
            self.output_types = [OutputType.tensor]
            raise TypeError(
                f"Expecting mars dataframe, series, index, or tensor, got {type(df)}"

        params = df.params.copy()
        if self.output_types[0] == OutputType.dataframe:
            params["dtypes"] = pd.Series(
                [np.dtype("bool")] * len(df.dtypes), index=df.columns_value.to_pandas()
            params["dtype"] = np.dtype("bool")
        return self.new_tileable([df], **params)

    def tile(cls, op: "DataFrameCheckNA"):
        in_df = op.inputs[0]
        out_df = op.outputs[0]

        chunks = []
        for c in in_df.chunks:
            params = c.params.copy()
            if op.output_types[0] == OutputType.dataframe:
                params["dtypes"] = pd.Series(
                    [np.dtype("bool")] * len(c.dtypes),
                params["dtype"] = np.dtype("bool")
            new_op = op.copy().reset_key()
            chunks.append(new_op.new_chunk([c], **params))

        new_op = op.copy().reset_key()
        params = out_df.params.copy()
        params.update(dict(chunks=chunks, nsplits=in_df.nsplits))
        return new_op.new_tileables([in_df], **params)

    def execute(cls, ctx, op: "DataFrameCheckNA"):
        in_data = ctx[op.inputs[0].key]
        if op.positive:
            ctx[op.outputs[0].key] = in_data.isna()
            ctx[op.outputs[0].key] = in_data.notna()

def _from_pandas(obj: Any):
    if isinstance(obj, pd.DataFrame):
        from ..datasource.dataframe import from_pandas

        return from_pandas(obj)
    elif isinstance(obj, pd.Series):
        from ..datasource.series import from_pandas

        return from_pandas(obj)
    elif isinstance(obj, np.ndarray):
        return mt.tensor(obj)
        return obj

[docs]def isna(obj): """ Detect missing values. Return a boolean same-sized object indicating if the values are NA. NA values, such as None or :attr:`numpy.NaN`, gets mapped to True values. Everything else gets mapped to False values. Characters such as empty strings ``''`` or :attr:`numpy.inf` are not considered NA values (unless you set ``pandas.options.mode.use_inf_as_na = True``). Returns ------- DataFrame Mask of bool values for each element in DataFrame that indicates whether an element is not an NA value. See Also -------- DataFrame.isnull : Alias of isna. DataFrame.notna : Boolean inverse of isna. DataFrame.dropna : Omit axes labels with missing values. isna : Top-level isna. Examples -------- Show which entries in a DataFrame are NA. >>> import numpy as np >>> import mars.dataframe as md >>> df = md.DataFrame({'age': [5, 6, np.NaN], ... 'born': [md.NaT, md.Timestamp('1939-05-27'), ... md.Timestamp('1940-04-25')], ... 'name': ['Alfred', 'Batman', ''], ... 'toy': [None, 'Batmobile', 'Joker']}) >>> df.execute() age born name toy 0 5.0 NaT Alfred None 1 6.0 1939-05-27 Batman Batmobile 2 NaN 1940-04-25 Joker >>> df.isna().execute() age born name toy 0 False True False True 1 False False False False 2 True False False False Show which entries in a Series are NA. >>> ser = md.Series([5, 6, np.NaN]) >>> ser.execute() 0 5.0 1 6.0 2 NaN dtype: float64 >>> ser.isna().execute() 0 False 1 False 2 True dtype: bool """ if isinstance(obj, md.MultiIndex): raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, ENTITY_TYPE): if isinstance(obj, TENSOR_TYPE): return mt.isnan(obj) else: op = DataFrameCheckNA(positive=True) return op(obj) else: return _from_pandas(pd.isna(obj))
[docs]def notna(obj): """ Detect existing (non-missing) values. Return a boolean same-sized object indicating if the values are not NA. Non-missing values get mapped to True. Characters such as empty strings ``''`` or :attr:`numpy.inf` are not considered NA values (unless you set ``pandas.options.mode.use_inf_as_na = True``). NA values, such as None or :attr:`numpy.NaN`, get mapped to False values. Returns ------- DataFrame Mask of bool values for each element in DataFrame that indicates whether an element is not an NA value. See Also -------- DataFrame.notnull : Alias of notna. DataFrame.isna : Boolean inverse of notna. DataFrame.dropna : Omit axes labels with missing values. notna : Top-level notna. Examples -------- Show which entries in a DataFrame are not NA. >>> import numpy as np >>> import mars.dataframe as md >>> df = md.DataFrame({'age': [5, 6, np.NaN], ... 'born': [md.NaT, md.Timestamp('1939-05-27'), ... md.Timestamp('1940-04-25')], ... 'name': ['Alfred', 'Batman', ''], ... 'toy': [None, 'Batmobile', 'Joker']}) >>> df.execute() age born name toy 0 5.0 NaT Alfred None 1 6.0 1939-05-27 Batman Batmobile 2 NaN 1940-04-25 Joker >>> df.notna().execute() age born name toy 0 True False True False 1 True True True True 2 False True True True Show which entries in a Series are not NA. >>> ser = md.Series([5, 6, np.NaN]) >>> ser.execute() 0 5.0 1 6.0 2 NaN dtype: float64 >>> ser.notna().execute() 0 True 1 True 2 False dtype: bool """ if isinstance(obj, md.MultiIndex): raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, ENTITY_TYPE): if isinstance(obj, TENSOR_TYPE): return ~mt.isnan(obj) else: op = DataFrameCheckNA(positive=False) return op(obj) else: return _from_pandas(pd.notna(obj))
isnull = isna notnull = notna