Source code for xorbits._mars.learn.contrib.xgboost.classifier

# Copyright 2022-2023 XProbe Inc.
# derived from copyright 1999-2021 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ..utils import make_import_error_func
from .core import XGBScikitLearnBase, xgboost

if not xgboost:
    XGBClassifier = make_import_error_func("xgboost")
else:
    from xgboost.sklearn import XGBClassifierBase

    from .... import tensor as mt
    from .core import wrap_evaluation_matrices
    from .predict import predict
    from .train import train

    class XGBClassifier(XGBScikitLearnBase, XGBClassifierBase):
        """
        Implementation of the scikit-learn API for XGBoost classification.
        """

[docs] def fit( self, X, y, sample_weight=None, base_margin=None, eval_set=None, sample_weight_eval_set=None, base_margin_eval_set=None, **kw, ): session = kw.pop("session", None) run_kwargs = kw.pop("run_kwargs", dict()) if kw: raise TypeError( f"fit got an unexpected keyword argument '{next(iter(kw))}'" ) dtrain, evals = wrap_evaluation_matrices( None, X, y, sample_weight, base_margin, eval_set, sample_weight_eval_set, base_margin_eval_set, ) params = self.get_xgb_params() self.classes_ = mt.unique(y, aggregate_size=1).to_numpy( session=session, **run_kwargs ) self.n_classes_ = len(self.classes_) if self.n_classes_ > 2: params["objective"] = "multi:softprob" params["num_class"] = self.n_classes_ else: params["objective"] = "binary:logistic" self.evals_result_ = dict() result = train( params, dtrain, num_boost_round=self.get_num_boosting_rounds(), evals=evals, evals_result=self.evals_result_, session=session, run_kwargs=run_kwargs, ) self._Booster = result return self
[docs] def predict(self, data, **kw): session = kw.pop("session", None) run_kwargs = kw.pop("run_kwargs", dict()) run = kw.pop("run", True) prob = predict(self.get_booster(), data, run=False, **kw) if prob.ndim > 1: prediction = mt.argmax(prob, axis=1) else: prediction = (prob > 0.5).astype(mt.int64) if run: prediction.execute(session=session, **run_kwargs) return prediction
[docs] def predict_proba(self, data, ntree_limit=None, **kw): if ntree_limit is not None: raise NotImplementedError("ntree_limit is not currently supported") return predict(self.get_booster(), data, **kw)