"""
The :mod:`physlearn.loss` module enables computation of the average loss
or the negative gradient in either the single-target or the multi-target
regression setting, whereby data can be represented heterogeneously with
Numpy or Pandas. It includes the :class:`physlearn.LeastSquaresError`,
:class:`physlearn.LeastAbsoluteError`, :class:`physlearn.HuberLossFunction`,
:class:`physlearn.QuantileLossFunction` classes, and the helper
:func:`physlearn.loss._difference` function.
"""
# Author: Alex Wozniakowski
# License: MIT
import typing
import numpy as np
import pandas as pd
import sklearn.ensemble
import sklearn.metrics
pandas_or_numpy = typing.Union[pd.DataFrame, pd.Series, np.ndarray]
[docs]def _difference(y: pandas_or_numpy, raw_predictions: pandas_or_numpy) -> pandas_or_numpy:
"""Subtract the raw predictions from the single-target(s).
The function supports heterogeneous usage of Numpy and pandas
data representations.
Parameters
----------
y : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The target matrix, where each row corresponds to an example and the
column(s) correspond to the single-target(s).
raw_predictions : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The estimate matrix, where each row corresponds to an example and the
column(s) correspond to the prediction(s) for the single-target(s).
Returns
-------
diff : DataFrame, Series, or ndarray
The difference between the single-target(s) and the raw predictions.
Examples
--------
>>> import pandas as pd
>>> from sklearn.datasets import load_linnerud
>>> from physlearn.loss import _difference
>>> X, y = load_linnerud(return_X_y=True)
>>> _difference(y=pd.DataFrame(y), raw_predictions=X).iloc[:2]
0 1 2
0 186.0 -126.0 -10.0
1 187.0 -73.0 -8.0
"""
if isinstance(y, (pd.DataFrame, pd.Series)):
if hasattr(raw_predictions, 'values'):
diff = y.subtract(raw_predictions.values)
else:
diff = y.subtract(raw_predictions)
else:
diff = y - raw_predictions
return diff
[docs]class LeastSquaresError(sklearn.ensemble._gb_losses.LeastSquaresError):
"""Least squares loss function.
The object modifies the original Scikit-learn LeastSquaresError such that
the average loss and pseudo-residual computations support heterogeneous
usage of Numpy and pandas data representations. Moreover, the modification
supports both single-target and multi-target data.
References
----------
- Alex Wozniakowski, Jayne Thompson, Mile Gu, and Felix C. Binder.
"A new formulation of gradient boosting",
Machine Learning: Science and Technology, 2 045022 (2021).
- Jerome Friedman. "Greedy function approximation: A gradient boosting machine,"
Annals of Statistics, 29(5):1189–1232 (2001).
"""
[docs] def __call__(self, y: pandas_or_numpy, raw_predictions: pandas_or_numpy,
sample_weight=None) -> pandas_or_numpy:
"""Computes the average loss.
Parameters
----------
y : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The target matrix, where each row corresponds to an example and the
column(s) correspond to the single-target(s).
raw_predictions : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The estimate matrix, where each row corresponds to an example and the
column(s) correspond to the prediction(s) for the single-target(s).
sample_weight : float, ndarray, or None, optional (default=None)
Individual weights for each target. If the weight is a float, then
every target will have the same weight.
Returns
-------
mse : DataFrame, Series, or ndarray
Examples
--------
>>> from sklearn.datasets import load_linnerud
>>> from physlearn import LeastSquaresError
>>> X, y = load_linnerud(return_X_y=True)
>>> ls = LeastSquaresError()
>>> ls(y=y, raw_predictions=X)
16048.6
"""
if sample_weight is None:
return sklearn.metrics.mean_squared_error(y_true=y, y_pred=raw_predictions)
else:
return sklearn.metrics.mean_squared_error(y_true=y, y_pred=raw_predictions,
sample_weight=sample_weight)
[docs] def negative_gradient(self, y: pandas_or_numpy,
raw_predictions: pandas_or_numpy) -> pandas_or_numpy:
"""Computes the pseudo-residuals.
Parameters
----------
y : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The target matrix, where each row corresponds to an example and the
column(s) correspond to the single-target(s).
raw_predictions : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The estimate matrix, where each row corresponds to an example and the
column(s) correspond to the prediction(s) for the single-target(s).
Returns
-------
residual : DataFrame, Series, or ndarray
Examples
--------
>>> import pandas as pd
>>> from sklearn.datasets import load_linnerud
>>> from physlearn import LeastSquaresError
>>> X, y = load_linnerud(return_X_y=True)
>>> ls = LeastSquaresError()
>>> ls.negative_gradient(y=pd.DataFrame(y), raw_predictions=X).iloc[:2]
0 1 2
0 186.0 -126.0 -10.0
1 187.0 -73.0 -8.0
"""
return _difference(y=y, raw_predictions=raw_predictions)
[docs]class LeastAbsoluteError(sklearn.ensemble._gb_losses.LeastAbsoluteError):
"""Absolute error loss function.
The object modifies the original Scikit-learn LeastAbsoluteError such that
the average loss and pseudo-residual computations support heterogeneous
usage of Numpy and pandas data representations. Moreover, the modification
supports both single-target and multi-target data.
References
----------
- Alex Wozniakowski, Jayne Thompson, Mile Gu, and Felix C. Binder.
"A new formulation of gradient boosting",
Machine Learning: Science and Technology, 2 045022 (2021).
- Jerome Friedman. "Greedy function approximation: A gradient boosting machine,"
Annals of Statistics, 29(5):1189–1232 (2001).
"""
[docs] def __call__(self, y: pandas_or_numpy, raw_predictions: pandas_or_numpy,
sample_weight=None) -> pandas_or_numpy:
"""Computes the average loss.
Parameters
----------
y : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The target matrix, where each row corresponds to an example and the
column(s) correspond to the single-target(s).
raw_predictions : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The estimate matrix, where each row corresponds to an example and the
column(s) correspond to the prediction(s) for the single-target(s).
sample_weight : float, ndarray, or None, optional (default=None)
Individual weights for each target. If the weight is a float, then
every target will have the same weight.
Returns
-------
mae : DataFrame, Series, or ndarray
Examples
--------
>>> from sklearn.datasets import load_linnerud
>>> from physlearn import LeastAbsoluteError
>>> X, y = load_linnerud(return_X_y=True)
>>> lad = LeastAbsoluteError()
>>> lad(y=y, raw_predictions=X)
104.23333333333333
"""
if sample_weight is None:
return sklearn.metrics.mean_absolute_error(y_true=y, y_pred=raw_predictions)
else:
return sklearn.metrics.mean_absolute_error(y_true=y, y_pred=raw_predictions,
sample_weight=sample_weight)
[docs] def negative_gradient(self, y: pandas_or_numpy,
raw_predictions: pandas_or_numpy) -> pandas_or_numpy:
"""Computes the pseudo-residuals.
Parameters
----------
y : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The target matrix, where each row corresponds to an example and the
column(s) correspond to the single-target(s).
raw_predictions : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The estimate matrix, where each row corresponds to an example and the
column(s) correspond to the prediction(s) for the single-target(s).
Returns
-------
residual : DataFrame, Series, or ndarray
Examples
--------
>>> import pandas as pd
>>> from sklearn.datasets import load_linnerud
>>> from physlearn import LeastAbsoluteError
>>> X, y = load_linnerud(return_X_y=True)
>>> lad = LeastAbsoluteError()
>>> lad.negative_gradient(y=pd.DataFrame(y), raw_predictions=X).iloc[:2]
0 1 2
0 1.0 -1.0 -1.0
1 1.0 -1.0 -1.0
"""
if isinstance(y, (pd.DataFrame, pd.Series)):
return _difference(y=y, raw_predictions=raw_predictions).apply(np.sign)
else:
return np.sign(_difference(y=y, raw_predictions=raw_predictions))
[docs]class HuberLossFunction(sklearn.ensemble._gb_losses.HuberLossFunction):
"""Huber loss function.
The object modifies the original Scikit-learn HuberLossFunction such that
the average loss and pseudo-residual computations support heterogeneous
usage of Numpy and pandas data representations. Moreover, the modification
supports both single-target and multi-target data.
References
----------
- Alex Wozniakowski, Jayne Thompson, Mile Gu, and Felix C. Binder.
"A new formulation of gradient boosting",
Machine Learning: Science and Technology, 2 045022 (2021).
- Jerome Friedman. "Greedy function approximation: A gradient boosting machine,"
Annals of Statistics, 29(5):1189–1232 (2001).
"""
[docs] def _delta(self, difference: pandas_or_numpy,
sample_weight=None) -> np.float64:
"""Computes the delta threshold.
This threshold determines whether to use the squared error or
the absolute error loss function.
Parameters
----------
difference : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The difference between the single-target(s) and the raw prediction(s).
sample_weight : float, ndarray, or None, optional (default=None)
Individual weights for each target. If the weight is a float, then
every target will have the same weight.
Returns
-------
delta : np.float64
"""
if hasattr(difference, 'abs'):
abs_diff = difference.abs()
else:
abs_diff = np.abs(difference)
if sample_weight is None:
delta = np.percentile(abs_diff, self.alpha * 100)
else:
delta = sklearn.utils.stats._weighted_percentile(abs_diff, sample_weight,
self.alpha * 100)
return delta
[docs] def __call__(self, y: pandas_or_numpy, raw_predictions: pandas_or_numpy,
sample_weight=None) -> pandas_or_numpy:
"""Computes the average loss.
Parameters
----------
y : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The target matrix, where each row corresponds to an example and the
column(s) correspond to the single-target(s).
raw_predictions : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The estimate matrix, where each row corresponds to an example and the
column(s) correspond to the prediction(s) for the single-target(s).
sample_weight : float, ndarray, or None, optional (default=None)
Individual weights for each target. If the weight is a float, then
every target will have the same weight.
Returns
-------
huber : DataFrame, Series, or ndarray
Examples
--------
>>> from sklearn.datasets import load_linnerud
>>> from physlearn import HuberLossFunction
>>> X, y = load_linnerud(return_X_y=True)
>>> huber = HuberLossFunction()
>>> huber(y=y, raw_predictions=X)
7989.893
"""
diff = _difference(y=y, raw_predictions=raw_predictions)
delta = self._delta(difference=diff, sample_weight=sample_weight)
if hasattr(diff, 'abs'):
mask = diff.abs() > delta
if sample_weight is None:
diff[mask] = delta * (diff[mask].abs() - delta/2)
diff[~mask] = 0.5 * diff[~mask].pow(other=2)
return diff.mean()
else:
diff[mask] = delta * (diff[mask].abs().multiply(sample_weight[mask]) - delta/2)
diff[~mask] = 0.5 * diff[~mask].pow(other=2).multiply(sample_weight[~mask])
else:
mask = np.abs(diff) > delta
if sample_weight is None:
diff[mask] = delta * (np.abs(diff[mask]) - delta/2)
diff[~mask] = 0.5 * diff[~mask]**2
return diff.mean()
else:
diff[mask] = delta * (sample_weight[mask]@np.abs(diff[mask]) - delta/2)
diff[~mask] = 0.5 * sample_weight[~mask]@diff[~mask]**2
return diff.sum() / sample_weight.sum()
[docs] def negative_gradient(self, y: pandas_or_numpy, raw_predictions: pandas_or_numpy,
sample_weight=None) -> pandas_or_numpy:
"""Computes the pseudo-residuals.
Parameters
----------
y : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The target matrix, where each row corresponds to an example and the
column(s) correspond to the single-target(s).
raw_predictions : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The estimate matrix, where each row corresponds to an example and the
column(s) correspond to the prediction(s) for the single-target(s).
Returns
-------
residual : DataFrame, Series, or ndarray
Examples
--------
>>> import pandas as pd
>>> from sklearn.datasets import load_linnerud
>>> from physlearn import HuberLossFunction
>>> X, y = load_linnerud(return_X_y=True)
>>> huber = HuberLossFunction()
>>> huber.negative_gradient(y=pd.DataFrame(y), raw_prediction=X).iloc[:2]
0 1 2
0 186.0 -126.0 -10.0
1 187.0 -73.0 -8.0
"""
diff = _difference(y=y, raw_predictions=raw_predictions)
delta = self._delta(difference=diff, sample_weight=sample_weight)
if hasattr(diff, 'abs'):
mask = diff.abs() > delta
diff[mask] = delta * diff[mask].apply(np.sign)
else:
mask = np.abs(diff) > delta
diff[mask] = delta * np.sign(diff[mask])
return diff
[docs]class QuantileLossFunction(sklearn.ensemble._gb_losses.QuantileLossFunction):
"""Quantile loss function.
The object modifies the original Scikit-learn QuantileLossFunction such that
the average loss and pseudo-residual computations support heterogeneous
usage of Numpy and pandas data representations. Moreover, the modification
supports both single-target and multi-target data.
References
----------
- Alex Wozniakowski, Jayne Thompson, Mile Gu, and Felix C. Binder.
"A new formulation of gradient boosting",
Machine Learning: Science and Technology, 2 045022 (2021).
- Jerome Friedman. "Greedy function approximation: A gradient boosting machine,"
Annals of Statistics, 29(5):1189–1232 (2001).
"""
[docs] def __call__(self, y: pandas_or_numpy, raw_predictions: pandas_or_numpy,
sample_weight=None) -> pandas_or_numpy:
"""Computes the average loss.
Parameters
----------
y : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The target matrix, where each row corresponds to an example and the
column(s) correspond to the single-target(s).
raw_predictions : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The estimate matrix, where each row corresponds to an example and the
column(s) correspond to the prediction(s) for the single-target(s).
sample_weight : float, ndarray, or None, optional (default=None)
Individual weights for each target. If the weight is a float, then
every target will have the same weight.
Returns
-------
quantile : DataFrame, Series, or ndarray
Examples
--------
>>> from sklearn.datasets import load_linnerud
>>> from physlearn import QuantileLossFunction
>>> X, y = load_linnerud(return_X_y=True)
>>> quantile = QuantileLossFunction()
>>> quantile(y=y, raw_predictions=X)
174.27
"""
diff = _difference(y=y, raw_predictions=raw_predictions)
mask = y > raw_predictions
if sample_weight is None:
return (self.alpha * diff[mask].sum() - (1-self.alpha) * diff[~mask].sum()) / y.shape[0]
else:
if hasattr(diff, 'multiply'):
with_mask = diff[mask].multiply(sample_weight[mask]).sum()
without_mask = diff[~mask].multiply(sample_weight[~mask]).sum()
else:
with_mask = (sample_weight[mask]@diff[mask]).sum()
without_mask = (sample_weight[~mask]@diff[~mask]).sum()
return (self.alpha*with_mask - (1-self.alpha)*without_mask) / sample_weight.sum()
[docs] def negative_gradient(self, y: pandas_or_numpy,
raw_predictions: pandas_or_numpy) -> pandas_or_numpy:
"""Computes the pseudo-residuals.
Parameters
----------
y : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The target matrix, where each row corresponds to an example and the
column(s) correspond to the single-target(s).
raw_predictions : array-like of shape = [n_samples] or shape = [n_samples, n_targets]
The estimate matrix, where each row corresponds to an example and the
column(s) correspond to the prediction(s) for the single-target(s).
Returns
-------
residual : DataFrame, Series, or ndarray
Examples
--------
>>> import pandas as pd
>>> from sklearn.datasets import load_linnerud
>>> from physlearn import QuantileLossFunction
>>> X, y = load_linnerud(return_X_y=True)
>>> quantile = QuantileLossFunction()
>>> quantile.negative_gradient(y=pd.DataFrame(y), raw_predictions=X).iloc[:2]
0 1 2
0 0.9 -0.1 -0.1
1 0.9 -0.1 -0.1
"""
if hasattr(raw_predictions, 'values'):
mask = y.gt(raw_predictions.values)
else:
mask = y.gt(raw_predictions)
return self.alpha*mask - (1-self.alpha)*~mask
LOSS_FUNCTIONS = dict(ls=LeastSquaresError,
lad=LeastAbsoluteError,
huber=HuberLossFunction,
quantile=QuantileLossFunction)