Source code for skfolio.distribution.univariate._johnson_su

"""Johnson SU Estimator."""

# Copyright (c) 2025
# Authors: The skfolio developers
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
# SPDX-License-Identifier: BSD-3-Clause

import numpy.typing as npt
import scipy.stats as st

from skfolio.distribution.univariate._base import BaseUnivariateDist


[docs] class JohnsonSU(BaseUnivariateDist): r"""Johnson SU Distribution Estimation. This estimator fits a univariate Johnson SU distribution to the input data. The Johnson SU distribution is flexible and can capture both skewness and fat tails, making it appropriate for financial time series modeling. The probability density function is: .. math:: f(x, a, b) = \frac{b}{\sqrt{x^2 + 1}} \phi(a + b \log(x + \sqrt{x^2 + 1})) where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`. :math:`\phi` is the pdf of the normal distribution. The probability density above is defined in the "standardized" form. To shift and/or scale the distribution use the loc and scale parameters. Specifically, `pdf(x, a, b, loc, scale)` is equivalent to `pdf(y, a, b) / scale` with `y = (x - loc) / scale`. For more information, you can refer to the `scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.johnsonsu.html#scipy.stats.johnsonsu>`_ Parameters ---------- loc : float, optional If provided, the location parameter is fixed to this value during fitting. Otherwise, it is estimated from the data. scale : float, optional If provided, the scale parameter is fixed to this value during fitting. Otherwise, it is estimated from the data. random_state : int, RandomState instance or None, default=None Seed or random state to ensure reproducibility. Attributes ---------- a_ : float The fitted first shape parameter of the Johnson SU distribution. b_ : float The fitted second shape parameter of the Johnson SU distribution. loc_ : float The fitted location parameter. scale_ : float The fitted scale parameter. Examples -------- >>> from skfolio.datasets import load_sp500_index >>> from skfolio.preprocessing import prices_to_returns >>> from skfolio.distribution.univariate import JohnsonSU >>> >>> # Load historical prices and convert them to returns >>> prices = load_sp500_index() >>> X = prices_to_returns(prices) >>> >>> # Initialize the estimator. >>> model = JohnsonSU() >>> >>> # Fit the model to the data. >>> model.fit(X) >>> >>> # Display the fitted parameters. >>> print(model.fitted_repr) JohnsonSU(0.0742, 1.08, 0.00115, 0.00774) >>> >>> # Compute the log-likelihood, total log-likelihood, CDF, PPF, AIC, and BIC >>> log_likelihood = model.score_samples(X) >>> score = model.score(X) >>> cdf = model.cdf(X) >>> ppf = model.ppf(X) >>> aic = model.aic(X) >>> bic = model.bic(X) >>> >>> # Generate 5 new samples from the fitted distribution. >>> samples = model.sample(n_samples=5) >>> >>> # Plot the estimated probability density function (PDF). >>> fig = model.plot_pdf() >>> fig.show() """ a_: float b_: float loc_: float scale_: float _scipy_model = st.johnsonsu def __init__( self, loc: float | None = None, scale: float | None = None, random_state: int | None = None, ): super().__init__(random_state=random_state) self.loc = loc self.scale = scale @property def _scipy_params(self) -> dict[str, float]: """Dictionary of parameters to pass to the underlying SciPy distribution.""" return {"a": self.a_, "b": self.b_, "loc": self.loc_, "scale": self.scale_}
[docs] def fit(self, X: npt.ArrayLike, y=None) -> "JohnsonSU": """Fit the univariate Johnson SU distribution model. Parameters ---------- X : array-like of shape (n_observations, 1) The input data. X must contain a single column. y : None Ignored. Provided for compatibility with scikit-learn's API. Returns ------- self : JohnsonSU Returns the instance itself. """ X = self._validate_X(X, reset=True) if self.loc is not None and self.scale is not None: raise ValueError("Either loc or scale must be None to be fitted") fixed_params = {} if self.loc is not None: fixed_params["floc"] = self.loc if self.scale is not None: fixed_params["fscale"] = self.scale self.a_, self.b_, self.loc_, self.scale_ = self._scipy_model.fit( X, **fixed_params ) return self