Source code for skfolio.distribution.copula._student_t

"""Bivariate Student's t Copula Estimation."""

# Copyright (c) 2025
# Author: Hugo Delatte <delatte.hugo@gmail.com>
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
# SPDX-License-Identifier: BSD-3-Clause

import numpy as np
import numpy.typing as npt
import scipy.optimize as so
import scipy.special as sp
import scipy.stats as st
import sklearn.utils.validation as skv

from skfolio.distribution.copula._base import _RHO_BOUNDS, BaseBivariateCopula
from skfolio.distribution.copula._utils import _apply_margin_swap

# Student's t copula with dof less than 2.0 is so extremely heavy-tailed that even the
# mean (and many moments) of the distribution do not exist. So Impractical in practice,
# and dof above 50 tends to a Gaussian Copula so we limit it to the interval [2, 50] for
# improved stability and robustness.
_DOF_BOUNDS = (2.0, 50.0)



[docs]
class StudentTCopula(BaseBivariateCopula):
    r"""Bivariate Student's t Copula Estimation.

    The bivariate Student's t copula density is defined as:

    .. math::
        C_{\nu, \rho}(u, v) = T_{\nu, \rho} \Bigl(t_{\nu}^{-1}(u),\;t_{\nu}^{-1}(v)\Bigr)

    where:
        - :math:`\nu > 0` is the degrees of freedom.
        - :math:`\rho \in (-1, 1)` is the correlation coefficient.
        - :math:`T_{\nu, \rho}(x, y)` is the CDF of the bivariate t-distribution.
        - :math:`t_{\nu}^{-1}(p)` is the quantile function (inverse CDF) of the
          univariate t-distribution.

    Student's t copula with degrees of freedom (dof) less than 2.0 is extremely
    heavy-tailed, to the extent that even the mean (and many moments) do not exist,
    rendering it impractical. Conversely, for dof above 50 the t copula behaves
    similarly to a Gaussian copula. Thus, for improved stability and robustness,
    the dof is limited to the interval [2, 50].

    .. note::

        Rotations are not needed for elliptical copula (e.g., Gaussian or Student-t)
        because its correlation parameter :math:`\rho \in (-1, 1)` naturally covers
        both positive and negative dependence, and they exhibit symmetric tail behavior.


    Parameters
    ----------
    itau : bool, default=True
        itau : bool, default=True
        If True, :math:`\rho` is estimated using the Kendall's tau inversion method;
        otherwise, we use the MLE (Maximum Likelihood Estimation) method. The MLE is
        slower but more accurate.

    kendall_tau : float, optional
        If `itau` is True and `kendall_tau` is provided, this
        value is used; otherwise, it is computed.

    tolerance : float, default=1e-4
        Convergence tolerance for the MLE optimization.

    random_state : int, RandomState instance or None, default=None
        Seed or random state to ensure reproducibility.

    Attributes
    ----------
    rho_ : float
        Fitted correlation coefficient (:math:`\rho`) in [-1, 1].
    dof_ : float
       Fitted degrees of freedom (:math:`\nu`) > 2.

    Examples
    --------
    >>> from skfolio.datasets import load_sp500_dataset
    >>> from skfolio.preprocessing import prices_to_returns
    >>> from skfolio.distribution import StudentTCopula, compute_pseudo_observations
    >>>
    >>> # Load historical prices and convert them to returns
    >>> prices = load_sp500_dataset()
    >>> X = prices_to_returns(prices)
    >>> X = X[["AAPL", "JPM"]]
    >>>
    >>> # Convert returns to pseudo observation in the interval [0,1]
    >>> X = compute_pseudo_observations(X)
    >>>
    >>> # Initialize the Copula estimator
    >>> model = StudentTCopula()
    >>>
    >>> # Fit the model to the data.
    >>> model.fit(X)
    >>>
    >>> # Display the fitted parameter and tail dependence coefficients
    >>> print(model.fitted_repr)
    StudentTCopula(rho=0.327, dof=5.14)
    >>> print(model.lower_tail_dependence)
    0.1270
    >>> print(model.upper_tail_dependence)
    0.1270
    >>>
    >>> # Compute the log-likelihood, total log-likelihood, CDF, Partial Derivative,
    >>> # Inverse Partial Derivative, AIC, and BIC
    >>> log_likelihood = model.score_samples(X)
    >>> score = model.score(X)
    >>> cdf = model.cdf(X)
    >>> p = model.partial_derivative(X)
    >>> u = model.inverse_partial_derivative(X)
    >>> aic = model.aic(X)
    >>> bic = model.bic(X)
    >>>
    >>> # Generate 5 new samples
    >>> samples = model.sample(n_samples=5)
    >>>
    >>> # Plot the tail concentration function.
    >>> fig = model.plot_tail_concentration()
    >>> fig.show()
    >>>
    >>> # Plot a 2D contour of the estimated PDF.
    >>> fig = model.plot_pdf_2d()
    >>> fig.show()
    >>>
    >>> # Plot a 3D surface of the estimated PDF.
    >>> fig = model.plot_pdf_3d()
    >>> fig.show()

    References
    ----------
    .. [1] "An Introduction to Copulas (2nd ed.)",
       Nelsen (2006)

    .. [2] "Multivariate Models and Dependence Concepts",
        Joe, Chapman & Hall (1997)

    .. [3] "Quantitative Risk Management: Concepts, Techniques and Tools",
        McNeil, Frey & Embrechts (2005)

    .. [4] "The t Copula and Related Copulas",
        Demarta & McNeil (2005)

    .. [5] "Copula Methods in Finance",
        Cherubini, Luciano & Vecchiato (2004)
    """

    rho_: float
    dof_: float
    _n_params = 2

    def __init__(
        self,
        itau: bool = True,
        kendall_tau: float | None = None,
        tolerance: float = 1e-4,
        random_state: int | None = None,
    ):
        super().__init__(random_state=random_state)
        self.itau = itau
        self.kendall_tau = kendall_tau
        self.tolerance = tolerance


[docs]
    def fit(self, X: npt.ArrayLike, y=None) -> "StudentTCopula":
        r"""Fit the Bivariate Student's t Copula.

        If `itau` is True, it uses a Kendall-based two-step method:
            - Estimates the correlation parameter (:math:`\rho`) from Kendall's
              tau inversion.

            - Optimizes the degrees of freedom (:math:`\nu`) by maximizing the
              log-likelihood.

        Otherwise, it uses the full MLE method: optimizes both :math:`\rho` and
        :math:`\nu` by maximizing the log-likelihood.

        Parameters
        ----------
        X : array-like of shape (n_observations, 2)
            An array of bivariate inputs `(u, v)` where each row represents a
            bivariate observation. Both `u` and `v` must be in the interval [0, 1],
            having been transformed to uniform marginals.

        y : None
            Ignored. Provided for compatibility with scikit-learn's API.

        Returns
        -------
        self : StudentTCopula
            Returns the instance itself.

        """
        X = self._validate_X(X, reset=True)

        if self.kendall_tau is None:
            kendall_tau = st.kendalltau(X[:, 0], X[:, 1]).statistic
        else:
            kendall_tau = self.kendall_tau

        # Either used directly or for initial guess
        rho_from_tau = np.clip(
            np.sin((np.pi * kendall_tau) / 2.0),
            a_min=_RHO_BOUNDS[0],
            a_max=_RHO_BOUNDS[1],
        )

        if self.itau:
            res = so.minimize_scalar(
                _neg_log_likelihood,
                args=(
                    rho_from_tau,
                    X,
                ),
                bounds=_DOF_BOUNDS,
                method="bounded",
                options={"xatol": self.tolerance},
            )
            if not res.success:
                raise RuntimeError(f"Optimization failed: {res.message}")
            self.dof_ = res.x
            self.rho_ = rho_from_tau
        else:
            # We'll use L-BFGS-B for the optimization because:
            # 1) The bivariate Student-t copula's negative log-likelihood is smooth,
            #    making gradient-based methods more efficient than derivative-free
            #    methods.
            # 2) L-BFGS-B directly supports simple box bounds (e.g., -1 < rho < 1,
            #    0 < nu < 50).
            # 3) It's typically faster and more stable for small-dimensional problems
            #    than more general constraint solvers (like trust-constr or SLSQP)
            result = so.minimize(
                fun=lambda x: _neg_log_likelihood(dof=x[0], rho=x[1], X=X),
                x0=np.array([3.0, rho_from_tau]),
                bounds=(_DOF_BOUNDS, _RHO_BOUNDS),
                method="L-BFGS-B",
                tol=self.tolerance,
            )
            if not result.success:
                raise RuntimeError(f"Optimization failed: {result.message}")
            self.dof_, self.rho_ = result.x

        return self



[docs]
    def cdf(self, X: npt.ArrayLike) -> np.ndarray:
        """Compute the CDF of the bivariate Student-t copula.

        Parameters
        ----------
        X : array-like of shape (n_observations, 2)
            An array of bivariate inputs `(u, v)` where each row represents a
            bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
            having been transformed to uniform marginals.

        Returns
        -------
        cdf : ndarray of shape (n_observations,)
            CDF values for each observation in X.
        """
        skv.check_is_fitted(self)
        X = self._validate_X(X, reset=False)
        cdf = st.multivariate_t.cdf(
            x=sp.stdtrit(self.dof_, X),
            loc=np.array([0, 0]),
            shape=np.array([[1, self.rho_], [self.rho_, 1]]),
            df=self.dof_,
        )
        return cdf



[docs]
    def partial_derivative(
        self, X: npt.ArrayLike, first_margin: bool = False
    ) -> np.ndarray:
        r"""Compute the h-function (partial derivative) for the bivariate Student's t
        copula.

        The h-function with respect to the second margin represents the conditional
        distribution function of :math:`u` given :math:`v`:

        .. math:: \begin{aligned}
                   h(u \mid v) &= \frac{\partial C(u,v)}{\partial v} \\
                               &= t_{\nu+1}\!\left(\frac{t_\nu^{-1}(u) - \rho\,t_\nu^{-1}(v)}
                                  {\sqrt{\frac{(1-\rho^2)\left(\nu + \left(t_\nu^{-1}(v)\right)^2\right)}{\nu+1}}}\right).
                  \end{aligned}

        where:
            - :math:`\nu > 0` is the degrees of freedom.
            - :math:`\rho \in (-1, 1)` is the correlation coefficient.
            - :math:`t_{\nu}^{-1}(p)` is the quantile function (inverse CDF) of the
              univariate \(t\)-distribution.

        Parameters
        ----------
        X : array-like of shape (n_observations, 2)
            An array of bivariate inputs `(u, v)` where each row represents a
            bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
            having been transformed to uniform marginals.

        first_margin : bool, default=False
            If True, compute the partial derivative with respect to the first
            margin `u`; otherwise, compute the partial derivative with respect to the
            second margin `v`.

        Returns
        -------
        p : ndarray of shape (n_observations,)
            h-function values :math:`h(u \mid v) \;=\; p` for each observation in X.
        """
        skv.check_is_fitted(self)
        X = self._validate_X(X, reset=False)
        X = _apply_margin_swap(X, first_margin=first_margin)
        # Compute the inverse CDF (percent point function) using stdtrit for better
        # performance
        u_inv, v_inv = sp.stdtrit(self.dof_, X).T
        # Compute the denominator: sqrt((1 - rho^2) * (nu + y^2) / (nu + 1))
        z = (u_inv - self.rho_ * v_inv) / (
            np.sqrt((1 - self.rho_**2) * (self.dof_ + v_inv**2) / (self.dof_ + 1))
        )
        # Student's t CDF with (nu+1) degrees of freedom using stdtr for better
        # performance
        p = sp.stdtr(self.dof_ + 1, z)
        return p



[docs]
    def inverse_partial_derivative(
        self, X: npt.ArrayLike, first_margin: bool = False
    ) -> np.ndarray:
        r"""Compute the inverse of the bivariate copula's partial derivative, commonly
        known as the inverse h-function [1]_.

        Let :math:`C(u, v)` be a bivariate copula. The h-function with respect to the
        second margin is defined by

        .. math::
            h(u \mid v) \;=\; \frac{\partial\,C(u, v)}{\partial\,v},

        which is the conditional distribution of :math:`U` given :math:`V = v`.
        The **inverse h-function**, denoted :math:`h^{-1}(p \mid v)`, is the unique
        value :math:`u \in [0,1]` such that

        .. math::
            h(u \mid v) \;=\; p,
            \quad \text{where } p \in [0,1].

        In practical terms, given :math:`(p, v)` in :math:`[0, 1]^2`,
        :math:`h^{-1}(p \mid v)` solves for the :math:`u` satisfying
        :math:`p = \partial C(u, v)/\partial v`.

        Parameters
        ----------
        X : array-like of shape (n_observations, 2)
            An array of bivariate inputs `(p, v)`, each in the interval `[0, 1]`.
            - The first column `p` corresponds to the value of the h-function.
            - The second column `v` is the conditioning variable.

        first_margin : bool, default=False
            If True, compute the inverse partial derivative with respect to the first
            margin `u`; otherwise, compute the inverse partial derivative with respect
            to the second margin `v`.

        Returns
        -------
        u : ndarray of shape (n_observations,)
            A 1D-array of length `n_observations`, where each element is the computed
            :math:`u = h^{-1}(p \mid v)` for the corresponding pair in `X`.

        References
        ----------
        .. [1] "Multivariate Models and Dependence Concepts", Joe, H. (1997)
        .. [2] "An Introduction to Copulas", Nelsen, R. B. (2006)
        """
        skv.check_is_fitted(self)
        X = self._validate_X(X, reset=False)
        X = _apply_margin_swap(X, first_margin=first_margin)
        p_inv = sp.stdtrit(self.dof_ + 1, X[:, 0])
        v_inv = sp.stdtrit(self.dof_, X[:, 1])
        u_inv = (
            p_inv
            * np.sqrt((self.dof_ + v_inv**2) / (self.dof_ + 1) * (1 - self.rho_**2))
            + self.rho_ * v_inv
        )
        u = sp.stdtr(self.dof_, u_inv)
        return u



[docs]
    def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
        """Compute the log-likelihood of each sample (log-pdf) under the model.

        Parameters
        ----------
        X : array-like of shape (n_observations, 2)
            An array of bivariate inputs `(u, v)` where each row represents a
            bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
            having been transformed to uniform marginals.

        Returns
        -------
        density : ndarray of shape (n_observations,)
            The log-likelihood of each sample under the fitted copula.
        """
        skv.check_is_fitted(self)
        X = self._validate_X(X, reset=False)
        log_density = _sample_scores(X=X, rho=self.rho_, dof=self.dof_)
        return log_density


    @property
    def lower_tail_dependence(self) -> float:
        """Theoretical lower tail dependence coefficient."""
        skv.check_is_fitted(self)
        arg = -np.sqrt((self.dof_ + 1) * (1 - self.rho_) / (1 + self.rho_))
        return 2 * sp.stdtr(self.dof_ + 1, arg)

    @property
    def upper_tail_dependence(self) -> float:
        """Theoretical upper tail dependence coefficient."""
        return self.lower_tail_dependence

    @property
    def fitted_repr(self) -> str:
        """String representation of the fitted copula."""
        return f"{self.__class__.__name__}(rho={self.rho_:0.3f}, dof={self.dof_:0.2f})"



def _neg_log_likelihood(dof: float, rho: float, X: np.ndarray) -> float:
    """Negative log-likelihood function for optimization.

    Parameters
    ----------
    X : array-like of shape (n_observations, 2)
        An array of bivariate inputs `(u, v)` where each row represents a
        bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
        having been transformed to uniform marginals.

    rho : float
        Correlation copula parameter.

    dof : float
        Degree of freedom copula parameter.

    Returns
    -------
    value : float
        The negative log-likelihood value.
    """
    return -np.sum(_sample_scores(X=X, rho=rho, dof=dof))


def _sample_scores(X: np.ndarray, rho: float, dof: float) -> np.ndarray:
    """Compute the log-likelihood of each sample (log-pdf) under the bivariate
    Gaussian copula model.

    Parameters
    ----------
    X : array-like of shape (n_observations, 2)
        An array of bivariate inputs `(u, v)` where each row represents a
        bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
        having been transformed to uniform marginals.

    rho : float
        Gaussian copula parameter.

    Returns
    -------
    density : ndarray of shape (n_observations,)
        The log-likelihood of each sample under the fitted copula.

    Raises
    ------
    ValueError
        If rho is not in (-1, 1) or dof is not positive.
    """
    if not (-1.0 <= rho <= 1.0):
        raise ValueError("rho must be between -1 and 1.")
    if not 1.0 <= dof <= 50:
        raise ValueError("Degrees of freedom `dof` must be between 1 and 50.")

    # Inverse CDF (ppf) using stdtrit for better performance
    x, y = sp.stdtrit(dof, X).T

    a = 1.0 - rho**2
    log_density = (
        sp.gammaln((dof + 2.0) / 2.0)
        + sp.gammaln(dof / 2.0)
        - 2.0 * sp.gammaln((dof + 1.0) / 2.0)
        - np.log(a) / 2
        + (dof + 1.0) / 2.0 * (np.log1p(x**2 / dof) + np.log1p(y**2 / dof))
        - (dof + 2.0) / 2.0 * np.log1p((x**2 - 2 * rho * x * y + y**2) / a / dof)
    )
    return log_density