"""Bivariate Student's t Copula Estimation."""
# Copyright (c) 2025
# Author: Hugo Delatte <delatte.hugo@gmail.com>
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
# SPDX-License-Identifier: BSD-3-Clause
import numpy as np
import numpy.typing as npt
import scipy.optimize as so
import scipy.special as sp
import scipy.stats as st
import sklearn.utils.validation as skv
from skfolio.distribution.copula._base import _RHO_BOUNDS, BaseBivariateCopula
from skfolio.distribution.copula._utils import _apply_margin_swap
# Student's t copula with dof less than 2.0 is so extremely heavy-tailed that even the
# mean (and many moments) of the distribution do not exist. So Impractical in practice,
# and dof above 50 tends to a Gaussian Copula so we limit it to the interval [2, 50] for
# improved stability and robustness.
_DOF_BOUNDS = (2.0, 50.0)
[docs]
class StudentTCopula(BaseBivariateCopula):
r"""Bivariate Student's t Copula Estimation.
The bivariate Student's t copula density is defined as:
.. math::
C_{\nu, \rho}(u, v) = T_{\nu, \rho} \Bigl(t_{\nu}^{-1}(u),\;t_{\nu}^{-1}(v)\Bigr)
where:
- :math:`\nu > 0` is the degrees of freedom.
- :math:`\rho \in (-1, 1)` is the correlation coefficient.
- :math:`T_{\nu, \rho}(x, y)` is the CDF of the bivariate t-distribution.
- :math:`t_{\nu}^{-1}(p)` is the quantile function (inverse CDF) of the
univariate t-distribution.
Student's t copula with degrees of freedom (dof) less than 2.0 is extremely
heavy-tailed, to the extent that even the mean (and many moments) do not exist,
rendering it impractical. Conversely, for dof above 50 the t copula behaves
similarly to a Gaussian copula. Thus, for improved stability and robustness,
the dof is limited to the interval [2, 50].
.. note::
Rotations are not needed for elliptical copula (e.g., Gaussian or Student-t)
because its correlation parameter :math:`\rho \in (-1, 1)` naturally covers
both positive and negative dependence, and they exhibit symmetric tail behavior.
Parameters
----------
itau : bool, default=True
itau : bool, default=True
If True, :math:`\rho` is estimated using the Kendall's tau inversion method;
otherwise, we use the MLE (Maximum Likelihood Estimation) method. The MLE is
slower but more accurate.
kendall_tau : float, optional
If `itau` is True and `kendall_tau` is provided, this
value is used; otherwise, it is computed.
tolerance : float, default=1e-4
Convergence tolerance for the MLE optimization.
random_state : int, RandomState instance or None, default=None
Seed or random state to ensure reproducibility.
Attributes
----------
rho_ : float
Fitted correlation coefficient (:math:`\rho`) in [-1, 1].
dof_ : float
Fitted degrees of freedom (:math:`\nu`) > 2.
Examples
--------
>>> from skfolio.datasets import load_sp500_dataset
>>> from skfolio.preprocessing import prices_to_returns
>>> from skfolio.distribution import StudentTCopula, compute_pseudo_observations
>>>
>>> # Load historical prices and convert them to returns
>>> prices = load_sp500_dataset()
>>> X = prices_to_returns(prices)
>>> X = X[["AAPL", "JPM"]]
>>>
>>> # Convert returns to pseudo observation in the interval [0,1]
>>> X = compute_pseudo_observations(X)
>>>
>>> # Initialize the Copula estimator
>>> model = StudentTCopula()
>>>
>>> # Fit the model to the data.
>>> model.fit(X)
>>>
>>> # Display the fitted parameter and tail dependence coefficients
>>> print(model.fitted_repr)
StudentTCopula(rho=0.327, dof=5.14)
>>> print(model.lower_tail_dependence)
0.1270
>>> print(model.upper_tail_dependence)
0.1270
>>>
>>> # Compute the log-likelihood, total log-likelihood, CDF, Partial Derivative,
>>> # Inverse Partial Derivative, AIC, and BIC
>>> log_likelihood = model.score_samples(X)
>>> score = model.score(X)
>>> cdf = model.cdf(X)
>>> p = model.partial_derivative(X)
>>> u = model.inverse_partial_derivative(X)
>>> aic = model.aic(X)
>>> bic = model.bic(X)
>>>
>>> # Generate 5 new samples
>>> samples = model.sample(n_samples=5)
>>>
>>> # Plot the tail concentration function.
>>> fig = model.plot_tail_concentration()
>>> fig.show()
>>>
>>> # Plot a 2D contour of the estimated PDF.
>>> fig = model.plot_pdf_2d()
>>> fig.show()
>>>
>>> # Plot a 3D surface of the estimated PDF.
>>> fig = model.plot_pdf_3d()
>>> fig.show()
References
----------
.. [1] "An Introduction to Copulas (2nd ed.)",
Nelsen (2006)
.. [2] "Multivariate Models and Dependence Concepts",
Joe, Chapman & Hall (1997)
.. [3] "Quantitative Risk Management: Concepts, Techniques and Tools",
McNeil, Frey & Embrechts (2005)
.. [4] "The t Copula and Related Copulas",
Demarta & McNeil (2005)
.. [5] "Copula Methods in Finance",
Cherubini, Luciano & Vecchiato (2004)
"""
rho_: float
dof_: float
_n_params = 2
def __init__(
self,
itau: bool = True,
kendall_tau: float | None = None,
tolerance: float = 1e-4,
random_state: int | None = None,
):
super().__init__(random_state=random_state)
self.itau = itau
self.kendall_tau = kendall_tau
self.tolerance = tolerance
[docs]
def fit(self, X: npt.ArrayLike, y=None) -> "StudentTCopula":
r"""Fit the Bivariate Student's t Copula.
If `itau` is True, it uses a Kendall-based two-step method:
- Estimates the correlation parameter (:math:`\rho`) from Kendall's
tau inversion.
- Optimizes the degrees of freedom (:math:`\nu`) by maximizing the
log-likelihood.
Otherwise, it uses the full MLE method: optimizes both :math:`\rho` and
:math:`\nu` by maximizing the log-likelihood.
Parameters
----------
X : array-like of shape (n_observations, 2)
An array of bivariate inputs `(u, v)` where each row represents a
bivariate observation. Both `u` and `v` must be in the interval [0, 1],
having been transformed to uniform marginals.
y : None
Ignored. Provided for compatibility with scikit-learn's API.
Returns
-------
self : StudentTCopula
Returns the instance itself.
"""
X = self._validate_X(X, reset=True)
if self.kendall_tau is None:
kendall_tau = st.kendalltau(X[:, 0], X[:, 1]).statistic
else:
kendall_tau = self.kendall_tau
# Either used directly or for initial guess
rho_from_tau = np.clip(
np.sin((np.pi * kendall_tau) / 2.0),
a_min=_RHO_BOUNDS[0],
a_max=_RHO_BOUNDS[1],
)
if self.itau:
res = so.minimize_scalar(
_neg_log_likelihood,
args=(
rho_from_tau,
X,
),
bounds=_DOF_BOUNDS,
method="bounded",
options={"xatol": self.tolerance},
)
if not res.success:
raise RuntimeError(f"Optimization failed: {res.message}")
self.dof_ = res.x
self.rho_ = rho_from_tau
else:
# We'll use L-BFGS-B for the optimization because:
# 1) The bivariate Student-t copula's negative log-likelihood is smooth,
# making gradient-based methods more efficient than derivative-free
# methods.
# 2) L-BFGS-B directly supports simple box bounds (e.g., -1 < rho < 1,
# 0 < nu < 50).
# 3) It's typically faster and more stable for small-dimensional problems
# than more general constraint solvers (like trust-constr or SLSQP)
result = so.minimize(
fun=lambda x: _neg_log_likelihood(dof=x[0], rho=x[1], X=X),
x0=np.array([3.0, rho_from_tau]),
bounds=(_DOF_BOUNDS, _RHO_BOUNDS),
method="L-BFGS-B",
tol=self.tolerance,
)
if not result.success:
raise RuntimeError(f"Optimization failed: {result.message}")
self.dof_, self.rho_ = result.x
return self
[docs]
def cdf(self, X: npt.ArrayLike) -> np.ndarray:
"""Compute the CDF of the bivariate Student-t copula.
Parameters
----------
X : array-like of shape (n_observations, 2)
An array of bivariate inputs `(u, v)` where each row represents a
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
having been transformed to uniform marginals.
Returns
-------
cdf : ndarray of shape (n_observations,)
CDF values for each observation in X.
"""
skv.check_is_fitted(self)
X = self._validate_X(X, reset=False)
cdf = st.multivariate_t.cdf(
x=sp.stdtrit(self.dof_, X),
loc=np.array([0, 0]),
shape=np.array([[1, self.rho_], [self.rho_, 1]]),
df=self.dof_,
)
return cdf
[docs]
def partial_derivative(
self, X: npt.ArrayLike, first_margin: bool = False
) -> np.ndarray:
r"""Compute the h-function (partial derivative) for the bivariate Student's t
copula.
The h-function with respect to the second margin represents the conditional
distribution function of :math:`u` given :math:`v`:
.. math:: \begin{aligned}
h(u \mid v) &= \frac{\partial C(u,v)}{\partial v} \\
&= t_{\nu+1}\!\left(\frac{t_\nu^{-1}(u) - \rho\,t_\nu^{-1}(v)}
{\sqrt{\frac{(1-\rho^2)\left(\nu + \left(t_\nu^{-1}(v)\right)^2\right)}{\nu+1}}}\right).
\end{aligned}
where:
- :math:`\nu > 0` is the degrees of freedom.
- :math:`\rho \in (-1, 1)` is the correlation coefficient.
- :math:`t_{\nu}^{-1}(p)` is the quantile function (inverse CDF) of the
univariate \(t\)-distribution.
Parameters
----------
X : array-like of shape (n_observations, 2)
An array of bivariate inputs `(u, v)` where each row represents a
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
having been transformed to uniform marginals.
first_margin : bool, default=False
If True, compute the partial derivative with respect to the first
margin `u`; otherwise, compute the partial derivative with respect to the
second margin `v`.
Returns
-------
p : ndarray of shape (n_observations,)
h-function values :math:`h(u \mid v) \;=\; p` for each observation in X.
"""
skv.check_is_fitted(self)
X = self._validate_X(X, reset=False)
X = _apply_margin_swap(X, first_margin=first_margin)
# Compute the inverse CDF (percent point function) using stdtrit for better
# performance
u_inv, v_inv = sp.stdtrit(self.dof_, X).T
# Compute the denominator: sqrt((1 - rho^2) * (nu + y^2) / (nu + 1))
z = (u_inv - self.rho_ * v_inv) / (
np.sqrt((1 - self.rho_**2) * (self.dof_ + v_inv**2) / (self.dof_ + 1))
)
# Student's t CDF with (nu+1) degrees of freedom using stdtr for better
# performance
p = sp.stdtr(self.dof_ + 1, z)
return p
[docs]
def inverse_partial_derivative(
self, X: npt.ArrayLike, first_margin: bool = False
) -> np.ndarray:
r"""Compute the inverse of the bivariate copula's partial derivative, commonly
known as the inverse h-function [1]_.
Let :math:`C(u, v)` be a bivariate copula. The h-function with respect to the
second margin is defined by
.. math::
h(u \mid v) \;=\; \frac{\partial\,C(u, v)}{\partial\,v},
which is the conditional distribution of :math:`U` given :math:`V = v`.
The **inverse h-function**, denoted :math:`h^{-1}(p \mid v)`, is the unique
value :math:`u \in [0,1]` such that
.. math::
h(u \mid v) \;=\; p,
\quad \text{where } p \in [0,1].
In practical terms, given :math:`(p, v)` in :math:`[0, 1]^2`,
:math:`h^{-1}(p \mid v)` solves for the :math:`u` satisfying
:math:`p = \partial C(u, v)/\partial v`.
Parameters
----------
X : array-like of shape (n_observations, 2)
An array of bivariate inputs `(p, v)`, each in the interval `[0, 1]`.
- The first column `p` corresponds to the value of the h-function.
- The second column `v` is the conditioning variable.
first_margin : bool, default=False
If True, compute the inverse partial derivative with respect to the first
margin `u`; otherwise, compute the inverse partial derivative with respect
to the second margin `v`.
Returns
-------
u : ndarray of shape (n_observations,)
A 1D-array of length `n_observations`, where each element is the computed
:math:`u = h^{-1}(p \mid v)` for the corresponding pair in `X`.
References
----------
.. [1] "Multivariate Models and Dependence Concepts", Joe, H. (1997)
.. [2] "An Introduction to Copulas", Nelsen, R. B. (2006)
"""
skv.check_is_fitted(self)
X = self._validate_X(X, reset=False)
X = _apply_margin_swap(X, first_margin=first_margin)
p_inv = sp.stdtrit(self.dof_ + 1, X[:, 0])
v_inv = sp.stdtrit(self.dof_, X[:, 1])
u_inv = (
p_inv
* np.sqrt((self.dof_ + v_inv**2) / (self.dof_ + 1) * (1 - self.rho_**2))
+ self.rho_ * v_inv
)
u = sp.stdtr(self.dof_, u_inv)
return u
[docs]
def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
"""Compute the log-likelihood of each sample (log-pdf) under the model.
Parameters
----------
X : array-like of shape (n_observations, 2)
An array of bivariate inputs `(u, v)` where each row represents a
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
having been transformed to uniform marginals.
Returns
-------
density : ndarray of shape (n_observations,)
The log-likelihood of each sample under the fitted copula.
"""
skv.check_is_fitted(self)
X = self._validate_X(X, reset=False)
log_density = _sample_scores(X=X, rho=self.rho_, dof=self.dof_)
return log_density
@property
def lower_tail_dependence(self) -> float:
"""Theoretical lower tail dependence coefficient."""
skv.check_is_fitted(self)
arg = -np.sqrt((self.dof_ + 1) * (1 - self.rho_) / (1 + self.rho_))
return 2 * sp.stdtr(self.dof_ + 1, arg)
@property
def upper_tail_dependence(self) -> float:
"""Theoretical upper tail dependence coefficient."""
return self.lower_tail_dependence
@property
def fitted_repr(self) -> str:
"""String representation of the fitted copula."""
return f"{self.__class__.__name__}(rho={self.rho_:0.3f}, dof={self.dof_:0.2f})"
def _neg_log_likelihood(dof: float, rho: float, X: np.ndarray) -> float:
"""Negative log-likelihood function for optimization.
Parameters
----------
X : array-like of shape (n_observations, 2)
An array of bivariate inputs `(u, v)` where each row represents a
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
having been transformed to uniform marginals.
rho : float
Correlation copula parameter.
dof : float
Degree of freedom copula parameter.
Returns
-------
value : float
The negative log-likelihood value.
"""
return -np.sum(_sample_scores(X=X, rho=rho, dof=dof))
def _sample_scores(X: np.ndarray, rho: float, dof: float) -> np.ndarray:
"""Compute the log-likelihood of each sample (log-pdf) under the bivariate
Gaussian copula model.
Parameters
----------
X : array-like of shape (n_observations, 2)
An array of bivariate inputs `(u, v)` where each row represents a
bivariate observation. Both `u` and `v` must be in the interval `[0, 1]`,
having been transformed to uniform marginals.
rho : float
Gaussian copula parameter.
Returns
-------
density : ndarray of shape (n_observations,)
The log-likelihood of each sample under the fitted copula.
Raises
------
ValueError
If rho is not in (-1, 1) or dof is not positive.
"""
if not (-1.0 <= rho <= 1.0):
raise ValueError("rho must be between -1 and 1.")
if not 1.0 <= dof <= 50:
raise ValueError("Degrees of freedom `dof` must be between 1 and 50.")
# Inverse CDF (ppf) using stdtrit for better performance
x, y = sp.stdtrit(dof, X).T
a = 1.0 - rho**2
log_density = (
sp.gammaln((dof + 2.0) / 2.0)
+ sp.gammaln(dof / 2.0)
- 2.0 * sp.gammaln((dof + 1.0) / 2.0)
- np.log(a) / 2
+ (dof + 1.0) / 2.0 * (np.log1p(x**2 / dof) + np.log1p(y**2 / dof))
- (dof + 2.0) / 2.0 * np.log1p((x**2 - 2 * rho * x * y + y**2) / a / dof)
)
return log_density