Source code for skfolio.moments.covariance._ledoit_wolf

"""LedoitWolf Covariance Estimators."""

# Copyright (c) 2023
# Author: Hugo Delatte <delatte.hugo@gmail.com>
# License: BSD 3 clause
# Implementation derived from:
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
# Grisel Licensed under BSD 3 clause.

import numpy.typing as npt
import sklearn.covariance as skc

from skfolio.moments.covariance._base import BaseCovariance


[docs] class LedoitWolf(BaseCovariance, skc.LedoitWolf): """LedoitWolf Covariance Estimator. Ledoit-Wolf is a particular form of shrinkage, where the shrinkage coefficient is computed using O. Ledoit and M. Wolf's formula as described in [1]_. Read more in `scikit-learn <https://scikit-learn.org/stable/modules/generated/sklearn.covariance.ShrunkCovariance.html>`_. Parameters ---------- store_precision : bool, default=True Specify if the estimated precision is stored. assume_centered : bool, default=False If True, data will not be centered before computation. Useful when working with data whose mean is almost, but not exactly zero. If False (default), data will be centered before computation. block_size : int, default=1000 Size of blocks into which the covariance matrix will be split during its Ledoit-Wolf estimation. This is purely a memory optimization and does not affect results. nearest : bool, default=True If this is set to True, the covariance is replaced by the nearest covariance matrix that is positive definite and with a Cholesky decomposition than can be computed. The variance is left unchanged. A covariance matrix that is not positive definite often occurs in high dimensional problems. It can be due to multicollinearity, floating-point inaccuracies, or when the number of observations is smaller than the number of assets. For more details, see :func:`~skfolio.utils.stats.cov_nearest`. The default is `True`. higham : bool, default=False If this is set to True, the Higham & Nick (2002) algorithm is used to find the nearest PD covariance, otherwise the eigenvalues are clipped to a threshold above zeros (1e-13). The default is `False` and use the clipping method as the Higham & Nick algorithm can be slow for large datasets. higham_max_iteration : int, default=100 Maximum number of iteration of the Higham & Nick (2002) algorithm. The default value is `100`. Attributes ---------- covariance_ : ndarray of shape (n_assets, n_assets) Estimated covariance. location_ : ndarray of shape (n_assets,) Estimated location, i.e. the estimated mean. precision_ : ndarray of shape (n_assets, n_assets) Estimated pseudo inverse matrix. (stored only if store_precision is True) shrinkage_ : float Coefficient in the convex combination used for the computation of the shrunk estimate. Range is [0, 1]. n_features_in_ : int Number of assets seen during `fit`. feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during `fit`. Defined only when `X` has feature names that are all strings. Notes ----- The regularised covariance is: (1 - shrinkage) * cov + shrinkage * mu * np.identity(n_features) where mu = trace(cov) / n_features and shrinkage is given by the Ledoit and Wolf formula (see References) References ---------- .. [1] "A Well-Conditioned Estimator for Large-Dimensional Covariance Matrices". Ledoit and Wolf, Journal of Multivariate Analysis, Volume 88, Issue 2. February 2004, pages 365-41. """ def __init__( self, store_precision=True, assume_centered=False, block_size=1000, nearest: bool = True, higham: bool = False, higham_max_iteration: int = 100, ): super().__init__( nearest=nearest, higham=higham, higham_max_iteration=higham_max_iteration, ) skc.LedoitWolf.__init__( self, store_precision=store_precision, assume_centered=assume_centered, block_size=block_size, )
[docs] def fit(self, X: npt.ArrayLike, y=None) -> "LedoitWolf": """Fit the Ledoit-Wolf shrunk covariance model to X. Parameters ---------- X : array-like of shape (n_observations, n_assets) Price returns of the assets. y : Ignored Not used, present for API consistency by convention. Returns ------- self : LedoitWolf Fitted estimator. """ skc.LedoitWolf.fit(self, X) self._set_covariance(self.covariance_) return self