Source code for skfolio.moments.covariance._ew_covariance

"""Exponentially Weighted Covariance Estimators."""

# Copyright (c) 2023
# Author: Hugo Delatte <delatte.hugo@gmail.com>
# License: BSD 3 clause
# Implementation derived from:
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
# Grisel Licensed under BSD 3 clause.

import numpy.typing as npt
import pandas as pd

from skfolio.moments.covariance._base import BaseCovariance


[docs] class EWCovariance(BaseCovariance): r"""Exponentially Weighted Covariance estimator. Estimator of the covariance using the historical exponentially weighted returns. Parameters ---------- window_size : int, optional Window size. The model is fitted on the last `window_size` observations. The default (`None`) is to use all the data. alpha : float, default=0.2 Exponential smoothing factor. The default value is `0.2`. :math:`0 < \alpha \leq 1`. nearest : bool, default=True If this is set to True, the covariance is replaced by the nearest covariance matrix that is positive definite and with a Cholesky decomposition than can be computed. The variance is left unchanged. A covariance matrix that is not positive definite often occurs in high dimensional problems. It can be due to multicollinearity, floating-point inaccuracies, or when the number of observations is smaller than the number of assets. For more details, see :func:`~skfolio.utils.stats.cov_nearest`. The default is `True`. higham : bool, default=False If this is set to True, the Higham & Nick (2002) algorithm is used to find the nearest PD covariance, otherwise the eigenvalues are clipped to a threshold above zeros (1e-13). The default is `False` and use the clipping method as the Higham & Nick algorithm can be slow for large datasets. higham_max_iteration : int, default=100 Maximum number of iteration of the Higham & Nick (2002) algorithm. The default value is `100`. Attributes ---------- covariance_ : ndarray of shape (n_assets, n_assets) Estimated covariance. n_features_in_ : int Number of assets seen during `fit`. feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during `fit`. Defined only when `X` has feature names that are all strings. """ def __init__( self, window_size: int | None = None, alpha: float = 0.2, nearest: bool = True, higham: bool = False, higham_max_iteration: int = 100, ): super().__init__( nearest=nearest, higham=higham, higham_max_iteration=higham_max_iteration, ) self.window_size = window_size self.alpha = alpha
[docs] def fit(self, X: npt.ArrayLike, y=None): """Fit the Exponentially Weighted Covariance estimator. Parameters ---------- X : array-like of shape (n_observations, n_assets) Price returns of the assets. y : Ignored Not used, present for API consistency by convention. Returns ------- self : EWCovariance Fitted estimator. """ X = self._validate_data(X) if self.window_size is not None: X = X[-int(self.window_size) :] n_observations = X.shape[0] covariance = ( pd.DataFrame(X) .ewm(alpha=self.alpha) .cov() .loc[(n_observations - 1, slice(None)), :] .to_numpy() ) self._set_covariance(covariance) return self