Source code for skfolio.pre_selection._drop_zero_variance
"""Pre-selection DropZeroVariance module."""
# Copyright (c) 2025
# Author: Vincent Maladiere <maladiere.vincent@gmail.com>
# SPDX-License-Identifier: BSD-3-Clause
import numpy as np
import numpy.typing as npt
import sklearn.base as skb
import sklearn.feature_selection as skf
import sklearn.utils.validation as skv
[docs]
class DropZeroVariance(skf.SelectorMixin, skb.BaseEstimator):
"""Transformer for dropping assets with near-zero variance.
On short windows, some assets can experience a near-zero variance, making
the covariance matrix improper for optimization. This simple transformer drops
assets whose variance is below some threshold.
Parameters
----------
threshold : float, default=1e-8
Minimum variance threshold. The default value is 1e-8. For daily asset returns,
this value filters out assets whose daily standard deviation is below 1e-4
(0.01%), which corresponds to an annual standard deviation of approximately
0.16%, assuming 252 trading days.
Attributes
----------
to_keep_ : ndarray of shape (n_assets, )
Boolean array indicating which assets are remaining.
n_features_in_ : int
Number of assets seen during `fit`.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of assets seen during `fit`. Defined only when `X`
has assets names that are all strings.
"""
to_keep_: np.ndarray
def __init__(self, threshold: float = 1e-8):
self.threshold = threshold
[docs]
def fit(self, X: npt.ArrayLike, y=None):
"""Fit the transformer on some assets.
Parameters
----------
X : array-like of shape (n_observations, n_assets)
Price returns of the assets.
y : Ignored
Not used, present for API consistency by convention.
Returns
-------
self : DropZeroVariance
Fitted estimator.
"""
X = skv.validate_data(self, X)
if self.threshold < 0:
raise ValueError(
f"`threshold` must be higher than 0, got {self.threshold}."
)
self.to_keep_ = X.var(axis=0) > self.threshold
return self
def _get_support_mask(self):
skv.check_is_fitted(self)
return self.to_keep_