MCRE-BE/cramersv.py

## cramersv.py
"""CramersV detection and suppression algorithm for OHE columns.

See Also
--------
`Multicollinearity impact <www.kaggle.com/code/ffisegydd/sklearn-multicollinearity-class/notebook>`_
"""

# %%
#############
# Libraries #
#############
from itertools import combinations
from typing import Self

import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.exceptions import NotFittedError
from tqdm.auto import tqdm


############
# CramersV #
############
class CramersV(BaseEstimator, TransformerMixin):

    """CramersV detection and suppression algorithm for OHE columns.

    In statistics, Cramér's V (sometimes referred to as Cramér's phi and denoted as φc)
    is a measure of association between two nominal variables, giving a value between
    0 and +1 (inclusive). It is based on Pearson's chi-squared statistic and was published
    by Harald Cramér in 1946.

    Based on SKLearn alorithm, create a fit and transform class to calculate and drop
    the needed columns.

    See Also
    --------
    `Wikipedia Cramer's V <https://www.wikiwand.com/en/Cram%C3%A9r's_V>`_
    `Assocation Metrics on Github <https://github.com/HeberTU/association_metrics>`_

    Parameters
    ----------
    thresh : float, by default 0.5

    Attributes
    ----------
    thresh : float
        The treshold above which the featues will be assumed to be related
        and one will be dropped during fitting. The following thresholds are
        often assumed :

        * Unrelated : 0 to 0.33
        * Medium relation : 0.33 to 0.5
        * Strongly related : 0.5 to 1
    matrix: Dict[Tuple[str, str], int]
        Dict with the calculated Cramer's V associations. The
        dictionnary is filled in both ways i:j and j:i with tupples
        as keys
    data : pd.DataFrame
        The data used during fitting
    features_to_drop_ : List[str]
        List of all features to drop
    _fitted : bool
        Whether the .fit function has been called
    """

    def __init__(
        self: Self,
        thresh: float = 0.5,
    ):

        self.thresh = thresh
        self.features_to_drop_ = []
        self._fitted = False
        self.data = None
        self.matrix = None

    def fit(
        self: Self,
        X: pd.DataFrame,
        y: pd.Series = None,
    ) -> object:
        """Fits the object based on Cramer's V selection method.

        Fits the object by calculating the Cramer's V association value
        for each pair of features passed in X. The results are saved in
        a squared matrix array. _Fitted attribute is set to True.

        The calculation is only applied for categorical columns in the
        pandas DataFrame.

        Parameters
        ----------
        X : pd.DataFrame
            Feature dataframe with all columns.
        y : pd.Series, by default None
            The target variable. Not used, only added for SKLearn comaptibility.

        Attributes
        ----------
        matrix: dict
        data : pd.DataFrame
        features_to_drop_ : list
        _fitted : bool

        Raises
        ------
        KeyError
            In case no columns with dtype "category" are present in the passed
            dataframe.
        """

        # --- Import ---
        from scipy.stats.contingency import association

        # --- Variables ---
        data = X.copy()
        thresh = self.thresh

        # --- Script ---
        # Select only categorical variables
        col = data.select_dtypes(include=['category']).columns
        if len(col) == 0: raise KeyError("No categorical variables found")
        col = list(combinations(col, r=2))

        # Fill the matrix
        dropped = []
        matrix = {}
        progress = tqdm(col, desc="Combo", leave=False)
        for i, j in progress:
            progress.set_postfix_str(f"{i} : {j}")

            # if already dropped, don't calculate
            if i in dropped or j in dropped:
                next
            else:
                input_tab = pd.crosstab(data[i], data[j])
                res_cramer = association(input_tab, method='cramer')
                matrix[(i, j)], matrix[(j, i)] = res_cramer, res_cramer
                dropped.append(i) if res_cramer > thresh else None

        # Save
        self._fitted = True
        self.matrix = matrix
        self.data = X.copy()
        self.features_to_drop_ = list(set(dropped))

        return self

    def transform(
        self: Self,
        X: pd.DataFrame,
    ) -> pd.DataFrame:
        """Transform the dataframe based on Cramer's V selection.

        Based on the fitted object, drops all the corresponding columns
        from the dataframe. The object assumes the same columns are in the
        fitted as in the transformed DataFrame, even if this is not checked.

        Parameters
        ----------
        X : pd.DataFrame
            Feature dataframe with all columns.

        Returns
        -------
        pd.DataFrame
            Transformed dataframe

        Raises
        ------
        NotFittedError
            If the object has not been fitted
        """

        # --- Check ---
        if not self._fitted: raise NotFittedError("Object has not been fitted")

        return X.drop(columns=self.features_to_drop_, errors="ignore").copy()

## reducevif.py
"""Different kinds of transformers used in the FeatureTransformer.

Module containing several kinds of SKLearn compatible transformers to select
and filter out specific types of features that could potentially reduce the
accuracy of our forecasts.

See Also
--------
`Multicollinearity impact <www.kaggle.com/code/ffisegydd/sklearn-multicollinearity-class/notebook>`_

"""

# %%
#############
# Libraries #
#############
from typing import List, Self, Tuple

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.exceptions import NotFittedError
from sklearn.impute import SimpleImputer


#############
# ReduceVIF #
#############
class ReduceVIF(BaseEstimator, TransformerMixin):

    """Variance Inflation Factors detection and suppression algorithm.

    Algorithm to itteratively calculate the Variance Inflation Factor for each
    of the features in the provided dataframe and delete them from the model.
    The use of this transformer guarantees the absence of multicolinearity in the
    model and should imporve the accuracy or training speed.

    See Also
    --------
    statsmodels.stats.outliers_influence.variance_inflation_factor
    sklearn.preprocessing.Imputer

    Parameters
    ----------
    thresh : float, by default 5.0
    impute : bool, by default False
    impute_strategy : str, {'median', 'mean', 'most_frequent'}

    Attributes
    ----------
    tresh : float
        The treshold under wich we want to drop the column. From documentation
        5 to 10 : is ok / Above 10 : to drop.
    impute : bool
        Whether to impute the NaN values.
    impute_strategy : str
        The imputer to use by sklearn.preprocessing to fill any available NaN.
    imputer : SimpleImputer
        SimpleImputer initialized with the chosen impute_strategy
    data : pd.DataFrame
        The data used during fitting
    features_to_drop_ : List[str]
        List of all features to drop
    _fitted : bool
        Whether the .fit function has been called
    """

    def __init__(
        self: Self,
        thresh: float = 5.0,
        impute: bool = False,
        impute_strategy: str = 'median',
    ):

        self.thresh = thresh
        self.impute_strategy = impute_strategy
        self.features_to_drop_ = []
        self._fitted = False
        self.impute = impute
        if impute:
            self.imputer = SimpleImputer(strategy=impute_strategy)

    @staticmethod
    def calculate_vif(
        X: pd.DataFrame,
        thresh=5.0,
    ) -> Tuple[pd.DataFrame, List[str]]:
        """Calculate the VIF for each column and drop it.

        Parameters
        ----------
        X : pd.DataFrame
            Feature dataframe with all columns.
        thresh : float, by default 5.0
            The treshold over which we decide to drop the chosen column.

        See Also
        --------
        `Inspiration of function <https://stats.stackexchange.com/a/253620/53565>`_

        Returns
        -------
        pd.DataFrame
            The transformed DataFrame with all high VIF columns dropped.
        List[str]
            The list of all columns to drop
        """

        # --- Import ---
        from statsmodels.stats.outliers_influence import variance_inflation_factor

        # --- Setting Variables ---
        dropped = True
        LOOP = 1
        to_drop = {}

        # --- Script ---
        while dropped and LOOP <= len(X.columns):
            # --- Run variables ---
            feat = X.columns
            dropped = False
            LOOP = LOOP + 1

            # --- Script ---
            # Calculate the VIF for each and every column
            vif = [
                variance_inflation_factor(X.values, feat.get_loc(var))
                for var in feat
            ]

            # Find the highest VIF and drop it. The continue if one is dropped.
            max_vif = max(vif)
            if max_vif > thresh:
                # find column to drop and save it
                col = feat[vif.index(max_vif)]
                to_drop[col] = max_vif
                X = X.drop(columns=col, errors="ignore")
                dropped = True

        # Logging
        out = pd.DataFrame(data=to_drop.values(), index=to_drop.keys())
        out = out.astype(np.int64)

        return X, list(out.index)

    def fit(
        self: Self,
        X: pd.DataFrame,
        y: pd.Series = None,
    ) -> Self:
        """The fit function.

        Parameters
        ----------
        X : pd.DataFrame
            Feature dataframe with all columns.
        y : pd.Series, by default None
            The target variable. Not used, only added for SKLearn comaptibility.

        Attributes
        ----------
        data : pd.DataFrame
        features_to_drop_ : List[str]
        _fitted : bool

        See Also
        --------
        ReduceVIF.calculate_vif
        """

        # --- Variables ---
        self.data = X.copy()
        columns = X.columns.tolist()

        # --- Script ---
        if hasattr(self, 'imputer'):
            self.imputer.fit(X)
            X = pd.DataFrame(self.imputer.transform(X), columns=columns)

        X, to_drop = ReduceVIF.calculate_vif(X, self.thresh)
        self.features_to_drop_ = list(set(to_drop))
        self._fitted = True
        return self

    def transform(
        self: Self,
        X: pd.DataFrame,
    ) -> pd.DataFrame:
        """The transformer function for the ReduceVIF.

        Parameters
        ----------
        X : pd.DataFrame
            Feature dataframe with all columns.

        Returns
        -------
        pd.DataFrame
            The transformed dataframe.

        Raises
        ------
        NotFittedError
            If the object has not been fitted
        """

        # --- Checks ---
        if not self._fitted:
            raise NotFittedError("Model has not been fitted yet")

        return X.drop(columns=self.features_to_drop_, errors="ignore").copy()

## regexselector.py
"""RegexSelector selection and suppression algorithm."""

# %%
#############
# Libraries #
#############
import re
from typing import List, Self

import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.exceptions import NotFittedError


#################
# RegexSelector #
#################
# ? : to remove as unused?
class RegexSelector(BaseEstimator, TransformerMixin):

    """RegexSelector selection and suppression algorithm.

    Object that selects the feature columns from a dataframe based on a
    regex string and only keeps the relevant columns.

    Parameters
    ----------
    regex : List[str]

    Attributes
    ----------
    regex : List[str]
        A list of lists or tupples with the following pattern:

        * Regex string to select all relevant features
        * String to match in the set. Matching are kept.

    features: List[str]
        List of all features seen during fitting
    data : pd.DataFrame
        The data used during fitting
    features_to_drop_ : List[str]
        List of all features to drop
    _fitted : bool
        Whether the .fit function has been called

    Example
    -------
    Dictionnary formatting ::

        regex = [
          ["^(SIX_Lead_time_|SIX_t_btwn_o_).*", "100"],
          ["^(Holidays_).*(_Cat_).*", "001"],
          ["^(Holidays_DC_|Holidays_XPO_).*", "DCA"],
        ]
    """

    def __init__(
        self: Self,
        regex: List[str],
    ):

        self.regex = regex
        self._fitted = False
        self.data = None
        self.features = None
        self.features_to_drop_ = []

    def fit(
        self: Self,
        X: pd.DataFrame,
        y: pd.Series = None,
    ) -> Self:
        """Fits the object and sets the needed attributes.

        Based on the provided regex list provided during initialization,
        the needed columns to drop are identified and saved in the needed
        attributes.

        Parameters
        ----------
        X : pd.DataFrame
            Feature dataframe with all columns.
        y : pd.Series, by default None
            The target variable. Not used, only added for SKLearn comaptibility.

        Attributes
        ----------
        features: List[str]
        data : pd.DataFrame
        features_to_drop_ : List[str]
        _fitted : bool
        """

        # --- Variables ---
        regex = self.regex
        features = list(X.columns)
        self.features = features
        self.data = X.copy()

        # --- Script ---
        dropped = []

        for pattern, what in regex:
            test = [x for x in features if re.search(pattern, x)]
            if what is None:
                to_drop = test
            else:
                to_drop = [x for x in test if str(what) not in x]

            dropped += to_drop

        # Saving
        self.features_to_drop_ = list(set(dropped))
        self._fitted = True

        return self

    def transform(
        self: Self,
        X: pd.DataFrame,
    ) -> pd.DataFrame:
        """Transforms the dataframe by removing columns chosen in fit.

        Based on the fitted object, drops all the corresponding columns
        from the dataframe. The object assumes the same columns are in the
        fitted as in the transformed DataFrame, even if this is not checked.

        Parameters
        ----------
        X : pd.DataFrame
            Feature dataframe with all columns.

        Returns
        -------
        pd.DataFrame
            Transformed dataframe

        Raises
        ------
        NotFittedError
            If the object has not been fitted
        """
        # --- Check ---
        if not self._fitted: raise NotFittedError("Object has not been fitted")

        return X.drop(columns=self.features_to_drop_, errors="ignore").copy()
	"""CramersV detection and suppression algorithm for OHE columns.

	See Also
	--------
	`Multicollinearity impact <www.kaggle.com/code/ffisegydd/sklearn-multicollinearity-class/notebook>`_
	"""

	# %%
	#############
	# Libraries #
	#############
	from itertools import combinations
	from typing import Self

	import pandas as pd
	from sklearn.base import BaseEstimator, TransformerMixin
	from sklearn.exceptions import NotFittedError
	from tqdm.auto import tqdm


	############
	# CramersV #
	############
	class CramersV(BaseEstimator, TransformerMixin):

	"""CramersV detection and suppression algorithm for OHE columns.

	In statistics, Cramér's V (sometimes referred to as Cramér's phi and denoted as φc)
	is a measure of association between two nominal variables, giving a value between
	0 and +1 (inclusive). It is based on Pearson's chi-squared statistic and was published
	by Harald Cramér in 1946.

	Based on SKLearn alorithm, create a fit and transform class to calculate and drop
	the needed columns.

	See Also
	--------
	`Wikipedia Cramer's V <https://www.wikiwand.com/en/Cram%C3%A9r's_V>`_
	`Assocation Metrics on Github <https://github.com/HeberTU/association_metrics>`_

	Parameters
	----------
	thresh : float, by default 0.5

	Attributes
	----------
	thresh : float
	The treshold above which the featues will be assumed to be related
	and one will be dropped during fitting. The following thresholds are
	often assumed :

	* Unrelated : 0 to 0.33
	* Medium relation : 0.33 to 0.5
	* Strongly related : 0.5 to 1
	matrix: Dict[Tuple[str, str], int]
	Dict with the calculated Cramer's V associations. The
	dictionnary is filled in both ways i:j and j:i with tupples
	as keys
	data : pd.DataFrame
	The data used during fitting
	features_to_drop_ : List[str]
	List of all features to drop
	_fitted : bool
	Whether the .fit function has been called
	"""

	def __init__(
	self: Self,
	thresh: float = 0.5,
	):

	self.thresh = thresh
	self.features_to_drop_ = []
	self._fitted = False
	self.data = None
	self.matrix = None

	def fit(
	self: Self,
	X: pd.DataFrame,
	y: pd.Series = None,
	) -> object:
	"""Fits the object based on Cramer's V selection method.

	Fits the object by calculating the Cramer's V association value
	for each pair of features passed in X. The results are saved in
	a squared matrix array. _Fitted attribute is set to True.

	The calculation is only applied for categorical columns in the
	pandas DataFrame.

	Parameters
	----------
	X : pd.DataFrame
	Feature dataframe with all columns.
	y : pd.Series, by default None
	The target variable. Not used, only added for SKLearn comaptibility.

	Attributes
	----------
	matrix: dict
	data : pd.DataFrame
	features_to_drop_ : list
	_fitted : bool

	Raises
	------
	KeyError
	In case no columns with dtype "category" are present in the passed
	dataframe.
	"""

	# --- Import ---
	from scipy.stats.contingency import association

	# --- Variables ---
	data = X.copy()
	thresh = self.thresh

	# --- Script ---
	# Select only categorical variables
	col = data.select_dtypes(include=['category']).columns
	if len(col) == 0: raise KeyError("No categorical variables found")
	col = list(combinations(col, r=2))

	# Fill the matrix
	dropped = []
	matrix = {}
	progress = tqdm(col, desc="Combo", leave=False)
	for i, j in progress:
	progress.set_postfix_str(f"{i} : {j}")

	# if already dropped, don't calculate
	if i in dropped or j in dropped:
	next
	else:
	input_tab = pd.crosstab(data[i], data[j])
	res_cramer = association(input_tab, method='cramer')
	matrix[(i, j)], matrix[(j, i)] = res_cramer, res_cramer
	dropped.append(i) if res_cramer > thresh else None

	# Save
	self._fitted = True
	self.matrix = matrix
	self.data = X.copy()
	self.features_to_drop_ = list(set(dropped))

	return self

	def transform(
	self: Self,
	X: pd.DataFrame,
	) -> pd.DataFrame:
	"""Transform the dataframe based on Cramer's V selection.

	Based on the fitted object, drops all the corresponding columns
	from the dataframe. The object assumes the same columns are in the
	fitted as in the transformed DataFrame, even if this is not checked.

	Parameters
	----------
	X : pd.DataFrame
	Feature dataframe with all columns.

	Returns
	-------
	pd.DataFrame
	Transformed dataframe

	Raises
	------
	NotFittedError
	If the object has not been fitted
	"""

	# --- Check ---
	if not self._fitted: raise NotFittedError("Object has not been fitted")

	return X.drop(columns=self.features_to_drop_, errors="ignore").copy()
	"""Different kinds of transformers used in the FeatureTransformer.

	Module containing several kinds of SKLearn compatible transformers to select
	and filter out specific types of features that could potentially reduce the
	accuracy of our forecasts.

	See Also
	--------
	`Multicollinearity impact <www.kaggle.com/code/ffisegydd/sklearn-multicollinearity-class/notebook>`_

	"""

	# %%
	#############
	# Libraries #
	#############
	from typing import List, Self, Tuple

	import numpy as np
	import pandas as pd
	from sklearn.base import BaseEstimator, TransformerMixin
	from sklearn.exceptions import NotFittedError
	from sklearn.impute import SimpleImputer


	#############
	# ReduceVIF #
	#############
	class ReduceVIF(BaseEstimator, TransformerMixin):

	"""Variance Inflation Factors detection and suppression algorithm.

	Algorithm to itteratively calculate the Variance Inflation Factor for each
	of the features in the provided dataframe and delete them from the model.
	The use of this transformer guarantees the absence of multicolinearity in the
	model and should imporve the accuracy or training speed.

	See Also
	--------
	statsmodels.stats.outliers_influence.variance_inflation_factor
	sklearn.preprocessing.Imputer

	Parameters
	----------
	thresh : float, by default 5.0
	impute : bool, by default False
	impute_strategy : str, {'median', 'mean', 'most_frequent'}

	Attributes
	----------
	tresh : float
	The treshold under wich we want to drop the column. From documentation
	5 to 10 : is ok / Above 10 : to drop.
	impute : bool
	Whether to impute the NaN values.
	impute_strategy : str
	The imputer to use by sklearn.preprocessing to fill any available NaN.
	imputer : SimpleImputer
	SimpleImputer initialized with the chosen impute_strategy
	data : pd.DataFrame
	The data used during fitting
	features_to_drop_ : List[str]
	List of all features to drop
	_fitted : bool
	Whether the .fit function has been called
	"""

	def __init__(
	self: Self,
	thresh: float = 5.0,
	impute: bool = False,
	impute_strategy: str = 'median',
	):

	self.thresh = thresh
	self.impute_strategy = impute_strategy
	self.features_to_drop_ = []
	self._fitted = False
	self.impute = impute
	if impute:
	self.imputer = SimpleImputer(strategy=impute_strategy)

	@staticmethod
	def calculate_vif(
	X: pd.DataFrame,
	thresh=5.0,
	) -> Tuple[pd.DataFrame, List[str]]:
	"""Calculate the VIF for each column and drop it.

	Parameters
	----------
	X : pd.DataFrame
	Feature dataframe with all columns.
	thresh : float, by default 5.0
	The treshold over which we decide to drop the chosen column.

	See Also
	--------
	`Inspiration of function <https://stats.stackexchange.com/a/253620/53565>`_

	Returns
	-------
	pd.DataFrame
	The transformed DataFrame with all high VIF columns dropped.
	List[str]
	The list of all columns to drop
	"""

	# --- Import ---
	from statsmodels.stats.outliers_influence import variance_inflation_factor

	# --- Setting Variables ---
	dropped = True
	LOOP = 1
	to_drop = {}

	# --- Script ---
	while dropped and LOOP <= len(X.columns):
	# --- Run variables ---
	feat = X.columns
	dropped = False
	LOOP = LOOP + 1

	# --- Script ---
	# Calculate the VIF for each and every column
	vif = [
	variance_inflation_factor(X.values, feat.get_loc(var))
	for var in feat
	]

	# Find the highest VIF and drop it. The continue if one is dropped.
	max_vif = max(vif)
	if max_vif > thresh:
	# find column to drop and save it
	col = feat[vif.index(max_vif)]
	to_drop[col] = max_vif
	X = X.drop(columns=col, errors="ignore")
	dropped = True

	# Logging
	out = pd.DataFrame(data=to_drop.values(), index=to_drop.keys())
	out = out.astype(np.int64)

	return X, list(out.index)

	def fit(
	self: Self,
	X: pd.DataFrame,
	y: pd.Series = None,
	) -> Self:
	"""The fit function.

	Parameters
	----------
	X : pd.DataFrame
	Feature dataframe with all columns.
	y : pd.Series, by default None
	The target variable. Not used, only added for SKLearn comaptibility.

	Attributes
	----------
	data : pd.DataFrame
	features_to_drop_ : List[str]
	_fitted : bool

	See Also
	--------
	ReduceVIF.calculate_vif
	"""

	# --- Variables ---
	self.data = X.copy()
	columns = X.columns.tolist()

	# --- Script ---
	if hasattr(self, 'imputer'):
	self.imputer.fit(X)
	X = pd.DataFrame(self.imputer.transform(X), columns=columns)

	X, to_drop = ReduceVIF.calculate_vif(X, self.thresh)
	self.features_to_drop_ = list(set(to_drop))
	self._fitted = True
	return self

	def transform(
	self: Self,
	X: pd.DataFrame,
	) -> pd.DataFrame:
	"""The transformer function for the ReduceVIF.

	Parameters
	----------
	X : pd.DataFrame
	Feature dataframe with all columns.

	Returns
	-------
	pd.DataFrame
	The transformed dataframe.

	Raises
	------
	NotFittedError
	If the object has not been fitted
	"""

	# --- Checks ---
	if not self._fitted:
	raise NotFittedError("Model has not been fitted yet")

	return X.drop(columns=self.features_to_drop_, errors="ignore").copy()
	"""RegexSelector selection and suppression algorithm."""

	# %%
	#############
	# Libraries #
	#############
	import re
	from typing import List, Self

	import pandas as pd
	from sklearn.base import BaseEstimator, TransformerMixin
	from sklearn.exceptions import NotFittedError


	#################
	# RegexSelector #
	#################
	# ? : to remove as unused?
	class RegexSelector(BaseEstimator, TransformerMixin):

	"""RegexSelector selection and suppression algorithm.

	Object that selects the feature columns from a dataframe based on a
	regex string and only keeps the relevant columns.

	Parameters
	----------
	regex : List[str]

	Attributes
	----------
	regex : List[str]
	A list of lists or tupples with the following pattern:

	* Regex string to select all relevant features
	* String to match in the set. Matching are kept.

	features: List[str]
	List of all features seen during fitting
	data : pd.DataFrame
	The data used during fitting
	features_to_drop_ : List[str]
	List of all features to drop
	_fitted : bool
	Whether the .fit function has been called

	Example
	-------
	Dictionnary formatting ::

	regex = [
	["^(SIX_Lead_time_\|SIX_t_btwn_o_).*", "100"],
	["^(Holidays_).(_Cat_).", "001"],
	["^(Holidays_DC_\|Holidays_XPO_).*", "DCA"],
	]
	"""

	def __init__(
	self: Self,
	regex: List[str],
	):

	self.regex = regex
	self._fitted = False
	self.data = None
	self.features = None
	self.features_to_drop_ = []

	def fit(
	self: Self,
	X: pd.DataFrame,
	y: pd.Series = None,
	) -> Self:
	"""Fits the object and sets the needed attributes.

	Based on the provided regex list provided during initialization,
	the needed columns to drop are identified and saved in the needed
	attributes.

	Parameters
	----------
	X : pd.DataFrame
	Feature dataframe with all columns.
	y : pd.Series, by default None
	The target variable. Not used, only added for SKLearn comaptibility.

	Attributes
	----------
	features: List[str]
	data : pd.DataFrame
	features_to_drop_ : List[str]
	_fitted : bool
	"""

	# --- Variables ---
	regex = self.regex
	features = list(X.columns)
	self.features = features
	self.data = X.copy()

	# --- Script ---
	dropped = []

	for pattern, what in regex:
	test = [x for x in features if re.search(pattern, x)]
	if what is None:
	to_drop = test
	else:
	to_drop = [x for x in test if str(what) not in x]

	dropped += to_drop

	# Saving
	self.features_to_drop_ = list(set(dropped))
	self._fitted = True

	return self

	def transform(
	self: Self,
	X: pd.DataFrame,
	) -> pd.DataFrame:
	"""Transforms the dataframe by removing columns chosen in fit.

	Based on the fitted object, drops all the corresponding columns
	from the dataframe. The object assumes the same columns are in the
	fitted as in the transformed DataFrame, even if this is not checked.

	Parameters
	----------
	X : pd.DataFrame
	Feature dataframe with all columns.

	Returns
	-------
	pd.DataFrame
	Transformed dataframe

	Raises
	------
	NotFittedError
	If the object has not been fitted
	"""
	# --- Check ---
	if not self._fitted: raise NotFittedError("Object has not been fitted")

	return X.drop(columns=self.features_to_drop_, errors="ignore").copy()