from abc import ABC, abstractmethod from pandas import DataFrame, Series from pandas.api.types import is_numeric_dtype from typing import Any, Union class MVStrategy(ABC): """A way to handle missing values in a dataframe.""" @abstractmethod def apply(self, df: DataFrame, label: str, series: Series) -> DataFrame: """Apply the current strategy to the given series. The series is described by its label and dataframe.""" return df @staticmethod def list_available(series: Series) -> list['MVStrategy']: """Get all the strategies that can be used.""" choices = [DropStrategy(), ModeStrategy()] if is_numeric_dtype(series): choices.extend((MeanStrategy(), MedianStrategy())) return choices class DropStrategy(MVStrategy): #@typing.override def apply(self, df: DataFrame, label: str, series: Series) -> DataFrame: df.dropna(subset=label, inplace=True) return df def __str__(self) -> str: return "Drop" class PositionStrategy(MVStrategy): #@typing.override def apply(self, df: DataFrame, label: str, series: Series) -> DataFrame: series.fillna(self.get_value(series), inplace=True) return df @abstractmethod def get_value(self, series: Series) -> Any: pass class MeanStrategy(PositionStrategy): #@typing.override def get_value(self, series: Series) -> Union[int, float]: return series.mean() def __str__(self) -> str: return "Use mean" class MedianStrategy(PositionStrategy): #@typing.override def get_value(self, series: Series) -> Union[int, float]: return series.median() def __str__(self) -> str: return "Use median" class ModeStrategy(PositionStrategy): #@typing.override def get_value(self, series: Series) -> Any: return series.mode()[0] def __str__(self) -> str: return "Use mode"