diff --git a/frontend/normstrategy.py b/frontend/normstrategy.py index af4dde3..5a50bab 100644 --- a/frontend/normstrategy.py +++ b/frontend/normstrategy.py @@ -130,6 +130,10 @@ class KNNStrategy(MVStrategy): df.fillna(usable_data, inplace=True) return df + def count_max(self, df: DataFrame, label: str) -> int: + usable_data = df.dropna(subset=self.training_features) + return usable_data[label].count() + def __str__(self) -> str: return "kNN" diff --git a/frontend/pages/normalization.py b/frontend/pages/normalization.py index b543f87..34de383 100644 --- a/frontend/pages/normalization.py +++ b/frontend/pages/normalization.py @@ -16,9 +16,8 @@ if "data" in st.session_state: key=f"mv-{column}", ) if isinstance(option, KNNStrategy): - print(option.available_features) option.training_features = st.multiselect("Training columns", option.training_features, default=option.available_features, key=f"cols-{column}") - option.n_neighbors = st.number_input("Number of neighbors", min_value=1, value=option.n_neighbors, key=f"neighbors-{column}") + option.n_neighbors = st.number_input("Number of neighbors", min_value=1, max_value=option.count_max(data, column), value=option.n_neighbors, key=f"neighbors-{column}") # Always re-get the series to avoid reusing an invalidated series pointer data = option.apply(data, column, data[column])