Limit the number of neighbors based on the dataframe

knn
Clément FRÉVILLE 10 months ago
parent cd0c85ea44
commit 06adc742eb

@ -130,6 +130,10 @@ class KNNStrategy(MVStrategy):
df.fillna(usable_data, inplace=True) df.fillna(usable_data, inplace=True)
return df return df
def count_max(self, df: DataFrame, label: str) -> int:
usable_data = df.dropna(subset=self.training_features)
return usable_data[label].count()
def __str__(self) -> str: def __str__(self) -> str:
return "kNN" return "kNN"

@ -16,9 +16,8 @@ if "data" in st.session_state:
key=f"mv-{column}", key=f"mv-{column}",
) )
if isinstance(option, KNNStrategy): if isinstance(option, KNNStrategy):
print(option.available_features)
option.training_features = st.multiselect("Training columns", option.training_features, default=option.available_features, key=f"cols-{column}") option.training_features = st.multiselect("Training columns", option.training_features, default=option.available_features, key=f"cols-{column}")
option.n_neighbors = st.number_input("Number of neighbors", min_value=1, value=option.n_neighbors, key=f"neighbors-{column}") option.n_neighbors = st.number_input("Number of neighbors", min_value=1, max_value=option.count_max(data, column), value=option.n_neighbors, key=f"neighbors-{column}")
# Always re-get the series to avoid reusing an invalidated series pointer # Always re-get the series to avoid reusing an invalidated series pointer
data = option.apply(data, column, data[column]) data = option.apply(data, column, data[column])

Loading…
Cancel
Save