Merge remote-tracking branch 'origin' into streamlit

pull/21/head
remrem 10 months ago
commit 9f54e72183

@ -33,14 +33,23 @@ def impute_with_knn(data, n_neighbors=5):
return pd.DataFrame(imputer.fit_transform(data), columns=data.columns) return pd.DataFrame(imputer.fit_transform(data), columns=data.columns)
def impute_with_regression(data): def impute_with_regression(data):
for column in data.columns: missing_columns = data.columns[data.isnull().any()].tolist()
if data[column].isnull().sum() > 0:
train_data = data[data[column].notna()] for col in missing_columns:
test_data = data[data[column].isna()] missing_data = data[data[col].isnull()]
if not train_data.empty and not test_data.empty: complete_data = data[~data[col].isnull()]
regressor = LinearRegression() if missing_data.empty or complete_data.empty:
regressor.fit(train_data.drop(column, axis=1), train_data[column]) continue
data.loc[data[column].isna(), column] = regressor.predict(test_data.drop(column, axis=1)) X_complete = complete_data.drop(columns=missing_columns)
y_complete = complete_data[col]
X_missing = missing_data.drop(columns=missing_columns)
if X_missing.shape[0] > 0.5 * data.shape[0]:
continue
model = LinearRegression()
model.fit(X_complete, y_complete)
y_pred = model.predict(X_missing)
data.loc[df[col].isnull(), col] = y_pred
return data return data
@ -63,5 +72,4 @@ def handle_missing_values(data, method, n_neighbors=5):
elif method == 'regression': elif method == 'regression':
return impute_with_regression(data) return impute_with_regression(data)
else: else:
raise ValueError("Unknown method") raise ValueError("Unknown method")
Loading…
Cancel
Save