From 1a862c1ed66e48e622c88e5d3cca9dd5a7876ea5 Mon Sep 17 00:00:00 2001 From: rem Date: Mon, 24 Jun 2024 23:45:52 +0200 Subject: [PATCH] fix robust normalize --- src/back/load_csv.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/back/load_csv.py b/src/back/load_csv.py index b1ce91a..8b05ea2 100644 --- a/src/back/load_csv.py +++ b/src/back/load_csv.py @@ -1,6 +1,7 @@ import pandas as pd import numpy as np import matplotlib.pyplot as plt +from sklearn.preprocessing import RobustScaler def return_csv(path): df = pd.read_csv(path) @@ -32,17 +33,16 @@ def csv_standardisation_Z(df, col): df[col] = (df[col] - mean_col1) / std_col1 return df[col] -def csv_robust_normalize(df, col): - # Calcul de la médiane et de l'IQR - median = df[col].median() - q1 = df[col].quantile(0.25) - q3 = df[col].quantile(0.75) - iqr = q3 - q1 - - # Application de la normalisation robuste - normalized_column = (df[col] - median) / iqr - df[col] = normalized_column - return normalized_column +def robust_normalize_column(df, column_name): + # Extract the column datas + column_data = df[column_name].values.reshape(-1, 1) + + # Fit and transform the column datas + scaler = RobustScaler() + normalized_data = scaler.fit_transform(column_data) + df[column_name] = normalized_data + + return normalized_data def handle_normalization(df, norm_method): for col_name in df: @@ -51,7 +51,7 @@ def handle_normalization(df, norm_method): elif norm_method == "z-score": df[col_name] = csv_standardisation_Z(df, col_name) elif norm_method == "robust": - df[col_name] = csv_robust_normalize(df, col_name) + df[col_name] = robust_normalize_column(df, col_name) else: raise ValueError("Unknown method") return df