fix robust normalize

12 months ago · 1a862c1ed6
parent 0910dfae21
commit 1a862c1ed6
1 changed files with 12 additions and 12 deletions
--- a/src/back/load_csv.py
+++ b/src/back/load_csv.py
@ -1,6 +1,7 @@
 import pandas as pd
 import numpy  as np
 import matplotlib.pyplot as plt
+from sklearn.preprocessing import RobustScaler

 def return_csv(path):
    df = pd.read_csv(path)
@ -32,17 +33,16 @@ def csv_standardisation_Z(df, col):
    df[col] = (df[col] - mean_col1) / std_col1
    return df[col]

-def csv_robust_normalize(df, col):
-    # Calcul de la médiane et de l'IQR
-    median = df[col].median()
-    q1 = df[col].quantile(0.25)
-    q3 = df[col].quantile(0.75)
-    iqr = q3 - q1
+def robust_normalize_column(df, column_name):
+    # Extract the column datas
+    column_data = df[column_name].values.reshape(-1, 1)
    
-    # Application de la normalisation robuste
-    normalized_column = (df[col] - median) / iqr
-    df[col] = normalized_column
-    return normalized_column
+    # Fit and transform the column datas
+    scaler = RobustScaler()
+    normalized_data = scaler.fit_transform(column_data)
+    df[column_name] = normalized_data
+    
+    return normalized_data

 def handle_normalization(df, norm_method):
    for col_name in df:
@ -51,7 +51,7 @@ def handle_normalization(df, norm_method):
        elif norm_method == "z-score":
            df[col_name] = csv_standardisation_Z(df, col_name)
        elif norm_method == "robust":
-            df[col_name] = csv_robust_normalize(df, col_name)
+            df[col_name] = robust_normalize_column(df, col_name)
        else:
            raise ValueError("Unknown method")
    return df