diff --git a/main.py b/main.py index a80ee2d..b00f3e0 100755 --- a/main.py +++ b/main.py @@ -14,8 +14,10 @@ l.csv_value(df) # l.csv_standardisation_Z(df,"Vehicle Year") +# l.csv_robust_normalize(df,"Speed Limit") + # s.histo_col(df,"Speed Limit") # s.plotBoxWhisker(df) -c.launch_cluster(df,['Speed Limit','Vehicle Year']) +# c.launch_cluster(df,['Speed Limit','Vehicle Year']) diff --git a/src/back/load_csv.py b/src/back/load_csv.py index 7b8eeeb..5aa10f2 100644 --- a/src/back/load_csv.py +++ b/src/back/load_csv.py @@ -34,3 +34,15 @@ def csv_standardisation_Z(df,col): df[col] = (df[col] - mean_col1) / std_col1 return df[col] +def csv_robust_normalize(df, column): + # Calcul de la médiane et de l'IQR + median = df[column].median() + q1 = df[column].quantile(0.25) + q3 = df[column].quantile(0.75) + iqr = q3 - q1 + + # Application de la normalisation robuste + normalized_column = (df[column] - median) / iqr + df[column] = normalized_column + print (normalized_column) + return normalized_column