|
|
|
@ -1,6 +1,7 @@
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import numpy as np
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
from sklearn.preprocessing import RobustScaler
|
|
|
|
|
|
|
|
|
|
def return_csv(path):
|
|
|
|
|
df = pd.read_csv(path)
|
|
|
|
@ -32,17 +33,16 @@ def csv_standardisation_Z(df, col):
|
|
|
|
|
df[col] = (df[col] - mean_col1) / std_col1
|
|
|
|
|
return df[col]
|
|
|
|
|
|
|
|
|
|
def csv_robust_normalize(df, col):
|
|
|
|
|
# Calcul de la médiane et de l'IQR
|
|
|
|
|
median = df[col].median()
|
|
|
|
|
q1 = df[col].quantile(0.25)
|
|
|
|
|
q3 = df[col].quantile(0.75)
|
|
|
|
|
iqr = q3 - q1
|
|
|
|
|
def robust_normalize_column(df, column_name):
|
|
|
|
|
# Extract the column datas
|
|
|
|
|
column_data = df[column_name].values.reshape(-1, 1)
|
|
|
|
|
|
|
|
|
|
# Application de la normalisation robuste
|
|
|
|
|
normalized_column = (df[col] - median) / iqr
|
|
|
|
|
df[col] = normalized_column
|
|
|
|
|
return normalized_column
|
|
|
|
|
# Fit and transform the column datas
|
|
|
|
|
scaler = RobustScaler()
|
|
|
|
|
normalized_data = scaler.fit_transform(column_data)
|
|
|
|
|
df[column_name] = normalized_data
|
|
|
|
|
|
|
|
|
|
return normalized_data
|
|
|
|
|
|
|
|
|
|
def handle_normalization(df, norm_method):
|
|
|
|
|
for col_name in df:
|
|
|
|
@ -51,7 +51,7 @@ def handle_normalization(df, norm_method):
|
|
|
|
|
elif norm_method == "z-score":
|
|
|
|
|
df[col_name] = csv_standardisation_Z(df, col_name)
|
|
|
|
|
elif norm_method == "robust":
|
|
|
|
|
df[col_name] = csv_robust_normalize(df, col_name)
|
|
|
|
|
df[col_name] = robust_normalize_column(df, col_name)
|
|
|
|
|
else:
|
|
|
|
|
raise ValueError("Unknown method")
|
|
|
|
|
return df
|
|
|
|
|