From f83f35c498fcc7f8e58368d050b349b914625197 Mon Sep 17 00:00:00 2001 From: "aurian.jault" Date: Fri, 7 Jun 2024 10:56:19 +0200 Subject: [PATCH] added normalizing datas --- main.py | 7 ++++++- src/back/load_csv.py | 41 ++++++++++++++++++++++++++++------------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/main.py b/main.py index 1f987b5..8a8212c 100755 --- a/main.py +++ b/main.py @@ -4,4 +4,9 @@ sys.path.append('./src/back/') import load_csv as l -l.csv_value() +df = l.return_csv("./data.csv") +l.csv_value(df) + +l.csv_value(df) + +l.csv_stadadisation_Z(df,"Vehicle Year") diff --git a/src/back/load_csv.py b/src/back/load_csv.py index 03db63a..4438e2b 100644 --- a/src/back/load_csv.py +++ b/src/back/load_csv.py @@ -1,20 +1,35 @@ import pandas as pd +import numpy as np +def return_csv(path): + df = pd.read_csv(path) + return df -def csv_value(): - df = pd.read_csv('./data.csv') - # print(df.head()) - +def csv_value(df): #print all detail - # df.info() - + df.info() # Print number of missing value for each column - # print(df.isna().sum()) - + print(df.isna().sum()) # Useless values - # Off-Road Description -> 156170 - # Municipality -> 152979 - # Related Non-Motorist -> 166642 - # Non-Motorist Substance Abuse -> 167788 - # Circumstance -> 140746 + + +def csv_check(df): + for col in df: + print("-"*12) + print(col) + print("-"*12) + print(df[col].unique()) + + +def csv_norm_min_max(df,col): + maValue = df[col].max + miValue = df[col].min + df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min()) return df + +def csv_stadadisation_Z(df,col): + mean_col1 = df[col].mean() + std_col1 = df[col].std() + df[col] = (df[col] - mean_col1) / std_col1 + return df[col] + -- 2.36.3