added normalizing datas

pull/12/head
Aurian JAULT 11 months ago
parent 7d53ad02e8
commit f83f35c498

@ -4,4 +4,9 @@ sys.path.append('./src/back/')
import load_csv as l import load_csv as l
l.csv_value() df = l.return_csv("./data.csv")
l.csv_value(df)
l.csv_value(df)
l.csv_stadadisation_Z(df,"Vehicle Year")

@ -1,20 +1,35 @@
import pandas as pd import pandas as pd
import numpy as np
def return_csv(path):
df = pd.read_csv(path)
return df
def csv_value(): def csv_value(df):
df = pd.read_csv('./data.csv')
# print(df.head())
#print all detail #print all detail
# df.info() df.info()
# Print number of missing value for each column # Print number of missing value for each column
# print(df.isna().sum()) print(df.isna().sum())
# Useless values # Useless values
# Off-Road Description -> 156170
# Municipality -> 152979
# Related Non-Motorist -> 166642 def csv_check(df):
# Non-Motorist Substance Abuse -> 167788 for col in df:
# Circumstance -> 140746 print("-"*12)
print(col)
print("-"*12)
print(df[col].unique())
def csv_norm_min_max(df,col):
maValue = df[col].max
miValue = df[col].min
df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
return df return df
def csv_stadadisation_Z(df,col):
mean_col1 = df[col].mean()
std_col1 = df[col].std()
df[col] = (df[col] - mean_col1) / std_col1
return df[col]

Loading…
Cancel
Save