added normalizing datas #12
Merged
aurian.jault
merged 1 commits from data_treatment
into master
11 months ago
@ -1,20 +1,35 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
def return_csv(path):
|
||||
df = pd.read_csv(path)
|
||||
return df
|
||||
|
||||
def csv_value():
|
||||
df = pd.read_csv('./data.csv')
|
||||
# print(df.head())
|
||||
|
||||
def csv_value(df):
|
||||
#print all detail
|
||||
# df.info()
|
||||
|
||||
df.info()
|
||||
# Print number of missing value for each column
|
||||
# print(df.isna().sum())
|
||||
|
||||
print(df.isna().sum())
|
||||
# Useless values
|
||||
# Off-Road Description -> 156170
|
||||
# Municipality -> 152979
|
||||
# Related Non-Motorist -> 166642
|
||||
# Non-Motorist Substance Abuse -> 167788
|
||||
# Circumstance -> 140746
|
||||
|
||||
|
||||
def csv_check(df):
|
||||
for col in df:
|
||||
print("-"*12)
|
||||
print(col)
|
||||
print("-"*12)
|
||||
print(df[col].unique())
|
||||
|
||||
|
||||
def csv_norm_min_max(df,col):
|
||||
maValue = df[col].max
|
||||
miValue = df[col].min
|
||||
df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
|
||||
return df
|
||||
|
||||
def csv_stadadisation_Z(df,col):
|
||||
mean_col1 = df[col].mean()
|
||||
std_col1 = df[col].std()
|
||||
df[col] = (df[col] - mean_col1) / std_col1
|
||||
return df[col]
|
||||
|
||||
|
Loading…
Reference in new issue