You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pow/src/pages/clean.py

45 lines
1.4 KiB

import streamlit as st
import sys
sys.path.append('./back/')
import managing_missing_values as mmv
import load_csv as lc
if 'original_df' in st.session_state:
df = st.session_state.original_df
st.write("# 🧼 Data cleaning")
st.write("## Missing data")
rm_empty_rows_or_cols = st.checkbox("Remove empty rows or columns", True)
st.write("#### Replace missing values")
replace_methods = ["mean","median","mode","knn","regression"]
replace_method = st.radio('Choose an option:', replace_methods)
st.write("## Normalize data")
normalize_methods = ["min-max","z-score","robust"]
normalize_method = st.radio('Choose an option:', normalize_methods)
is_cleaned = st.button("Clean dataset")
if is_cleaned:
if rm_empty_rows_or_cols:
st.write("- Removing hight null percentage values")
df = mmv.drop_high_null_percentage(df)
st.dataframe(df)
st.write("- Handle missing values with method:", replace_method)
df = mmv.handle_missing_values(df, replace_method)
st.session_state.df = df
st.dataframe(df)
st.write("- Normalize with method:", normalize_method)
df = lc.handle_normalization(df, normalize_method)
st.session_state.df = df
st.dataframe(df)
st.switch_page("pages/visualize.py")
else:
st.write("Please upload you dataset.")