parent
b7bcb629db
commit
84adfb5a96
@ -1,18 +1,44 @@
|
||||
import streamlit as st
|
||||
import sys
|
||||
sys.path.append('./back/')
|
||||
|
||||
st.write("# 🧼 Data cleaning")
|
||||
import managing_missing_values as mmv
|
||||
import load_csv as lc
|
||||
|
||||
st.write("## Missing data")
|
||||
rm_empty_rows_or_cols = st.checkbox("Remove empty rows or columns", True)
|
||||
if 'original_df' in st.session_state:
|
||||
df = st.session_state.original_df
|
||||
|
||||
st.write("#### Replace missing values")
|
||||
replace_methods = ["Mean","Median","Mode","KNN","Regression"]
|
||||
replace_method = st.radio('Choose an option:', replace_methods)
|
||||
st.write("# 🧼 Data cleaning")
|
||||
|
||||
st.write("## Normalize data")
|
||||
normalize_methods = ["Min-Max","Z-Score","Another One"]
|
||||
normalize_method = st.radio('Choose an option:', normalize_methods)
|
||||
st.write("## Missing data")
|
||||
rm_empty_rows_or_cols = st.checkbox("Remove empty rows or columns", True)
|
||||
|
||||
if st.button("Clean dataset"):
|
||||
# TODO: Actual processing
|
||||
st.write("TODO")
|
||||
|
||||
st.write("#### Replace missing values")
|
||||
replace_methods = ["mean","median","mode","knn","regression"]
|
||||
replace_method = st.radio('Choose an option:', replace_methods)
|
||||
|
||||
st.write("## Normalize data")
|
||||
normalize_methods = ["min-max","z-score","robust"]
|
||||
normalize_method = st.radio('Choose an option:', normalize_methods)
|
||||
|
||||
is_cleaned = st.button("Clean dataset")
|
||||
if is_cleaned:
|
||||
if rm_empty_rows_or_cols:
|
||||
st.write("- Removing hight null percentage values")
|
||||
df = mmv.drop_high_null_percentage(df)
|
||||
st.dataframe(df)
|
||||
|
||||
st.write("- Handle missing values with method:", replace_method)
|
||||
df = mmv.handle_missing_values(df, replace_method)
|
||||
st.session_state.df = df
|
||||
st.dataframe(df)
|
||||
|
||||
st.write("- Normalize with method:", normalize_method)
|
||||
df = lc.handle_normalization(df, normalize_method)
|
||||
st.session_state.df = df
|
||||
st.dataframe(df)
|
||||
|
||||
st.switch_page("pages/visualize.py")
|
||||
else:
|
||||
st.write("Please upload you dataset.")
|
||||
|
@ -0,0 +1,41 @@
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import sys
|
||||
sys.path.append('./back/')
|
||||
|
||||
import clustering_csv as cc
|
||||
import prediction as p
|
||||
|
||||
if 'df' in st.session_state:
|
||||
|
||||
df = st.session_state.df
|
||||
df_cols = df.columns.tolist()
|
||||
|
||||
st.write("# 🔮 Prediction")
|
||||
|
||||
if st.button("K-means"):
|
||||
st.pyplot(cc.launch_cluster_knn(df, ["Route Type", "Traffic Control"]))
|
||||
|
||||
if st.button("DBSCAN"):
|
||||
st.pyplot(cc.launch_cluster_dbscan(df, ["Route Type", "Traffic Control"]))
|
||||
|
||||
if st.button("Linear Regression"):
|
||||
col = "Route Type"
|
||||
df_cols.remove(col)
|
||||
original_col = df[col]
|
||||
predicted_col = p.getColumnsForPredictionAndPredict(df, df_cols, "Route Type", "Linear Regression")
|
||||
|
||||
if st.button("Random Forest"):
|
||||
col = "Route Type"
|
||||
df_cols.remove(col)
|
||||
original_col = df[col]
|
||||
predicted_col = p.getColumnsForPredictionAndPredict(df, df_cols, "Route Type", "Random Forest")
|
||||
|
||||
ndf = pd.DataFrame()
|
||||
ndf['Original'] = original_col
|
||||
ndf['Predicted'] = predicted_col
|
||||
|
||||
st.dataframe(ndf)
|
||||
|
||||
else:
|
||||
st.write("Please clean your dataset.")
|
@ -1,23 +1,32 @@
|
||||
import streamlit as st
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
df = st.session_state.orig_df
|
||||
df_columns = df.columns.tolist()
|
||||
import sys
|
||||
sys.path.append('./back/')
|
||||
|
||||
st.write("# 📊 Visualization")
|
||||
import show_csv as sc
|
||||
|
||||
st.write("## Histograms")
|
||||
hist_tabs = st.tabs(df_columns)
|
||||
if 'df' in st.session_state:
|
||||
|
||||
for idx, tab in enumerate(hist_tabs):
|
||||
df = st.session_state.df
|
||||
df_columns = df.columns.tolist()
|
||||
|
||||
st.write("# 📊 Visualization")
|
||||
|
||||
st.write("## Histograms")
|
||||
hist_tabs = st.tabs(df_columns)
|
||||
|
||||
for idx, tab in enumerate(hist_tabs):
|
||||
tab.write("##### "+df_columns[idx])
|
||||
tab.bar_chart(df[df_columns[idx]])
|
||||
tab.pyplot(sc.histo_col(df, df_columns[idx]))
|
||||
|
||||
st.write("## Box & Whisker")
|
||||
baw_tabs = st.tabs(df_columns)
|
||||
st.write("## Box & Whisker")
|
||||
baw_tabs = st.tabs(df_columns)
|
||||
|
||||
for idx, tab in enumerate(baw_tabs):
|
||||
for idx, tab in enumerate(baw_tabs):
|
||||
tab.write("##### "+df_columns[idx])
|
||||
fig, ax = plt.subplots()
|
||||
df[df_columns[idx]].plot(kind='box')
|
||||
st.pyplot(fig)
|
||||
tab.pyplot(fig)
|
||||
else:
|
||||
st.write('Please clean your dataset.')
|
||||
|
Loading…
Reference in new issue