|
|
|
@ -1,6 +1,8 @@
|
|
|
|
|
import streamlit as st
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import sys
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import numpy as np
|
|
|
|
|
sys.path.append('./back/')
|
|
|
|
|
|
|
|
|
|
import clustering_csv as cc
|
|
|
|
@ -10,15 +12,15 @@ def handle_column_multiselect(df, method_name):
|
|
|
|
|
selected_columns = st.multiselect(f"Select the columns you want for {method_name}:", df.columns.tolist(), placeholder="Select dataset columns")
|
|
|
|
|
return selected_columns
|
|
|
|
|
|
|
|
|
|
def display_prediction_results(df, targetCol, sourceColumns, method):
|
|
|
|
|
def df_prediction_results(df, targetCol, sourceColumns, method):
|
|
|
|
|
original_col = df[targetCol]
|
|
|
|
|
predicted_col = p.getColumnsForPredictionAndPredict(df, sourceColumns, targetCol, method)
|
|
|
|
|
predicted_col = p.getColumnsForPredictionAndPredict(df, sourceColumns, targetCol, method)
|
|
|
|
|
|
|
|
|
|
new_df = pd.DataFrame()
|
|
|
|
|
new_df['Original'] = original_col
|
|
|
|
|
new_df['Predicted'] = predicted_col
|
|
|
|
|
|
|
|
|
|
st.dataframe(new_df)
|
|
|
|
|
return new_df
|
|
|
|
|
|
|
|
|
|
if 'df' in st.session_state:
|
|
|
|
|
df = st.session_state.df
|
|
|
|
@ -37,15 +39,16 @@ if 'df' in st.session_state:
|
|
|
|
|
dimensions = 2
|
|
|
|
|
|
|
|
|
|
tab_names = ["K-means", "DBSCAN"]
|
|
|
|
|
tab11, tab12 = st.tabs(tab_names)
|
|
|
|
|
cluster_tabs = st.tabs(tab_names)
|
|
|
|
|
|
|
|
|
|
with tab11:
|
|
|
|
|
if st.button(f"Start {tab_names[0]}"):
|
|
|
|
|
st.pyplot(cc.launch_cluster_knn(df, selected_columns, dimensions=dimensions))
|
|
|
|
|
for idx, tab in enumerate(cluster_tabs):
|
|
|
|
|
if tab.button(f"Start {tab_names[idx]}"):
|
|
|
|
|
if tab_names[idx] == "K-means":
|
|
|
|
|
fig = cc.launch_cluster_knn(df, selected_columns, dimensions=dimensions)
|
|
|
|
|
else:
|
|
|
|
|
fig = cc.launch_cluster_dbscan(df, selected_columns, dimensions)
|
|
|
|
|
|
|
|
|
|
with tab12:
|
|
|
|
|
if st.button(f"Start {tab_names[1]}"):
|
|
|
|
|
st.pyplot(cc.launch_cluster_dbscan(df, selected_columns, dimensions))
|
|
|
|
|
tab.pyplot(fig)
|
|
|
|
|
|
|
|
|
|
with tab2:
|
|
|
|
|
st.header("Predictions")
|
|
|
|
@ -60,16 +63,12 @@ if 'df' in st.session_state:
|
|
|
|
|
selected_columns_p = handle_column_multiselect(df, "predictions")
|
|
|
|
|
|
|
|
|
|
tab_names = ["Linear Regression", "Random Forest"]
|
|
|
|
|
tab21, tab22 = st.tabs(tab_names)
|
|
|
|
|
prediction_tabs = st.tabs(tab_names)
|
|
|
|
|
|
|
|
|
|
with tab21:
|
|
|
|
|
if st.button(f"Start {tab_names[0]}"):
|
|
|
|
|
st.write(target_column)
|
|
|
|
|
st.write(selected_columns_p)
|
|
|
|
|
display_prediction_results(df, target_column, selected_columns_p, tab_names[0])
|
|
|
|
|
|
|
|
|
|
with tab22:
|
|
|
|
|
if st.button(f"Start {tab_names[1]}"):
|
|
|
|
|
display_prediction_results(df, target_column, selected_columns_p, tab_names[1])
|
|
|
|
|
for idx, tab in enumerate(prediction_tabs):
|
|
|
|
|
if tab.button(f"Start {tab_names[idx]}"):
|
|
|
|
|
tab.pyplot(p.correlation_matrix(df, selected_columns_p+[target_column]))
|
|
|
|
|
tmp_df = df_prediction_results(df, target_column, selected_columns_p, tab_names[idx])
|
|
|
|
|
tab.dataframe(tmp_df)
|
|
|
|
|
else:
|
|
|
|
|
st.write("Please clean your dataset.")
|
|
|
|
|