pow/src/pages/prediction.py

import streamlit as st
import pandas as pd
import sys
sys.path.append('./back/')

import clustering_csv as cc
import prediction as p

def handle_column_multiselect(df, method_name):
    selected_columns = st.multiselect(f"Select the columns you want for {method_name}:", df.columns.tolist(), placeholder="Select dataset columns")
    return selected_columns

def display_prediction_results(df, targetCol, sourceColumns, method):
    original_col = df[targetCol]
    predicted_col = p.getColumnsForPredictionAndPredict(df, sourceColumns, targetCol, method)

    new_df = pd.DataFrame()
    new_df['Original'] = original_col
    new_df['Predicted'] = predicted_col

    st.dataframe(new_df)

if 'df' in st.session_state:
    df = st.session_state.df

    st.write("# 🔮 Prediction")

    tab1, tab2 = st.tabs(["Clustering", "Predictions"])

    with tab1:
        st.header("Clustering")
        selected_columns = handle_column_multiselect(df, "clustering")


        tab_names = ["K-means", "DBSCAN"]
        tab11, tab12 = st.tabs(tab_names)

        with tab11:
            if st.button(f"Start {tab_names[0]}"):
                st.pyplot(cc.launch_cluster_knn(df, selected_columns))

        with tab12:
            if st.button(f"Start {tab_names[1]}"):
                st.pyplot(cc.launch_cluster_dbscan(df, selected_columns))

    with tab2:
        st.header("Predictions")
        target_column = st.selectbox(
                            "Target column:",
                            df.columns.tolist(),
                            index=None,
                            placeholder="Select target column"
                        )

        if target_column != None:
            selected_columns_p = handle_column_multiselect(df, "predictions")

        tab_names = ["Linear Regression", "Random Forest"]
        tab21, tab22 = st.tabs(tab_names)

        with tab21:
            if st.button(f"Start {tab_names[0]}"):
                st.write(target_column)
                st.write(selected_columns_p)
                display_prediction_results(df, target_column, selected_columns_p, tab_names[0])

        with tab22:
            if st.button(f"Start {tab_names[1]}"):
                display_prediction_results(df, target_column, selected_columns_p, tab_names[1])
else:
    st.write("Please clean your dataset.")