Merge pull request 'streamlit' (#24) from streamlit into master

Reviewed-on: #24
1 year ago · 40be24a556
parent 59289df22f f180558394
commit 40be24a556
3 changed files with 40 additions and 22 deletions
--- a/src/back/clustering_csv.py
+++ b/src/back/clustering_csv.py
@ -90,7 +90,6 @@ def launch_cluster_dbscan(df, array_columns, dimensions=2):
        return visualize_clusters_3d(X, labels_dbscan, title="DBSCAN Clustering 3D")
    else:
        return visualize_clusters_2d(X, labels_dbscan, title="DBSCAN Clustering")
    return stats_dbscan
 def launch_cluster(df, array_columns):
    X = df[array_columns].values
--- a/src/back/prediction.py
+++ b/src/back/prediction.py
@ -1,6 +1,10 @@
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.metrics import f1_score
 from sklearn.metrics import accuracy_score
 import numpy as np
 import matplotlib.pyplot as plt
 def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction):
    predictors = df[columns]
@ -14,4 +18,20 @@ def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction):
        raise NameError("No method name : \"" + algoOfPrediction + "\"")
    model.fit(predictors, target)
-    return model.predict(predictors)
+    prediction = model.predict(predictors)
    return prediction
 def correlation_matrix(df, columns):
    new_df = df[columns]
    correlations = new_df.corr()
    print(correlations)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(correlations, vmin=-1, vmax=1)
    fig.colorbar(cax)
    ticks = np.arange(0,new_df.shape[1],1)
    ax.set_xticks(ticks)
    ax.set_yticks(ticks)
    ax.set_xticklabels(list(new_df))
    ax.set_yticklabels(list(new_df))
    return fig
--- a/src/pages/prediction.py
+++ b/src/pages/prediction.py
@ -1,6 +1,8 @@
 import streamlit as st
 import pandas as pd
 import sys
 import matplotlib.pyplot as plt
 import numpy as np
 sys.path.append('./back/')
 import clustering_csv as cc
@ -10,7 +12,7 @@ def handle_column_multiselect(df, method_name):
    selected_columns = st.multiselect(f"Select the columns you want for {method_name}:", df.columns.tolist(), placeholder="Select dataset columns")
    return selected_columns
-def display_prediction_results(df, targetCol, sourceColumns, method):
+def df_prediction_results(df, targetCol, sourceColumns, method):
    original_col = df[targetCol]
    predicted_col = p.getColumnsForPredictionAndPredict(df, sourceColumns, targetCol, method)
@ -18,7 +20,7 @@ def display_prediction_results(df, targetCol, sourceColumns, method):
    new_df['Original'] = original_col
    new_df['Predicted'] = predicted_col
-    st.dataframe(new_df)    
+    return new_df
 if 'df' in st.session_state:
    df = st.session_state.df
@ -37,15 +39,16 @@ if 'df' in st.session_state:
            dimensions = 2
        tab_names = ["K-means", "DBSCAN"] 
-        tab11, tab12 = st.tabs(tab_names)
+        cluster_tabs = st.tabs(tab_names)
-        with tab11:
+        for idx, tab in enumerate(cluster_tabs):
-            if st.button(f"Start {tab_names[0]}"):
+            if tab.button(f"Start {tab_names[idx]}"):
-                st.pyplot(cc.launch_cluster_knn(df, selected_columns, dimensions=dimensions))
+                if tab_names[idx] == "K-means":
                    fig = cc.launch_cluster_knn(df, selected_columns, dimensions=dimensions)
                else:
                    fig = cc.launch_cluster_dbscan(df, selected_columns, dimensions)
-        with tab12:
+                tab.pyplot(fig)
            if st.button(f"Start {tab_names[1]}"):
                st.pyplot(cc.launch_cluster_dbscan(df, selected_columns, dimensions))
    with tab2:
        st.header("Predictions")
@ -60,16 +63,12 @@ if 'df' in st.session_state:
            selected_columns_p = handle_column_multiselect(df, "predictions")
        tab_names = ["Linear Regression", "Random Forest"] 
-        tab21, tab22 = st.tabs(tab_names)
+        prediction_tabs = st.tabs(tab_names)
        with tab21:
            if st.button(f"Start {tab_names[0]}"):
                st.write(target_column)
                st.write(selected_columns_p)
                display_prediction_results(df, target_column, selected_columns_p, tab_names[0])
-        with tab22:
+        for idx, tab in enumerate(prediction_tabs):
-            if st.button(f"Start {tab_names[1]}"):
+            if tab.button(f"Start {tab_names[idx]}"):
-                display_prediction_results(df, target_column, selected_columns_p, tab_names[1])
+                tab.pyplot(p.correlation_matrix(df, selected_columns_p+[target_column]))
                tmp_df = df_prediction_results(df, target_column, selected_columns_p, tab_names[idx])
                tab.dataframe(tmp_df)
 else:
    st.write("Please clean your dataset.")