From d317fd3ad9822cd989b072b10f3e9035b5fb0f8a Mon Sep 17 00:00:00 2001
From: rem <remi.arnal@etu.uca.fr>
Date: Tue, 25 Jun 2024 02:26:54 +0200
Subject: [PATCH] correlation_matrix vue + code reorganisation

---
 src/back/prediction.py  | 22 +++++++++++++++++++++-
 src/pages/prediction.py | 36 ++++++++++++++++--------------------
 2 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/src/back/prediction.py b/src/back/prediction.py
index 1700d72..4a679f1 100644
--- a/src/back/prediction.py
+++ b/src/back/prediction.py
@@ -1,6 +1,10 @@
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.metrics import f1_score
+from sklearn.metrics import accuracy_score
+import numpy as np
+import matplotlib.pyplot as plt
 
 def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction):
     predictors = df[columns]
@@ -14,4 +18,20 @@ def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction):
         raise NameError("No method name : \"" + algoOfPrediction + "\"")
 
     model.fit(predictors, target)
-    return model.predict(predictors)
+    prediction = model.predict(predictors)
+    return prediction
+
+def correlation_matrix(df, columns):
+    new_df = df[columns]
+    correlations = new_df.corr()
+    print(correlations)
+    fig = plt.figure()
+    ax = fig.add_subplot(111)
+    cax = ax.matshow(correlations, vmin=-1, vmax=1)
+    fig.colorbar(cax)
+    ticks = np.arange(0,new_df.shape[1],1)
+    ax.set_xticks(ticks)
+    ax.set_yticks(ticks)
+    ax.set_xticklabels(list(new_df))
+    ax.set_yticklabels(list(new_df))
+    return fig
diff --git a/src/pages/prediction.py b/src/pages/prediction.py
index d2b8805..f25008c 100644
--- a/src/pages/prediction.py
+++ b/src/pages/prediction.py
@@ -1,6 +1,8 @@
 import streamlit as st
 import pandas as pd
 import sys
+import matplotlib.pyplot as plt
+import numpy as np
 sys.path.append('./back/')
 
 import clustering_csv as cc
@@ -10,7 +12,7 @@ def handle_column_multiselect(df, method_name):
     selected_columns = st.multiselect(f"Select the columns you want for {method_name}:", df.columns.tolist(), placeholder="Select dataset columns")
     return selected_columns
     
-def display_prediction_results(df, targetCol, sourceColumns, method):
+def df_prediction_results(df, targetCol, sourceColumns, method):
     original_col = df[targetCol]
     predicted_col = p.getColumnsForPredictionAndPredict(df, sourceColumns, targetCol, method)    
     
@@ -18,7 +20,7 @@ def display_prediction_results(df, targetCol, sourceColumns, method):
     new_df['Original'] = original_col
     new_df['Predicted'] = predicted_col
 
-    st.dataframe(new_df)    
+    return new_df    
 
 if 'df' in st.session_state:
     df = st.session_state.df
@@ -37,15 +39,14 @@ if 'df' in st.session_state:
             dimensions = 2
         
         tab_names = ["K-means", "DBSCAN"] 
-        tab11, tab12 = st.tabs(tab_names)
+        cluster_tabs = st.tabs(tab_names)
 
-        with tab11:
-            if st.button(f"Start {tab_names[0]}"):
-                st.pyplot(cc.launch_cluster_knn(df, selected_columns, dimensions=dimensions))
-
-        with tab12:
-            if st.button(f"Start {tab_names[1]}"):
-                st.pyplot(cc.launch_cluster_dbscan(df, selected_columns, dimensions))
+        for idx, tab in enumerate(cluster_tabs):
+            if tab.button(f"Start {tab_names[idx]}"):
+                if tab_names[idx] == "K-means":
+                    tab.pyplot(cc.launch_cluster_knn(df, selected_columns, dimensions=dimensions))
+                else:
+                    tab.pyplot(cc.launch_cluster_dbscan(df, selected_columns, dimensions))
 
     with tab2:
         st.header("Predictions")
@@ -60,16 +61,11 @@ if 'df' in st.session_state:
             selected_columns_p = handle_column_multiselect(df, "predictions")
         
         tab_names = ["Linear Regression", "Random Forest"] 
-        tab21, tab22 = st.tabs(tab_names)
-
-        with tab21:
-            if st.button(f"Start {tab_names[0]}"):
-                st.write(target_column)
-                st.write(selected_columns_p)
-                display_prediction_results(df, target_column, selected_columns_p, tab_names[0])
+        prediction_tabs = st.tabs(tab_names)
 
-        with tab22:
-            if st.button(f"Start {tab_names[1]}"):
-                display_prediction_results(df, target_column, selected_columns_p, tab_names[1])
+        for idx, tab in enumerate(prediction_tabs):
+            if tab.button(f"Start {tab_names[idx]}"):
+                tab.pyplot(p.correlation_matrix(df, selected_columns_p+[target_column]))
+                tab.dataframe(df_prediction_results(df, target_column, selected_columns_p, tab_names[idx]))
 else:
     st.write("Please clean your dataset.")