From 75d2de0889c2ab627b883271611a596c5ac784d7 Mon Sep 17 00:00:00 2001 From: "hugo.pradier2" Date: Wed, 19 Jun 2024 09:50:28 +0200 Subject: [PATCH] prediction de regression terminee --- frontend/pages/prediction.py | 56 --------------------- frontend/pages/prediction_classification.py | 41 +++++++++++++++ frontend/pages/prediction_regression.py | 28 +++++++++++ 3 files changed, 69 insertions(+), 56 deletions(-) delete mode 100644 frontend/pages/prediction.py create mode 100644 frontend/pages/prediction_classification.py create mode 100644 frontend/pages/prediction_regression.py diff --git a/frontend/pages/prediction.py b/frontend/pages/prediction.py deleted file mode 100644 index 25c074e..0000000 --- a/frontend/pages/prediction.py +++ /dev/null @@ -1,56 +0,0 @@ -import streamlit as st -from sklearn.ensemble import RandomForestClassifier -from sklearn.ensemble import RandomForestRegressor -from sklearn.linear_model import LogisticRegression -from sklearn.linear_model import LinearRegression -from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score -from sklearn import datasets -from sklearn.impute import SimpleImputer # Add this line -import pandas as pd -import numpy as np -st.header("Prediction") - -if "data" in st.session_state: - data = st.session_state.data - - with st.form("my_form"): - header = st.columns([2,1,2]) - header[0].subheader("Model") - header[1].subheader("Data Name") - - row1 = st.columns([2,1,2]) - model = row1[0].selectbox("", ["Random Forest Classifier", "Random Forest Regressor", "Logistic Regression", "Linear Regression"]) - data_name = row1[1].selectbox("", data.columns) - - st.form_submit_button('launch') - - if model == "Random Forest Classifier": - model = RandomForestClassifier() - elif model == "Random Forest Regressor": - model = RandomForestRegressor() - elif model == "Logistic Regression": - model = LogisticRegression() - elif model == "Linear Regression": - model = LinearRegression() - - x = data.drop(data_name, axis=1) - y = data[data_name] - - # Convert categorical data to numerical values - x = pd.get_dummies(x) - - # Handle missing values - imputer = SimpleImputer() - x = imputer.fit_transform(x) - x = pd.get_dummies(x) - - x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) - model.fit(x_train, y_train) - y_pred = model.predict(x_test) - if model == "Random Forest Classifier": - st.write("Accuracy: ", accuracy_score(y_test, y_pred)) - elif model == "Random Forest Regressor" or model == "Logistic Regression" or model == "Linear Regression": - st.write("Mean Squared Error: ", np.mean((y_pred - y_test) ** 2)) -else: - st.error("file not loaded") diff --git a/frontend/pages/prediction_classification.py b/frontend/pages/prediction_classification.py new file mode 100644 index 0000000..8d368d4 --- /dev/null +++ b/frontend/pages/prediction_classification.py @@ -0,0 +1,41 @@ +import streamlit as st +from sklearn.ensemble import RandomForestClassifier +from sklearn.preprocessing import LabelEncoder +import pandas as pd + +st.header("Prediction: Classification") + +if "data" in st.session_state: + data = st.session_state.data + + with st.form("classification_form"): + st.subheader("Random Forest Parameters") + + data_name = st.multiselect("Features", data.select_dtypes(include="object").columns, help="Sélectionnez les caractéristiques pour l'entraînement.") + target_name = st.selectbox("Target", data.columns, help="Sélectionnez la variable cible pour l'entraînement.") + + n_estimators = st.number_input("Number of estimators", step=1, min_value=1, value=100, help="Nombre d'arbres dans la forêt.") + max_depth = st.number_input("Max depth", step=1, min_value=1, value=10, help="Profondeur maximale des arbres.") + + submit_button = st.form_submit_button('Train and Predict') + + if submit_button and data_name and target_name: + le = LabelEncoder() + X = data[data_name].apply(le.fit_transform) + y = le.fit_transform(data[target_name]) + + model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=111) + model.fit(X, y) + + st.subheader("Enter values for prediction") + + pred_values = [st.selectbox(f"Value for {feature}", options=data[feature].unique(), key=f"value_{feature}") for feature in data_name] + pred_values_encoded = [le.transform([val])[0] for val in pred_values] + + prediction = model.predict([pred_values_encoded]) + + prediction_decoded = le.inverse_transform(prediction) + + st.write("Prediction:", prediction_decoded[0]) +else: + st.error("File not loaded") diff --git a/frontend/pages/prediction_regression.py b/frontend/pages/prediction_regression.py new file mode 100644 index 0000000..c6a6a38 --- /dev/null +++ b/frontend/pages/prediction_regression.py @@ -0,0 +1,28 @@ +import streamlit as st +from sklearn.linear_model import LinearRegression + +st.header("Prediction: Regression") + +if "data" in st.session_state: + data = st.session_state.data + + with st.form("regression_form"): + st.subheader("Linear Regression Parameters") + data_name = st.multiselect("Features", data.select_dtypes(include="number").columns) + target_name = st.selectbox("Target", data.select_dtypes(include="number").columns) + st.form_submit_button('Train and Predict') + + if data_name and target_name: + X = data[data_name] + y = data[target_name] + + model = LinearRegression() + model.fit(X, y) + + st.subheader("Enter values for prediction") + pred_values = [st.number_input(f"Value for {feature}", value=0.0) for feature in data_name] + prediction = model.predict([pred_values]) + + st.write("Prediction:", prediction[0]) +else: + st.error("File not loaded")