Merge pull request 'streamlit' (#22) from streamlit into master
continuous-integration/drone/push Build is passing Details

Reviewed-on: #22
pull/23/head
remrem 10 months ago
commit 69aa8c58b2

@ -24,7 +24,7 @@ def visualize_clusters_3d(X, labels, centers=None, title="Clusters"):
ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")
ax.set_zlabel("Feature 3")
plt.show()
return plt.gcf()
def calculate_cluster_statistics_kmeans(X, labels, centers):
unique_labels = np.unique(labels)

@ -1,6 +1,7 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
def return_csv(path):
df = pd.read_csv(path)
@ -20,11 +21,6 @@ def csv_check(df):
print("-"*12)
print(df[col].unique())
def do_for_columns(df):
for col_name in df:
df[col_name] = function(df[col_name])
def csv_norm_min_max(df, col):
max = df[col].max()
min = df[col].min()
@ -37,30 +33,25 @@ def csv_standardisation_Z(df, col):
df[col] = (df[col] - mean_col1) / std_col1
return df[col]
def csv_robust_normalize(df, col):
# Calcul de la médiane et de l'IQR
median = df[col].median()
q1 = df[col].quantile(0.25)
q3 = df[col].quantile(0.75)
iqr = q3 - q1
def robust_normalize_column(df, column_name):
# Extract the column datas
column_data = df[column_name].values.reshape(-1, 1)
# Application de la normalisation robuste
normalized_column = (df[col] - median) / iqr
df[col] = normalized_column
return normalized_column
# Fit and transform the column datas
scaler = RobustScaler()
normalized_data = scaler.fit_transform(column_data)
df[column_name] = normalized_data
return normalized_data
def handle_normalization(df, norm_method):
if norm_method == "min-max":
for col_name in df:
if norm_method == "min-max":
df[col_name] = csv_norm_min_max(df, col_name)
return df
elif norm_method == "z-score":
for col_name in df:
df[col_name] = csv_standardisation_Z(df, col_name)
return df
elif norm_method == "robust":
for col_name in df:
df[col_name] = csv_robust_normalize(df, col_name)
return df
df[col_name] = robust_normalize_column(df, col_name)
else:
raise ValueError("Unknown method")
return df

@ -18,7 +18,6 @@ def drop_high_null_percentage(data, threshold=0.5):
data = data.loc[:, missing_percentage <= threshold]
return data
def replace_with_mean(data):
return data.apply(lambda col: col.fillna(col.mean()) if col.dtype.kind in 'biufc' else col)
@ -48,11 +47,10 @@ def impute_with_regression(data):
model = LinearRegression()
model.fit(X_complete, y_complete)
y_pred = model.predict(X_missing)
data.loc[df[col].isnull(), col] = y_pred
data.loc[data[col].isnull(), col] = y_pred
return data
"""
Parameters:
- data: Pandas DataFrame with the data

@ -1,6 +1,6 @@
import streamlit as st
from io import StringIO
from ydata_profiling import ProfileReport
# from ydata_profiling import ProfileReport
import pandas as pd
def statistics(df):
@ -23,7 +23,6 @@ def nav_bar():
st.page_link("pages/clean.py", label="Clean", icon="🧼", help=None)
st.page_link("pages/visualize.py", label="Visualize", icon="👁️", help=None)
st.page_link("pages/prediction.py", label="Predict", icon="🔮", help=None)
st.page_link("pages/evaluate.py", label="Evaluate", icon=None, help=None)
def clean_dataframe(line):
# Call to function to clean data

@ -13,7 +13,6 @@ if 'original_df' in st.session_state:
st.write("## Missing data")
rm_empty_rows_or_cols = st.checkbox("Remove empty rows or columns", True)
st.write("#### Replace missing values")
replace_methods = ["mean","median","mode","knn","regression"]
replace_method = st.radio('Choose an option:', replace_methods)

@ -6,36 +6,66 @@ sys.path.append('./back/')
import clustering_csv as cc
import prediction as p
if 'df' in st.session_state:
def handle_column_multiselect(df, method_name):
selected_columns = st.multiselect(f"Select the columns you want for {method_name}:", df.columns.tolist(), placeholder="Select dataset columns")
return selected_columns
def display_prediction_results(df, targetCol, sourceColumns, method):
original_col = df[targetCol]
predicted_col = p.getColumnsForPredictionAndPredict(df, sourceColumns, targetCol, method)
new_df = pd.DataFrame()
new_df['Original'] = original_col
new_df['Predicted'] = predicted_col
st.dataframe(new_df)
if 'df' in st.session_state:
df = st.session_state.df
df_cols = df.columns.tolist()
st.write("# 🔮 Prediction")
if st.button("K-means"):
st.pyplot(cc.launch_cluster_knn(df, ["Route Type", "Traffic Control"]))
tab1, tab2 = st.tabs(["Clustering", "Predictions"])
with tab1:
st.header("Clustering")
selected_columns = handle_column_multiselect(df, "clustering")
tab_names = ["K-means", "DBSCAN"]
tab11, tab12 = st.tabs(tab_names)
with tab11:
if st.button(f"Start {tab_names[0]}"):
st.pyplot(cc.launch_cluster_knn(df, selected_columns))
if st.button("DBSCAN"):
st.pyplot(cc.launch_cluster_dbscan(df, ["Route Type", "Traffic Control"]))
with tab12:
if st.button(f"Start {tab_names[1]}"):
st.pyplot(cc.launch_cluster_dbscan(df, selected_columns))
if st.button("Linear Regression"):
col = "Route Type"
df_cols.remove(col)
original_col = df[col]
predicted_col = p.getColumnsForPredictionAndPredict(df, df_cols, "Route Type", "Linear Regression")
with tab2:
st.header("Predictions")
target_column = st.selectbox(
"Target column:",
df.columns.tolist(),
index=None,
placeholder="Select target column"
)
if st.button("Random Forest"):
col = "Route Type"
df_cols.remove(col)
original_col = df[col]
predicted_col = p.getColumnsForPredictionAndPredict(df, df_cols, "Route Type", "Random Forest")
if target_column != None:
selected_columns_p = handle_column_multiselect(df, "predictions")
ndf = pd.DataFrame()
ndf['Original'] = original_col
ndf['Predicted'] = predicted_col
tab_names = ["Linear Regression", "Random Forest"]
tab21, tab22 = st.tabs(tab_names)
st.dataframe(ndf)
with tab21:
if st.button(f"Start {tab_names[0]}"):
st.write(target_column)
st.write(selected_columns_p)
display_prediction_results(df, target_column, selected_columns_p, tab_names[0])
with tab22:
if st.button(f"Start {tab_names[1]}"):
display_prediction_results(df, target_column, selected_columns_p, tab_names[1])
else:
st.write("Please clean your dataset.")

Loading…
Cancel
Save