Merge pull request 'streamlit' (#24) from streamlit into master
continuous-integration/drone/push Build is passing Details

Reviewed-on: #24
master
remrem 5 months ago
commit 40be24a556

@ -90,7 +90,6 @@ def launch_cluster_dbscan(df, array_columns, dimensions=2):
return visualize_clusters_3d(X, labels_dbscan, title="DBSCAN Clustering 3D") return visualize_clusters_3d(X, labels_dbscan, title="DBSCAN Clustering 3D")
else: else:
return visualize_clusters_2d(X, labels_dbscan, title="DBSCAN Clustering") return visualize_clusters_2d(X, labels_dbscan, title="DBSCAN Clustering")
return stats_dbscan
def launch_cluster(df, array_columns): def launch_cluster(df, array_columns):
X = df[array_columns].values X = df[array_columns].values

@ -1,6 +1,10 @@
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt
def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction): def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction):
predictors = df[columns] predictors = df[columns]
@ -14,4 +18,20 @@ def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction):
raise NameError("No method name : \"" + algoOfPrediction + "\"") raise NameError("No method name : \"" + algoOfPrediction + "\"")
model.fit(predictors, target) model.fit(predictors, target)
return model.predict(predictors) prediction = model.predict(predictors)
return prediction
def correlation_matrix(df, columns):
new_df = df[columns]
correlations = new_df.corr()
print(correlations)
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(correlations, vmin=-1, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0,new_df.shape[1],1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(list(new_df))
ax.set_yticklabels(list(new_df))
return fig

@ -1,6 +1,8 @@
import streamlit as st import streamlit as st
import pandas as pd import pandas as pd
import sys import sys
import matplotlib.pyplot as plt
import numpy as np
sys.path.append('./back/') sys.path.append('./back/')
import clustering_csv as cc import clustering_csv as cc
@ -10,7 +12,7 @@ def handle_column_multiselect(df, method_name):
selected_columns = st.multiselect(f"Select the columns you want for {method_name}:", df.columns.tolist(), placeholder="Select dataset columns") selected_columns = st.multiselect(f"Select the columns you want for {method_name}:", df.columns.tolist(), placeholder="Select dataset columns")
return selected_columns return selected_columns
def display_prediction_results(df, targetCol, sourceColumns, method): def df_prediction_results(df, targetCol, sourceColumns, method):
original_col = df[targetCol] original_col = df[targetCol]
predicted_col = p.getColumnsForPredictionAndPredict(df, sourceColumns, targetCol, method) predicted_col = p.getColumnsForPredictionAndPredict(df, sourceColumns, targetCol, method)
@ -18,7 +20,7 @@ def display_prediction_results(df, targetCol, sourceColumns, method):
new_df['Original'] = original_col new_df['Original'] = original_col
new_df['Predicted'] = predicted_col new_df['Predicted'] = predicted_col
st.dataframe(new_df) return new_df
if 'df' in st.session_state: if 'df' in st.session_state:
df = st.session_state.df df = st.session_state.df
@ -37,15 +39,16 @@ if 'df' in st.session_state:
dimensions = 2 dimensions = 2
tab_names = ["K-means", "DBSCAN"] tab_names = ["K-means", "DBSCAN"]
tab11, tab12 = st.tabs(tab_names) cluster_tabs = st.tabs(tab_names)
with tab11: for idx, tab in enumerate(cluster_tabs):
if st.button(f"Start {tab_names[0]}"): if tab.button(f"Start {tab_names[idx]}"):
st.pyplot(cc.launch_cluster_knn(df, selected_columns, dimensions=dimensions)) if tab_names[idx] == "K-means":
fig = cc.launch_cluster_knn(df, selected_columns, dimensions=dimensions)
else:
fig = cc.launch_cluster_dbscan(df, selected_columns, dimensions)
with tab12: tab.pyplot(fig)
if st.button(f"Start {tab_names[1]}"):
st.pyplot(cc.launch_cluster_dbscan(df, selected_columns, dimensions))
with tab2: with tab2:
st.header("Predictions") st.header("Predictions")
@ -60,16 +63,12 @@ if 'df' in st.session_state:
selected_columns_p = handle_column_multiselect(df, "predictions") selected_columns_p = handle_column_multiselect(df, "predictions")
tab_names = ["Linear Regression", "Random Forest"] tab_names = ["Linear Regression", "Random Forest"]
tab21, tab22 = st.tabs(tab_names) prediction_tabs = st.tabs(tab_names)
with tab21:
if st.button(f"Start {tab_names[0]}"):
st.write(target_column)
st.write(selected_columns_p)
display_prediction_results(df, target_column, selected_columns_p, tab_names[0])
with tab22: for idx, tab in enumerate(prediction_tabs):
if st.button(f"Start {tab_names[1]}"): if tab.button(f"Start {tab_names[idx]}"):
display_prediction_results(df, target_column, selected_columns_p, tab_names[1]) tab.pyplot(p.correlation_matrix(df, selected_columns_p+[target_column]))
tmp_df = df_prediction_results(df, target_column, selected_columns_p, tab_names[idx])
tab.dataframe(tmp_df)
else: else:
st.write("Please clean your dataset.") st.write("Please clean your dataset.")

Loading…
Cancel
Save