Merge pull request 'streamlit' (#24) from streamlit into master
continuous-integration/drone/push Build is passing Details

Reviewed-on: #24
master
remrem 5 months ago
commit 40be24a556

@ -90,7 +90,6 @@ def launch_cluster_dbscan(df, array_columns, dimensions=2):
return visualize_clusters_3d(X, labels_dbscan, title="DBSCAN Clustering 3D")
else:
return visualize_clusters_2d(X, labels_dbscan, title="DBSCAN Clustering")
return stats_dbscan
def launch_cluster(df, array_columns):
X = df[array_columns].values

@ -1,6 +1,10 @@
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt
def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction):
predictors = df[columns]
@ -14,4 +18,20 @@ def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction):
raise NameError("No method name : \"" + algoOfPrediction + "\"")
model.fit(predictors, target)
return model.predict(predictors)
prediction = model.predict(predictors)
return prediction
def correlation_matrix(df, columns):
new_df = df[columns]
correlations = new_df.corr()
print(correlations)
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(correlations, vmin=-1, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0,new_df.shape[1],1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(list(new_df))
ax.set_yticklabels(list(new_df))
return fig

@ -1,6 +1,8 @@
import streamlit as st
import pandas as pd
import sys
import matplotlib.pyplot as plt
import numpy as np
sys.path.append('./back/')
import clustering_csv as cc
@ -10,7 +12,7 @@ def handle_column_multiselect(df, method_name):
selected_columns = st.multiselect(f"Select the columns you want for {method_name}:", df.columns.tolist(), placeholder="Select dataset columns")
return selected_columns
def display_prediction_results(df, targetCol, sourceColumns, method):
def df_prediction_results(df, targetCol, sourceColumns, method):
original_col = df[targetCol]
predicted_col = p.getColumnsForPredictionAndPredict(df, sourceColumns, targetCol, method)
@ -18,7 +20,7 @@ def display_prediction_results(df, targetCol, sourceColumns, method):
new_df['Original'] = original_col
new_df['Predicted'] = predicted_col
st.dataframe(new_df)
return new_df
if 'df' in st.session_state:
df = st.session_state.df
@ -37,15 +39,16 @@ if 'df' in st.session_state:
dimensions = 2
tab_names = ["K-means", "DBSCAN"]
tab11, tab12 = st.tabs(tab_names)
cluster_tabs = st.tabs(tab_names)
with tab11:
if st.button(f"Start {tab_names[0]}"):
st.pyplot(cc.launch_cluster_knn(df, selected_columns, dimensions=dimensions))
for idx, tab in enumerate(cluster_tabs):
if tab.button(f"Start {tab_names[idx]}"):
if tab_names[idx] == "K-means":
fig = cc.launch_cluster_knn(df, selected_columns, dimensions=dimensions)
else:
fig = cc.launch_cluster_dbscan(df, selected_columns, dimensions)
with tab12:
if st.button(f"Start {tab_names[1]}"):
st.pyplot(cc.launch_cluster_dbscan(df, selected_columns, dimensions))
tab.pyplot(fig)
with tab2:
st.header("Predictions")
@ -60,16 +63,12 @@ if 'df' in st.session_state:
selected_columns_p = handle_column_multiselect(df, "predictions")
tab_names = ["Linear Regression", "Random Forest"]
tab21, tab22 = st.tabs(tab_names)
with tab21:
if st.button(f"Start {tab_names[0]}"):
st.write(target_column)
st.write(selected_columns_p)
display_prediction_results(df, target_column, selected_columns_p, tab_names[0])
prediction_tabs = st.tabs(tab_names)
with tab22:
if st.button(f"Start {tab_names[1]}"):
display_prediction_results(df, target_column, selected_columns_p, tab_names[1])
for idx, tab in enumerate(prediction_tabs):
if tab.button(f"Start {tab_names[idx]}"):
tab.pyplot(p.correlation_matrix(df, selected_columns_p+[target_column]))
tmp_df = df_prediction_results(df, target_column, selected_columns_p, tab_names[idx])
tab.dataframe(tmp_df)
else:
st.write("Please clean your dataset.")

Loading…
Cancel
Save