From 4ae8512dcb661c184b8bf88b72242d18c89c2b4d Mon Sep 17 00:00:00 2001 From: bastien ollier Date: Fri, 7 Jun 2024 11:29:18 +0200 Subject: [PATCH 1/8] add form --- frontend/pages/clustering.py | 48 ++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 frontend/pages/clustering.py diff --git a/frontend/pages/clustering.py b/frontend/pages/clustering.py new file mode 100644 index 0000000..037780d --- /dev/null +++ b/frontend/pages/clustering.py @@ -0,0 +1,48 @@ +import streamlit as st +from sklearn.cluster import KMeans +import matplotlib.pyplot as plt + +st.header("clustering et Prediction") + + +if "data" in st.session_state: + data = st.session_state.data + + with st.form("my_form"): + header = st.columns([2,1,2]) + header[0].subheader("Dispersion") + header[1].subheader("Number of clusters") + header[2].subheader("Data Name") + + row1 = st.columns([2,1,2]) + cluster_std = row1[0].slider("", 0.2, 3.0, 0.2, 0.2) + n_clusters = row1[1].selectbox("", range(1, 10)) + data_name = row1[2].selectbox("", data.columns) + + st.form_submit_button('launch') + + from sklearn.datasets import make_blobs + from sklearn.cluster import KMeans + import matplotlib.pyplot as plt + import streamlit as st + import random + + # Points generator + x, _ = make_blobs(n_samples=200, n_features=2, centers=5, cluster_std=cluster_std, shuffle=True, random_state=10) + + x = data[["Unit Price","Unit Cost"]].to_numpy() + + # k-means algorithm + kmeans = KMeans(n_clusters=n_clusters, init='random', n_init=10, max_iter=300, random_state=111) + y_kmeans = kmeans.fit_predict(x) + + # Plotting colored clusters + fig, ax = plt.subplots(figsize=(12,8)) + plt.scatter(x[:, 0], x[:, 1], s=100, c=kmeans.labels_, cmap='Set1') + plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=400, marker='*', color='k') + st.pyplot(fig) + +else: + st.error("file not loaded") + +# Cached function that returns a mutable object with a random number in the range 0-10 \ No newline at end of file From 5bf5f507a595acb12bb3835b00058a0349bdc9c1 Mon Sep 17 00:00:00 2001 From: bastien ollier Date: Fri, 7 Jun 2024 11:56:38 +0200 Subject: [PATCH 2/8] end clustering --- frontend/pages/clustering.py | 45 +++++++++++++----------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/frontend/pages/clustering.py b/frontend/pages/clustering.py index 037780d..97698ec 100644 --- a/frontend/pages/clustering.py +++ b/frontend/pages/clustering.py @@ -2,47 +2,34 @@ import streamlit as st from sklearn.cluster import KMeans import matplotlib.pyplot as plt -st.header("clustering et Prediction") +st.header("Clustering") if "data" in st.session_state: data = st.session_state.data with st.form("my_form"): - header = st.columns([2,1,2]) - header[0].subheader("Dispersion") - header[1].subheader("Number of clusters") - header[2].subheader("Data Name") + row1 = st.columns([1,1,1]) + n_clusters = row1[0].selectbox("Number of clusters", range(1, 10)) + data_name = row1[1].multiselect("Data Name",data.select_dtypes(include="number").columns, max_selections=2) + n_init = row1[2].number_input("n_init",step=1,min_value=1) - row1 = st.columns([2,1,2]) - cluster_std = row1[0].slider("", 0.2, 3.0, 0.2, 0.2) - n_clusters = row1[1].selectbox("", range(1, 10)) - data_name = row1[2].selectbox("", data.columns) + row2 = st.columns([1,1]) + max_iter = row1[0].number_input("max_iter",step=1,min_value=1) - st.form_submit_button('launch') - - from sklearn.datasets import make_blobs - from sklearn.cluster import KMeans - import matplotlib.pyplot as plt - import streamlit as st - import random - # Points generator - x, _ = make_blobs(n_samples=200, n_features=2, centers=5, cluster_std=cluster_std, shuffle=True, random_state=10) + st.form_submit_button('launch') - x = data[["Unit Price","Unit Cost"]].to_numpy() + if len(data_name) == 2: + x = data[data_name].to_numpy() - # k-means algorithm - kmeans = KMeans(n_clusters=n_clusters, init='random', n_init=10, max_iter=300, random_state=111) - y_kmeans = kmeans.fit_predict(x) + kmeans = KMeans(n_clusters=n_clusters, init='random', n_init=n_init, max_iter=max_iter, random_state=111) + y_kmeans = kmeans.fit_predict(x) - # Plotting colored clusters - fig, ax = plt.subplots(figsize=(12,8)) - plt.scatter(x[:, 0], x[:, 1], s=100, c=kmeans.labels_, cmap='Set1') - plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=400, marker='*', color='k') - st.pyplot(fig) + fig, ax = plt.subplots(figsize=(12,8)) + plt.scatter(x[:, 0], x[:, 1], s=100, c=kmeans.labels_, cmap='Set1') + plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=400, marker='*', color='k') + st.pyplot(fig) else: st.error("file not loaded") - -# Cached function that returns a mutable object with a random number in the range 0-10 \ No newline at end of file From 197939555c6ac4403ddb30c4389ab117219c47a2 Mon Sep 17 00:00:00 2001 From: bastien ollier Date: Wed, 19 Jun 2024 08:45:34 +0200 Subject: [PATCH 3/8] debut dbscan --- frontend/pages/clustering:_dbscan.py | 18 ++++++++++++++++++ .../{clustering.py => clustering:_kmeans.py} | 7 ++++--- 2 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 frontend/pages/clustering:_dbscan.py rename frontend/pages/{clustering.py => clustering:_kmeans.py} (80%) diff --git a/frontend/pages/clustering:_dbscan.py b/frontend/pages/clustering:_dbscan.py new file mode 100644 index 0000000..02fde08 --- /dev/null +++ b/frontend/pages/clustering:_dbscan.py @@ -0,0 +1,18 @@ +import streamlit as st +import matplotlib.pyplot as plt +from sklearn.cluster import DBSCAN +import numpy as np + +st.header("Clustering: dbscan") + + +if "data" in st.session_state: + data = st.session_state.data + + with st.form("my_form"): + data_name = st.multiselect("Data Name",data.select_dtypes(include="number").columns, max_selections=2) + st.form_submit_button('launch') + + +else: + st.error("file not loaded") \ No newline at end of file diff --git a/frontend/pages/clustering.py b/frontend/pages/clustering:_kmeans.py similarity index 80% rename from frontend/pages/clustering.py rename to frontend/pages/clustering:_kmeans.py index 97698ec..ce34e66 100644 --- a/frontend/pages/clustering.py +++ b/frontend/pages/clustering:_kmeans.py @@ -2,7 +2,7 @@ import streamlit as st from sklearn.cluster import KMeans import matplotlib.pyplot as plt -st.header("Clustering") +st.header("Clustering: kmeans") if "data" in st.session_state: @@ -27,8 +27,9 @@ if "data" in st.session_state: y_kmeans = kmeans.fit_predict(x) fig, ax = plt.subplots(figsize=(12,8)) - plt.scatter(x[:, 0], x[:, 1], s=100, c=kmeans.labels_, cmap='Set1') - plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=400, marker='*', color='k') + plt.scatter(x[:, 0], x[:, 1], c=y_kmeans, s=50, cmap='viridis') + centers = kmeans.cluster_centers_ + plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, marker='X') st.pyplot(fig) else: From 9fc6d7d2d160a262938066d5d9a2c1a54291a998 Mon Sep 17 00:00:00 2001 From: bastien ollier Date: Wed, 19 Jun 2024 09:16:10 +0200 Subject: [PATCH 4/8] add dbscan --- frontend/pages/clustering:_dbscan.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/frontend/pages/clustering:_dbscan.py b/frontend/pages/clustering:_dbscan.py index 02fde08..2cb4920 100644 --- a/frontend/pages/clustering:_dbscan.py +++ b/frontend/pages/clustering:_dbscan.py @@ -11,8 +11,20 @@ if "data" in st.session_state: with st.form("my_form"): data_name = st.multiselect("Data Name",data.select_dtypes(include="number").columns, max_selections=2) + eps = st.slider("eps", min_value=0.0, max_value=1.0,value=0.5,step=0.01) + min_samples = st.number_input("min_samples",step=1,min_value=1,value=5) st.form_submit_button('launch') + if len(data_name) == 2: + x = data[data_name].to_numpy() + + dbscan = DBSCAN(eps=eps, min_samples=min_samples) + y_dbscan = dbscan.fit_predict(x) + + + fig, ax = plt.subplots(figsize=(12,8)) + plt.scatter(x[:, 0], x[:, 1], c=y_dbscan, s=50, cmap='viridis') + st.pyplot(fig) else: st.error("file not loaded") \ No newline at end of file From 72dcc8ff1cda806bca9416204d51226ba6d3f18a Mon Sep 17 00:00:00 2001 From: bastien ollier Date: Wed, 19 Jun 2024 09:17:12 +0200 Subject: [PATCH 5/8] add dbscan --- frontend/pages/clustering:_dbscan.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/frontend/pages/clustering:_dbscan.py b/frontend/pages/clustering:_dbscan.py index 2cb4920..6a8ca22 100644 --- a/frontend/pages/clustering:_dbscan.py +++ b/frontend/pages/clustering:_dbscan.py @@ -10,20 +10,20 @@ if "data" in st.session_state: data = st.session_state.data with st.form("my_form"): - data_name = st.multiselect("Data Name",data.select_dtypes(include="number").columns, max_selections=2) - eps = st.slider("eps", min_value=0.0, max_value=1.0,value=0.5,step=0.01) - min_samples = st.number_input("min_samples",step=1,min_value=1,value=5) - st.form_submit_button('launch') + data_name = st.multiselect("Data Name", data.select_dtypes(include="number").columns, max_selections=2) + eps = st.slider("eps", min_value=0.0, max_value=1.0, value=0.5, step=0.01) + min_samples = st.number_input("min_samples", step=1, min_value=1, value=5) + st.form_submit_button("launch") if len(data_name) == 2: x = data[data_name].to_numpy() - + dbscan = DBSCAN(eps=eps, min_samples=min_samples) y_dbscan = dbscan.fit_predict(x) fig, ax = plt.subplots(figsize=(12,8)) - plt.scatter(x[:, 0], x[:, 1], c=y_dbscan, s=50, cmap='viridis') + plt.scatter(x[:, 0], x[:, 1], c=y_dbscan, s=50, cmap="viridis") st.pyplot(fig) else: From d4e33e7367bef7a1ce47f119abdf9833cd3bc9b6 Mon Sep 17 00:00:00 2001 From: bastien ollier Date: Wed, 19 Jun 2024 09:20:59 +0200 Subject: [PATCH 6/8] dbscan --- frontend/pages/clustering:_kmeans.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/frontend/pages/clustering:_kmeans.py b/frontend/pages/clustering:_kmeans.py index ce34e66..824e173 100644 --- a/frontend/pages/clustering:_kmeans.py +++ b/frontend/pages/clustering:_kmeans.py @@ -18,18 +18,18 @@ if "data" in st.session_state: max_iter = row1[0].number_input("max_iter",step=1,min_value=1) - st.form_submit_button('launch') + st.form_submit_button("launch") if len(data_name) == 2: x = data[data_name].to_numpy() - kmeans = KMeans(n_clusters=n_clusters, init='random', n_init=n_init, max_iter=max_iter, random_state=111) + kmeans = KMeans(n_clusters=n_clusters, init="random", n_init=n_init, max_iter=max_iter, random_state=111) y_kmeans = kmeans.fit_predict(x) fig, ax = plt.subplots(figsize=(12,8)) - plt.scatter(x[:, 0], x[:, 1], c=y_kmeans, s=50, cmap='viridis') + plt.scatter(x[:, 0], x[:, 1], c=y_kmeans, s=50, cmap="viridis") centers = kmeans.cluster_centers_ - plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, marker='X') + plt.scatter(centers[:, 0], centers[:, 1], c="black", s=200, marker="X") st.pyplot(fig) else: From 64cf65a4170b8e1cc2e37d9e830ad1bf97090e68 Mon Sep 17 00:00:00 2001 From: bastien ollier Date: Wed, 19 Jun 2024 09:28:25 +0200 Subject: [PATCH 7/8] max nb cluster to nb line --- frontend/pages/clustering:_kmeans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/pages/clustering:_kmeans.py b/frontend/pages/clustering:_kmeans.py index 824e173..7be976b 100644 --- a/frontend/pages/clustering:_kmeans.py +++ b/frontend/pages/clustering:_kmeans.py @@ -10,7 +10,7 @@ if "data" in st.session_state: with st.form("my_form"): row1 = st.columns([1,1,1]) - n_clusters = row1[0].selectbox("Number of clusters", range(1, 10)) + n_clusters = row1[0].selectbox("Number of clusters", range(1, data.shape[0])) data_name = row1[1].multiselect("Data Name",data.select_dtypes(include="number").columns, max_selections=2) n_init = row1[2].number_input("n_init",step=1,min_value=1) From 34f70b4d792d9bb36f653b9e837f209b98ca1965 Mon Sep 17 00:00:00 2001 From: bastien ollier Date: Wed, 19 Jun 2024 09:34:52 +0200 Subject: [PATCH 8/8] delete np --- frontend/pages/clustering:_dbscan.py | 1 - frontend/pages/clustering:_kmeans.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/frontend/pages/clustering:_dbscan.py b/frontend/pages/clustering:_dbscan.py index 6a8ca22..da51aa9 100644 --- a/frontend/pages/clustering:_dbscan.py +++ b/frontend/pages/clustering:_dbscan.py @@ -1,7 +1,6 @@ import streamlit as st import matplotlib.pyplot as plt from sklearn.cluster import DBSCAN -import numpy as np st.header("Clustering: dbscan") diff --git a/frontend/pages/clustering:_kmeans.py b/frontend/pages/clustering:_kmeans.py index 7be976b..69d9920 100644 --- a/frontend/pages/clustering:_kmeans.py +++ b/frontend/pages/clustering:_kmeans.py @@ -10,7 +10,7 @@ if "data" in st.session_state: with st.form("my_form"): row1 = st.columns([1,1,1]) - n_clusters = row1[0].selectbox("Number of clusters", range(1, data.shape[0])) + n_clusters = row1[0].selectbox("Number of clusters", range(1,data.shape[0])) data_name = row1[1].multiselect("Data Name",data.select_dtypes(include="number").columns, max_selections=2) n_init = row1[2].number_input("n_init",step=1,min_value=1)