You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
112 lines
4.4 KiB
112 lines
4.4 KiB
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from sklearn.cluster import KMeans, DBSCAN
|
|
from sklearn.datasets import make_blobs, make_moons
|
|
from mpl_toolkits.mplot3d import Axes3D
|
|
|
|
def visualize_clusters_2d(X, labels, centers=None, title="Clusters"):
|
|
plt.figure(figsize=(10, 7))
|
|
plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='viridis')
|
|
if centers is not None:
|
|
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.75)
|
|
plt.title(title)
|
|
plt.xlabel("Feature 1")
|
|
plt.ylabel("Feature 2")
|
|
return plt.gcf()
|
|
|
|
def visualize_clusters_3d(X, labels, centers=None, title="Clusters"):
|
|
fig = plt.figure(figsize=(10, 7))
|
|
ax = fig.add_subplot(111, projection='3d')
|
|
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=labels, s=50, cmap='viridis')
|
|
if centers is not None:
|
|
ax.scatter(centers[:, 0], centers[:, 1], centers[:, 2], c='red', s=200, alpha=0.75)
|
|
ax.set_title(title)
|
|
ax.set_xlabel("Feature 1")
|
|
ax.set_ylabel("Feature 2")
|
|
ax.set_zlabel("Feature 3")
|
|
return plt.gcf()
|
|
|
|
def calculate_cluster_statistics_kmeans(X, labels, centers):
|
|
unique_labels = np.unique(labels)
|
|
stats = []
|
|
for label in unique_labels:
|
|
cluster_points = X[labels == label]
|
|
num_points = len(cluster_points)
|
|
center = centers[label]
|
|
stats.append({
|
|
'cluster': label,
|
|
'num_points': num_points,
|
|
'center': center
|
|
})
|
|
return stats
|
|
|
|
def calculate_cluster_statistics_dbscan(X, labels):
|
|
unique_labels = np.unique(labels)
|
|
stats = []
|
|
for label in unique_labels:
|
|
if label == -1:
|
|
continue # Ignore noise
|
|
cluster_points = X[labels == label]
|
|
num_points = len(cluster_points)
|
|
density = num_points / (np.max(cluster_points, axis=0) - np.min(cluster_points, axis=0)).prod()
|
|
stats.append({
|
|
'cluster': label,
|
|
'num_points': num_points,
|
|
'density': density
|
|
})
|
|
return stats
|
|
|
|
def launch_cluster_knn(df, array_columns, n=3):
|
|
X = df[array_columns].values
|
|
|
|
kmeans = KMeans(n_clusters=n, random_state=42)
|
|
labels_kmeans = kmeans.fit_predict(X)
|
|
centers_kmeans = kmeans.cluster_centers_
|
|
# for stat in stats_kmeans:
|
|
# print(f"Cluster {stat['cluster']}: {stat['num_points']} points, Center: {stat['center']}")
|
|
|
|
stats_kmeans = calculate_cluster_statistics_kmeans(X, labels_kmeans, centers_kmeans)
|
|
if len(array_columns) == 3:
|
|
return visualize_clusters_3d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering 3D")
|
|
else:
|
|
return visualize_clusters_2d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering")
|
|
|
|
def launch_cluster_dbscan(df, array_columns):
|
|
X = df[array_columns].values
|
|
dbscan = DBSCAN(eps=0.2, min_samples=5)
|
|
labels_dbscan = dbscan.fit_predict(X)
|
|
stats_dbscan = calculate_cluster_statistics_dbscan(X, labels_dbscan)
|
|
# for stat in stats_dbscan:
|
|
# print(f"Cluster {stat['cluster']}: {stat['num_points']} points, Density: {stat['density']}")
|
|
if len(array_columns) == 3:
|
|
return visualize_clusters_3d(X, labels_dbscan, title="DBSCAN Clustering 3D")
|
|
else:
|
|
return visualize_clusters_2d(X, labels_dbscan, title="DBSCAN Clustering")
|
|
return stats_dbscan
|
|
|
|
def launch_cluster(df, array_columns):
|
|
X = df[array_columns].values
|
|
|
|
kmeans = KMeans(n_clusters=4, random_state=42)
|
|
labels_kmeans = kmeans.fit_predict(X)
|
|
centers_kmeans = kmeans.cluster_centers_
|
|
|
|
stats_kmeans = calculate_cluster_statistics_kmeans(X, labels_kmeans, centers_kmeans)
|
|
# for stat in stats_kmeans:
|
|
# print(f"Cluster {stat['cluster']}: {stat['num_points']} points, Center: {stat['center']}")
|
|
|
|
# Appliquer DBSCAN
|
|
dbscan = DBSCAN(eps=0.2, min_samples=5)
|
|
labels_dbscan = dbscan.fit_predict(X)
|
|
stats_dbscan = calculate_cluster_statistics_dbscan(X, labels_dbscan)
|
|
# for stat in stats_dbscan:
|
|
# print(f"Cluster {stat['cluster']}: {stat['num_points']} points, Density: {stat['density']}")
|
|
if len(array_columns) == 3:
|
|
visualize_clusters_3d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering 3D")
|
|
visualize_clusters_3d(X, labels_dbscan, title="DBSCAN Clustering 3D")
|
|
else:
|
|
visualize_clusters_2d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering")
|
|
visualize_clusters_2d(X, labels_dbscan, title="DBSCAN Clustering")
|
|
return stats_kmeans,stats_dbscan
|
|
|