clustering 2D + 3D

pull/14/head
Aurian JAULT 1 year ago
parent c1dfaa7b68
commit 13d26c716b

@ -1,17 +1,21 @@
#!/usr/bin/env python3
import sys
sys.path.append('./src/back/')
import load_csv as l
import show_csv as s
import clustering_csv as c
df = l.return_csv("./data.csv")
l.csv_value(df)
l.csv_value(df)
# l.csv_stadardisation_Z(df,"Vehicle Year")
# l.csv_standardisation_Z(df,"Vehicle Year")
# s.histo_col(df,"Speed Limit")
s.histo_col(df,"Speed Limit")
# s.plotBoxWhisker(df)
s.plotBoxWhisker(df)
c.launch_cluster(df,['Speed Limit','Vehicle Year'])

@ -0,0 +1,82 @@
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, DBSCAN
from sklearn.datasets import make_blobs, make_moons
from mpl_toolkits.mplot3d import Axes3D
def visualize_clusters_2d(X, labels, centers=None, title="Clusters"):
plt.figure(figsize=(10, 7))
plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='viridis')
if centers is not None:
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.75)
plt.title(title)
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()
def visualize_clusters_3d(X, labels, centers=None, title="Clusters"):
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=labels, s=50, cmap='viridis')
if centers is not None:
ax.scatter(centers[:, 0], centers[:, 1], centers[:, 2], c='red', s=200, alpha=0.75)
ax.set_title(title)
ax.set_xlabel("Feature 1")
ax.set_ylabel("Feature 2")
ax.set_zlabel("Feature 3")
plt.show()
def calculate_cluster_statistics_kmeans(X, labels, centers):
unique_labels = np.unique(labels)
stats = []
for label in unique_labels:
cluster_points = X[labels == label]
num_points = len(cluster_points)
center = centers[label]
stats.append({
'cluster': label,
'num_points': num_points,
'center': center
})
return stats
def calculate_cluster_statistics_dbscan(X, labels):
unique_labels = np.unique(labels)
stats = []
for label in unique_labels:
if label == -1:
continue # Ignore noise
cluster_points = X[labels == label]
num_points = len(cluster_points)
density = num_points / (np.max(cluster_points, axis=0) - np.min(cluster_points, axis=0)).prod()
stats.append({
'cluster': label,
'num_points': num_points,
'density': density
})
return stats
def launch_cluster(df,array_columns):
X = df[array_columns].values
kmeans = KMeans(n_clusters=4, random_state=42)
labels_kmeans = kmeans.fit_predict(X)
centers_kmeans = kmeans.cluster_centers_
stats_kmeans = calculate_cluster_statistics_kmeans(X, labels_kmeans, centers_kmeans)
# for stat in stats_kmeans:
# print(f"Cluster {stat['cluster']}: {stat['num_points']} points, Center: {stat['center']}")
# Appliquer DBSCAN
dbscan = DBSCAN(eps=0.2, min_samples=5)
labels_dbscan = dbscan.fit_predict(X)
# stats_dbscan = calculate_cluster_statistics_dbscan(X, labels_dbscan)
# for stat in stats_dbscan:
# print(f"Cluster {stat['cluster']}: {stat['num_points']} points, Density: {stat['density']}")
if len(array_columns) == 3:
visualize_clusters_3d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering 3D")
visualize_clusters_3d(X, labels_dbscan, title="DBSCAN Clustering 3D")
else:
visualize_clusters_2d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering")
visualize_clusters_2d(X, labels_dbscan, title="DBSCAN Clustering")

@ -28,7 +28,7 @@ def csv_norm_min_max(df,col):
df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
return df
def csv_stadardisation_Z(df,col):
def csv_standardisation_Z(df,col):
mean_col1 = df[col].mean()
std_col1 = df[col].std()
df[col] = (df[col] - mean_col1) / std_col1

Loading…
Cancel
Save