diff --git a/README.md b/README.md
index 76eb91c..9666161 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-

+

# **Pow**
diff --git a/Documentation/assets/PickaxePowBackground.png b/assets/PickaxePowBackground.png
similarity index 100%
rename from Documentation/assets/PickaxePowBackground.png
rename to assets/PickaxePowBackground.png
diff --git a/main.py b/main.py
index 1f987b5..51e80d2 100755
--- a/main.py
+++ b/main.py
@@ -1,7 +1,23 @@
#!/usr/bin/env python3
+
import sys
sys.path.append('./src/back/')
import load_csv as l
+import show_csv as s
+import clustering_csv as c
+
+df = l.return_csv("./data.csv")
+l.csv_value(df)
+
+l.csv_value(df)
+
+# l.csv_standardisation_Z(df,"Vehicle Year")
+
+# l.csv_robust_normalize(df,"Speed Limit")
+
+# s.histo_col(df,"Speed Limit")
+
+# s.plotBoxWhisker(df)
-l.csv_value()
+c.launch_cluster(df,['Speed Limit','Vehicle Year'])
diff --git a/src/back/clustering_csv.py b/src/back/clustering_csv.py
new file mode 100644
index 0000000..497d38f
--- /dev/null
+++ b/src/back/clustering_csv.py
@@ -0,0 +1,83 @@
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.cluster import KMeans, DBSCAN
+from sklearn.datasets import make_blobs, make_moons
+from mpl_toolkits.mplot3d import Axes3D
+
+def visualize_clusters_2d(X, labels, centers=None, title="Clusters"):
+ plt.figure(figsize=(10, 7))
+ plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='viridis')
+ if centers is not None:
+ plt.scatter(centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.75)
+ plt.title(title)
+ plt.xlabel("Feature 1")
+ plt.ylabel("Feature 2")
+ plt.show()
+
+def visualize_clusters_3d(X, labels, centers=None, title="Clusters"):
+ fig = plt.figure(figsize=(10, 7))
+ ax = fig.add_subplot(111, projection='3d')
+ ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=labels, s=50, cmap='viridis')
+ if centers is not None:
+ ax.scatter(centers[:, 0], centers[:, 1], centers[:, 2], c='red', s=200, alpha=0.75)
+ ax.set_title(title)
+ ax.set_xlabel("Feature 1")
+ ax.set_ylabel("Feature 2")
+ ax.set_zlabel("Feature 3")
+ plt.show()
+
+def calculate_cluster_statistics_kmeans(X, labels, centers):
+ unique_labels = np.unique(labels)
+ stats = []
+ for label in unique_labels:
+ cluster_points = X[labels == label]
+ num_points = len(cluster_points)
+ center = centers[label]
+ stats.append({
+ 'cluster': label,
+ 'num_points': num_points,
+ 'center': center
+ })
+ return stats
+
+def calculate_cluster_statistics_dbscan(X, labels):
+ unique_labels = np.unique(labels)
+ stats = []
+ for label in unique_labels:
+ if label == -1:
+ continue # Ignore noise
+ cluster_points = X[labels == label]
+ num_points = len(cluster_points)
+ density = num_points / (np.max(cluster_points, axis=0) - np.min(cluster_points, axis=0)).prod()
+ stats.append({
+ 'cluster': label,
+ 'num_points': num_points,
+ 'density': density
+ })
+ return stats
+
+def launch_cluster(df,array_columns):
+ X = df[array_columns].values
+
+ kmeans = KMeans(n_clusters=4, random_state=42)
+ labels_kmeans = kmeans.fit_predict(X)
+ centers_kmeans = kmeans.cluster_centers_
+
+ stats_kmeans = calculate_cluster_statistics_kmeans(X, labels_kmeans, centers_kmeans)
+ # for stat in stats_kmeans:
+ # print(f"Cluster {stat['cluster']}: {stat['num_points']} points, Center: {stat['center']}")
+
+ # Appliquer DBSCAN
+ dbscan = DBSCAN(eps=0.2, min_samples=5)
+ labels_dbscan = dbscan.fit_predict(X)
+ stats_dbscan = calculate_cluster_statistics_dbscan(X, labels_dbscan)
+ # for stat in stats_dbscan:
+ # print(f"Cluster {stat['cluster']}: {stat['num_points']} points, Density: {stat['density']}")
+ if len(array_columns) == 3:
+ visualize_clusters_3d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering 3D")
+ visualize_clusters_3d(X, labels_dbscan, title="DBSCAN Clustering 3D")
+ else:
+ visualize_clusters_2d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering")
+ visualize_clusters_2d(X, labels_dbscan, title="DBSCAN Clustering")
+ return stats_kmeans,stats_dbscan
+
diff --git a/src/back/load_csv.py b/src/back/load_csv.py
index 03db63a..5aa10f2 100644
--- a/src/back/load_csv.py
+++ b/src/back/load_csv.py
@@ -1,20 +1,48 @@
import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+def return_csv(path):
+ df = pd.read_csv(path)
+ return df
-def csv_value():
- df = pd.read_csv('./data.csv')
- # print(df.head())
-
+def csv_value(df):
#print all detail
- # df.info()
-
+ df.info()
# Print number of missing value for each column
- # print(df.isna().sum())
-
+ print(df.isna().sum())
# Useless values
- # Off-Road Description -> 156170
- # Municipality -> 152979
- # Related Non-Motorist -> 166642
- # Non-Motorist Substance Abuse -> 167788
- # Circumstance -> 140746
+
+
+def csv_check(df):
+ for col in df:
+ print("-"*12)
+ print(col)
+ print("-"*12)
+ print(df[col].unique())
+
+
+def csv_norm_min_max(df,col):
+ maValue = df[col].max
+ miValue = df[col].min
+ df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
return df
+
+def csv_standardisation_Z(df,col):
+ mean_col1 = df[col].mean()
+ std_col1 = df[col].std()
+ df[col] = (df[col] - mean_col1) / std_col1
+ return df[col]
+
+def csv_robust_normalize(df, column):
+ # Calcul de la médiane et de l'IQR
+ median = df[column].median()
+ q1 = df[column].quantile(0.25)
+ q3 = df[column].quantile(0.75)
+ iqr = q3 - q1
+
+ # Application de la normalisation robuste
+ normalized_column = (df[column] - median) / iqr
+ df[column] = normalized_column
+ print (normalized_column)
+ return normalized_column
diff --git a/src/back/show_csv.py b/src/back/show_csv.py
new file mode 100644
index 0000000..93d9973
--- /dev/null
+++ b/src/back/show_csv.py
@@ -0,0 +1,16 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+
+def histo_col(df,colonne):
+ plt.figure()
+ plt.hist(df[colonne], bins=int(df[colonne].nunique()/4), alpha=0.7, color='blue', edgecolor='black')
+ plt.title(f"Histogramme de la colonne '{colonne}'")
+ plt.xlabel(colonne)
+ plt.ylabel("Fréquence")
+ plt.grid(True)
+ plt.show()
+
+def plotBoxWhisker(df):
+ df.plot(kind='box', subplots=True, sharex=False, sharey=False)
+ plt.show()