diff --git a/.drone.yml b/.drone.yml
new file mode 100644
index 0000000..028ee22
--- /dev/null
+++ b/.drone.yml
@@ -0,0 +1,31 @@
+kind: pipeline
+type: docker
+name: Pow
+
+trigger:
+  event:
+    - push
+
+steps:
+
+  - name: build-pow
+    image: plugins/docker
+    settings:
+      dockerfile: ./src/Dockerfile
+      context: ./src
+      registry: hub.codefirst.iut.uca.fr
+      repo: hub.codefirst.iut.uca.fr/dorian.hodin/pow
+      username:
+        from_secret: SECRET_USERNAME
+      password:
+        from_secret: SECRET_PASSWD
+
+  - name: deploy-pow
+    image: hub.codefirst.iut.uca.fr/thomas.bellembois/codefirst-dockerproxy-clientdrone:latest
+    environment:
+      IMAGENAME: hub.codefirst.iut.uca.fr/dorian.hodin/pow:latest
+      CONTAINERNAME: pow
+      COMMAND: create
+      OVERWRITE: true
+      ADMINS: dorianhodin,aurianjault,remiarnal
+    depends_on: [ build-pow ]
diff --git a/.gitignore b/.gitignore
index 4df8670..2672e85 100644
--- a/.gitignore
+++ b/.gitignore
@@ -159,4 +159,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+#.idea/
\ No newline at end of file
diff --git a/.streamlit/config.toml b/.streamlit/config.toml
new file mode 100644
index 0000000..7c595d0
--- /dev/null
+++ b/.streamlit/config.toml
@@ -0,0 +1,2 @@
+[client]
+showSidebarNavigation = false
diff --git a/src/Dockerfile b/src/Dockerfile
new file mode 100644
index 0000000..7107b12
--- /dev/null
+++ b/src/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3.9
+
+WORKDIR /app
+COPY . .
+
+RUN pip install --upgrade pip
+RUN pip install streamlit matplotlib pandas scikit-learn
+
+EXPOSE 8501
+
+ENTRYPOINT ["streamlit", "run", "home.py", "--server.port=8501", "--server.address=0.0.0.0"]
diff --git a/src/back/clustering_csv.py b/src/back/clustering_csv.py
index dcb170d..fb93b4e 100644
--- a/src/back/clustering_csv.py
+++ b/src/back/clustering_csv.py
@@ -12,7 +12,7 @@ def visualize_clusters_2d(X, labels, centers=None, title="Clusters"):
     plt.title(title)
     plt.xlabel("Feature 1")
     plt.ylabel("Feature 2")
-    plt.show()
+    return plt.gcf()
 
 def visualize_clusters_3d(X, labels, centers=None, title="Clusters"):
     fig = plt.figure(figsize=(10, 7))
@@ -56,7 +56,7 @@ def calculate_cluster_statistics_dbscan(X, labels):
         })
     return stats
 
-def launch_cluster_knn(df,array_columns,n):
+def launch_cluster_knn(df, array_columns, n=3):
     X = df[array_columns].values
     
     kmeans = KMeans(n_clusters=n, random_state=42)
@@ -67,12 +67,11 @@ def launch_cluster_knn(df,array_columns,n):
 
     stats_kmeans = calculate_cluster_statistics_kmeans(X, labels_kmeans, centers_kmeans)
     if len(array_columns) == 3:
-        visualize_clusters_3d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering 3D")
+        return visualize_clusters_3d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering 3D")
     else:
-        visualize_clusters_2d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering")
-    return stats_kmeans
+        return visualize_clusters_2d(X, labels_kmeans, centers_kmeans, title="K-Means Clustering")
 
-def launch_cluster_DBSCAN(df, array_columns):
+def launch_cluster_dbscan(df, array_columns):
     X = df[array_columns].values
     dbscan = DBSCAN(eps=0.2, min_samples=5)
     labels_dbscan = dbscan.fit_predict(X)
@@ -80,12 +79,12 @@ def launch_cluster_DBSCAN(df, array_columns):
     # for stat in stats_dbscan:
     #     print(f"Cluster {stat['cluster']}: {stat['num_points']} points, Density: {stat['density']}")
     if len(array_columns) == 3:
-        visualize_clusters_3d(X, labels_dbscan, title="DBSCAN Clustering 3D")
+        return visualize_clusters_3d(X, labels_dbscan, title="DBSCAN Clustering 3D")
     else:
-        visualize_clusters_2d(X, labels_dbscan, title="DBSCAN Clustering")
+        return visualize_clusters_2d(X, labels_dbscan, title="DBSCAN Clustering")
     return stats_dbscan
 
-def launch_cluster(df,array_columns):
+def launch_cluster(df, array_columns):
     X = df[array_columns].values
     
     kmeans = KMeans(n_clusters=4, random_state=42)
diff --git a/src/back/load_csv.py b/src/back/load_csv.py
index 25d5ad9..83f111f 100644
--- a/src/back/load_csv.py
+++ b/src/back/load_csv.py
@@ -2,6 +2,7 @@ import pandas as pd
 import numpy  as np
 import matplotlib.pyplot as plt
 
+<<<<<<< HEAD
 def return_csv(path):
     df = pd.read_csv(path)
     return df
@@ -13,7 +14,6 @@ def csv_value(df):
     print(df.isna().sum())
     # Useless values
 
-
 def csv_check(df):
     for col in df:
         print("-"*12)
@@ -21,31 +21,47 @@ def csv_check(df):
         print("-"*12)
         print(df[col].unique())
 
+def do_for_columns(df):
+    for col_name in df:
+        df[col_name] = function(df[col_name])
 
-def csv_norm_min_max(df,col):
-    maValue = df[col].max
-    miValue = df[col].min
-    df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
-    return df
 
-def csv_standardisation_Z(df,col):
+def csv_norm_min_max(df, col):
+    max = df[col].max()
+    min = df[col].min()
+    df[col] = (df[col] - min)/ (max - min)
+    return df[col]
+
+def csv_standardisation_Z(df, col):
     mean_col1 = df[col].mean()
     std_col1 = df[col].std()
     df[col] = (df[col] - mean_col1) / std_col1
     return df[col]
 
-def csv_robust_normalize(df, column):
+def csv_robust_normalize(df, col):
     # Calcul de la médiane et de l'IQR
-    median = df[column].median()
-    q1 = df[column].quantile(0.25)
-    q3 = df[column].quantile(0.75)
+    median = df[col].median()
+    q1 = df[col].quantile(0.25)
+    q3 = df[col].quantile(0.75)
     iqr = q3 - q1
 
     # Application de la normalisation robuste
-    normalized_column = (df[column] - median) / iqr
-    df[column] = normalized_column
-    print (normalized_column)
+    normalized_column = (df[col] - median) / iqr
+    df[col] = normalized_column
     return normalized_column
 
-
-
+def handle_normalization(df, norm_method):
+    if norm_method == "min-max":
+        for col_name in df:
+            df[col_name] = csv_norm_min_max(df, col_name)
+        return df
+    elif norm_method == "z-score":
+        for col_name in df:
+            df[col_name] = csv_standardisation_Z(df, col_name)
+        return df
+    elif norm_method == "robust":
+        for col_name in df:
+            df[col_name] = csv_robust_normalize(df, col_name)
+        return df
+    else:
+        raise ValueError("Unknown method")
diff --git a/src/back/managing_missing_values.py b/src/back/managing_missing_values.py
index 7ee920f..24ac773 100644
--- a/src/back/managing_missing_values.py
+++ b/src/back/managing_missing_values.py
@@ -60,8 +60,6 @@ def impute_with_regression(data):
     - n_neighbors: Number of neighbors to use for KNN imputation (only used if method='knn')
 """
 def handle_missing_values(data, method, n_neighbors=5):
-
-    data = drop_high_null_percentage(data)
     data = convert_categorical_to_numeric(data)    
     if method == 'mean':
         return replace_with_mean(data)
@@ -74,4 +72,4 @@ def handle_missing_values(data, method, n_neighbors=5):
     elif method == 'regression':
         return impute_with_regression(data)
     else:
-        raise ValueError("Unknown method")
\ No newline at end of file
+        raise ValueError("Unknown method")
diff --git a/src/back/prediction.py b/src/back/prediction.py
index 09c7556..1700d72 100644
--- a/src/back/prediction.py
+++ b/src/back/prediction.py
@@ -2,18 +2,16 @@ from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
 from sklearn.ensemble import RandomForestRegressor
 
-
 def getColumnsForPredictionAndPredict(df,columns, columnGoal, algoOfPrediction):
     predictors = df[columns]
     target = df[columnGoal]
-    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.2, random_state=42)
 
-    if algoOfPrediction == "Régression Linéaire":
+    if algoOfPrediction == "Linear Regression":
         model = LinearRegression()
-    elif algoOfPrediction == "Forêt Aléatoire":
-        model = RandomForestRegressor(n_estimators=100)   
+    elif algoOfPrediction == "Random Forest":
+        model = RandomForestRegressor(n_estimators=100)
     else:
         raise NameError("No method name : \"" + algoOfPrediction + "\"")
 
-    model.fit(X_train, y_train)
-    return model.predict(X_test)
\ No newline at end of file
+    model.fit(predictors, target)
+    return model.predict(predictors)
diff --git a/src/back/show_csv.py b/src/back/show_csv.py
index 93d9973..cd8a293 100644
--- a/src/back/show_csv.py
+++ b/src/back/show_csv.py
@@ -2,15 +2,15 @@ import pandas as pd
 import numpy  as np
 import matplotlib.pyplot as plt
 
-def histo_col(df,colonne):
+def histo_col(df, col):
     plt.figure()
-    plt.hist(df[colonne], bins=int(df[colonne].nunique()/4), alpha=0.7, color='blue', edgecolor='black')
-    plt.title(f"Histogramme de la colonne '{colonne}'")
-    plt.xlabel(colonne)
+    plt.hist(df[col], bins=4, alpha=0.7, color='blue', edgecolor='black')
+    plt.title(f"Histogramme de la colonne '{col}'")
+    plt.xlabel(col)
     plt.ylabel("Fréquence")
     plt.grid(True)
-    plt.show()
+    return plt.gcf()
 
-def plotBoxWhisker(df):
-    df.plot(kind='box', subplots=True, sharex=False, sharey=False)
-    plt.show()
+def plotBoxWhisker(df, col):
+    df[col].plot(kind='box', subplots=True, sharex=False, sharey=False)
+    return plt.gcf()
diff --git a/src/home.py b/src/home.py
new file mode 100644
index 0000000..754dc65
--- /dev/null
+++ b/src/home.py
@@ -0,0 +1,53 @@
+import streamlit as st
+from io import StringIO
+from ydata_profiling import ProfileReport
+import pandas as pd
+
+def  statistics(df):
+    nan_counts = df.isnull().sum(axis=1).sum()
+
+    st.write("*Number of columns*:", len(df.columns))
+    st.write("*Number of rows*:", len(df.index))
+    
+    st.write("*Nan Counts*: ", nan_counts)
+    st.write(df.isna().sum())
+
+def display_df_first_and_lasts_lines(df):
+    fl = df.head(10)
+    ll = df.tail(10)
+    concat = pd.concat([fl, ll])
+    st.dataframe(concat)
+
+def nav_bar():
+    st.page_link("./home.py", label="Import", icon="⬆️", help=None)
+    st.page_link("pages/clean.py", label="Clean", icon="🧼", help=None)
+    st.page_link("pages/visualize.py", label="Visualize", icon="👁️", help=None)
+    st.page_link("pages/prediction.py", label="Predict", icon="🔮", help=None)
+    st.page_link("pages/evaluate.py", label="Evaluate", icon=None, help=None)
+
+def clean_dataframe(line):
+    # Call to function to clean data
+    line.empty()
+    line.write("Dataframe has been cleaned")
+
+def main():
+    nav_bar()
+    st.write("# Pow: Your data analyser")
+
+    uploaded_file = st.file_uploader("Choose a file")
+    if uploaded_file is not None:
+        df = pd.read_csv(uploaded_file)
+        st.session_state.orig_df = df
+        st.write("## Dataframe (10 first/last lines)")
+        display_df_first_and_lasts_lines(df)
+
+        st.write("## Statistics")
+        statistics(df)
+
+        profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True)
+        profile.to_widgets()
+
+        if st.button("Next"):
+            st.switch_page("pages/clean.py")
+
+main()
diff --git a/src/pages/clean.py b/src/pages/clean.py
new file mode 100644
index 0000000..f64bd49
--- /dev/null
+++ b/src/pages/clean.py
@@ -0,0 +1,44 @@
+import streamlit as st
+import sys
+sys.path.append('./back/')
+
+import managing_missing_values as mmv
+import load_csv as lc
+
+if 'original_df' in st.session_state:
+    df = st.session_state.original_df
+
+    st.write("# 🧼 Data cleaning")
+
+    st.write("## Missing data")
+    rm_empty_rows_or_cols = st.checkbox("Remove empty rows or columns", True)
+
+
+    st.write("#### Replace missing values")
+    replace_methods = ["mean","median","mode","knn","regression"]
+    replace_method = st.radio('Choose an option:', replace_methods)
+
+    st.write("## Normalize data")
+    normalize_methods = ["min-max","z-score","robust"]
+    normalize_method = st.radio('Choose an option:', normalize_methods)
+
+    is_cleaned = st.button("Clean dataset")
+    if is_cleaned:
+        if rm_empty_rows_or_cols:
+            st.write("-  Removing hight null percentage values")
+            df = mmv.drop_high_null_percentage(df)
+            st.dataframe(df)
+
+        st.write("- Handle missing values with method:", replace_method)
+        df = mmv.handle_missing_values(df, replace_method)
+        st.session_state.df = df
+        st.dataframe(df)
+
+        st.write("- Normalize with method:", normalize_method)
+        df = lc.handle_normalization(df, normalize_method)
+        st.session_state.df = df
+        st.dataframe(df)
+
+        st.switch_page("pages/visualize.py")
+else:
+    st.write("Please upload you dataset.")
diff --git a/src/pages/evaluate.py b/src/pages/evaluate.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/pages/prediction.py b/src/pages/prediction.py
new file mode 100644
index 0000000..6892c69
--- /dev/null
+++ b/src/pages/prediction.py
@@ -0,0 +1,41 @@
+import streamlit as st
+import pandas as pd
+import sys
+sys.path.append('./back/')
+
+import clustering_csv as cc
+import prediction as p
+
+if 'df' in st.session_state:
+
+    df = st.session_state.df
+    df_cols = df.columns.tolist()
+
+    st.write("# 🔮 Prediction")
+
+    if st.button("K-means"):
+        st.pyplot(cc.launch_cluster_knn(df, ["Route Type", "Traffic Control"]))
+
+    if st.button("DBSCAN"):
+        st.pyplot(cc.launch_cluster_dbscan(df, ["Route Type", "Traffic Control"]))
+
+    if st.button("Linear Regression"):
+        col = "Route Type"
+        df_cols.remove(col)
+        original_col = df[col]
+        predicted_col = p.getColumnsForPredictionAndPredict(df, df_cols, "Route Type", "Linear Regression")
+
+    if st.button("Random Forest"):
+        col = "Route Type"
+        df_cols.remove(col)
+        original_col = df[col]
+        predicted_col = p.getColumnsForPredictionAndPredict(df, df_cols, "Route Type", "Random Forest")
+
+    ndf = pd.DataFrame()
+    ndf['Original'] = original_col
+    ndf['Predicted'] = predicted_col
+
+    st.dataframe(ndf)
+
+else:
+    st.write("Please clean your dataset.")
diff --git a/src/pages/visualize.py b/src/pages/visualize.py
new file mode 100644
index 0000000..d15ff23
--- /dev/null
+++ b/src/pages/visualize.py
@@ -0,0 +1,32 @@
+import streamlit as st
+import matplotlib.pyplot as plt
+
+import sys
+sys.path.append('./back/')
+
+import show_csv as sc
+
+if 'df' in st.session_state:
+
+    df = st.session_state.df
+    df_columns = df.columns.tolist()
+
+    st.write("# 📊 Visualization")
+
+    st.write("## Histograms")
+    hist_tabs = st.tabs(df_columns)
+
+    for idx, tab in enumerate(hist_tabs):
+        tab.write("##### "+df_columns[idx])
+        tab.pyplot(sc.histo_col(df, df_columns[idx]))
+
+    st.write("## Box & Whisker")
+    baw_tabs = st.tabs(df_columns)
+
+    for idx, tab in enumerate(baw_tabs):
+        tab.write("##### "+df_columns[idx])
+        fig, ax = plt.subplots()
+        df[df_columns[idx]].plot(kind='box')
+        tab.pyplot(fig)
+else:
+    st.write('Please clean your dataset.')