From d726f86c066da74cd6d5a36bc00cc1adeb8f2493 Mon Sep 17 00:00:00 2001 From: Hugo PRADIER Date: Wed, 5 Jun 2024 08:48:51 +0200 Subject: [PATCH 1/4] exploration initiale + debut visualisation --- frontend/main.py | 62 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/frontend/main.py b/frontend/main.py index b51c053..bc9e4bf 100644 --- a/frontend/main.py +++ b/frontend/main.py @@ -1,10 +1,60 @@ import pandas as pd import streamlit as st +import matplotlib.pyplot as plt +import seaborn as sns -st.title("Hello world!") +st.title("Project Miner") -uploaded_file = st.file_uploader("Choose a file") -if uploaded_file is not None: - df = pd.read_csv(uploaded_file) - st.write(df.head(10)) - st.write(df.tail(10)) +# File uploader +uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) + +if uploaded_file: + data = pd.read_csv(uploaded_file) + st.success("File loaded successfully!") + + # Data Preview + st.header("Data Preview") + + st.subheader("First 5 Rows") + st.write(data.head()) + + st.subheader("Last 5 Rows") + st.write(data.tail()) + + # Data Summary + st.header("Data Summary") + + st.subheader("Basic Information") + st.write(f"Number of Rows: {data.shape[0]}") + st.write(f"Number of Columns: {data.shape[1]}") + st.write(f"Column Names: {list(data.columns)}") + + st.subheader("Missing Values by Column") + missing_values = data.isnull().sum() + st.write(missing_values) + + st.subheader("Statistical Summary") + st.write(data.describe()) + + # Data Visualization + st.header("Data Visualization") + + # Histogram + st.subheader("Histogram") + column_to_plot = st.selectbox("Select Column for Histogram", data.columns) + if column_to_plot: + fig, ax = plt.subplots() + ax.hist(data[column_to_plot].dropna(), bins=20, edgecolor='k') + ax.set_title(f'Histogram of {column_to_plot}') + ax.set_xlabel(column_to_plot) + ax.set_ylabel('Frequency') + st.pyplot(fig) + + # Boxplot + st.subheader("Boxplot") + column_to_plot_box = st.selectbox("Select Column for Boxplot", data.columns, key="boxplot") + if column_to_plot_box: + fig, ax = plt.subplots() + sns.boxplot(y=data[column_to_plot_box].dropna(), ax=ax) + ax.set_title(f'Boxplot of {column_to_plot_box}') + st.pyplot(fig) From aa7b935de5f99d024267831fa43e2ed76828fc71 Mon Sep 17 00:00:00 2001 From: Hugo PRADIER Date: Wed, 5 Jun 2024 09:05:40 +0200 Subject: [PATCH 2/4] verif colonnes numeriques pour boite moustache --- frontend/main.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/frontend/main.py b/frontend/main.py index bc9e4bf..f37c936 100644 --- a/frontend/main.py +++ b/frontend/main.py @@ -5,14 +5,13 @@ import seaborn as sns st.title("Project Miner") -# File uploader +### Exploration uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) if uploaded_file: data = pd.read_csv(uploaded_file) st.success("File loaded successfully!") - # Data Preview st.header("Data Preview") st.subheader("First 5 Rows") @@ -21,7 +20,6 @@ if uploaded_file: st.subheader("Last 5 Rows") st.write(data.tail()) - # Data Summary st.header("Data Summary") st.subheader("Basic Information") @@ -36,10 +34,10 @@ if uploaded_file: st.subheader("Statistical Summary") st.write(data.describe()) - # Data Visualization + ### Visualization + st.header("Data Visualization") - # Histogram st.subheader("Histogram") column_to_plot = st.selectbox("Select Column for Histogram", data.columns) if column_to_plot: @@ -50,7 +48,6 @@ if uploaded_file: ax.set_ylabel('Frequency') st.pyplot(fig) - # Boxplot st.subheader("Boxplot") column_to_plot_box = st.selectbox("Select Column for Boxplot", data.columns, key="boxplot") if column_to_plot_box: From 1fd219d4388adc797463e5faf698842d81934424 Mon Sep 17 00:00:00 2001 From: Hugo PRADIER Date: Wed, 5 Jun 2024 09:10:14 +0200 Subject: [PATCH 3/4] utilisation de is_numeric_dtype au lieu de verifier par type --- frontend/main.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/frontend/main.py b/frontend/main.py index f37c936..3de03a5 100644 --- a/frontend/main.py +++ b/frontend/main.py @@ -2,6 +2,7 @@ import pandas as pd import streamlit as st import matplotlib.pyplot as plt import seaborn as sns +from pandas.api.types import is_numeric_dtype st.title("Project Miner") @@ -49,9 +50,10 @@ if uploaded_file: st.pyplot(fig) st.subheader("Boxplot") - column_to_plot_box = st.selectbox("Select Column for Boxplot", data.columns, key="boxplot") - if column_to_plot_box: + dataNumeric = data.select_dtypes(include='number') + column_to_plot = st.selectbox("Select Column for Boxplot", dataNumeric.columns) + if column_to_plot: fig, ax = plt.subplots() - sns.boxplot(y=data[column_to_plot_box].dropna(), ax=ax) - ax.set_title(f'Boxplot of {column_to_plot_box}') - st.pyplot(fig) + sns.boxplot(data=data, x=column_to_plot, ax=ax) + ax.set_title(f'Boxplot of {column_to_plot}') + st.pyplot(fig) \ No newline at end of file From bf0c38b0d91efab29026dbabdb31e56338022513 Mon Sep 17 00:00:00 2001 From: bastien ollier Date: Wed, 5 Jun 2024 09:07:27 +0200 Subject: [PATCH 4/4] update visualisation --- frontend/main.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/frontend/main.py b/frontend/main.py index 3de03a5..79b9a64 100644 --- a/frontend/main.py +++ b/frontend/main.py @@ -4,7 +4,10 @@ import matplotlib.pyplot as plt import seaborn as sns from pandas.api.types import is_numeric_dtype -st.title("Project Miner") +st.set_page_config( + page_title="Project Miner", + layout="wide" +) ### Exploration uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) @@ -24,8 +27,10 @@ if uploaded_file: st.header("Data Summary") st.subheader("Basic Information") - st.write(f"Number of Rows: {data.shape[0]}") - st.write(f"Number of Columns: {data.shape[1]}") + col1, col2 = st.columns(2) + col1.metric("Number of Rows", data.shape[0]) + col2.metric("Number of Columns", data.shape[1]) + st.write(f"Column Names: {list(data.columns)}") st.subheader("Missing Values by Column")