diff --git a/frontend/exploration.py b/frontend/exploration.py new file mode 100644 index 0000000..123a22b --- /dev/null +++ b/frontend/exploration.py @@ -0,0 +1,47 @@ +import pandas as pd +import streamlit as st + +st.set_page_config( + page_title="Project Miner", + layout="wide" +) + +st.title("Home") + +### Exploration +uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) + +if uploaded_file is not None: + st.session_state.data = pd.read_csv(uploaded_file) + st.success("File loaded successfully!") + + +if "data" in st.session_state: + data = st.session_state.data + st.write(data.head(10)) + st.write(data.tail(10)) + + st.header("Data Preview") + + st.subheader("First 5 Rows") + st.write(data.head()) + + st.subheader("Last 5 Rows") + st.write(data.tail()) + + st.header("Data Summary") + + st.subheader("Basic Information") + col1, col2 = st.columns(2) + col1.metric("Number of Rows", data.shape[0]) + col2.metric("Number of Columns", data.shape[1]) + + st.write(f"Column Names: {list(data.columns)}") + + st.subheader("Missing Values by Column") + missing_values = data.isnull().sum() + st.write(missing_values) + + st.subheader("Statistical Summary") + st.write(data.describe()) + diff --git a/frontend/main.py b/frontend/main.py deleted file mode 100644 index 79b9a64..0000000 --- a/frontend/main.py +++ /dev/null @@ -1,64 +0,0 @@ -import pandas as pd -import streamlit as st -import matplotlib.pyplot as plt -import seaborn as sns -from pandas.api.types import is_numeric_dtype - -st.set_page_config( - page_title="Project Miner", - layout="wide" -) - -### Exploration -uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) - -if uploaded_file: - data = pd.read_csv(uploaded_file) - st.success("File loaded successfully!") - - st.header("Data Preview") - - st.subheader("First 5 Rows") - st.write(data.head()) - - st.subheader("Last 5 Rows") - st.write(data.tail()) - - st.header("Data Summary") - - st.subheader("Basic Information") - col1, col2 = st.columns(2) - col1.metric("Number of Rows", data.shape[0]) - col2.metric("Number of Columns", data.shape[1]) - - st.write(f"Column Names: {list(data.columns)}") - - st.subheader("Missing Values by Column") - missing_values = data.isnull().sum() - st.write(missing_values) - - st.subheader("Statistical Summary") - st.write(data.describe()) - - ### Visualization - - st.header("Data Visualization") - - st.subheader("Histogram") - column_to_plot = st.selectbox("Select Column for Histogram", data.columns) - if column_to_plot: - fig, ax = plt.subplots() - ax.hist(data[column_to_plot].dropna(), bins=20, edgecolor='k') - ax.set_title(f'Histogram of {column_to_plot}') - ax.set_xlabel(column_to_plot) - ax.set_ylabel('Frequency') - st.pyplot(fig) - - st.subheader("Boxplot") - dataNumeric = data.select_dtypes(include='number') - column_to_plot = st.selectbox("Select Column for Boxplot", dataNumeric.columns) - if column_to_plot: - fig, ax = plt.subplots() - sns.boxplot(data=data, x=column_to_plot, ax=ax) - ax.set_title(f'Boxplot of {column_to_plot}') - st.pyplot(fig) \ No newline at end of file diff --git a/frontend/pages/visualization.py b/frontend/pages/visualization.py new file mode 100644 index 0000000..057b0c9 --- /dev/null +++ b/frontend/pages/visualization.py @@ -0,0 +1,30 @@ +import streamlit as st +import matplotlib.pyplot as plt +import seaborn as sns + +st.header("Data Visualization") + + +if "data" in st.session_state: + data = st.session_state.data + + st.subheader("Histogram") + column_to_plot = st.selectbox("Select Column for Histogram", data.columns) + if column_to_plot: + fig, ax = plt.subplots() + ax.hist(data[column_to_plot].dropna(), bins=20, edgecolor='k') + ax.set_title(f"Histogram of {column_to_plot}") + ax.set_xlabel(column_to_plot) + ax.set_ylabel("Frequency") + st.pyplot(fig) + + st.subheader("Boxplot") + dataNumeric = data.select_dtypes(include="number") + column_to_plot = st.selectbox("Select Column for Boxplot", dataNumeric.columns) + if column_to_plot: + fig, ax = plt.subplots() + sns.boxplot(data=data, x=column_to_plot, ax=ax) + ax.set_title(f"Boxplot of {column_to_plot}") + st.pyplot(fig) +else: + st.error("file not loaded") \ No newline at end of file