import pandas as pd import streamlit as st import matplotlib.pyplot as plt import seaborn as sns st.set_page_config( page_title="Project Miner", layout="wide" ) ### Exploration uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) if uploaded_file: data = pd.read_csv(uploaded_file) st.success("File loaded successfully!") st.header("Data Preview") st.subheader("First 5 Rows") st.write(data.head()) st.subheader("Last 5 Rows") st.write(data.tail()) st.header("Data Summary") st.subheader("Basic Information") col1, col2 = st.columns(2) col1.metric("Number of Rows", data.shape[0]) col2.metric("Number of Columns", data.shape[1]) st.write(f"Column Names: {list(data.columns)}") st.subheader("Missing Values by Column") missing_values = data.isnull().sum() st.write(missing_values) st.subheader("Statistical Summary") st.write(data.describe()) ### Visualization st.header("Data Visualization") st.subheader("Histogram") column_to_plot = st.selectbox("Select Column for Histogram", data.columns) if column_to_plot: fig, ax = plt.subplots() ax.hist(data[column_to_plot].dropna(), bins=20, edgecolor='k') ax.set_title(f'Histogram of {column_to_plot}') ax.set_xlabel(column_to_plot) ax.set_ylabel('Frequency') st.pyplot(fig) st.subheader("Boxplot") dataNumeric = data.select_dtypes(include='number') column_to_plot = st.selectbox("Select Column for Boxplot", dataNumeric.columns) if column_to_plot: fig, ax = plt.subplots() sns.boxplot(data=data, x=column_to_plot, ax=ax) ax.set_title(f'Boxplot of {column_to_plot}') st.pyplot(fig)