Merge pull request 'streamlit' (#21) from streamlit into master
continuous-integration/drone/push Build is passing
Details
continuous-integration/drone/push Build is passing
Details
Reviewed-on: #21pull/23/head
commit
522d016a2a
@ -0,0 +1,31 @@
|
|||||||
|
kind: pipeline
|
||||||
|
type: docker
|
||||||
|
name: Pow
|
||||||
|
|
||||||
|
trigger:
|
||||||
|
event:
|
||||||
|
- push
|
||||||
|
|
||||||
|
steps:
|
||||||
|
|
||||||
|
- name: build-pow
|
||||||
|
image: plugins/docker
|
||||||
|
settings:
|
||||||
|
dockerfile: ./src/Dockerfile
|
||||||
|
context: ./src
|
||||||
|
registry: hub.codefirst.iut.uca.fr
|
||||||
|
repo: hub.codefirst.iut.uca.fr/dorian.hodin/pow
|
||||||
|
username:
|
||||||
|
from_secret: SECRET_USERNAME
|
||||||
|
password:
|
||||||
|
from_secret: SECRET_PASSWD
|
||||||
|
|
||||||
|
- name: deploy-pow
|
||||||
|
image: hub.codefirst.iut.uca.fr/thomas.bellembois/codefirst-dockerproxy-clientdrone:latest
|
||||||
|
environment:
|
||||||
|
IMAGENAME: hub.codefirst.iut.uca.fr/dorian.hodin/pow:latest
|
||||||
|
CONTAINERNAME: pow
|
||||||
|
COMMAND: create
|
||||||
|
OVERWRITE: true
|
||||||
|
ADMINS: dorianhodin,aurianjault,remiarnal
|
||||||
|
depends_on: [ build-pow ]
|
@ -0,0 +1,2 @@
|
|||||||
|
[client]
|
||||||
|
showSidebarNavigation = false
|
@ -0,0 +1,11 @@
|
|||||||
|
FROM python:3.9
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip
|
||||||
|
RUN pip install streamlit matplotlib pandas scikit-learn
|
||||||
|
|
||||||
|
EXPOSE 8501
|
||||||
|
|
||||||
|
ENTRYPOINT ["streamlit", "run", "home.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
@ -0,0 +1,53 @@
|
|||||||
|
import streamlit as st
|
||||||
|
from io import StringIO
|
||||||
|
from ydata_profiling import ProfileReport
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def statistics(df):
|
||||||
|
nan_counts = df.isnull().sum(axis=1).sum()
|
||||||
|
|
||||||
|
st.write("*Number of columns*:", len(df.columns))
|
||||||
|
st.write("*Number of rows*:", len(df.index))
|
||||||
|
|
||||||
|
st.write("*Nan Counts*: ", nan_counts)
|
||||||
|
st.write(df.isna().sum())
|
||||||
|
|
||||||
|
def display_df_first_and_lasts_lines(df):
|
||||||
|
fl = df.head(10)
|
||||||
|
ll = df.tail(10)
|
||||||
|
concat = pd.concat([fl, ll])
|
||||||
|
st.dataframe(concat)
|
||||||
|
|
||||||
|
def nav_bar():
|
||||||
|
st.page_link("./home.py", label="Import", icon="⬆️", help=None)
|
||||||
|
st.page_link("pages/clean.py", label="Clean", icon="🧼", help=None)
|
||||||
|
st.page_link("pages/visualize.py", label="Visualize", icon="👁️", help=None)
|
||||||
|
st.page_link("pages/prediction.py", label="Predict", icon="🔮", help=None)
|
||||||
|
st.page_link("pages/evaluate.py", label="Evaluate", icon=None, help=None)
|
||||||
|
|
||||||
|
def clean_dataframe(line):
|
||||||
|
# Call to function to clean data
|
||||||
|
line.empty()
|
||||||
|
line.write("Dataframe has been cleaned")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
nav_bar()
|
||||||
|
st.write("# Pow: Your data analyser")
|
||||||
|
|
||||||
|
uploaded_file = st.file_uploader("Choose a file")
|
||||||
|
if uploaded_file is not None:
|
||||||
|
df = pd.read_csv(uploaded_file)
|
||||||
|
st.session_state.orig_df = df
|
||||||
|
st.write("## Dataframe (10 first/last lines)")
|
||||||
|
display_df_first_and_lasts_lines(df)
|
||||||
|
|
||||||
|
st.write("## Statistics")
|
||||||
|
statistics(df)
|
||||||
|
|
||||||
|
profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True)
|
||||||
|
profile.to_widgets()
|
||||||
|
|
||||||
|
if st.button("Next"):
|
||||||
|
st.switch_page("pages/clean.py")
|
||||||
|
|
||||||
|
main()
|
@ -0,0 +1,44 @@
|
|||||||
|
import streamlit as st
|
||||||
|
import sys
|
||||||
|
sys.path.append('./back/')
|
||||||
|
|
||||||
|
import managing_missing_values as mmv
|
||||||
|
import load_csv as lc
|
||||||
|
|
||||||
|
if 'original_df' in st.session_state:
|
||||||
|
df = st.session_state.original_df
|
||||||
|
|
||||||
|
st.write("# 🧼 Data cleaning")
|
||||||
|
|
||||||
|
st.write("## Missing data")
|
||||||
|
rm_empty_rows_or_cols = st.checkbox("Remove empty rows or columns", True)
|
||||||
|
|
||||||
|
|
||||||
|
st.write("#### Replace missing values")
|
||||||
|
replace_methods = ["mean","median","mode","knn","regression"]
|
||||||
|
replace_method = st.radio('Choose an option:', replace_methods)
|
||||||
|
|
||||||
|
st.write("## Normalize data")
|
||||||
|
normalize_methods = ["min-max","z-score","robust"]
|
||||||
|
normalize_method = st.radio('Choose an option:', normalize_methods)
|
||||||
|
|
||||||
|
is_cleaned = st.button("Clean dataset")
|
||||||
|
if is_cleaned:
|
||||||
|
if rm_empty_rows_or_cols:
|
||||||
|
st.write("- Removing hight null percentage values")
|
||||||
|
df = mmv.drop_high_null_percentage(df)
|
||||||
|
st.dataframe(df)
|
||||||
|
|
||||||
|
st.write("- Handle missing values with method:", replace_method)
|
||||||
|
df = mmv.handle_missing_values(df, replace_method)
|
||||||
|
st.session_state.df = df
|
||||||
|
st.dataframe(df)
|
||||||
|
|
||||||
|
st.write("- Normalize with method:", normalize_method)
|
||||||
|
df = lc.handle_normalization(df, normalize_method)
|
||||||
|
st.session_state.df = df
|
||||||
|
st.dataframe(df)
|
||||||
|
|
||||||
|
st.switch_page("pages/visualize.py")
|
||||||
|
else:
|
||||||
|
st.write("Please upload you dataset.")
|
@ -0,0 +1,41 @@
|
|||||||
|
import streamlit as st
|
||||||
|
import pandas as pd
|
||||||
|
import sys
|
||||||
|
sys.path.append('./back/')
|
||||||
|
|
||||||
|
import clustering_csv as cc
|
||||||
|
import prediction as p
|
||||||
|
|
||||||
|
if 'df' in st.session_state:
|
||||||
|
|
||||||
|
df = st.session_state.df
|
||||||
|
df_cols = df.columns.tolist()
|
||||||
|
|
||||||
|
st.write("# 🔮 Prediction")
|
||||||
|
|
||||||
|
if st.button("K-means"):
|
||||||
|
st.pyplot(cc.launch_cluster_knn(df, ["Route Type", "Traffic Control"]))
|
||||||
|
|
||||||
|
if st.button("DBSCAN"):
|
||||||
|
st.pyplot(cc.launch_cluster_dbscan(df, ["Route Type", "Traffic Control"]))
|
||||||
|
|
||||||
|
if st.button("Linear Regression"):
|
||||||
|
col = "Route Type"
|
||||||
|
df_cols.remove(col)
|
||||||
|
original_col = df[col]
|
||||||
|
predicted_col = p.getColumnsForPredictionAndPredict(df, df_cols, "Route Type", "Linear Regression")
|
||||||
|
|
||||||
|
if st.button("Random Forest"):
|
||||||
|
col = "Route Type"
|
||||||
|
df_cols.remove(col)
|
||||||
|
original_col = df[col]
|
||||||
|
predicted_col = p.getColumnsForPredictionAndPredict(df, df_cols, "Route Type", "Random Forest")
|
||||||
|
|
||||||
|
ndf = pd.DataFrame()
|
||||||
|
ndf['Original'] = original_col
|
||||||
|
ndf['Predicted'] = predicted_col
|
||||||
|
|
||||||
|
st.dataframe(ndf)
|
||||||
|
|
||||||
|
else:
|
||||||
|
st.write("Please clean your dataset.")
|
@ -0,0 +1,32 @@
|
|||||||
|
import streamlit as st
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.append('./back/')
|
||||||
|
|
||||||
|
import show_csv as sc
|
||||||
|
|
||||||
|
if 'df' in st.session_state:
|
||||||
|
|
||||||
|
df = st.session_state.df
|
||||||
|
df_columns = df.columns.tolist()
|
||||||
|
|
||||||
|
st.write("# 📊 Visualization")
|
||||||
|
|
||||||
|
st.write("## Histograms")
|
||||||
|
hist_tabs = st.tabs(df_columns)
|
||||||
|
|
||||||
|
for idx, tab in enumerate(hist_tabs):
|
||||||
|
tab.write("##### "+df_columns[idx])
|
||||||
|
tab.pyplot(sc.histo_col(df, df_columns[idx]))
|
||||||
|
|
||||||
|
st.write("## Box & Whisker")
|
||||||
|
baw_tabs = st.tabs(df_columns)
|
||||||
|
|
||||||
|
for idx, tab in enumerate(baw_tabs):
|
||||||
|
tab.write("##### "+df_columns[idx])
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
df[df_columns[idx]].plot(kind='box')
|
||||||
|
tab.pyplot(fig)
|
||||||
|
else:
|
||||||
|
st.write('Please clean your dataset.')
|
Loading…
Reference in new issue