diff --git a/test.py b/test.py index e4d5542..0e4e554 100644 --- a/test.py +++ b/test.py @@ -4,11 +4,7 @@ import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.tree import export_graphviz -from io import StringIO -from IPython.display import Image from sklearn.tree import plot_tree -import pydotplus -from IPython.display import Image from sklearn.pipeline import Pipeline from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score @@ -153,24 +149,31 @@ def build_df(df, fighters, i): return df_fights def build_df_all_but_last(df, fighters): - cols = [col for col in df] df_fights=pd.DataFrame(columns=cols) for f in range(len(fighters)): - for i in range(10000): + i=0 + while True: fight_row = select_fight_row(df, fighters[f], i) if fight_row is None: + if not df_fights.empty: + df_fights = df_fights.iloc[:-1] break fight_row = list(fight_row) dfTemp = pd.DataFrame(data=[fight_row], columns=cols) - df_fights = df_fights._append(dfTemp, ignore_index=True) + df_fights = df_fights.dropna(axis=1, how='all') + df_fights = pd.concat([df_fights, dfTemp], ignore_index=True) + i=i+1 df_fights.drop_duplicates(inplace=True) + df_fights = df_fights[~df_fights.apply(lambda row: 'Open Stance' in row.values, axis=1)].reset_index(drop=True) df_fights['title_bout'] = df_fights['title_bout'].map({True: 1, False: 0}) df_fights.drop(['R_fighter', 'B_fighter', 'date'], axis=1, inplace=True) + return df_fights + df_train = build_df_all_but_last(df, fighters) -df_test = build_df(df, fighters,-1) +df_test = build_df(df, fighters,0) preprocessor = make_column_transformer((OrdinalEncoder(), ['weight_class', 'B_Stance', 'R_Stance']), remainder='passthrough')