diff --git a/test.py b/test.py index 8602b5d..700adc3 100644 --- a/test.py +++ b/test.py @@ -135,7 +135,7 @@ def list_fighters(df, limit_date): return fighters # Last year when data fight was not full and correct -fighters = list_fighters(df,'2016-01-01') +fighters = list_fighters(df,'2015-01-01') def build_df(df, fighters, i): arr = [select_fight_row(df, fighters[f], i) for f in range(len(fighters)) if select_fight_row(df, fighters[f], i) is not None] @@ -145,11 +145,26 @@ def build_df(df, fighters, i): df_fights['title_bout'] = df_fights['title_bout'].map({True: 1, False: 0}) df_fights.drop(['R_fighter', 'B_fighter', 'date'], axis=1, inplace=True) return df_fights + +def build_df_all_but_last(df, fighters): + + cols = [col for col in df] + df_fights=pd.DataFrame(columns=cols) + for f in range(len(fighters)): + for i in range(10000): + fight_row = select_fight_row(df, fighters[f], i) + if fight_row is None: + break + fight_row = list(fight_row) + dfTemp = pd.DataFrame(data=[fight_row], columns=cols) + df_fights = df_fights._append(dfTemp, ignore_index=True) + df_fights.drop_duplicates(inplace=True) + df_fights['title_bout'] = df_fights['title_bout'].map({True: 1, False: 0}) + df_fights.drop(['R_fighter', 'B_fighter', 'date'], axis=1, inplace=True) + return df_fights -df_train = build_df(df, fighters, 0) -df_test = build_df(df, fighters, 1) - -# print(df_train.head(5)) +df_train = build_df_all_but_last(df, fighters) +df_test = build_df(df, fighters,-1) preprocessor = make_column_transformer((OrdinalEncoder(), ['weight_class', 'B_Stance', 'R_Stance']), remainder='passthrough')