You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
60 lines
2.1 KiB
60 lines
2.1 KiB
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from sklearn.tree import DecisionTreeClassifier
|
|
from sklearn.metrics import accuracy_score, confusion_matrix
|
|
from sklearn import metrics
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
|
dataframe = pd.read_csv("archive/data.csv")
|
|
|
|
# Change bool values to int (0/1)
|
|
dataframe = dataframe.rename(columns={'Winner': 'Blue_Corner_Win'})
|
|
dataframe['Blue_Corner_Win'] = (dataframe['Blue_Corner_Win'] == 'Blue').astype(int)
|
|
dataframe['title_bout'] = dataframe['title_bout'].astype(int)
|
|
|
|
# Select datas for Yi (blue corner win ?) and columns for stat
|
|
Yi = dataframe[["Blue_Corner_Win"]]
|
|
colonnes = ['B_avg_BODY_landed', 'B_avg_HEAD_landed', 'B_avg_TD_att', 'B_avg_TOTAL_STR_landed',
|
|
'B_avg_opp_BODY_att', 'B_avg_opp_HEAD_landed', 'B_avg_opp_LEG_landed',
|
|
'B_avg_opp_SIG_STR_att', 'B_avg_opp_TOTAL_STR_att', 'R_avg_TD_att', 'R_avg_opp_GROUND_att',
|
|
'R_avg_opp_SIG_STR_landed', 'B_age', 'R_age']
|
|
|
|
Xi = dataframe[colonnes]
|
|
|
|
Xtrain, Xtest, ytrain, ytest = train_test_split(Xi, Yi,test_size=0.20, random_state=42)
|
|
print(Xtrain.shape)
|
|
print(Xtest.shape)
|
|
|
|
Arbre_decision = DecisionTreeClassifier(random_state=0, max_depth=20)
|
|
clf = Arbre_decision.fit(Xtrain, Yi)
|
|
|
|
ypredict = clf.predict(Xtest)
|
|
accuracy = accuracy_score(ytest, ypredict)
|
|
matriceConfusion = confusion_matrix(ytest, ypredict)
|
|
|
|
incorrect=matriceConfusion[0][1] + matriceConfusion[1][0]
|
|
total = matriceConfusion.sum()
|
|
|
|
print("\nNumber of incorrect classifications: " + str(incorrect))
|
|
print("Number of classifications total: " + str(total))
|
|
|
|
print("Percent: "+ str((total-incorrect)/total*100))
|
|
|
|
fighter_data = dataframe[dataframe['R_fighter'] == 'Adrian Yanez']
|
|
average_fighter_data = fighter_data[colonnes].mean()
|
|
|
|
fighter_data_2 = dataframe[dataframe['R_fighter'] == 'Gustavo Lopez']
|
|
average_fighter_data_2 = fighter_data_2[colonnes].mean()
|
|
|
|
combined_features = pd.concat([average_fighter_data, average_fighter_data_2])
|
|
|
|
prediction = clf.predict([combined_features])
|
|
if prediction[0] == 1:
|
|
winner = "Blue Corner"
|
|
else:
|
|
winner = "Red Corner"
|
|
|
|
print(f"The predicted winner is: {winner}")
|