|
|
@ -10,6 +10,7 @@ from models import *
|
|
|
|
import random
|
|
|
|
import random
|
|
|
|
from analise import *
|
|
|
|
from analise import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Défini toutes les colonnes à garder dans le nettoyage des données
|
|
|
|
columns = ['B_fighter','R_fighter','title_bout',
|
|
|
|
columns = ['B_fighter','R_fighter','title_bout',
|
|
|
|
'B_avg_BODY_landed', 'B_avg_HEAD_landed', 'B_avg_TD_att', 'B_avg_TOTAL_STR_landed',
|
|
|
|
'B_avg_BODY_landed', 'B_avg_HEAD_landed', 'B_avg_TD_att', 'B_avg_TOTAL_STR_landed',
|
|
|
|
'B_avg_opp_BODY_att', 'B_avg_opp_HEAD_landed', 'B_avg_opp_LEG_landed',
|
|
|
|
'B_avg_opp_BODY_att', 'B_avg_opp_HEAD_landed', 'B_avg_opp_LEG_landed',
|
|
|
@ -22,12 +23,15 @@ columns = ['B_fighter','R_fighter','title_bout',
|
|
|
|
'B_age', 'R_age','date','Winner','weight_class','B_Stance','R_Stance']
|
|
|
|
'B_age', 'R_age','date','Winner','weight_class','B_Stance','R_Stance']
|
|
|
|
|
|
|
|
|
|
|
|
def swap_values(row):
|
|
|
|
def swap_values(row):
|
|
|
|
|
|
|
|
# Choisi avec 1 chance sur 2 si le swap va être effectué
|
|
|
|
if random.random() > 0.5:
|
|
|
|
if random.random() > 0.5:
|
|
|
|
for column in columns:
|
|
|
|
for column in columns:
|
|
|
|
if column.startswith('B_'):
|
|
|
|
if column.startswith('B_'):
|
|
|
|
|
|
|
|
# Si la colonne commence par B_, on l'échange par R_
|
|
|
|
opposite_column = 'R_' + column[2:]
|
|
|
|
opposite_column = 'R_' + column[2:]
|
|
|
|
row[column], row[opposite_column] = row[opposite_column], row[column]
|
|
|
|
row[column], row[opposite_column] = row[opposite_column], row[column]
|
|
|
|
if column.startswith('Winner'):
|
|
|
|
if column.startswith('Winner'):
|
|
|
|
|
|
|
|
# Si la valeur de Winner est 0, on remplace par 2 et vice-versa
|
|
|
|
print(row[column])
|
|
|
|
print(row[column])
|
|
|
|
if row[column] == 0:
|
|
|
|
if row[column] == 0:
|
|
|
|
row[column] = 2
|
|
|
|
row[column] = 2
|
|
|
@ -35,27 +39,33 @@ def swap_values(row):
|
|
|
|
row[column] = 0
|
|
|
|
row[column] = 0
|
|
|
|
print(row[column])
|
|
|
|
print(row[column])
|
|
|
|
return row
|
|
|
|
return row
|
|
|
|
|
|
|
|
# retourne ligne inchangée si swap non effectué
|
|
|
|
return row
|
|
|
|
return row
|
|
|
|
|
|
|
|
|
|
|
|
def getData():
|
|
|
|
def getData():
|
|
|
|
df = pd.read_csv('archive/data.csv')
|
|
|
|
df = pd.read_csv('archive/data.csv')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Défini la date limite pour utiliser les données
|
|
|
|
limit_date = '2001-04-01'
|
|
|
|
limit_date = '2001-04-01'
|
|
|
|
df = df.loc[df['date'] > limit_date, columns]
|
|
|
|
df = df.loc[df['date'] > limit_date, columns]
|
|
|
|
|
|
|
|
|
|
|
|
label_encoder = LabelEncoder()
|
|
|
|
label_encoder = LabelEncoder()
|
|
|
|
# Convertir les chaînes de caractères en nombres
|
|
|
|
# Convertir les chaînes de caractères en nombres
|
|
|
|
for column in df.select_dtypes(include=['object']).columns:
|
|
|
|
for column in df.select_dtypes(include=['object']).columns:
|
|
|
|
|
|
|
|
# Encode pour chaque colonne de type chaînes de caractères en valeur numérique
|
|
|
|
df[column] = label_encoder.fit_transform(df[column])
|
|
|
|
df[column] = label_encoder.fit_transform(df[column])
|
|
|
|
|
|
|
|
|
|
|
|
df = df.apply(swap_values, axis=1)
|
|
|
|
df = df.apply(swap_values, axis=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Remplace toutes les valeurs NaN par la valeur médiane associée
|
|
|
|
median_values = df.median()
|
|
|
|
median_values = df.median()
|
|
|
|
df.fillna(median_values, inplace=True)
|
|
|
|
df.fillna(median_values, inplace=True)
|
|
|
|
|
|
|
|
|
|
|
|
imp_features = ['R_age', 'B_age']
|
|
|
|
imp_features = ['R_age', 'B_age']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# On crée un dataframe sans colonne winner car c'est ce qu'on cherche à prédire
|
|
|
|
X=df.drop('Winner', axis=1)
|
|
|
|
X=df.drop('Winner', axis=1)
|
|
|
|
|
|
|
|
# On crée un dataframe avec seulement la colonne winner pour vérifier les prédictions
|
|
|
|
y=df['Winner']
|
|
|
|
y=df['Winner']
|
|
|
|
return X,y
|
|
|
|
return X,y
|
|
|
|
|
|
|
|
|
|
|
|