You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

62 lines
2.1 KiB

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from models import *
import random
from analise import *
columns = ['B_fighter','R_fighter','title_bout',
'B_avg_BODY_landed', 'B_avg_HEAD_landed', 'B_avg_TD_att', 'B_avg_TOTAL_STR_landed',
'B_avg_opp_BODY_att', 'B_avg_opp_HEAD_landed', 'B_avg_opp_LEG_landed',
'B_avg_opp_SIG_STR_att', 'B_avg_opp_TOTAL_STR_att',
'R_avg_BODY_landed', 'R_avg_HEAD_landed', 'R_avg_TD_att', 'R_avg_TOTAL_STR_landed',
'R_avg_opp_BODY_att', 'R_avg_opp_HEAD_landed', 'R_avg_opp_LEG_landed',
'R_avg_opp_SIG_STR_att', 'R_avg_opp_TOTAL_STR_att',
'B_age', 'R_age','date','Winner','weight_class','B_Stance','R_Stance']
def swap_values(row):
if random.random() > 0.5:
for column in columns:
if column.startswith('B_'):
opposite_column = 'R_' + column[2:]
row[column], row[opposite_column] = row[opposite_column], row[column]
if column.startswith('Winner'):
print(row[column])
if row[column] == 0:
row[column] = 2
elif row[column] == 2:
row[column] = 0
print(row[column])
return row
return row
def getData():
df = pd.read_csv('archive/data.csv')
limit_date = '2001-04-01'
df = df.loc[df['date'] > limit_date, columns]
label_encoder = LabelEncoder()
# Convertir les chaînes de caractères en nombres
for column in df.select_dtypes(include=['object']).columns:
df[column] = label_encoder.fit_transform(df[column])
df = df.apply(swap_values, axis=1)
median_values = df.median()
df.fillna(median_values, inplace=True)
imp_features = ['R_age', 'B_age']
X=df.drop('Winner', axis=1)
y=df['Winner']
return X,y