You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
46 lines
1.6 KiB
46 lines
1.6 KiB
import numpy as np
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import sklearn as sk
|
|
from sklearn.preprocessing import LabelEncoder
|
|
from sklearn.impute import SimpleImputer
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
|
|
from models import *
|
|
from analise import *
|
|
|
|
|
|
def getData():
|
|
df = pd.read_csv('archive/data.csv')
|
|
columns = ['B_fighter','R_fighter','title_bout',
|
|
'B_avg_BODY_landed', 'B_avg_HEAD_landed', 'B_avg_TD_att', 'B_avg_TOTAL_STR_landed',
|
|
'B_avg_opp_BODY_att', 'B_avg_opp_HEAD_landed', 'B_avg_opp_LEG_landed',
|
|
'B_avg_opp_SIG_STR_att', 'B_avg_opp_TOTAL_STR_att',
|
|
|
|
'R_avg_BODY_landed', 'R_avg_HEAD_landed', 'R_avg_TD_att', 'R_avg_TOTAL_STR_landed',
|
|
'R_avg_opp_BODY_att', 'R_avg_opp_HEAD_landed', 'R_avg_opp_LEG_landed',
|
|
'R_avg_opp_SIG_STR_att', 'R_avg_opp_TOTAL_STR_att',
|
|
|
|
'B_age', 'R_age','date','Winner','weight_class','B_Stance','R_Stance']
|
|
|
|
|
|
|
|
limit_date = '2001-04-01'
|
|
df = df.loc[df['date'] > limit_date, columns]
|
|
|
|
label_encoder = LabelEncoder()
|
|
|
|
# Convertir les chaînes de caractères en nombres
|
|
for column in df.select_dtypes(include=['object']).columns:
|
|
df[column] = label_encoder.fit_transform(df[column])
|
|
|
|
median_values = df.median()
|
|
df.fillna(median_values, inplace=True)
|
|
|
|
imp_features = ['R_age', 'B_age']
|
|
|
|
X=df.drop('Winner', axis=1)
|
|
y=df['Winner']
|
|
return X,y
|
|
|