from flask import Flask, render_template, request import pandas as pd from test import * # Assurez-vous d'avoir un fichier predict.py avec votre fonction predict app = Flask(__name__) colonnes = ['B_fighter','R_fighter','title_bout', 'B_avg_BODY_landed', 'B_avg_HEAD_landed', 'B_avg_TD_att', 'B_avg_TOTAL_STR_landed', 'B_avg_opp_BODY_att', 'B_avg_opp_HEAD_landed', 'B_avg_opp_LEG_landed', 'B_avg_opp_SIG_STR_att', 'B_avg_opp_TOTAL_STR_att', 'R_avg_BODY_landed', 'R_avg_HEAD_landed', 'R_avg_TD_att', 'R_avg_TOTAL_STR_landed', 'R_avg_opp_BODY_att', 'R_avg_opp_HEAD_landed', 'R_avg_opp_LEG_landed', 'R_avg_opp_SIG_STR_att', 'R_avg_opp_TOTAL_STR_att', 'B_age', 'R_age','date','Winner','weight_class','B_Stance','R_Stance'] # Charger le DataFrame une seule fois pour économiser des ressources df = pd.read_csv('archive/data.csv') # Assurez-vous de spécifier le bon chemin vers votre fichier de données # Before April 2001, there were almost no rules in UFC (no judges, no time limits, no rounds, etc.). #It's up to this precise date that UFC started to implement a set of rules known as #"Unified Rules of Mixed Martial Arts". #Therefore, we delete all fights before this major update in UFC's rules history. # Using this old data would not be representative of current fights, especially since this #sport has become one of the most regulated due to its mixity and complexity. limit_date = '2001-04-01' df = df[(df['date'] > limit_date)] # Display NaN values displayNumberOfNaNValues(df) # Define the list of important features to impute imp_features = ['R_Weight_lbs', 'R_Height_cms', 'B_Height_cms', 'R_age', 'B_age', 'R_Reach_cms', 'B_Reach_cms'] imp_median = SimpleImputer(missing_values=np.nan, strategy='median') # Iterate over each feature to impute missing values for feature in imp_features: # Fit and transform the feature using median imputation imp_feature = imp_median.fit_transform(df[feature].values.reshape(-1,1)) # Assign the imputed values back to the DataFrame df[feature] = imp_feature # Impute missing values for 'R_Stance' using most frequent strategy imp_stance_R = SimpleImputer(missing_values=np.nan, strategy='most_frequent') imp_R_stance = imp_stance_R.fit_transform(df['R_Stance'].values.reshape(-1,1)) # Impute missing values for 'B_Stance' using most frequent strategy imp_stance_B = SimpleImputer(missing_values=np.nan, strategy='most_frequent') imp_B_stance = imp_stance_B.fit_transform(df['B_Stance'].values.reshape(-1,1)) # Create DataFrames for imputed stances df['R_Stance'] = pd.DataFrame(imp_R_stance, columns=['R_Stance']) df['B_Stance'] = pd.DataFrame(imp_B_stance, columns=['B_Stance']) df.drop(['Referee', 'location'], axis=1, inplace=True) # Drop column 'B_draw' and 'R_draw' and 'Draw' fight and 'Catch Weight' fight df.drop(['B_draw', 'R_draw'], axis=1, inplace=True) df = df[df['Winner'] != 'Draw'] df = df[df['weight_class'] != 'Catch Weight'] # Remove column when data type is not float or int dfWithoutString = df.select_dtypes(include=['float64', 'int64']) plt.figure(figsize=(50, 40)) corr_matrix = dfWithoutString.corr(method='pearson').abs() sns.heatmap(corr_matrix, annot=True) ## Show the correlation matrix of the dataframe ## Very laggy feature # plt.show() # Last year when data fight was not full and correct fighters = list_fighters(df,'2015-01-01') df = df[colonnes] # Get all fight of every fighters df_train = build_df_all_but_last(df, fighters) # Get the last fight of every fighters for test the model df_test = build_df(df, fighters,0) #Creates a column transformer that encodes specified categorical columns ordinally #while leaving other columns unchanged preprocessor = make_column_transformer((OrdinalEncoder(), ['weight_class', 'B_Stance', 'R_Stance']), remainder='passthrough') #These lines of code utilize LabelEncoder to encode the 'Winner' column into numerical labels for #both training and testing datasets, followed by the separation of features and target variable for #further processing. label_encoder = LabelEncoder() y_train = label_encoder.fit_transform(df_train['Winner']) y_test = label_encoder.transform(df_test['Winner']) X_train, X_test = df_train.drop(['Winner'], axis=1), df_test.drop(['Winner'], axis=1) # Random Forest composed of 100 decision trees. We optimized parameters using cross-validation #and GridSearch tool paired together random_forest = RandomForestClassifier(n_estimators=100, criterion='entropy', max_depth=10, min_samples_split=2, min_samples_leaf=1, random_state=0) # Train data model = Pipeline([('encoding', preprocessor), ('random_forest', random_forest)]) model.fit(X_train, y_train) # We use cross-validation with 5-folds to have a more precise accuracy (reduce variation) accuracies = cross_val_score(estimator=model, X=X_train, y=y_train, cv=5) print('Accuracy mean : ', accuracies.mean()) print('Accuracy standard deviation : ', accuracies.std()) # Test y_pred = model.predict(X_test) print('Testing accuracy : ', accuracy_score(y_test, y_pred), '\n') # Class definition target_names = ["Blue","Red"] print(classification_report(y_test, y_pred, labels=[0,1], target_names=target_names)) # Declare feature feature_names = [col for col in X_train] # Set importances for every feature feature_importances = model['random_forest'].feature_importances_ # Sort importances indices = np.argsort(feature_importances)[::-1] n = 30 # maximum feature importances displayed idx = indices[0:n] # Standard deviation std = np.std([tree.feature_importances_ for tree in model['random_forest'].estimators_], axis=0) # Select tree from model tree_estimator = model['random_forest'].estimators_[10] @app.route('/') def index(): return render_template('index.html') @app.route('/predict', methods=['POST']) def make_prediction(): blue_fighter = request.form['blue_fighter'] red_fighter = request.form['red_fighter'] weightclass = request.form['weightclass'] rounds = int(request.form['rounds']) title_bout = True if request.form['title_bout'] == 'True' else False prediction_proba = predict(df, model, blue_fighter, red_fighter, weightclass, rounds, title_bout) # Formatage du résultat pour l'afficher dans le navigateur result = "" if prediction_proba is not None: result = f"The predicted probability of {blue_fighter} winning is {round(prediction_proba[0][0] * 100, 2)}% and the predicted probability of {red_fighter} winning is {round(prediction_proba[0][1] * 100, 2)}%" return render_template('result.html', result=result) if __name__ == '__main__': app.run(debug=True)