From 80852f1d46ae9e13156a878a493981ed54555529 Mon Sep 17 00:00:00 2001 From: Paul Squizzato Date: Tue, 13 Feb 2024 01:32:50 +0100 Subject: [PATCH] l'appli console marche, bonne nuit aurel --- calculsIA.py | 63 +++++++++++++++++++++++++++++++++++----------------- interface.py | 25 ++++++++++++--------- 2 files changed, 58 insertions(+), 30 deletions(-) diff --git a/calculsIA.py b/calculsIA.py index 365dc12..7624d36 100644 --- a/calculsIA.py +++ b/calculsIA.py @@ -2,41 +2,64 @@ import pandas as pd import numpy as np import matplotlib.pyplot as plt import sklearn as sk +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error, mean_absolute_error -#dfRatingsTropGrand = pd.read_csv("processedData/actorsRatingsPerMovie.tsv",sep='\t') -#tconst ratings actorNames averageRatingMovie +# dfRatingsTropGrand = pd.read_csv("processedData/actorsRatingsPerMovie.tsv",sep='\t') +# tconst ratings actorNames averageRatingMovie -#dfRatings = dfRatingsTropGrand[dfRatingsTropGrand['ratings'].apply(lambda x: len(eval(x)) >= 4)] +# dfRatings = dfRatingsTropGrand[dfRatingsTropGrand['ratings'].apply(lambda x: len(eval(x)) >= 4)] -#dfRatings.to_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", index=False, sep="\t") +# dfRatings.to_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", index=False, sep="\t") dfRatings = pd.read_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", sep="\t") - -#listMovies = dfRatings.head(1000)['tconst'].values -listMovies = dfRatings['tconst'].values +dfActeurs = pd.read_csv("processedData/actorsRatingsGroupedWithName.tsv", sep="\t") +print("Veuillez entrer un entier positif inférieur ou égal à ",len(dfRatings)) +print("(Plus le nombre est petit, le temps de préparation sera moins long, mais la précision du modèle sera plus petite)") +val = input(": ") +val = int(val) +listMovies = dfRatings.sample(val)['tconst'].values +# listMovies = dfRatings['tconst'].values listRatingsA = [] listRatingsM = [] datas = [] nbDiese = 0 for i in range(len(listMovies)): - print(i/len(listMovies)*100,"%", end="\r") - film = listMovies[i] - bob = (dfRatings.averageRatingMovie.loc[dfRatings.tconst == film].values[0],eval(dfRatings.ratings.loc[dfRatings.tconst == film].values[0])) - listRatingsA.append(bob[1][:4]) - listRatingsM.append(bob[0]) + valPrct = i / len(listMovies) * 100 + print("{:.2f}".format(valPrct), "%", end="\r") + film = listMovies[i] + bob = (dfRatings.averageRatingMovie.loc[dfRatings.tconst == film].values[0], + eval(dfRatings.ratings.loc[dfRatings.tconst == film].values[0])) + listRatingsA.append(bob[1][:4]) + listRatingsM.append(bob[0]) print("") -from sklearn.model_selection import train_test_split +x = listRatingsA +y = listRatingsM -x=listRatingsA -y=listRatingsM +xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3) -xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.3) +lnrg = LinearRegression() +# clf = lnrg.fit(xtrain,ytrain) xtrain = np.array(xtrain) -from sklearn.linear_model import LinearRegression +clf = lnrg.fit(x, y) +predictions = lnrg.predict(xtest) +print("\nPréparation du modèle de regréssion linéaire terminée\n") -lnrg = LinearRegression() +print('Erreur quadratique : ', mean_squared_error(ytest, predictions)) +print('Écart moyen : ', mean_absolute_error(ytest, predictions),"\n") -#clf = lnrg.fit(xtrain,ytrain) -clf = lnrg.fit(x,y) \ No newline at end of file +def calculPrevision(listNomsActeurs): + if len(listNomsActeurs) == 4: + print('\nPrédiction en cours...\n') + notesActeurs = [] + for nom in listNomsActeurs: + note = dfActeurs.loc[dfActeurs.primaryName == nom].averageRatingMean.values[0] + print(nom, " a pour note moyenne : ", note) + notesActeurs.append(note) + prediction = clf.predict([notesActeurs])[0] + print("\nNote prédite : ", "{:.2f}".format(prediction), "\n") + else: + print("La liste d'acteurs n'est pas de la bonne taille") diff --git a/interface.py b/interface.py index 758c35d..b026d51 100644 --- a/interface.py +++ b/interface.py @@ -3,6 +3,7 @@ import os from typing import List import numpy as np import pandas as pd +from calculsIA import * def levenshtein_distance(s1, s2): if len(s1) < len(s2): @@ -23,6 +24,7 @@ def levenshtein_distance(s1, s2): return previous_row[-1] + def find_closest_actor_name(input_name, actor_names): closest_name = None min_distance = float('inf') @@ -34,26 +36,28 @@ def find_closest_actor_name(input_name, actor_names): closest_name = actor_name return (closest_name) + def getUniqueActorNames(filePath): # Lire le fichier TSV df = pd.read_csv(filePath, sep='\t') actor_names = df['primaryName'].tolist() return (actor_names) + def saveUniqueActorsSorted(inputFilePath, outputFilePath): # Lire le fichier TSV df = pd.read_csv(inputFilePath, sep='\t') - + # Trier le DataFrame par 'primaryName' en ordre alphabétique df_sorted = df.sort_values(by='primaryName') - + # Écrire les données triées dans le fichier CSV with open(outputFilePath, mode='w', newline='', encoding='utf-8') as file: writer = csv.writer(file, delimiter='\t') - + # Écrire l'en-tête writer.writerow(['primaryName', 'nconst']) - + # Écrire chaque ligne du DataFrame trié dans le fichier CSV for index, row in df_sorted.iterrows(): writer.writerow([row['primaryName'], row['nconst']]) @@ -63,6 +67,7 @@ def ask_user_verification(actor_name): response = input(f"Est-ce que vous vouliez dire {actor_name}? (Oui/Non) ") return response.lower() in ['oui', 'o', 'yes', 'y'] + def add_actor(actor_names): user_input = input("Entrez le nom de l'acteur à ajouter : ") closest_name = find_closest_actor_name(user_input, actor_names) @@ -75,6 +80,7 @@ def add_actor(actor_names): else: return None + if __name__ == "__main__": mustContinue = True if not os.path.exists("processedData/uniqueActorNames.tsv"): @@ -89,14 +95,14 @@ if __name__ == "__main__": print("3: Vider la liste") print("4: Lancer la prévision") print("5: Quitter") - + choice = input("Faites votre choix : ") - + if choice == '1': result = add_actor(actor_names) if result: - selectedActorNames.append(result[0]) - print(f"{result[0]} a été ajouté à la liste.") + selectedActorNames.append(result) + print(f"{result} a été ajouté à la liste.") elif choice == '2': print("Liste des acteurs sélectionnés :") for actor in selectedActorNames: @@ -105,8 +111,7 @@ if __name__ == "__main__": selectedActorNames.clear() print("La liste a été vidée.") elif choice == '4': - # TODO: Implémenter la comparaison - print("Lancement de la prévision... (TODO)") + calculPrevision(selectedActorNames) elif choice == '5': print("Au revoir !") mustContinue = False