l'appli console marche, bonne nuit aurel

master
Paul Squizzato 1 year ago
parent 63365203fc
commit 80852f1d46

@ -2,41 +2,64 @@ import pandas as pd
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import sklearn as sk import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
#dfRatingsTropGrand = pd.read_csv("processedData/actorsRatingsPerMovie.tsv",sep='\t') # dfRatingsTropGrand = pd.read_csv("processedData/actorsRatingsPerMovie.tsv",sep='\t')
#tconst ratings actorNames averageRatingMovie # tconst ratings actorNames averageRatingMovie
#dfRatings = dfRatingsTropGrand[dfRatingsTropGrand['ratings'].apply(lambda x: len(eval(x)) >= 4)] # dfRatings = dfRatingsTropGrand[dfRatingsTropGrand['ratings'].apply(lambda x: len(eval(x)) >= 4)]
#dfRatings.to_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", index=False, sep="\t") # dfRatings.to_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", index=False, sep="\t")
dfRatings = pd.read_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", sep="\t") dfRatings = pd.read_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", sep="\t")
dfActeurs = pd.read_csv("processedData/actorsRatingsGroupedWithName.tsv", sep="\t")
#listMovies = dfRatings.head(1000)['tconst'].values print("Veuillez entrer un entier positif inférieur ou égal à ",len(dfRatings))
listMovies = dfRatings['tconst'].values print("(Plus le nombre est petit, le temps de préparation sera moins long, mais la précision du modèle sera plus petite)")
val = input(": ")
val = int(val)
listMovies = dfRatings.sample(val)['tconst'].values
# listMovies = dfRatings['tconst'].values
listRatingsA = [] listRatingsA = []
listRatingsM = [] listRatingsM = []
datas = [] datas = []
nbDiese = 0 nbDiese = 0
for i in range(len(listMovies)): for i in range(len(listMovies)):
print(i/len(listMovies)*100,"%", end="\r") valPrct = i / len(listMovies) * 100
print("{:.2f}".format(valPrct), "%", end="\r")
film = listMovies[i] film = listMovies[i]
bob = (dfRatings.averageRatingMovie.loc[dfRatings.tconst == film].values[0],eval(dfRatings.ratings.loc[dfRatings.tconst == film].values[0])) bob = (dfRatings.averageRatingMovie.loc[dfRatings.tconst == film].values[0],
eval(dfRatings.ratings.loc[dfRatings.tconst == film].values[0]))
listRatingsA.append(bob[1][:4]) listRatingsA.append(bob[1][:4])
listRatingsM.append(bob[0]) listRatingsM.append(bob[0])
print("") print("")
from sklearn.model_selection import train_test_split x = listRatingsA
y = listRatingsM
x=listRatingsA xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3)
y=listRatingsM
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.3) lnrg = LinearRegression()
# clf = lnrg.fit(xtrain,ytrain)
xtrain = np.array(xtrain) xtrain = np.array(xtrain)
from sklearn.linear_model import LinearRegression clf = lnrg.fit(x, y)
predictions = lnrg.predict(xtest)
print("\nPréparation du modèle de regréssion linéaire terminée\n")
lnrg = LinearRegression() print('Erreur quadratique : ', mean_squared_error(ytest, predictions))
print('Écart moyen : ', mean_absolute_error(ytest, predictions),"\n")
#clf = lnrg.fit(xtrain,ytrain) def calculPrevision(listNomsActeurs):
clf = lnrg.fit(x,y) if len(listNomsActeurs) == 4:
print('\nPrédiction en cours...\n')
notesActeurs = []
for nom in listNomsActeurs:
note = dfActeurs.loc[dfActeurs.primaryName == nom].averageRatingMean.values[0]
print(nom, " a pour note moyenne : ", note)
notesActeurs.append(note)
prediction = clf.predict([notesActeurs])[0]
print("\nNote prédite : ", "{:.2f}".format(prediction), "\n")
else:
print("La liste d'acteurs n'est pas de la bonne taille")

@ -3,6 +3,7 @@ import os
from typing import List from typing import List
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from calculsIA import *
def levenshtein_distance(s1, s2): def levenshtein_distance(s1, s2):
if len(s1) < len(s2): if len(s1) < len(s2):
@ -23,6 +24,7 @@ def levenshtein_distance(s1, s2):
return previous_row[-1] return previous_row[-1]
def find_closest_actor_name(input_name, actor_names): def find_closest_actor_name(input_name, actor_names):
closest_name = None closest_name = None
min_distance = float('inf') min_distance = float('inf')
@ -34,12 +36,14 @@ def find_closest_actor_name(input_name, actor_names):
closest_name = actor_name closest_name = actor_name
return (closest_name) return (closest_name)
def getUniqueActorNames(filePath): def getUniqueActorNames(filePath):
# Lire le fichier TSV # Lire le fichier TSV
df = pd.read_csv(filePath, sep='\t') df = pd.read_csv(filePath, sep='\t')
actor_names = df['primaryName'].tolist() actor_names = df['primaryName'].tolist()
return (actor_names) return (actor_names)
def saveUniqueActorsSorted(inputFilePath, outputFilePath): def saveUniqueActorsSorted(inputFilePath, outputFilePath):
# Lire le fichier TSV # Lire le fichier TSV
df = pd.read_csv(inputFilePath, sep='\t') df = pd.read_csv(inputFilePath, sep='\t')
@ -63,6 +67,7 @@ def ask_user_verification(actor_name):
response = input(f"Est-ce que vous vouliez dire {actor_name}? (Oui/Non) ") response = input(f"Est-ce que vous vouliez dire {actor_name}? (Oui/Non) ")
return response.lower() in ['oui', 'o', 'yes', 'y'] return response.lower() in ['oui', 'o', 'yes', 'y']
def add_actor(actor_names): def add_actor(actor_names):
user_input = input("Entrez le nom de l'acteur à ajouter : ") user_input = input("Entrez le nom de l'acteur à ajouter : ")
closest_name = find_closest_actor_name(user_input, actor_names) closest_name = find_closest_actor_name(user_input, actor_names)
@ -75,6 +80,7 @@ def add_actor(actor_names):
else: else:
return None return None
if __name__ == "__main__": if __name__ == "__main__":
mustContinue = True mustContinue = True
if not os.path.exists("processedData/uniqueActorNames.tsv"): if not os.path.exists("processedData/uniqueActorNames.tsv"):
@ -95,8 +101,8 @@ if __name__ == "__main__":
if choice == '1': if choice == '1':
result = add_actor(actor_names) result = add_actor(actor_names)
if result: if result:
selectedActorNames.append(result[0]) selectedActorNames.append(result)
print(f"{result[0]} a été ajouté à la liste.") print(f"{result} a été ajouté à la liste.")
elif choice == '2': elif choice == '2':
print("Liste des acteurs sélectionnés :") print("Liste des acteurs sélectionnés :")
for actor in selectedActorNames: for actor in selectedActorNames:
@ -105,8 +111,7 @@ if __name__ == "__main__":
selectedActorNames.clear() selectedActorNames.clear()
print("La liste a été vidée.") print("La liste a été vidée.")
elif choice == '4': elif choice == '4':
# TODO: Implémenter la comparaison calculPrevision(selectedActorNames)
print("Lancement de la prévision... (TODO)")
elif choice == '5': elif choice == '5':
print("Au revoir !") print("Au revoir !")
mustContinue = False mustContinue = False

Loading…
Cancel
Save