parent
4c5bc9d059
commit
77e509aec8
@ -1,65 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import sklearn as sk
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
from sklearn.linear_model import LinearRegression
|
|
||||||
from sklearn.metrics import mean_squared_error, mean_absolute_error
|
|
||||||
|
|
||||||
# dfRatingsTropGrand = pd.read_csv("processedData/actorsRatingsPerMovie.tsv",sep='\t')
|
|
||||||
# tconst ratings actorNames averageRatingMovie
|
|
||||||
|
|
||||||
# dfRatings = dfRatingsTropGrand[dfRatingsTropGrand['ratings'].apply(lambda x: len(eval(x)) >= 4)]
|
|
||||||
|
|
||||||
# dfRatings.to_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", index=False, sep="\t")
|
|
||||||
dfRatings = pd.read_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", sep="\t")
|
|
||||||
dfActeurs = pd.read_csv("processedData/actorsRatingsGroupedWithName.tsv", sep="\t")
|
|
||||||
print("Veuillez entrer un entier positif inférieur ou égal à ",len(dfRatings))
|
|
||||||
print("(Plus le nombre est petit, le temps de préparation sera moins long, mais la précision du modèle sera plus petite)")
|
|
||||||
val = input(": ")
|
|
||||||
val = int(val)
|
|
||||||
listMovies = dfRatings.sample(val)['tconst'].values
|
|
||||||
# listMovies = dfRatings['tconst'].values
|
|
||||||
|
|
||||||
listRatingsA = []
|
|
||||||
listRatingsM = []
|
|
||||||
datas = []
|
|
||||||
nbDiese = 0
|
|
||||||
for i in range(len(listMovies)):
|
|
||||||
valPrct = i / len(listMovies) * 100
|
|
||||||
print("{:.2f}".format(valPrct), "%", end="\r")
|
|
||||||
film = listMovies[i]
|
|
||||||
bob = (dfRatings.averageRatingMovie.loc[dfRatings.tconst == film].values[0],
|
|
||||||
eval(dfRatings.ratings.loc[dfRatings.tconst == film].values[0]))
|
|
||||||
listRatingsA.append(bob[1][:4])
|
|
||||||
listRatingsM.append(bob[0])
|
|
||||||
print("")
|
|
||||||
|
|
||||||
x = listRatingsA
|
|
||||||
y = listRatingsM
|
|
||||||
|
|
||||||
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3)
|
|
||||||
|
|
||||||
lnrg = LinearRegression()
|
|
||||||
|
|
||||||
# clf = lnrg.fit(xtrain,ytrain)
|
|
||||||
xtrain = np.array(xtrain)
|
|
||||||
clf = lnrg.fit(x, y)
|
|
||||||
predictions = lnrg.predict(xtest)
|
|
||||||
print("\nPréparation du modèle de regréssion linéaire terminée\n")
|
|
||||||
|
|
||||||
print('Erreur quadratique : ', mean_squared_error(ytest, predictions))
|
|
||||||
print('Écart moyen : ', mean_absolute_error(ytest, predictions),"\n")
|
|
||||||
|
|
||||||
def calculPrevision(listNomsActeurs):
|
|
||||||
if len(listNomsActeurs) == 4:
|
|
||||||
print('\nPrédiction en cours...\n')
|
|
||||||
notesActeurs = []
|
|
||||||
for nom in listNomsActeurs:
|
|
||||||
note = dfActeurs.loc[dfActeurs.primaryName == nom].averageRatingMean.values[0]
|
|
||||||
print(nom, " a pour note moyenne : ", note)
|
|
||||||
notesActeurs.append(note)
|
|
||||||
prediction = clf.predict([notesActeurs])[0]
|
|
||||||
print("\nNote prédite : ", "{:.2f}".format(prediction), "\n")
|
|
||||||
else:
|
|
||||||
print("La liste d'acteurs n'est pas de la bonne taille")
|
|
Binary file not shown.
Loading…
Reference in new issue