calculsIA fournis un modele de regression qui fonctionne, il faut appeler clf.predict() avec une liste de 4 acteurs et ça marche
parent
36cffbfe04
commit
67e38077e9
@ -0,0 +1,42 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import sklearn as sk
|
||||||
|
|
||||||
|
#dfRatingsTropGrand = pd.read_csv("processedData/actorsRatingsPerMovie.tsv",sep='\t')
|
||||||
|
#tconst ratings actorNames averageRatingMovie
|
||||||
|
|
||||||
|
#dfRatings = dfRatingsTropGrand[dfRatingsTropGrand['ratings'].apply(lambda x: len(eval(x)) >= 4)]
|
||||||
|
|
||||||
|
#dfRatings.to_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", index=False, sep="\t")
|
||||||
|
dfRatings = pd.read_csv("processedData/actorsRatingsPerMovieGoodToUse.tsv", sep="\t")
|
||||||
|
|
||||||
|
#listMovies = dfRatings.head(1000)['tconst'].values
|
||||||
|
listMovies = dfRatings['tconst'].values
|
||||||
|
|
||||||
|
listRatingsA = []
|
||||||
|
listRatingsM = []
|
||||||
|
datas = []
|
||||||
|
nbDiese = 0
|
||||||
|
for i in range(len(listMovies)):
|
||||||
|
print(i/len(listMovies)*100,"%", end="\r")
|
||||||
|
film = listMovies[i]
|
||||||
|
bob = (dfRatings.averageRatingMovie.loc[dfRatings.tconst == film].values[0],eval(dfRatings.ratings.loc[dfRatings.tconst == film].values[0]))
|
||||||
|
listRatingsA.append(bob[1][:4])
|
||||||
|
listRatingsM.append(bob[0])
|
||||||
|
print("")
|
||||||
|
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
x=listRatingsA
|
||||||
|
y=listRatingsM
|
||||||
|
|
||||||
|
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.3)
|
||||||
|
|
||||||
|
xtrain = np.array(xtrain)
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
|
||||||
|
lnrg = LinearRegression()
|
||||||
|
|
||||||
|
#clf = lnrg.fit(xtrain,ytrain)
|
||||||
|
clf = lnrg.fit(x,y)
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue