parent
c78d824d09
commit
222f889240
@ -0,0 +1,47 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib as plt
|
||||
|
||||
# # filter by actor / actress
|
||||
# df = pd.read_csv("title.principals.tsv", sep="\t")
|
||||
# actors = df.loc[(df.category == 'actor') | (df.category =='actress')]
|
||||
# actors.to_csv("actors.tsv", sep="\t", index=False) # outputs actors.tsv
|
||||
|
||||
# # merges actors and movies to have each actor that played in a movie
|
||||
# actors = pd.read_csv("actors.tsv", sep="\t")
|
||||
# ratings = pd.read_csv("title.ratings.tsv", sep="\t")
|
||||
# names = pd.read_csv("name.basics.tsv", sep="\t")
|
||||
# actorsRatings = actors.merge(ratings, left_on="tconst", right_on="tconst", how="inner")
|
||||
# actorsRatings.to_csv("actorsRatings.tsv", sep="\t", index=False) # outputs actorsRatings.tsv
|
||||
|
||||
actorsRatings = pd.read_csv("actorsRatings.tsv", sep="\t")
|
||||
grouped = actorsRatings.groupby("nconst").agg({"averageRating": ["mean", "count"]})
|
||||
# Aplatir l'index des colonnes et renommer
|
||||
grouped.columns = ['averageRatingMean', 'averageRatingCount'] # Aplatir et renommer les colonnes
|
||||
|
||||
# Réinitialiser l'index pour inclure 'nconst' comme une colonne régulière
|
||||
grouped.reset_index(inplace=True)
|
||||
|
||||
# Enregistrer dans un fichier CSV avec les noms de colonnes personnalisés
|
||||
grouped.to_csv("actorsRatingsGrouped.tsv", index=False, sep="\t")
|
||||
|
||||
# actorNames = names[['nconst','primaryName']]
|
||||
# groupedWithActorNames = grouped.merge(actorNames, left_on="nconst", right_on="nconst", how="inner")
|
||||
# groupedWithActorNames.to_csv("actorsRatingsGroupedWithName.tsv", index=True, sep="\t")
|
||||
|
||||
# # Chargement des données
|
||||
# actorsRatings = pd.read_csv("actorsRatings.tsv", sep="\t")
|
||||
# actorsRatingsGrouped = pd.read_csv("actorsRatingsGrouped.tsv", sep="\t")
|
||||
|
||||
# # Joindre les deux DataFrames sur 'nconst' pour associer chaque 'tconst' avec les 'averageRating'
|
||||
# # Note : Assurez-vous que 'nconst' est présent dans les deux DataFrames comme clé de jointure
|
||||
# merged = pd.merge(actorsRatings[['tconst', 'nconst']], actorsRatingsGrouped[['nconst', 'averageRating']], on='nconst')
|
||||
|
||||
# # Grouper par 'tconst' et agréger 'averageRating' dans une liste pour chaque 'tconst'
|
||||
# grouped = merged.groupby('tconst')['averageRating'].apply(list).reset_index()
|
||||
|
||||
# # Convertir le DataFrame agrégé en un dictionnaire
|
||||
# allActorRatingsMovies = pd.Series(grouped.averageRating.values,index=grouped.tconst).to_dict()
|
||||
|
||||
# actorsRatingsPerMovie = pd.DataFrame(list(allActorRatingsMovies.items()), columns=['tconst', 'ratings'])
|
||||
# actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t")
|
Loading…
Reference in new issue