|
|
@ -26,26 +26,26 @@ def data_manipulation(DataIsLoaded = None):
|
|
|
|
# Démarre le thread de suivi du temps
|
|
|
|
# Démarre le thread de suivi du temps
|
|
|
|
timer_thread.start()
|
|
|
|
timer_thread.start()
|
|
|
|
|
|
|
|
|
|
|
|
names = pd.read_csv("name.basics.tsv", sep="\t")
|
|
|
|
names = pd.read_csv("baseData/name.basics.tsv", sep="\t")
|
|
|
|
|
|
|
|
|
|
|
|
# filter by actor / actress
|
|
|
|
# filter by actor / actress
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
if DataIsLoaded is not None:
|
|
|
|
if DataIsLoaded is not None:
|
|
|
|
df = pd.read_csv("title.principals.tsv", sep="\t")
|
|
|
|
df = pd.read_csv("baseData/title.principals.tsv", sep="\t")
|
|
|
|
actors = df.loc[(df.category == 'actor') | (df.category =='actress')]
|
|
|
|
actors = df.loc[(df.category == 'actor') | (df.category =='actress')]
|
|
|
|
actors.to_csv("actors.tsv", sep="\t", index=False) # outputs actors.tsv
|
|
|
|
actors.to_csv("processedData/actors.tsv", sep="\t", index=False) # outputs actors.tsv
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
|
|
|
|
|
|
|
|
# merges actors and movies to have each actor that played in a movie
|
|
|
|
# merges actors and movies to have each actor that played in a movie
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
actors = pd.read_csv("actors.tsv", sep="\t")
|
|
|
|
actors = pd.read_csv("processedData/actors.tsv", sep="\t")
|
|
|
|
ratings = pd.read_csv("title.ratings.tsv", sep="\t")
|
|
|
|
ratings = pd.read_csv("baseData/title.ratings.tsv", sep="\t")
|
|
|
|
|
|
|
|
|
|
|
|
actorsRatings = actors.merge(ratings, left_on="tconst", right_on="tconst", how="inner")
|
|
|
|
actorsRatings = actors.merge(ratings, left_on="tconst", right_on="tconst", how="inner")
|
|
|
|
actorsRatings.to_csv("actorsRatings.tsv", sep="\t", index=False) # outputs actorsRatings.tsv
|
|
|
|
actorsRatings.to_csv("processedData/actorsRatings.tsv", sep="\t", index=False) # outputs actorsRatings.tsv
|
|
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
actorsRatings = pd.read_csv("actorsRatings.tsv", sep="\t")
|
|
|
|
actorsRatings = pd.read_csv("processedData/actorsRatings.tsv", sep="\t")
|
|
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
|
|
|
|
|
|
|
@ -64,16 +64,16 @@ def data_manipulation(DataIsLoaded = None):
|
|
|
|
|
|
|
|
|
|
|
|
if DataIsLoaded is not None:
|
|
|
|
if DataIsLoaded is not None:
|
|
|
|
# Enregistrer dans un fichier CSV avec les noms de colonnes personnalisés
|
|
|
|
# Enregistrer dans un fichier CSV avec les noms de colonnes personnalisés
|
|
|
|
grouped.to_csv("actorsRatingsGrouped.tsv", index=False, sep="\t") # outputs actorsRatingsGrouped.tsv
|
|
|
|
grouped.to_csv("processedData/actorsRatingsGrouped.tsv", index=False, sep="\t") # outputs actorsRatingsGrouped.tsv
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
|
|
|
|
|
|
|
|
# merges the last file with the names of the actors
|
|
|
|
# merges the last file with the names of the actors getRatingsActeur.py
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
actorNames = names[['nconst','primaryName']]
|
|
|
|
actorNames = names[['nconst','primaryName']]
|
|
|
|
groupedWithActorNames = grouped.merge(actorNames, left_on="nconst", right_on="nconst", how="inner")
|
|
|
|
groupedWithActorNames = grouped.merge(actorNames, left_on="nconst", right_on="nconst", how="inner")
|
|
|
|
|
|
|
|
|
|
|
|
if DataIsLoaded is not None:
|
|
|
|
if DataIsLoaded is not None:
|
|
|
|
groupedWithActorNames.to_csv("actorsRatingsGroupedWithName.tsv", index=True, sep="\t") # outputs actorsRatingsGroupedWithName.tsv
|
|
|
|
groupedWithActorNames.to_csv("processedData/actorsRatingsGroupedWithName.tsv", index=True, sep="\t") # outputs actorsRatingsGroupedWithName.tsv
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
# ------------------------------------------------------------------ #
|
|
|
|
|
|
|
|
|
|
|
|
# Joindre les deux DataFrames sur 'nconst' pour associer chaque 'tconst' avec les 'averageRating'
|
|
|
|
# Joindre les deux DataFrames sur 'nconst' pour associer chaque 'tconst' avec les 'averageRating'
|
|
|
@ -94,7 +94,7 @@ def data_manipulation(DataIsLoaded = None):
|
|
|
|
'averageRatingMovie': grouped['averageRating'] # Ajoute 'averageRating' comme 'averageRatingMovie'
|
|
|
|
'averageRatingMovie': grouped['averageRating'] # Ajoute 'averageRating' comme 'averageRatingMovie'
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t")
|
|
|
|
actorsRatingsPerMovie.to_csv("processedData/actorsRatingsPerMovie.tsv", index=False, sep="\t")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -102,7 +102,8 @@ def data_manipulation(DataIsLoaded = None):
|
|
|
|
# actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t")
|
|
|
|
# actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t")
|
|
|
|
|
|
|
|
|
|
|
|
# Fonction principale pour exécuter à la fois le timer et la manipulation de données
|
|
|
|
# Fonction principale pour exécuter à la fois le timer et la manipulation de données
|
|
|
|
if __name__ == "__main__": # Crée un thread pour suivre le temps
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
|
|
# Crée un thread pour suivre le temps
|
|
|
|
timer_thread = threading.Thread(target=time_tracker)
|
|
|
|
timer_thread = threading.Thread(target=time_tracker)
|
|
|
|
|
|
|
|
|
|
|
|
# Démarre le thread de suivi du temps
|
|
|
|
# Démarre le thread de suivi du temps
|
|
|
|