diff --git a/getRatingsActeur.py b/getRatingsActeur.py index 044dbd1..e4f5a93 100644 --- a/getRatingsActeur.py +++ b/getRatingsActeur.py @@ -26,26 +26,26 @@ def data_manipulation(DataIsLoaded = None): # Démarre le thread de suivi du temps timer_thread.start() - names = pd.read_csv("name.basics.tsv", sep="\t") + names = pd.read_csv("baseData/name.basics.tsv", sep="\t") # filter by actor / actress # ------------------------------------------------------------------ # if DataIsLoaded is not None: - df = pd.read_csv("title.principals.tsv", sep="\t") + df = pd.read_csv("baseData/title.principals.tsv", sep="\t") actors = df.loc[(df.category == 'actor') | (df.category =='actress')] - actors.to_csv("actors.tsv", sep="\t", index=False) # outputs actors.tsv + actors.to_csv("processedData/actors.tsv", sep="\t", index=False) # outputs actors.tsv # ------------------------------------------------------------------ # # merges actors and movies to have each actor that played in a movie # ------------------------------------------------------------------ # - actors = pd.read_csv("actors.tsv", sep="\t") - ratings = pd.read_csv("title.ratings.tsv", sep="\t") + actors = pd.read_csv("processedData/actors.tsv", sep="\t") + ratings = pd.read_csv("baseData/title.ratings.tsv", sep="\t") actorsRatings = actors.merge(ratings, left_on="tconst", right_on="tconst", how="inner") - actorsRatings.to_csv("actorsRatings.tsv", sep="\t", index=False) # outputs actorsRatings.tsv + actorsRatings.to_csv("processedData/actorsRatings.tsv", sep="\t", index=False) # outputs actorsRatings.tsv else: - actorsRatings = pd.read_csv("actorsRatings.tsv", sep="\t") + actorsRatings = pd.read_csv("processedData/actorsRatings.tsv", sep="\t") # ------------------------------------------------------------------ # @@ -64,16 +64,16 @@ def data_manipulation(DataIsLoaded = None): if DataIsLoaded is not None: # Enregistrer dans un fichier CSV avec les noms de colonnes personnalisés - grouped.to_csv("actorsRatingsGrouped.tsv", index=False, sep="\t") # outputs actorsRatingsGrouped.tsv + grouped.to_csv("processedData/actorsRatingsGrouped.tsv", index=False, sep="\t") # outputs actorsRatingsGrouped.tsv # ------------------------------------------------------------------ # - # merges the last file with the names of the actors + # merges the last file with the names of the actors getRatingsActeur.py # ------------------------------------------------------------------ # actorNames = names[['nconst','primaryName']] groupedWithActorNames = grouped.merge(actorNames, left_on="nconst", right_on="nconst", how="inner") if DataIsLoaded is not None: - groupedWithActorNames.to_csv("actorsRatingsGroupedWithName.tsv", index=True, sep="\t") # outputs actorsRatingsGroupedWithName.tsv + groupedWithActorNames.to_csv("processedData/actorsRatingsGroupedWithName.tsv", index=True, sep="\t") # outputs actorsRatingsGroupedWithName.tsv # ------------------------------------------------------------------ # # Joindre les deux DataFrames sur 'nconst' pour associer chaque 'tconst' avec les 'averageRating' @@ -94,7 +94,7 @@ def data_manipulation(DataIsLoaded = None): 'averageRatingMovie': grouped['averageRating'] # Ajoute 'averageRating' comme 'averageRatingMovie' }) - actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t") + actorsRatingsPerMovie.to_csv("processedData/actorsRatingsPerMovie.tsv", index=False, sep="\t") @@ -102,7 +102,8 @@ def data_manipulation(DataIsLoaded = None): # actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t") # Fonction principale pour exécuter à la fois le timer et la manipulation de données -if __name__ == "__main__": # Crée un thread pour suivre le temps +if __name__ == "__main__": + # Crée un thread pour suivre le temps timer_thread = threading.Thread(target=time_tracker) # Démarre le thread de suivi du temps