🔨 modif path des fichiers dans le script

master
Aurelien PINTRAND 1 year ago
parent f3986312e3
commit db38ac7b61

@ -26,26 +26,26 @@ def data_manipulation(DataIsLoaded = None):
# Démarre le thread de suivi du temps # Démarre le thread de suivi du temps
timer_thread.start() timer_thread.start()
names = pd.read_csv("name.basics.tsv", sep="\t") names = pd.read_csv("baseData/name.basics.tsv", sep="\t")
# filter by actor / actress # filter by actor / actress
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
if DataIsLoaded is not None: if DataIsLoaded is not None:
df = pd.read_csv("title.principals.tsv", sep="\t") df = pd.read_csv("baseData/title.principals.tsv", sep="\t")
actors = df.loc[(df.category == 'actor') | (df.category =='actress')] actors = df.loc[(df.category == 'actor') | (df.category =='actress')]
actors.to_csv("actors.tsv", sep="\t", index=False) # outputs actors.tsv actors.to_csv("processedData/actors.tsv", sep="\t", index=False) # outputs actors.tsv
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# merges actors and movies to have each actor that played in a movie # merges actors and movies to have each actor that played in a movie
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
actors = pd.read_csv("actors.tsv", sep="\t") actors = pd.read_csv("processedData/actors.tsv", sep="\t")
ratings = pd.read_csv("title.ratings.tsv", sep="\t") ratings = pd.read_csv("baseData/title.ratings.tsv", sep="\t")
actorsRatings = actors.merge(ratings, left_on="tconst", right_on="tconst", how="inner") actorsRatings = actors.merge(ratings, left_on="tconst", right_on="tconst", how="inner")
actorsRatings.to_csv("actorsRatings.tsv", sep="\t", index=False) # outputs actorsRatings.tsv actorsRatings.to_csv("processedData/actorsRatings.tsv", sep="\t", index=False) # outputs actorsRatings.tsv
else: else:
actorsRatings = pd.read_csv("actorsRatings.tsv", sep="\t") actorsRatings = pd.read_csv("processedData/actorsRatings.tsv", sep="\t")
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
@ -64,16 +64,16 @@ def data_manipulation(DataIsLoaded = None):
if DataIsLoaded is not None: if DataIsLoaded is not None:
# Enregistrer dans un fichier CSV avec les noms de colonnes personnalisés # Enregistrer dans un fichier CSV avec les noms de colonnes personnalisés
grouped.to_csv("actorsRatingsGrouped.tsv", index=False, sep="\t") # outputs actorsRatingsGrouped.tsv grouped.to_csv("processedData/actorsRatingsGrouped.tsv", index=False, sep="\t") # outputs actorsRatingsGrouped.tsv
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# merges the last file with the names of the actors # merges the last file with the names of the actors getRatingsActeur.py
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
actorNames = names[['nconst','primaryName']] actorNames = names[['nconst','primaryName']]
groupedWithActorNames = grouped.merge(actorNames, left_on="nconst", right_on="nconst", how="inner") groupedWithActorNames = grouped.merge(actorNames, left_on="nconst", right_on="nconst", how="inner")
if DataIsLoaded is not None: if DataIsLoaded is not None:
groupedWithActorNames.to_csv("actorsRatingsGroupedWithName.tsv", index=True, sep="\t") # outputs actorsRatingsGroupedWithName.tsv groupedWithActorNames.to_csv("processedData/actorsRatingsGroupedWithName.tsv", index=True, sep="\t") # outputs actorsRatingsGroupedWithName.tsv
# ------------------------------------------------------------------ # # ------------------------------------------------------------------ #
# Joindre les deux DataFrames sur 'nconst' pour associer chaque 'tconst' avec les 'averageRating' # Joindre les deux DataFrames sur 'nconst' pour associer chaque 'tconst' avec les 'averageRating'
@ -94,7 +94,7 @@ def data_manipulation(DataIsLoaded = None):
'averageRatingMovie': grouped['averageRating'] # Ajoute 'averageRating' comme 'averageRatingMovie' 'averageRatingMovie': grouped['averageRating'] # Ajoute 'averageRating' comme 'averageRatingMovie'
}) })
actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t") actorsRatingsPerMovie.to_csv("processedData/actorsRatingsPerMovie.tsv", index=False, sep="\t")
@ -102,7 +102,8 @@ def data_manipulation(DataIsLoaded = None):
# actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t") # actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t")
# Fonction principale pour exécuter à la fois le timer et la manipulation de données # Fonction principale pour exécuter à la fois le timer et la manipulation de données
if __name__ == "__main__": # Crée un thread pour suivre le temps if __name__ == "__main__":
# Crée un thread pour suivre le temps
timer_thread = threading.Thread(target=time_tracker) timer_thread = threading.Thread(target=time_tracker)
# Démarre le thread de suivi du temps # Démarre le thread de suivi du temps

Loading…
Cancel
Save