🔨 modif path des fichiers dans le script

master
Aurelien PINTRAND 1 year ago
parent f3986312e3
commit db38ac7b61

@ -26,26 +26,26 @@ def data_manipulation(DataIsLoaded = None):
# Démarre le thread de suivi du temps
timer_thread.start()
names = pd.read_csv("name.basics.tsv", sep="\t")
names = pd.read_csv("baseData/name.basics.tsv", sep="\t")
# filter by actor / actress
# ------------------------------------------------------------------ #
if DataIsLoaded is not None:
df = pd.read_csv("title.principals.tsv", sep="\t")
df = pd.read_csv("baseData/title.principals.tsv", sep="\t")
actors = df.loc[(df.category == 'actor') | (df.category =='actress')]
actors.to_csv("actors.tsv", sep="\t", index=False) # outputs actors.tsv
actors.to_csv("processedData/actors.tsv", sep="\t", index=False) # outputs actors.tsv
# ------------------------------------------------------------------ #
# merges actors and movies to have each actor that played in a movie
# ------------------------------------------------------------------ #
actors = pd.read_csv("actors.tsv", sep="\t")
ratings = pd.read_csv("title.ratings.tsv", sep="\t")
actors = pd.read_csv("processedData/actors.tsv", sep="\t")
ratings = pd.read_csv("baseData/title.ratings.tsv", sep="\t")
actorsRatings = actors.merge(ratings, left_on="tconst", right_on="tconst", how="inner")
actorsRatings.to_csv("actorsRatings.tsv", sep="\t", index=False) # outputs actorsRatings.tsv
actorsRatings.to_csv("processedData/actorsRatings.tsv", sep="\t", index=False) # outputs actorsRatings.tsv
else:
actorsRatings = pd.read_csv("actorsRatings.tsv", sep="\t")
actorsRatings = pd.read_csv("processedData/actorsRatings.tsv", sep="\t")
# ------------------------------------------------------------------ #
@ -64,16 +64,16 @@ def data_manipulation(DataIsLoaded = None):
if DataIsLoaded is not None:
# Enregistrer dans un fichier CSV avec les noms de colonnes personnalisés
grouped.to_csv("actorsRatingsGrouped.tsv", index=False, sep="\t") # outputs actorsRatingsGrouped.tsv
grouped.to_csv("processedData/actorsRatingsGrouped.tsv", index=False, sep="\t") # outputs actorsRatingsGrouped.tsv
# ------------------------------------------------------------------ #
# merges the last file with the names of the actors
# merges the last file with the names of the actors getRatingsActeur.py
# ------------------------------------------------------------------ #
actorNames = names[['nconst','primaryName']]
groupedWithActorNames = grouped.merge(actorNames, left_on="nconst", right_on="nconst", how="inner")
if DataIsLoaded is not None:
groupedWithActorNames.to_csv("actorsRatingsGroupedWithName.tsv", index=True, sep="\t") # outputs actorsRatingsGroupedWithName.tsv
groupedWithActorNames.to_csv("processedData/actorsRatingsGroupedWithName.tsv", index=True, sep="\t") # outputs actorsRatingsGroupedWithName.tsv
# ------------------------------------------------------------------ #
# Joindre les deux DataFrames sur 'nconst' pour associer chaque 'tconst' avec les 'averageRating'
@ -94,7 +94,7 @@ def data_manipulation(DataIsLoaded = None):
'averageRatingMovie': grouped['averageRating'] # Ajoute 'averageRating' comme 'averageRatingMovie'
})
actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t")
actorsRatingsPerMovie.to_csv("processedData/actorsRatingsPerMovie.tsv", index=False, sep="\t")
@ -102,7 +102,8 @@ def data_manipulation(DataIsLoaded = None):
# actorsRatingsPerMovie.to_csv("actorsRatingsPerMovie.tsv", index=False, sep="\t")
# Fonction principale pour exécuter à la fois le timer et la manipulation de données
if __name__ == "__main__": # Crée un thread pour suivre le temps
if __name__ == "__main__":
# Crée un thread pour suivre le temps
timer_thread = threading.Thread(target=time_tracker)
# Démarre le thread de suivi du temps

Loading…
Cancel
Save