master
Aurelien PINTRAND 1 year ago
parent 36cffbfe04
commit b73d40fcc4

@ -0,0 +1,105 @@
import csv
import os
from typing import List
import numpy as np
import pandas as pd
def levenshtein_distance(s1, s2):
if len(s1) < len(s2):
return levenshtein_distance(s2, s1)
if len(s2) == 0:
return len(s1)
previous_row = range(len(s2) + 1)
for i, c1 in enumerate(s1):
current_row = [i + 1]
for j, c2 in enumerate(s2):
insertions = previous_row[j + 1] + 1
deletions = current_row[j] + 1
substitutions = previous_row[j] + (c1 != c2)
current_row.append(min(insertions, deletions, substitutions))
previous_row = current_row
return previous_row[-1]
def find_closest_actor_name(input_name, actor_names, actor_ids):
closest_name = None
closest_id = None
min_distance = float('inf')
for i in range(len(actor_names)):
actor_name = actor_names[i]
actor_id = actor_ids[i]
distance = levenshtein_distance(input_name, actor_name)
if distance < min_distance:
min_distance = distance
closest_name = actor_name
closest_id = actor_id
return (closest_name,closest_id)
def getUniqueActorNames(filePath):
# Lire le fichier TSV
df = pd.read_csv(filePath, sep='\t')
actor_names = df['primaryName'].tolist()
actor_ids = df['nconst'].tolist() # Supposer que la colonne s'appelle 'ActorName'
# Supposer que la colonne s'appelle 'ActorName'
return (actor_names,actor_ids)
def saveUniqueActorsSorted(inputFilePath, outputFilePath):
# Lire le fichier TSV
df = pd.read_csv(inputFilePath, sep='\t')
# Trier le DataFrame par 'primaryName' en ordre alphabétique
df_sorted = df.sort_values(by='primaryName')
# Écrire les données triées dans le fichier CSV
with open(outputFilePath, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file, delimiter='\t')
# Écrire l'en-tête
writer.writerow(['primaryName', 'nconst'])
# Écrire chaque ligne du DataFrame trié dans le fichier CSV
for index, row in df_sorted.iterrows():
writer.writerow([row['primaryName'], row['nconst']])
if __name__ == "__main__":
mustContinue = True
if not os.path.exists("processedData/uniqueActorNames.tsv"):
saveUniqueActorsSorted("processedData/actorsRatingsGroupedWithName.tsv", "processedData/uniqueActorNames.tsv")
tupleActors = getUniqueActorNames("processedData/uniqueActorNames.tsv")
selectedActorNames = []
print("Bienvenue dans MoviePrecog!")
while(mustContinue):
print("1: Ajouter un acteur à la liste (4 acteurs / actrices requis)")
print("2: Afficher la liste")
print("3: Vider la liste")
print("4: Lancer la prévision")
print("5: Quitter")
userChoice = input("Votre choix: ")
if(userChoice == "1"):
actorName = input("Quel acteur voulez-vous chercher ? ")
actorFound = find_closest_actor_name(actorName, tupleActors[0], tupleActors[1])
print("L'acteur/actrice trouvé est: " + actorFound)
sublevelMustContinue = True
leave = False
correctActorChoice = input("Est-ce le bon acteur/actrice ? (o = oui, n = non)")
if(correctActorChoice == "n"):
while(sublevelMustContinue):
leave = input("Sortir ou relancer ? (sortir = s, relancer = r)")
if(leave == "r"):
actorName = input("Quel acteur voulez-vous chercher ? ")
actorFound = find_closest_actor_name(actorName, tupleActors[0], tupleActors[1])
print("L'acteur/actrice trouvé est: " + actorFound)
sublevelMustContinue = True
correctActorChoice = input("Est-ce le bon acteur/actrice ? (o = oui, n = non)")
if(correctActorChoice == o):
sublevelMustContinue = False
else if(leave != "r" or (leave != "s" and leave ))
print(find_closest_actor_name("Robert downey jr",tupleActors[0],tupleActors[1]))

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save